class TraphServerFactory(Factory): default_WECR = '(s:[a-zA-Z]+\\|(t:[0-9]+\\|)?(h:[^\\|]+\\|(h:[^\\|]+\\|)|h:(localhost|(\\d{1,3}\\.){3}\\d{1,3}|\\[[\\da-f]*:[\\da-f:]*\\])\\|))' WECRs = { 's:http|h:com|h:world|': '(s:[a-zA-Z]+\\|(t:[0-9]+\\|)?(h:[^\\|]+\\|(h:[^\\|]+\\|)+|h:(localhost|(\\d{1,3}\\.){3}\\d{1,3}|\\[[\\da-f]*:[\\da-f:]*\\])\\|)(p:[^\\|]+\\|){1})' } def __init__(self, corpus, traph_dir="traph-data", default_WECR=None, WECRs=None): self.traph_dir = traph_dir self.corpus = corpus if not os.path.isdir(self.traph_dir): os.makedirs(self.traph_dir) self.traph = Traph( folder=os.path.join(self.traph_dir, corpus), default_webentity_creation_rule=default_WECR or self.default_WECR, webentity_creation_rules=WECRs or self.WECRs ) def ready(self): # stdin message received by childprocess to know when traph is ready print "READY" def buildProtocol(self, addr): return TraphProtocol(self.traph) def close(self): self.traph.close()
class TraphServerFactory(Factory): default_WECR = '(s:[a-zA-Z]+\\|(t:[0-9]+\\|)?(h:[^\\|]+\\|(h:[^\\|]+\\|)|h:(localhost|(\\d{1,3}\\.){3}\\d{1,3}|\\[[\\da-f]*:[\\da-f:]*\\])\\|))' WECRs = { 's:http|h:com|h:world|': '(s:[a-zA-Z]+\\|(t:[0-9]+\\|)?(h:[^\\|]+\\|(h:[^\\|]+\\|)+|h:(localhost|(\\d{1,3}\\.){3}\\d{1,3}|\\[[\\da-f]*:[\\da-f:]*\\])\\|)(p:[^\\|]+\\|){1})' } def __init__(self, corpus, traph_dir="traph-data", default_WECR=None, WECRs=None): self.traph_dir = traph_dir self.corpus = corpus if not os.path.isdir(self.traph_dir): os.makedirs(self.traph_dir) self.traph = Traph(folder=os.path.join(self.traph_dir, corpus), default_webentity_creation_rule=default_WECR or self.default_WECR, webentity_creation_rules=WECRs or self.WECRs) def ready(self): # stdin message received by childprocess to know when traph is ready print "READY" def buildProtocol(self, addr): return TraphProtocol(self.traph) def close(self): self.traph.close()
include_internal=False, include_outbound=False) for source_lru, lru, weight in valjean_inlinks: print '\t<- (weight %s) \t%s' % (weight, source_lru) print '' valjean_outlinks = traph.get_page_links('s:http|h:com|h:valjean|', include_inbound=False, include_internal=False, include_outbound=True) for lru, target_lru, weight in valjean_outlinks: print '\t-> (weight %s) \t%s' % (weight, target_lru) # import networkx as nx # g = nx.Graph() # w = traph.get_webentities_links() # for source, targets in w.items(): # source_label = webentity_store.data['webentities'][source][1] # g.add_node(source, label=source_label) # for target in targets: # target_label = webentity_store.data['webentities'][target][1] # g.add_node(target, label=target_label) # g.add_edge(source, target) # nx.write_gexf(g, './scripts/data/dump.gexf') traph.close()