예제 #1
0
파일: server.py 프로젝트: medialab/hyphe
class TraphServerFactory(Factory):

    default_WECR = '(s:[a-zA-Z]+\\|(t:[0-9]+\\|)?(h:[^\\|]+\\|(h:[^\\|]+\\|)|h:(localhost|(\\d{1,3}\\.){3}\\d{1,3}|\\[[\\da-f]*:[\\da-f:]*\\])\\|))'
    WECRs = {
      's:http|h:com|h:world|': '(s:[a-zA-Z]+\\|(t:[0-9]+\\|)?(h:[^\\|]+\\|(h:[^\\|]+\\|)+|h:(localhost|(\\d{1,3}\\.){3}\\d{1,3}|\\[[\\da-f]*:[\\da-f:]*\\])\\|)(p:[^\\|]+\\|){1})'
    }

    def __init__(self, corpus, traph_dir="traph-data", default_WECR=None, WECRs=None):
        self.traph_dir = traph_dir
        self.corpus = corpus
        if not os.path.isdir(self.traph_dir):
            os.makedirs(self.traph_dir)
        self.traph = Traph(
          folder=os.path.join(self.traph_dir, corpus),
          default_webentity_creation_rule=default_WECR or self.default_WECR,
          webentity_creation_rules=WECRs or self.WECRs
        )

    def ready(self):
        # stdin message received by childprocess to know when traph is ready
        print "READY"

    def buildProtocol(self, addr):
        return TraphProtocol(self.traph)

    def close(self):
        self.traph.close()
예제 #2
0
class TraphServerFactory(Factory):

    default_WECR = '(s:[a-zA-Z]+\\|(t:[0-9]+\\|)?(h:[^\\|]+\\|(h:[^\\|]+\\|)|h:(localhost|(\\d{1,3}\\.){3}\\d{1,3}|\\[[\\da-f]*:[\\da-f:]*\\])\\|))'
    WECRs = {
        's:http|h:com|h:world|':
        '(s:[a-zA-Z]+\\|(t:[0-9]+\\|)?(h:[^\\|]+\\|(h:[^\\|]+\\|)+|h:(localhost|(\\d{1,3}\\.){3}\\d{1,3}|\\[[\\da-f]*:[\\da-f:]*\\])\\|)(p:[^\\|]+\\|){1})'
    }

    def __init__(self,
                 corpus,
                 traph_dir="traph-data",
                 default_WECR=None,
                 WECRs=None):
        self.traph_dir = traph_dir
        self.corpus = corpus
        if not os.path.isdir(self.traph_dir):
            os.makedirs(self.traph_dir)
        self.traph = Traph(folder=os.path.join(self.traph_dir, corpus),
                           default_webentity_creation_rule=default_WECR
                           or self.default_WECR,
                           webentity_creation_rules=WECRs or self.WECRs)

    def ready(self):
        # stdin message received by childprocess to know when traph is ready
        print "READY"

    def buildProtocol(self, addr):
        return TraphProtocol(self.traph)

    def close(self):
        self.traph.close()
예제 #3
0
                                       include_internal=False,
                                       include_outbound=False)
for source_lru, lru, weight in valjean_inlinks:
    print '\t<- (weight %s) \t%s' % (weight, source_lru)
print ''
valjean_outlinks = traph.get_page_links('s:http|h:com|h:valjean|',
                                        include_inbound=False,
                                        include_internal=False,
                                        include_outbound=True)
for lru, target_lru, weight in valjean_outlinks:
    print '\t-> (weight %s) \t%s' % (weight, target_lru)

# import networkx as nx

# g = nx.Graph()

# w = traph.get_webentities_links()

# for source, targets in w.items():
#     source_label = webentity_store.data['webentities'][source][1]
#     g.add_node(source, label=source_label)

#     for target in targets:
#         target_label = webentity_store.data['webentities'][target][1]
#         g.add_node(target, label=target_label)
#         g.add_edge(source, target)

# nx.write_gexf(g, './scripts/data/dump.gexf')

traph.close()