Beispiel #1
0
    def __init__(self, **kwargs):
        '''
        Constructor
        :param args:
        :param kwargs:
                iGephiUrl:  URL of gephi master server
        :return: New GraphVis instance
        '''
        # Networkx graph structure mirrors visualized data
        self.G = nx.Graph()

        # define default visualization properties here
        GraphVis.default_node_vis = kwargs.pop('default_node_vis',
                                               {'red':1, 'size':10})
        GraphVis.default_edge_vis = kwargs.pop('default_edge_vis',
                                               {'red':0.5, 'blue':0.5, 'green':0.5, 'weight':1})

        GephiStreamerManager.__init__(self, **kwargs)
Beispiel #2
0
from ws4py.client.threadedclient import WebSocketClient
from GephiStreamer import Node, Edge, GephiStreamerManager
import json

t = GephiStreamerManager()


class DummyClient(WebSocketClient):
    #Bootstrap to register to websocket
    def opened(self):
        self.send('{"op":"unconfirmed_sub"}')

    def closed(self, code, reason=None):
        print "Closed down", code, reason

    #When we receive a new blockchain
    def received_message(self, m):
        #Loading the data as json
        data = json.loads("%s" % m)
        print "==%s==" % data['x']['hash']
        #Created the node that represent the transaction
        transactionNode = Node(data['x']['hash'], blue=1)
        #With some properties
        for prop in [
                'vin_sz', 'vout_sz', 'lock_time', 'relayed_by', 'tx_index',
                'time'
        ]:
            transactionNode.property[prop] = data['x'][prop]
        #Hack to avoid "size" of the node
        transactionNode.property['transaction_size'] = data['x']['size']
        #we type our node
Beispiel #3
0
 def open_spider(self, spider):
     self.gephi = GephiStreamerManager(iGephiUrl=self.gephi_uri, iGephiWorkspace=self.gephi_ws)
     self.logger.info('GephiStream connected {}'.format(self.gephi_uri))
Beispiel #4
0
class GephiPipeline(object):

    def __init__(self, gephi_uri, gephi_ws):
        self.logger = logging.getLogger(type(self).__name__)
        self.gephi_uri = gephi_uri
        self.gephi_ws = gephi_ws
        self.nodes = set()

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            gephi_uri=crawler.settings.get('GEPHI_URI'),
            gephi_ws=crawler.settings.get('GEPHI_WS')
        )

    def open_spider(self, spider):
        self.gephi = GephiStreamerManager(iGephiUrl=self.gephi_uri, iGephiWorkspace=self.gephi_ws)
        self.logger.info('GephiStream connected {}'.format(self.gephi_uri))

    def close_spider(self, spider):
        pass

    def process_item(self, item, spider):
        patent_args = {'size': 5, 'red': 1, 'green': 0, 'blue': 0}
        patent_node = Node(item['publication_number'], **patent_args)
        patent_node.property['type'] = 'patent'
        patent_node.property['title'] = item.get('title')
        patent_node.property['filing_date'] = item.get('filing_date')
        patent_node.property['publication_date'] = item.get('publication_date')
        patent_node.property['priority_date'] = item.get('priority_date')
        patent_node.property['grant_date'] = item.get('grant_date')
        patent_node.property['pdf'] = item.get('pdf')
        if item['publication_number'] in self.nodes:
            self.gephi.change_node(patent_node)
        else:
            self.gephi.add_node(patent_node)

        link_args = {'size': 5, 'red': 0, 'green': 0, 'blue': 1}
        for citation in item.get('citations', []):
            citation_node = Node(citation, **link_args)
            citation_node.property['type'] = 'link'
            self.gephi.add_node(citation_node)
            self.gephi.add_edge(Edge(patent_node, citation_node, True))
            self.nodes.add(citation)

        for cited_by in item.get('cited_by', []):
            cited_by_node = Node(cited_by, **link_args)
            cited_by_node.property['type'] = 'link'
            self.gephi.add_node(cited_by_node)
            self.gephi.add_edge(Edge(cited_by_node, patent_node, True))
            self.nodes.add(cited_by)

        entity_args = {'size': 5, 'red': 0, 'green': 1, 'blue': 0}
        entities = set(item.get('inventors', []) + item.get('assignees', []))
        for entity in entities:
            entity_node = Node(entity, **entity_args)
            entity_node.property['type'] = 'entity'
            self.gephi.add_node(entity_node)
            self.gephi.add_edge(Edge(entity_node, patent_node, True))

        self.logger.info('Publishing item {}'.format(item['publication_number']))

        try:
            self.gephi.commit()
        except ConnectionError, e:
            self.logger.error(e)

        self.nodes.add(item['publication_number'])
        return item
Beispiel #5
0
    :param: gs  GraphVis instance
    :param: rgb node color
    :param: id_fcn  function to generate node label
    '''

    gnodes = make_gephi_nodes(G.nodes(), rgb=rgb, size=10)

    for k,v1 in gnodes.items():
        gs.add_node(v1)
        try:
            for v2 in make_gephi_nodes(G[k].keys()).values():  # G[k] is networkx node neighbor dictionary
                gs.add_node(v2)
                gs.add_edge(Edge(v1, v2, False, weight=1))
            gs.commit()
        except Exception, e:
            print "Unable to add", k
            print e

    print "Graph sent"
    return gnodes

# quick and dirty setup/testing
if __name__ == "__main__":
    import scratchpad

    G = scratchpad.load_fhx_graph('R-3')

    gs=GephiStreamerManager(iGephiUrl='Tannhauser:8080')

    send_graph(G, gs)