Beispiel #1
0
 def pull(self):
     query = CypherQuery(self.graph, "START a=node({a}) RETURN a")
     results = query.execute(a=self._id)
     node, = results[0].values
     super(Node, self).properties.clear()
     super(Node, self).properties.update(node.properties)
     self._Node__stale.clear()
    def export_nodes_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=","):
        """Export general nodes to csv for import to Gephi.

        :param str type: person, funder, or company
        :param str query_str: cypher query string to return the nodes or relations
        :param str out_file: output file
        :param list fields: fields to write
        :param dict initial_dict: dict with any vars not in node
        :param str sep: separator to use in output file
        :rtype None:
        """
        field_set = set()
        header_set = set()
        n_exported = 0
        n_errors = 0
        query_size = 5000

        with open(out_file, "wb") as fil:
            dw = csv.DictWriter(fil, fields, extrasaction="ignore", dialect="excel-tab")

            # Generate and write the header line
            header = dict()
            for txt in fields:
                header[txt] = txt
                header_set.add(txt)
            dw.writerow(header)

            for first in xrange(0, count, query_size):
                query_str_with_limits = query_str + " skip " + str(first) + " limit " + str(query_size) + ";"

                query = CypherQuery(self, query_str_with_limits)
                for item in query.stream():
                    for anode in item:
                        anode = item.values[0]
                        try:
                            d = copy.copy(initial_dict)
                            anode["permalink"] = self.get_permalink(anode)
                            d["nodes"] = anode["permalink"]
                            d["id"] = anode._id
                            nd = dict()

                            for key, val in anode.get_properties().iteritems():
                                field_set.add(key)
                                nd[key] = val
                            d.update(nd)
                            dw.writerow(d)

                            n_exported += 1
                            if (n_exported % 1000) == 0:
                                print "Nodes exported ", n_exported
                        except UnicodeEncodeError as uee:
                            n_errors += 1
                            print "Unicode Error Inside on Nodes", uee.args
                        except ValueError as ve:
                            n_errors += 1
                            print "Value Error Inside on Nodes", ve.args

        print "\nExport of {} {} nodes complete. There were {} errors.".format(n_exported, type, n_errors)
        print "   Unexported fields: {}".format(field_set - header_set)
def get_movie(title):
    query = CypherQuery(graph, "MATCH (movie:Movie {title:{title}}) "
                               "OPTIONAL MATCH (movie)<-[r]-(person:Person) "
                               "RETURN movie.title as title,"
                               "collect([person.name, head(split(lower(type(r)),'_')), r.roles]) as cast "
                               "LIMIT 1")
    results = query.execute(title=title)
    row = results.data[0]
    return {"title": row["title"], "cast": [dict(zip(("name", "job", "role"), member)) for member in row["cast"]]}
def get_movie(title):
    query = CypherQuery(graph, "MATCH (movie:Movie {title:{title}}) "
                               "OPTIONAL MATCH (movie)<-[r]-(person:Person) "
                               "RETURN movie.title as title,"
                               "collect([person.name, head(split(lower(type(r)),'_')), r.roles]) as cast "
                               "LIMIT 1")
    results = query.execute(title=title)
    row = results.data[0]
    return {"title": row["title"],
            "cast": [dict(zip(("name", "job", "role"), member)) for member in row["cast"]]}
    def cypher_prop(self):
        # noinspection PyUnresolvedReferences
        from py2neo.neo4j import CypherQuery

        cypher_ns = SimpleNamespace()
        cypher_ns.stream = six.create_bound_method(
            lambda s, q, **ps: CypherQuery(s, q).stream(**ps), self.graph_db)
        cypher_ns.execute = six.create_bound_method(
            lambda s, q, **ps: CypherQuery(s, q).execute(**ps), self.graph_db)
        return cypher_ns
def get_search():
    try:
        q = request.query["q"]
    except KeyError:
        return []
    else:
        query = CypherQuery(graph, "MATCH (movie:Movie) "
                                   "WHERE movie.title =~ {title} "
                                   "RETURN movie")
        results = query.execute(title="(?i).*" + q + ".*")
        response.content_type = "application/json"
        return json.dumps([{"movie": row["movie"].get_cached_properties()} for row in results.data])
Beispiel #7
0
def unity():

    results = CypherQuery(
        graph,
        'MATCH (game)-[:USES_MECHANIC]->(mechanic) WHERE mechanic.mechanic = "Trick-taking" RETURN game.bgg_name order by game.bgg_name'
    ).execute()
    return render_template("games.html", games=results)
    def export_funded_relationships_to_csv(
            self, out_file_name='funded_relations.tab', limit=9999999):
        """Export edges to csv file to be read in with Gephi.

        Abstraction layer for export_relationships_to_csv."""
        rel_type = 'funded'
        query_str = 'match (a)-[r:' + rel_type + ']->(b) ' + ' return a.permalink as source, r, b.permalink as target, id(r) as id'
        initial_dict = {'label': rel_type, 'source_id': ''}
        funded_fields = [
            u'source', u'target', u'type', u'source_id', u'id', u'label',
            u'name', u'category_code', u'crunchbase_url', u'round_code',
            u'raised_amount', u'permalink', u'source_url',
            u'raised_currency_code', u'funded_year'
        ]
        # Source descriptions has lots of non-standard characters--u'source_description',
        # Probably don't need: u'funded_month', u'funded_day'
        result = CypherQuery(
            self, 'match ()-[r:' + rel_type + ']->() ' +
            ' return count(r);').execute()
        count, = result.data[0].values
        count = min(count, limit)
        print 'Count rels', count
        self.export_relations_to_csv('funded',
                                     query_str,
                                     count,
                                     out_file_name,
                                     funded_fields,
                                     initial_dict,
                                     sep='\n')
    def export_company_node_to_csv(self,
                                   out_file_name='company_nodes.tab',
                                   limit=9999999):
        """Export company nodes to csv file to be read in with Gephi.

        Abstraction layer for export_nodes_to_csv."""
        node_type = 'company'
        query_str = 'match (n:' + node_type + ') ' + ' return n '
        initial_dict = {'label': node_type}
        company_fields = [
            u'nodes', u'id', u'label', u'name', u'category_code',
            u'crunchbase_url', u'description', u'number_of_employees',
            u'alias_list', u'deadpooled_year', u'total_money_raised', u'error',
            u'founded_year'
        ]
        # Probably don't need:  u'deadpooled_month',u'created_at', u'updated_at', u'founded_day',
        #    u'deadpooled_day', u'deadpooled_url', u'twitter_username', u'homepage_url',
        #    u'blog_url', u'blog_feed_url', u'founded_month', u'email_address',
        result = CypherQuery(
            self,
            'match (n:' + node_type + ') ' + ' return count(n);').execute()
        count, = result.data[0].values
        count = min(count, limit)
        print '\nBeginning export of {} company nodes'.format(count)
        self.export_nodes_to_csv('company',
                                 query_str,
                                 count,
                                 out_file_name,
                                 company_fields,
                                 initial_dict,
                                 sep='\n')
    def export_financial_nodes_to_csv(self,
                                      out_file_name='financial_nodes.tab',
                                      limit=9999999):
        """Export financial institution nodes to csv file to be read in with Gephi.

        Abstraction layer for export_nodes_to_csv."""
        node_type = 'funder'
        query_str = 'match (n:' + node_type + ') ' + ' return n '
        initial_dict = {'label': node_type}
        funder_fields = [
            u'nodes', u'id', u'label', u'name', u'permalink',
            u'crunchbase_url', u'homepage_url', u'description', u'overview',
            u'twitter_username', u'founded_year', u'alias_list', u'tag_list',
            u'deadpooled_month', u'deadpooled_year', u'total_money_raised',
            u'error'
        ]
        # Don't need:  u'blog_url', u'blog_feed_url', u'phone_number', u'email_address', u'founded_month',
        #   u'created_at', u'updated_at', u'founded_day', u'deadpooled_day', u'deadpooled_url',
        result = CypherQuery(
            self,
            'match (n:' + node_type + ') ' + ' return count(n);').execute()
        count, = result.data[0].values
        count = min(count, limit)
        print '\nBeginning export of {} financial-institution nodes'.format(
            count)
        self.export_nodes_to_csv(node_type,
                                 query_str,
                                 count,
                                 out_file_name,
                                 funder_fields,
                                 initial_dict,
                                 sep='\n')
    def export_person_nodes_to_csv(self,
                                   out_file_name='person_nodes.tab',
                                   limit=9999999):
        """Export person nodes to csv file to be read in with Gephi.

        Abstraction layer for export_nodes_to_csv."""
        node_type = 'person'
        query_str = 'match (n:' + node_type + ') ' + ' return n'
        initial_dict = {'label': node_type}
        person_fields = [
            u'nodes', u'id', u'label', u'first_name', u'last_name',
            u'affiliation_name', u'alias_list', u'crunchbase_url', u'born_year'
        ]
        # Probably don't need: u'created_at', u'updated_at', u'twitter_username', u'blog_feed_url',
        #      u'blog_url', u'born_month', u'homepage_url', u'born_day',
        result = CypherQuery(
            self,
            'match (n:' + node_type + ') ' + ' return count(n);').execute()
        count, = result.data[0].values
        count = min(count, limit)
        print '\nBeginning export of {} person nodes'.format(count)
        self.export_nodes_to_csv(node_type,
                                 query_str,
                                 count,
                                 out_file_name,
                                 person_fields,
                                 initial_dict,
                                 sep='\n')
Beispiel #12
0
def execute(graph, query, params=None, row_handler=None,
            metadata_handler=None, error_handler=None):
    query = CypherQuery(graph, query)
    data, metadata = [], None
    try:
        results = query.execute(**params or {})
    except CypherError as err:
        if error_handler:
            error_handler(err.message, err.exception, err.stack_trace)
        else:
            raise
    else:
        metadata = Metadata(results.columns)
        if metadata_handler:
            metadata_handler(metadata)
        if row_handler:
            for record in results:
                row_handler(list(record))
            return data, metadata
        else:
            return [list(record) for record in results], metadata
def get_graph():
    query = CypherQuery(graph, "MATCH (m:Movie)<-[:ACTED_IN]-(a:Person) "
                               "RETURN m.title as movie, collect(a.name) as cast "
                               "LIMIT {limit}")
    results = query.execute(limit=request.query.get("limit", 100))
    nodes = []
    rels = []
    i = 0
    for movie, cast in results.data:
        nodes.append({"title": movie, "label": "movie"})
        target = i
        i += 1
        for name in cast:
            actor = {"title": name, "label": "actor"}
            try:
                source = nodes.index(actor)
            except ValueError:
                nodes.append(actor)
                source = i
                i += 1
            rels.append({"source": source, "target": target})
    return {"nodes": nodes, "links": rels}
    def export_nodes_to_csv(self,
                            type,
                            query_str,
                            count,
                            out_file,
                            fields,
                            initial_dict={},
                            sep=','):
        """Export general nodes to csv for import to Gephi.

        :param str type: person, funder, or company
        :param str query_str: cypher query string to return the nodes or relations
        :param str out_file: output file
        :param list fields: fields to write
        :param dict initial_dict: dict with any vars not in node
        :param str sep: separator to use in output file
        :rtype None:
        """
        field_set = set()
        header_set = set()
        n_exported = 0
        n_errors = 0
        query_size = 5000

        with open(out_file, 'wb') as fil:
            dw = csv.DictWriter(fil,
                                fields,
                                extrasaction='ignore',
                                dialect='excel-tab')

            # Generate and write the header line
            header = dict()
            for txt in fields:
                header[txt] = txt
                header_set.add(txt)
            dw.writerow(header)

            for first in xrange(0, count, query_size):
                query_str_with_limits = query_str + ' skip ' + str(
                    first) + ' limit ' + str(query_size) + ';'

                query = CypherQuery(self, query_str_with_limits)
                for item in query.stream():
                    for anode in item:
                        anode = item.values[0]
                        try:
                            d = copy.copy(initial_dict)
                            anode['permalink'] = self.get_permalink(anode)
                            d['nodes'] = anode['permalink']
                            d['id'] = anode._id
                            nd = dict()

                            for key, val in anode.get_properties().iteritems():
                                field_set.add(key)
                                nd[key] = val
                            d.update(nd)
                            dw.writerow(d)

                            n_exported += 1
                            if (n_exported % 1000) == 0:
                                print 'Nodes exported ', n_exported
                        except UnicodeEncodeError as uee:
                            n_errors += 1
                            print 'Unicode Error Inside on Nodes', uee.args
                        except ValueError as ve:
                            n_errors += 1
                            print 'Value Error Inside on Nodes', ve.args

        print '\nExport of {} {} nodes complete. There were {} errors.'.format(
            n_exported, type, n_errors)
        print '   Unexported fields: {}'.format(field_set - header_set)
    def export_relations_to_csv(self,
                                type,
                                query_str,
                                count,
                                out_file,
                                fields,
                                initial_dict={},
                                sep=','):
        """Export general relationships to csv for import to Gephi.

        :param str type: person, funder, or company
        :param str query_str: cypher query string to return the nodes or relations
        :param str out_file: output file
        :param list fields: fields to write
        :param dict initial_dict: dict with any vars not in node
        :param str sep: separator to use in output file
        :rtype None:
        """

        field_set = set()
        header_set = set()
        n_exported = 0
        n_errors = 0
        query_size = 2000

        with open(out_file, 'wb') as fil:
            dw = csv.DictWriter(fil,
                                fields,
                                extrasaction='ignore',
                                dialect='excel-tab')

            # Generate and write the header line
            header = dict()
            for txt in fields:
                header[txt] = txt
                header_set.add(txt)
            dw.writerow(header)

            for first in xrange(0, count, query_size):
                query_str_with_limits = query_str + ' skip ' + str(
                    first) + ' limit ' + str(query_size) + ';'

                query = CypherQuery(self, query_str_with_limits)
                for relationship in query.stream():

                    #print 'try to pull out rels parts', len(relationship)
                    rel_parts = relationship.values[1]
                    d = initial_dict
                    d['label'] = rel_parts.type
                    d['type'] = rel_parts.type
                    d['source'] = self.encode_chars(rel_parts.start_node)
                    d['target'] = self.encode_chars(rel_parts.end_node)
                    d['permalink'] = rel_parts.start_node[
                        'permalink'] + '__' + rel_parts.end_node['permalink']
                    edge_properties = self.encode_chars(
                        rel_parts.get_properties())
                    d['source']['overview'] = ''
                    d['target']['overview'] = ''
                    print 'Edge_props (cleaned)', edge_properties
                    print '  Source props (cleaned)', d['permalink']
                    #print '  Source', d['source']
                    # TODO: unicode errors are in source or target node information

                    try:
                        d['id'] = d['permalink']
                        for key in d:
                            field_set.add(key)
                        d.update(edge_properties)

                        dw.writerow(self.encode_chars(d))
                        n_exported += 1
                        if (n_exported % 1000) == 0:
                            print 'Relationships exported: ', n_exported
                    except UnicodeEncodeError as uee:
                        n_errors += 1
                        print 'Unicode Error Inside in Export Relationships', uee.args
                    except ValueError as err:
                        n_errors += 1
                        print 'Unknown Error Inside in Export Relationships', err.args

        print '\nExport of {} {} relationships complete. There were {} errors.'.format(
            n_exported, type, n_errors)
        print '   Unexported fields: {}'.format(field_set - header_set)
    def export_relations_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=","):
        """Export general relationships to csv for import to Gephi.

        :param str type: person, funder, or company
        :param str query_str: cypher query string to return the nodes or relations
        :param str out_file: output file
        :param list fields: fields to write
        :param dict initial_dict: dict with any vars not in node
        :param str sep: separator to use in output file
        :rtype None:
        """

        field_set = set()
        header_set = set()
        n_exported = 0
        n_errors = 0
        query_size = 2000

        with open(out_file, "wb") as fil:
            dw = csv.DictWriter(fil, fields, extrasaction="ignore", dialect="excel-tab")

            # Generate and write the header line
            header = dict()
            for txt in fields:
                header[txt] = txt
                header_set.add(txt)
            dw.writerow(header)

            for first in xrange(0, count, query_size):
                query_str_with_limits = query_str + " skip " + str(first) + " limit " + str(query_size) + ";"

                query = CypherQuery(self, query_str_with_limits)
                for relationship in query.stream():

                    # print 'try to pull out rels parts', len(relationship)
                    rel_parts = relationship.values[1]
                    d = initial_dict
                    d["label"] = rel_parts.type
                    d["type"] = rel_parts.type
                    d["source"] = self.encode_chars(rel_parts.start_node)
                    d["target"] = self.encode_chars(rel_parts.end_node)
                    d["permalink"] = rel_parts.start_node["permalink"] + "__" + rel_parts.end_node["permalink"]
                    edge_properties = self.encode_chars(rel_parts.get_properties())
                    d["source"]["overview"] = ""
                    d["target"]["overview"] = ""
                    print "Edge_props (cleaned)", edge_properties
                    print "  Source props (cleaned)", d["permalink"]
                    # print '  Source', d['source']
                    # TODO: unicode errors are in source or target node information

                    try:
                        d["id"] = d["permalink"]
                        for key in d:
                            field_set.add(key)
                        d.update(edge_properties)

                        dw.writerow(self.encode_chars(d))
                        n_exported += 1
                        if (n_exported % 1000) == 0:
                            print "Relationships exported: ", n_exported
                    except UnicodeEncodeError as uee:
                        n_errors += 1
                        print "Unicode Error Inside in Export Relationships", uee.args
                    except ValueError as err:
                        n_errors += 1
                        print "Unknown Error Inside in Export Relationships", err.args

        print "\nExport of {} {} relationships complete. There were {} errors.".format(n_exported, type, n_errors)
        print "   Unexported fields: {}".format(field_set - header_set)
Beispiel #17
0
 def date_range(self, start_date=None, end_date=None):
     """ Fetch the calendar node representing the date range defined by
     `start_date` and `end_date`. If either are unspecified, this defines an
     open-ended range. Either `start_date` or `end_date` must be specified.
     """
     #                         (CAL)
     #                           |
     #                       [:RANGE]
     #                           |
     #                           v
     # (START)<-[:START_DATE]-(RANGE)-[:END_DATE]->(END)
     range_ = GregorianCalendar.DateRange(start_date, end_date)
     start, end = range_.start_date, range_.end_date
     if start and end:
         # if start and end are equal, return the day node instead
         if (start.year, start.month, start.day) == (end.year, end.month,
                                                     end.day):
             return start.get_node(self)
         if (start.year, start.month) == (end.year, end.month):
             root = self.month(start.year, start.month)
         elif start.year == end.year:
             root = self.year(start.year)
         else:
             root = self._calendar
         query = """\
             START z=node({z}), s=node({s}), e=node({e})
             CREATE UNIQUE (s)<-[:START_DATE]-(r {r})-[:END_DATE]->(e),
                           (z)-[:DATE_RANGE]->(r {r})
             RETURN r
         """
         params = {
             "z": root._id,
             "s": start.get_node(self)._id,
             "e": end.get_node(self)._id,
             "r": {
                 "start_date": str(start),
                 "end_date": str(end),
             },
         }
     elif start:
         query = """\
             START z=node({z}), s=node({s})
             CREATE UNIQUE (s)<-[:START_DATE]-(r {r}),
                           (z)-[:DATE_RANGE]->(r {r})
             RETURN r
         """
         params = {
             "z": self._calendar._id,
             "s": start.get_node(self)._id,
             "r": {
                 "start_date": str(start),
             },
         }
     elif end:
         query = """\
             START z=node({z}), e=node({e})
             CREATE UNIQUE (r {r})-[:END_DATE]->(e),
                           (z)-[:DATE_RANGE]->(r {r})
             RETURN r
         """
         params = {
             "z": self._calendar._id,
             "e": end.get_node(self)._id,
             "r": {
                 "end_date": str(end),
             },
         }
     else:
         raise ValueError("Either start or end date must be supplied "
                          "for a date range")
     return CypherQuery(self._graph, query).execute_one(**params)
Beispiel #18
0
import json
from py2neo.neo4j import CypherQuery, GraphDatabaseService, WriteBatch
from py2neo import neo4j

db = neo4j.GraphDatabaseService()

business_index_query = CypherQuery(db, "CREATE INDEX ON :Business(id)")
business_index_query.execute()

category_index_query = CypherQuery(db, "CREATE INDEX ON :Category(name)")
category_index_query.execute()

create_business_query = '''
// MERGE ON categories
CREATE (b:Business {id: {business_id}, name: {name}, lat:{latitude}, lon:{longitude},
	stars: {stars}, review_count: {review_count}})
'''

merge_category_query = '''
MATCH (b:Business {id: {business_id}})
MERGE (c:Category {name: {category}})
CREATE UNIQUE (c)<-[:IS_IN]-(b)
'''

print "Beginning business batch"
with open('data/yelp_academic_dataset_business.json', 'r') as f:
	business_batch = WriteBatch(db)
	count = 0
	for b in (json.loads(l) for l in f):
		business_batch.append_cypher(create_business_query, b)
		count += 1
Beispiel #19
0
import json
from py2neo.neo4j import CypherQuery, GraphDatabaseService, WriteBatch
from py2neo import neo4j

db = neo4j.GraphDatabaseService()

business_index_query = CypherQuery(db, "CREATE INDEX ON :Business(id)")
business_index_query.execute()

category_index_query = CypherQuery(db, "CREATE INDEX ON :Category(name)")
category_index_query.execute()

create_business_query = '''
// MERGE ON categories
CREATE (b:Business {id: {business_id}, name: {name}, lat:{latitude}, lon:{longitude},
	stars: {stars}, review_count: {review_count}})
'''

merge_category_query = '''
MATCH (b:Business {id: {business_id}})
MERGE (c:Category {name: {category}})
CREATE UNIQUE (c)<-[:IS_IN]-(b)
'''

print "Beginning business batch"
with open('data/yelp_academic_dataset_business.json', 'r') as f:
    business_batch = WriteBatch(db)
    count = 0
    for b in (json.loads(l) for l in f):
        business_batch.append_cypher(create_business_query, b)
        count += 1