Python CypherQuery.stream Beispiele, py2neo.neo4j.CypherQuery.stream Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: GraphBuilder.py Projekt: CassonStallings/OriginalVisiblyConnected

    def export_nodes_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=","):
        """Export general nodes to csv for import to Gephi.

        :param str type: person, funder, or company
        :param str query_str: cypher query string to return the nodes or relations
        :param str out_file: output file
        :param list fields: fields to write
        :param dict initial_dict: dict with any vars not in node
        :param str sep: separator to use in output file
        :rtype None:
        """
        field_set = set()
        header_set = set()
        n_exported = 0
        n_errors = 0
        query_size = 5000

        with open(out_file, "wb") as fil:
            dw = csv.DictWriter(fil, fields, extrasaction="ignore", dialect="excel-tab")

            # Generate and write the header line
            header = dict()
            for txt in fields:
                header[txt] = txt
                header_set.add(txt)
            dw.writerow(header)

            for first in xrange(0, count, query_size):
                query_str_with_limits = query_str + " skip " + str(first) + " limit " + str(query_size) + ";"

                query = CypherQuery(self, query_str_with_limits)
                for item in query.stream():
                    for anode in item:
                        anode = item.values[0]
                        try:
                            d = copy.copy(initial_dict)
                            anode["permalink"] = self.get_permalink(anode)
                            d["nodes"] = anode["permalink"]
                            d["id"] = anode._id
                            nd = dict()

                            for key, val in anode.get_properties().iteritems():
                                field_set.add(key)
                                nd[key] = val
                            d.update(nd)
                            dw.writerow(d)

                            n_exported += 1
                            if (n_exported % 1000) == 0:
                                print "Nodes exported ", n_exported
                        except UnicodeEncodeError as uee:
                            n_errors += 1
                            print "Unicode Error Inside on Nodes", uee.args
                        except ValueError as ve:
                            n_errors += 1
                            print "Value Error Inside on Nodes", ve.args

        print "\nExport of {} {} nodes complete. There were {} errors.".format(n_exported, type, n_errors)
        print "   Unexported fields: {}".format(field_set - header_set)

Beispiel #2

0

Datei anzeigen

Datei: GraphBuilder.py Projekt: tianjiansmile/OriginalVisiblyConnected

    def export_nodes_to_csv(self,
                            type,
                            query_str,
                            count,
                            out_file,
                            fields,
                            initial_dict={},
                            sep=','):
        """Export general nodes to csv for import to Gephi.

        :param str type: person, funder, or company
        :param str query_str: cypher query string to return the nodes or relations
        :param str out_file: output file
        :param list fields: fields to write
        :param dict initial_dict: dict with any vars not in node
        :param str sep: separator to use in output file
        :rtype None:
        """
        field_set = set()
        header_set = set()
        n_exported = 0
        n_errors = 0
        query_size = 5000

        with open(out_file, 'wb') as fil:
            dw = csv.DictWriter(fil,
                                fields,
                                extrasaction='ignore',
                                dialect='excel-tab')

            # Generate and write the header line
            header = dict()
            for txt in fields:
                header[txt] = txt
                header_set.add(txt)
            dw.writerow(header)

            for first in xrange(0, count, query_size):
                query_str_with_limits = query_str + ' skip ' + str(
                    first) + ' limit ' + str(query_size) + ';'

                query = CypherQuery(self, query_str_with_limits)
                for item in query.stream():
                    for anode in item:
                        anode = item.values[0]
                        try:
                            d = copy.copy(initial_dict)
                            anode['permalink'] = self.get_permalink(anode)
                            d['nodes'] = anode['permalink']
                            d['id'] = anode._id
                            nd = dict()

                            for key, val in anode.get_properties().iteritems():
                                field_set.add(key)
                                nd[key] = val
                            d.update(nd)
                            dw.writerow(d)

                            n_exported += 1
                            if (n_exported % 1000) == 0:
                                print 'Nodes exported ', n_exported
                        except UnicodeEncodeError as uee:
                            n_errors += 1
                            print 'Unicode Error Inside on Nodes', uee.args
                        except ValueError as ve:
                            n_errors += 1
                            print 'Value Error Inside on Nodes', ve.args

        print '\nExport of {} {} nodes complete. There were {} errors.'.format(
            n_exported, type, n_errors)
        print '   Unexported fields: {}'.format(field_set - header_set)

Beispiel #3

0

Datei anzeigen

Datei: GraphBuilder.py Projekt: tianjiansmile/OriginalVisiblyConnected

    def export_relations_to_csv(self,
                                type,
                                query_str,
                                count,
                                out_file,
                                fields,
                                initial_dict={},
                                sep=','):
        """Export general relationships to csv for import to Gephi.

        :param str type: person, funder, or company
        :param str query_str: cypher query string to return the nodes or relations
        :param str out_file: output file
        :param list fields: fields to write
        :param dict initial_dict: dict with any vars not in node
        :param str sep: separator to use in output file
        :rtype None:
        """

        field_set = set()
        header_set = set()
        n_exported = 0
        n_errors = 0
        query_size = 2000

        with open(out_file, 'wb') as fil:
            dw = csv.DictWriter(fil,
                                fields,
                                extrasaction='ignore',
                                dialect='excel-tab')

            # Generate and write the header line
            header = dict()
            for txt in fields:
                header[txt] = txt
                header_set.add(txt)
            dw.writerow(header)

            for first in xrange(0, count, query_size):
                query_str_with_limits = query_str + ' skip ' + str(
                    first) + ' limit ' + str(query_size) + ';'

                query = CypherQuery(self, query_str_with_limits)
                for relationship in query.stream():

                    #print 'try to pull out rels parts', len(relationship)
                    rel_parts = relationship.values[1]
                    d = initial_dict
                    d['label'] = rel_parts.type
                    d['type'] = rel_parts.type
                    d['source'] = self.encode_chars(rel_parts.start_node)
                    d['target'] = self.encode_chars(rel_parts.end_node)
                    d['permalink'] = rel_parts.start_node[
                        'permalink'] + '__' + rel_parts.end_node['permalink']
                    edge_properties = self.encode_chars(
                        rel_parts.get_properties())
                    d['source']['overview'] = ''
                    d['target']['overview'] = ''
                    print 'Edge_props (cleaned)', edge_properties
                    print '  Source props (cleaned)', d['permalink']
                    #print '  Source', d['source']
                    # TODO: unicode errors are in source or target node information

                    try:
                        d['id'] = d['permalink']
                        for key in d:
                            field_set.add(key)
                        d.update(edge_properties)

                        dw.writerow(self.encode_chars(d))
                        n_exported += 1
                        if (n_exported % 1000) == 0:
                            print 'Relationships exported: ', n_exported
                    except UnicodeEncodeError as uee:
                        n_errors += 1
                        print 'Unicode Error Inside in Export Relationships', uee.args
                    except ValueError as err:
                        n_errors += 1
                        print 'Unknown Error Inside in Export Relationships', err.args

        print '\nExport of {} {} relationships complete. There were {} errors.'.format(
            n_exported, type, n_errors)
        print '   Unexported fields: {}'.format(field_set - header_set)

Beispiel #4

0

Datei anzeigen

Datei: GraphBuilder.py Projekt: CassonStallings/OriginalVisiblyConnected

    def export_relations_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=","):
        """Export general relationships to csv for import to Gephi.

        :param str type: person, funder, or company
        :param str query_str: cypher query string to return the nodes or relations
        :param str out_file: output file
        :param list fields: fields to write
        :param dict initial_dict: dict with any vars not in node
        :param str sep: separator to use in output file
        :rtype None:
        """

        field_set = set()
        header_set = set()
        n_exported = 0
        n_errors = 0
        query_size = 2000

        with open(out_file, "wb") as fil:
            dw = csv.DictWriter(fil, fields, extrasaction="ignore", dialect="excel-tab")

            # Generate and write the header line
            header = dict()
            for txt in fields:
                header[txt] = txt
                header_set.add(txt)
            dw.writerow(header)

            for first in xrange(0, count, query_size):
                query_str_with_limits = query_str + " skip " + str(first) + " limit " + str(query_size) + ";"

                query = CypherQuery(self, query_str_with_limits)
                for relationship in query.stream():

                    # print 'try to pull out rels parts', len(relationship)
                    rel_parts = relationship.values[1]
                    d = initial_dict
                    d["label"] = rel_parts.type
                    d["type"] = rel_parts.type
                    d["source"] = self.encode_chars(rel_parts.start_node)
                    d["target"] = self.encode_chars(rel_parts.end_node)
                    d["permalink"] = rel_parts.start_node["permalink"] + "__" + rel_parts.end_node["permalink"]
                    edge_properties = self.encode_chars(rel_parts.get_properties())
                    d["source"]["overview"] = ""
                    d["target"]["overview"] = ""
                    print "Edge_props (cleaned)", edge_properties
                    print "  Source props (cleaned)", d["permalink"]
                    # print '  Source', d['source']
                    # TODO: unicode errors are in source or target node information

                    try:
                        d["id"] = d["permalink"]
                        for key in d:
                            field_set.add(key)
                        d.update(edge_properties)

                        dw.writerow(self.encode_chars(d))
                        n_exported += 1
                        if (n_exported % 1000) == 0:
                            print "Relationships exported: ", n_exported
                    except UnicodeEncodeError as uee:
                        n_errors += 1
                        print "Unicode Error Inside in Export Relationships", uee.args
                    except ValueError as err:
                        n_errors += 1
                        print "Unknown Error Inside in Export Relationships", err.args

        print "\nExport of {} {} relationships complete. There were {} errors.".format(n_exported, type, n_errors)
        print "   Unexported fields: {}".format(field_set - header_set)