def export_nodes_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=","): """Export general nodes to csv for import to Gephi. :param str type: person, funder, or company :param str query_str: cypher query string to return the nodes or relations :param str out_file: output file :param list fields: fields to write :param dict initial_dict: dict with any vars not in node :param str sep: separator to use in output file :rtype None: """ field_set = set() header_set = set() n_exported = 0 n_errors = 0 query_size = 5000 with open(out_file, "wb") as fil: dw = csv.DictWriter(fil, fields, extrasaction="ignore", dialect="excel-tab") # Generate and write the header line header = dict() for txt in fields: header[txt] = txt header_set.add(txt) dw.writerow(header) for first in xrange(0, count, query_size): query_str_with_limits = query_str + " skip " + str(first) + " limit " + str(query_size) + ";" query = CypherQuery(self, query_str_with_limits) for item in query.stream(): for anode in item: anode = item.values[0] try: d = copy.copy(initial_dict) anode["permalink"] = self.get_permalink(anode) d["nodes"] = anode["permalink"] d["id"] = anode._id nd = dict() for key, val in anode.get_properties().iteritems(): field_set.add(key) nd[key] = val d.update(nd) dw.writerow(d) n_exported += 1 if (n_exported % 1000) == 0: print "Nodes exported ", n_exported except UnicodeEncodeError as uee: n_errors += 1 print "Unicode Error Inside on Nodes", uee.args except ValueError as ve: n_errors += 1 print "Value Error Inside on Nodes", ve.args print "\nExport of {} {} nodes complete. There were {} errors.".format(n_exported, type, n_errors) print " Unexported fields: {}".format(field_set - header_set)
def export_nodes_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=','): """Export general nodes to csv for import to Gephi. :param str type: person, funder, or company :param str query_str: cypher query string to return the nodes or relations :param str out_file: output file :param list fields: fields to write :param dict initial_dict: dict with any vars not in node :param str sep: separator to use in output file :rtype None: """ field_set = set() header_set = set() n_exported = 0 n_errors = 0 query_size = 5000 with open(out_file, 'wb') as fil: dw = csv.DictWriter(fil, fields, extrasaction='ignore', dialect='excel-tab') # Generate and write the header line header = dict() for txt in fields: header[txt] = txt header_set.add(txt) dw.writerow(header) for first in xrange(0, count, query_size): query_str_with_limits = query_str + ' skip ' + str( first) + ' limit ' + str(query_size) + ';' query = CypherQuery(self, query_str_with_limits) for item in query.stream(): for anode in item: anode = item.values[0] try: d = copy.copy(initial_dict) anode['permalink'] = self.get_permalink(anode) d['nodes'] = anode['permalink'] d['id'] = anode._id nd = dict() for key, val in anode.get_properties().iteritems(): field_set.add(key) nd[key] = val d.update(nd) dw.writerow(d) n_exported += 1 if (n_exported % 1000) == 0: print 'Nodes exported ', n_exported except UnicodeEncodeError as uee: n_errors += 1 print 'Unicode Error Inside on Nodes', uee.args except ValueError as ve: n_errors += 1 print 'Value Error Inside on Nodes', ve.args print '\nExport of {} {} nodes complete. There were {} errors.'.format( n_exported, type, n_errors) print ' Unexported fields: {}'.format(field_set - header_set)
def export_relations_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=','): """Export general relationships to csv for import to Gephi. :param str type: person, funder, or company :param str query_str: cypher query string to return the nodes or relations :param str out_file: output file :param list fields: fields to write :param dict initial_dict: dict with any vars not in node :param str sep: separator to use in output file :rtype None: """ field_set = set() header_set = set() n_exported = 0 n_errors = 0 query_size = 2000 with open(out_file, 'wb') as fil: dw = csv.DictWriter(fil, fields, extrasaction='ignore', dialect='excel-tab') # Generate and write the header line header = dict() for txt in fields: header[txt] = txt header_set.add(txt) dw.writerow(header) for first in xrange(0, count, query_size): query_str_with_limits = query_str + ' skip ' + str( first) + ' limit ' + str(query_size) + ';' query = CypherQuery(self, query_str_with_limits) for relationship in query.stream(): #print 'try to pull out rels parts', len(relationship) rel_parts = relationship.values[1] d = initial_dict d['label'] = rel_parts.type d['type'] = rel_parts.type d['source'] = self.encode_chars(rel_parts.start_node) d['target'] = self.encode_chars(rel_parts.end_node) d['permalink'] = rel_parts.start_node[ 'permalink'] + '__' + rel_parts.end_node['permalink'] edge_properties = self.encode_chars( rel_parts.get_properties()) d['source']['overview'] = '' d['target']['overview'] = '' print 'Edge_props (cleaned)', edge_properties print ' Source props (cleaned)', d['permalink'] #print ' Source', d['source'] # TODO: unicode errors are in source or target node information try: d['id'] = d['permalink'] for key in d: field_set.add(key) d.update(edge_properties) dw.writerow(self.encode_chars(d)) n_exported += 1 if (n_exported % 1000) == 0: print 'Relationships exported: ', n_exported except UnicodeEncodeError as uee: n_errors += 1 print 'Unicode Error Inside in Export Relationships', uee.args except ValueError as err: n_errors += 1 print 'Unknown Error Inside in Export Relationships', err.args print '\nExport of {} {} relationships complete. There were {} errors.'.format( n_exported, type, n_errors) print ' Unexported fields: {}'.format(field_set - header_set)
def export_relations_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=","): """Export general relationships to csv for import to Gephi. :param str type: person, funder, or company :param str query_str: cypher query string to return the nodes or relations :param str out_file: output file :param list fields: fields to write :param dict initial_dict: dict with any vars not in node :param str sep: separator to use in output file :rtype None: """ field_set = set() header_set = set() n_exported = 0 n_errors = 0 query_size = 2000 with open(out_file, "wb") as fil: dw = csv.DictWriter(fil, fields, extrasaction="ignore", dialect="excel-tab") # Generate and write the header line header = dict() for txt in fields: header[txt] = txt header_set.add(txt) dw.writerow(header) for first in xrange(0, count, query_size): query_str_with_limits = query_str + " skip " + str(first) + " limit " + str(query_size) + ";" query = CypherQuery(self, query_str_with_limits) for relationship in query.stream(): # print 'try to pull out rels parts', len(relationship) rel_parts = relationship.values[1] d = initial_dict d["label"] = rel_parts.type d["type"] = rel_parts.type d["source"] = self.encode_chars(rel_parts.start_node) d["target"] = self.encode_chars(rel_parts.end_node) d["permalink"] = rel_parts.start_node["permalink"] + "__" + rel_parts.end_node["permalink"] edge_properties = self.encode_chars(rel_parts.get_properties()) d["source"]["overview"] = "" d["target"]["overview"] = "" print "Edge_props (cleaned)", edge_properties print " Source props (cleaned)", d["permalink"] # print ' Source', d['source'] # TODO: unicode errors are in source or target node information try: d["id"] = d["permalink"] for key in d: field_set.add(key) d.update(edge_properties) dw.writerow(self.encode_chars(d)) n_exported += 1 if (n_exported % 1000) == 0: print "Relationships exported: ", n_exported except UnicodeEncodeError as uee: n_errors += 1 print "Unicode Error Inside in Export Relationships", uee.args except ValueError as err: n_errors += 1 print "Unknown Error Inside in Export Relationships", err.args print "\nExport of {} {} relationships complete. There were {} errors.".format(n_exported, type, n_errors) print " Unexported fields: {}".format(field_set - header_set)