def load(self): """Generate query rows and load them into the graph.""" graph = get_graph() begin_time = time() rp = self.engine.execute(self.query) log.debug("Query time: %.5fms", (time() - begin_time) * 1000) stats = {'rows': 0, 'nodes': 0, 'rels': 0} while True: graphtx = graph.begin() rows = rp.fetchmany(10000) if not len(rows): break for row in rows: stats['rows'] += 1 self.update(graphtx, dict(row.items()), stats) if stats['rows'] % 1000 == 0: elapsed = (time() - begin_time) stats['per_node'] = max(stats['nodes'], 1) / elapsed log.info( "Loaded: %(rows)s [%(nodes)s nodes, " "%(rels)s edges], %(per_node).5f n/s", stats) graphtx.commit() log.info( "Done. Loaded %(rows)s rows, %(nodes)s nodes, " "%(rels)s edges.", stats)
def upgrade(): graph = get_graph() if graph is None: return # graph.delete_all() for node_type in NodeType.all(): node_type.ensure_indices(graph)
def test(): from aleph.model import Entity graph = get_graph() tx = graph.begin() for entity_id in Entity.all_ids(): remove_entity(tx, entity_id) tx.commit()
def transaction(): graph = get_graph() if graph is None: yield None else: tx = graph.begin() yield tx tx.commit()
def load_documents(): graph = get_graph() tx = graph.begin() for i, document in enumerate(Document.all()): load_document(tx, document) if i > 0 and i % 1000 == 0: tx.commit() tx = graph.begin() tx.commit()
def load_documents(): graph = get_graph() tx = graph.begin() for i, document in enumerate(Document.all()): log.info("Load doc [%s]: %r", document.id, document.meta) load_document(tx, document) if i > 0 and i % 1000 == 0: tx.commit() tx = graph.begin() tx.commit()
def upgrade_graph(): graph = get_graph() if graph is None: return # graph.delete_all() cur = graph.run("MATCH (n) WHERE NOT (n)--() DELETE n;") log.debug("Deleted %(nodes_deleted)s orphan nodes.", cur.stats()) for node_type in NodeType.all(): node_type.ensure_indices(graph)
def transaction(): graph = get_graph() if graph is None: yield None else: # this produces deadlocks en masse: # tx = graph.begin() # yield tx # tx.commit() yield graph
def load_entities(): graph = get_graph() tx = graph.begin() q = Entity.all() q = q.filter(Entity.state == Entity.STATE_ACTIVE) for i, entity in enumerate(q): load_entity(tx, entity) if i > 0 and i % 10000 == 0: tx.commit() tx = graph.begin() tx.commit()
def graph_metadata(): graph = get_graph() if graph is None: return {'active': False} ignore_labels = ['Collection', BASE_NODE] labels = [l for l in graph.node_labels if l not in ignore_labels] types = [t for t in graph.relationship_types if t != 'PART_OF'] return { 'active': True, 'labels': labels, 'types': types, 'icons': get_config('GRAPH_ICONS'), 'colors': get_config('GRAPH_COLORS') }
def upgrade_graph(): graph = get_graph() if graph is None: return # graph.delete_all() cur = graph.run("MATCH (n) WHERE NOT (n)--() DELETE n;") log.debug("Deleted %(nodes_deleted)s orphan nodes.", cur.stats()) # Common base type indexes if 'fingerprint' not in graph.schema.get_indexes(BASE_NODE): graph.schema.create_index(BASE_NODE, 'fingerprint') # if 'id' not in graph.schema.get_uniqueness_constraints(BASE_NODE): # graph.schema.create_uniqueness_constraint(BASE_NODE, 'id') if 'id' not in graph.schema.get_indexes(BASE_NODE): graph.schema.create_index(BASE_NODE, 'id') for node_type in NodeType.all(): node_type.ensure_indices(graph)
def test(): # from py2neo.database.cypher import cypher_repr graph = get_graph() collections = range(1, 100) collections = [251] # collections = cypher_repr(collections) # print cypher_repr(u"huhu this has ' quotäää") # return q = "MATCH (n:Entity)-[r]-(d:Document) " \ "MATCH (n)-[:PART_OF]->(c1:Collection) " \ "MATCH (d)-[:PART_OF]->(c2:Collection) " \ "WHERE c1.alephCollection IN {acl} " \ "AND c2.alephCollection IN {acl} " \ "RETURN n, r, d LIMIT 5 " # q = q % (collections, collections) for res in graph.data(q, acl=collections): print dir(res.get('r')) print res.get('r').__uuid__
def load(self): """Generate query rows and load them into the graph.""" graph = get_graph() self.collection = self.create_collection(graph) begin_time = time() rp = self.engine.execute(self.query) log.info("Query time: %.5fms", (time() - begin_time) * 1000) row_count = 0 while True: graphtx = graph.begin() rows = rp.fetchmany(10000) if not len(rows): break for row in rows: row_count += 1 self.update(graphtx, dict(row.items())) if row_count % 1000 == 0: per_sec = row_count / (time() - begin_time) log.info("Loaded: %s, %.1frows/s", row_count, per_sec) graphtx.commit() log.info("Finished %s rows.", row_count)
def load(self): """Generate query rows and load them into the graph.""" collection = Collection.create({ 'foreign_id': self.config.get('collection'), 'label': self.config.get('collection'), 'managed': True }) db.session.commit() graph = get_graph() coll_type = NodeType.get('Collection') collection = coll_type.merge(graph, name=collection.label, fingerprint=collection.foreign_id, alephCollection=collection.id) begin_time = time() rp = self.engine.execute(self.query) log.debug("Query time: %.5fms", (time() - begin_time) * 1000) stats = {'rows': 0, 'nodes': 0, 'rels': 0} while True: graphtx = graph.begin() rows = rp.fetchmany(10000) if not len(rows): break for row in rows: stats['rows'] += 1 self.update(graphtx, collection, dict(row.items()), stats) if stats['rows'] % 1000 == 0: elapsed = (time() - begin_time) stats['per_node'] = max(stats['nodes'], 1) / elapsed log.info("Loaded: %(rows)s [%(nodes)s nodes, " "%(rels)s edges], %(per_node).5f n/s", stats) graphtx.commit() log.info("Done. Loaded %(rows)s rows, %(nodes)s nodes, " "%(rels)s edges.", stats)
def load(self): """Generate query rows and load them into the graph.""" graph = get_graph() begin_time = time() rp = self.engine.execute(self.query) log.debug("Query time: %.5fms", (time() - begin_time) * 1000) stats = {'rows': 0, 'nodes': 0, 'rels': 0} while True: graphtx = graph.begin() rows = rp.fetchmany(10000) if not len(rows): break for row in rows: stats['rows'] += 1 self.update(graphtx, dict(row.items()), stats) if stats['rows'] % 1000 == 0: elapsed = (time() - begin_time) stats['per_node'] = max(stats['nodes'], 1) / elapsed log.info("Loaded: %(rows)s [%(nodes)s nodes, " "%(rels)s edges], %(per_node).5f n/s", stats) graphtx.commit() log.info("Done. Loaded %(rows)s rows, %(nodes)s nodes, " "%(rels)s edges.", stats)