def loop_data(g,dataset): print("running ....") for (index,data) in dataset.iterrows(): # getorcreate node v1 = g.V().has('sim','name',data["a_number"]).fold().coalesce(__.unfold(),__.addV('sim').property('name',data["a_number"]).property('tac',data["tac"])).next() v2 = g.V().has('sim','name',data["b_number"]).fold().coalesce(__.unfold(),__.addV('sim').property('name',data["b_number"])).next() # create egde g.V(v1).addE('info').to(v2).property('service_type',data["service_type"]).property('b_prefix',data["b_prefix"]).property('start_time',data["start_time"]).property('duration',data["duration"]).iterate()
def __write_edges(self, g: traversal, edges: List[Dict], scan_id: str) -> None: """ Writes the edges to the labeled property graph :param g: The graph traversal source :param edges: A list of dictionaries for each edge :return: None """ cnt = 0 t = g for r in edges: to_id = f'{r["~to"]}_{scan_id}' from_id = f'{r["~from"]}_{scan_id}' t = ( t.addE(r["~label"]) .property(T.id, str(r["~id"])) .from_( __.V(from_id) .fold() .coalesce( __.unfold(), __.addV(self.parse_arn(r["~from"])["resource"]) .property(T.id, from_id) .property("scan_id", scan_id) .property("arn", r["~from"]), ) ) .to( __.V(to_id) .fold() .coalesce( __.unfold(), __.addV(self.parse_arn(r["~to"])["resource"]) .property(T.id, to_id) .property("scan_id", scan_id) .property("arn", r["~to"]), ) ) ) cnt += 1 if cnt % 100 == 0 or cnt == len(edges): try: self.logger.info( event=LogEvent.NeptunePeriodicWrite, msg=f"Writing edges {cnt} of {len(edges)}", ) t.next() t = g except Exception as err: self.logger.error(event=LogEvent.NeptuneLoadError, msg=str(err)) raise NeptuneLoadGraphException( f"Error loading edge {r} " f"with {str(t.bytecode)}" ) from err
def upsert_vertices_for_label(rows): conn = self.gremlin_utils.remote_connection() g = self.gremlin_utils.traversal_source(connection=conn) t = g i = 0 for row in rows: entries = row.asDict() create_traversal = __.addV(label) for key, value in entries.items(): key = key.split(':')[0] if key == '~id': create_traversal = create_traversal.property(id, value) elif key == '~label': pass else: create_traversal = create_traversal.property( key, value) t = t.V(entries['~id']).fold().coalesce( __.unfold(), create_traversal) i += 1 if i == batch_size: self.retry_query(t) t = g i = 0 if i > 0: self.retry_query(t) conn.close()
def get_or_create_vertex(self, label_value, id): return self._do_next( self.g.V(id).fold().coalesce( __.unfold(), __.addV(label_value).property(T.id, id).property(ID, id).property( DATE, utils.get_date_now())))
def upsert_vertex(record, vertex_mapping, g): vertex_label = vertex_mapping['vertex_label'] # Ensure all lookup values are present first lookup_values = get_lookup_values(record, vertex_mapping['lookup_properties']) if lookup_values is None: return # Setup traversals try: traversal = g.V().hasLabel(vertex_label) insertion_traversal = __.addV(vertex_label).property( 'type', vertex_label) for prop_key, lookup_value in lookup_values.items(): traversal = traversal.has(prop_key, lookup_value) insertion_traversal = insertion_traversal.property( prop_key, lookup_value) # Add Vertex insertion partial traversal for source_field, prop_key in vertex_mapping['other_properties'].items( ): insertion_traversal = insertion_traversal.property( prop_key, record[source_field]) traversal.fold().coalesce(__.unfold(), insertion_traversal).next() except: print("Vertex error - skipping: {0}({1})".format( vertex_label, lookup_values))
def __add_author(self, t, author, post_url): img_src = None if "img_src" in author.keys(): img_src = author['img_src'] img_height = author['img_height'] img_width = author['img_width'] t = ( t.V(author['name']) .fold() .coalesce( __.unfold(), __.addV('author') .property(T.id, author['name']) .property('name', author['name']) ).as_('p').addE('written_by').from_(__.V(post_url)) ) # Conditionally add the img_src, img_height, and img_width property if they do not exist if img_src: t = ( t.sideEffect( __.select('p').hasNot('img_src') .property('img_src', img_src) .property('img_height', img_height) .property('img_width', img_width) ) ) return t
def handle_youtube_video_added(self, video_id, user_id, name, description, location, preview_image_location, tags, added_date, timestamp): # make sure tags are unique (no duplicates) unique_tags = set(tags) logging.debug('SuggestedVideosService:handle_youtube_video_added, video ID: ' + str(video_id) + ', user ID: ' + str(user_id) + ', name: ' + name + ', description: ' + description + ', location: ' + location + ', preview_image_location: ' + preview_image_location + ', tags: ' + str(unique_tags) + ', timestamp: ' + str(timestamp)) # Note: building a single traversal, but broken into several steps for readability # locate user vertex traversal = self.graph.V().has('user', 'userId', user_id).as_('^user') # add video vertex traversal = traversal.addV('video').property('videoId', video_id)\ .property('added_date', added_date) \ .property('description', description) \ .property('name', name) \ .property('preview_image_location', preview_image_location) \ .as_('^video') # add edge from user to video vertex traversal = traversal.addE('uploaded').from_('^user').to('^video').property('added_date', added_date) # find vertices for tags and add edges from video vertex for tag in unique_tags: traversal = traversal.addE('taggedWith').from_('^video').to(__.coalesce( __.V().has('tag', 'name', tag), __.addV('tag').property('name', tag).property('tagged_date', added_date))) # execute the traversal traversal.iterate()
def _build_gremlin_vertices(g: GraphTraversalSource, row: Any) -> GraphTraversalSource: g = g.V(str(row["~id"])).fold().coalesce( __.unfold(), __.addV(row["~label"]).property(T.id, str(row["~id"]))) g = _build_gremlin_properties(g, row) return g
def upsert_node(self, node_id: str, node_label: str, node_properties: Dict[str, Any]) -> None: create_traversal = __.addV(node_label).property(T.id, node_id) node_traversal = self.get_graph().V().has(T.id, node_id). \ fold().coalesce(__.unfold(), create_traversal) node_traversal = NeptuneSessionClient.update_entity_properties_on_traversal( node_traversal, node_properties) node_traversal.next()
def _build_gremlin_insert_vertices( g: GraphTraversalSource, row: Any, use_header_cardinality: bool = False) -> GraphTraversalSource: g = g.V(str(row["~id"])).fold().coalesce( __.unfold(), __.addV(row["~label"]).property(T.id, str(row["~id"]))) g = _set_properties(g, use_header_cardinality, row) return g
def test_explain(self) -> None: proxy = self.get_proxy() g = proxy.g.V().has(VertexTypes.User.value.label, proxy.key_property_name, 'jack').fold().coalesce( __.unfold(), __.addV(VertexTypes.User.value.label).property(Cardinality.single, proxy.key_property_name, 'jack')) g = g.property(Cardinality.single, 'email', '*****@*****.**') query = ScriptTranslator.translateT(g) g.iterate() # just enough to not explode proxy._explain(query)
def _write_vertex(self, vertex_id: str, vertex_labels: List[str]) -> Traversal: logger.debug("Writing vertex %s", vertex_id) if self.supports_multiple_labels: vertex_label = "::".join(vertex_labels) else: vertex_label = vertex_labels[0] return self.g.V(vertex_id).fold().coalesce( __.unfold(), __.addV(vertex_label).property(T.id, vertex_id))
def get_last_checkpoint(client, tablename): conn = client.remote_connection() g = client.traversal_source(conn) checkpoint = (g.V().hasLabel('Checkpoint').has( 'table', tablename).fold().coalesce( __.unfold(), __.addV('Checkpoint').property('table', tablename).property( 'value', datetime.datetime(2015, 1, 1, 0, 0))).values('value').next()) conn.close() return checkpoint
def __add_tag(self, t, tag, post_url): t = ( t.V(tag) .fold() .coalesce( __.unfold(), __.addV('tag') .property(T.id, tag) .property('tag', tag) ).addE('tagged').from_(__.V(post_url)) ) return t
def __add_entities(self, t, entity, post_url): t = ( t.V(f'{entity["Text"]}_{entity["Type"]}') .fold() .coalesce( __.unfold(), __.addV(entity["Type"].lower()) .property(T.id, f'{entity["Text"]}_{entity["Type"]}') .property("text", entity["Text"]) .property("type", entity["Type"]) ).addE('found_in').from_(__.V(post_url)) .property('score', entity['Score']) ) return t
def upsert_vertex(t, row, **kwargs): mappings = kwargs['mappings'] label = kwargs['label'] if 'label' in kwargs else mappings.get_label(row) on_upsert = kwargs.get('on_upsert', None) #updateSingleCardinalityProperties #updateAllProperties #replaceAllProperties create_traversal = __.addV(label) updateable_items = [] for key, value in row.items(): mapping = mappings.mapping_for(key) if mapping.is_id_token(): create_traversal = create_traversal.property(id, value) elif not mapping.is_token(): if not on_upsert: create_traversal = create_traversal.property( mapping.name, mapping.convert(value)) elif on_upsert == 'updateSingleCardinalityProperties': if mapping.cardinality == 'single': updateable_items.append((key, value)) else: create_traversal = create_traversal.property( get_cardinality(mapping.cardinality), mapping.name, mapping.convert(value)) elif on_upsert == 'updateAllProperties': updateable_items.append((key, value)) elif on_upsert == 'replaceAllProperties': pass t = t.V(mappings.get_id(row)).fold().coalesce(__.unfold(), create_traversal) if updateable_items: for key, value in updateable_items: mapping = mappings.mapping_for(key) t = t.property(get_cardinality(mapping.cardinality), mapping.name, mapping.convert(value)) return t
def upsert_vertices_for_label(rows): conn = self.remote_connection() g = self.traversal_source(conn) for row in rows: entries = row.asDict() create_traversal = __.addV(label) for key, value in entries.iteritems(): key = key.split(':')[0] if key == '~id': create_traversal.property(id, value) elif key == '~label': pass else: create_traversal.property(key, value) g.V(entries['~id']).fold().coalesce(__.unfold(), create_traversal).next() conn.close()
def __add_post(self, t, post): t = ( t.V(post['url']) .fold() .coalesce( __.unfold(), __.addV('post') .property(T.id, post['url']) .property('title', post['title']) .property('post_date', post['date']) .property('img_src', post['img_src']) .property('img_height', post['img_height']) .property('img_width', post['img_width']) ).as_('post') ) return t
def test_big_result_set_secure(secure_client): g = Graph().traversal() t = g.inject(1).repeat(__.addV('person').property('name', __.loops())).times(20000).count() message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = secure_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 1 t = g.V().limit(10) message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = secure_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 10 t = g.V().limit(100) message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = secure_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 100 t = g.V().limit(1000) message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = secure_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 1000 t = g.V().limit(10000) message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = secure_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 10000
def __write_vertices(self, g: traversal, vertices: List[Dict], scan_id: str) -> None: """ Writes the vertices to the labeled property graph :param g: The graph traversal source :param vertices: A list of dictionaries for each vertex :return: None """ cnt = 0 t = g for r in vertices: vertex_id = f'{r["~id"]}_{scan_id}' t = ( t.V(vertex_id) .fold() .coalesce( __.unfold(), __.addV(self.parse_arn(r["~label"])["resource"]).property(T.id, vertex_id), ) ) for k in r.keys(): # Need to handle numbers that are bigger than a Long in Java, for now we stringify it if isinstance(r[k], int) and ( r[k] > 9223372036854775807 or r[k] < -9223372036854775807 ): r[k] = str(r[k]) if k not in ["~id", "~label"]: t = t.property(k, r[k]) cnt += 1 if cnt % 100 == 0 or cnt == len(vertices): try: self.logger.info( event=LogEvent.NeptunePeriodicWrite, msg=f"Writing vertices {cnt} of {len(vertices)}", ) t.next() t = g except Exception as err: print(str(err)) raise NeptuneLoadGraphException( f"Error loading vertex {r} " f"with {str(t.bytecode)}" ) from err
def test_big_result_set_secure(authenticated_client): g = Graph().traversal() t = g.inject(1).repeat(__.addV('person').property('name', __.loops())).times(20000).count() message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = authenticated_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 1 t = g.V().limit(10) message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = authenticated_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 10 t = g.V().limit(100) message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = authenticated_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 100 t = g.V().limit(1000) message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = authenticated_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 1000 t = g.V().limit(10000) message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}}) result_set = authenticated_client.submit(message) results = [] for result in result_set: results += result assert len(results) == 10000
def upsert_vertices_for_label(rows): try: conn = self.gremlin_utils.remote_connection() g = self.gremlin_utils.traversal_source(connection=conn) for row in rows: entries = row.asDict() create_traversal = __.addV(label) for key, value in entries.items(): key = key.split(':')[0] if key == '~id': create_traversal.property(id, value) elif key == '~label': pass else: create_traversal.property(key, value) g.V(entries['~id']).fold().coalesce( __.unfold(), create_traversal).next() conn.close() except GremlinServerError as err: print("Neptune error: {0}".format(err)) except: print("Unexpected error:", sys.exc_info()[0])
def _get_or_create_node(self, label: str, uri: str): return self.g.V().has(URI, uri).hasLabel(label).fold().coalesce( __.unfold(), __.addV(label).property(URI, uri))
from gremlin_python.process.graph_traversal import __ from gremlin_python.structure.graph import Vertex graph_name = 'modern' ep_schema = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) ep = DseGraph.create_execution_profile(graph_name) cluster = Cluster(execution_profiles={'schema': ep_schema, EXEC_PROFILE_GRAPH_DEFAULT: ep}) session = cluster.connect() # Define schema session.execute_graph("system.graph(name).create()", { 'name': graph_name }, execution_profile = EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) session.execute_graph("schema.propertyKey('neighborhood').Bigint().create()", execution_profile = 'schema') session.execute_graph("schema.propertyKey('name').Text().create()", execution_profile = 'schema') session.execute_graph("schema.propertyKey('age').Bigint().create()", execution_profile = 'schema') session.execute_graph("schema.propertyKey('weight').Float().create()", execution_profile = 'schema') session.execute_graph("schema.vertexLabel('person').partitionKey('neighborhood').clusteringKey('name').properties('age').create()", execution_profile = 'schema') session.execute_graph("schema.edgeLabel('knows').properties('weight').connection('person', 'person').create()", execution_profile = 'schema') # Execute batch batch = DseGraph.batch() batch.add(__.addV('person').property('neighborhood', 0).property('name', 'bob').property('age', 23)) batch.add(__.addV('person').property('neighborhood', 0).property('name', 'alice').property('age', 21)) batch.add(__.addE('knows') .from_(Vertex({ 'neighborhood': 0, 'name': 'bob', '~label' : 'person' })) .to(Vertex({ 'neighborhood': 0, 'name': 'alice', '~label' : 'person' })) .property('weight', 2.3)) session.execute_graph(batch.as_graph_statement()) cluster.close()
def get_or_create_vertice(self, label: str, name: str, value: str): return self.g.V().has(label, name, value).fold().coalesce( __.unfold(), __.addV(label).property(name, value)).next()