def undecided_user_audience_check(g, transient_id, website_url, thank_you_page_url, since, min_visited_count): """ Given transient id, check whether it belongs to an audience. It's simple yes, no question. User belongs to an audience whenever all of the following criteria are met: * visited some website url at least X times since specific timestamp * did not visit thank you page url since specific timestamp """ return (g.V(transient_id).hasLabel("transientId").in_("has_identity").out( "has_identity").outE("visited").has("ts", P.gt(since)).choose( has("visited_url", website_url), groupCount("visits").by(constant("page_visits"))).choose( has("visited_url", thank_you_page_url), groupCount("visits").by( constant("thank_you_page_vists"))).cap("visits").coalesce( and_( coalesce(select("thank_you_page_vists"), constant(0)).is_(0), select("page_visits").is_( P.gt(min_visited_count))).choose( count().is_(1), constant(True)), constant(False)))
def topology_subgraph(self, topology_id: str, topology_ref: str) -> GraphTraversalSource: """ Gets a gremlin graph traversal source limited to the sub-graph of vertices with the supplied topology ID and topology reference properties. Arguments: topology_id (str): The topology identification string. topology_ref (str): The reference string for the version of the topology you want to sub-graph. Returns: A GraphTraversalSource instance linked to the desired sub-graph """ LOG.debug( "Creating traversal source for topology %s subgraph with " "reference: %s", topology_id, topology_ref, ) topo_graph_traversal: GraphTraversalSource = self.graph_traversal.withStrategies( SubgraphStrategy(vertices=has("topology_ref", topology_ref).has( "topology_id", topology_id))) return topo_graph_traversal
def query_users_active_in_n_days(g, n=30, today=datetime(2016, 6, 22, 23, 59), limit=1000): """Get users that were active in last 30 days.""" dt_condition = [has("ts", P.gt(today - timedelta(days=n)))] return query_users_active_in_given_date_intervals(g, dt_condition, limit)
def get_component_paths(graph_client: GremlinClient, topology_id: str, topology_ref: str) -> List[List[str]]: """ Gets all component level paths through the specified topology. This method is cached as the component paths are fixed for the lifetime of a topology. Arguments: graph_client (GremlinClient): The graph database client instance. topology_id (str): The topology identification string. topology_ref (str): The topology graph identification string. Returns: List[List[str]]: A list of component name string path lists. For example [["A", "B", "D"], ["A", "C", "D"] """ sources_sinks: Dict[str, List[str]] = get_source_and_sink_comps( graph_client, topology_id, topology_ref) sgt: GraphTraversalSource = graph_client.topology_subgraph( topology_id, topology_ref) output: List[List[str]] = [] for source in sources_sinks["sources"]: # Pick a start vertex for this source start: Vertex = sgt.V().has("component", source).next() for sink in sources_sinks["sinks"]: LOG.debug( "Finding paths from source component: %s to sink component: %s", source, sink, ) # Find one path from the source vertex to any sink vertex and emit the # components as well as the edges. full_path: List[Union[str, Edge]] = (sgt.V(start).repeat( out("logically_connected").simplePath()).until( has("component", sink)).path().by("component").by().limit(1).next()) # Filter out the edges and keep the component strings path: List[str] = [ element for element in full_path if isinstance(element, str) ] output.append(path) return output
def undecided_users_audience(g, website_url, thank_you_page_url, since, min_visited_count): """ Given website url, get all the users that meet audience conditions. It returns list of transient identities uids. Audience is build from the users that met following criteria: * visited some website url at least X times since specific timestamp * did not visit thank you page url since specific timestamp """ return (g.V(website_url).hasLabel("website").inE("visited").has( "ts", P.gt(since)).outV().in_("has_identity").groupCount().unfold( ).dedup().where(select(Column.values).is_( P.gt(min_visited_count))).select(Column.keys).as_("pids").map( out("has_identity").outE("visited").has( "visited_url", thank_you_page_url).has("ts", P.gt(since)).outV().in_( "has_identity").dedup().values("pid").fold()).as_( "pids_that_visited").select("pids").not_( has("pid", where(P.within("pids_that_visited")))).out( "has_identity").values("uid"))