def run_gray(self): logging.info("---- Start G-Ray ----") st = time.time() self.computeRWR() ed = time.time() logging.info("#### Compute RWR: %f [s]" % (ed - st)) st = time.time() ext = extract.Extract(self.graph, self.graph_rwr) ext.computeExtract() self.extracts[''] = ext ed = time.time() logging.info("#### Compute Paths: %f [s]" % (ed - st)) # pr = cProfile.Profile() # pr.enable() st = time.time() self.process_gray() ed = time.time() logging.info("#### Compute G-Ray: %f [s]" % (ed - st))
def process_multiple_gray(seed_list, g_file, q_seed, q_args): query, cond, _, _, _, _ = parse_args(q_args) st = time.time() g = load_graph(g_file) ed = time.time() print("Load graph: %f" % (ed - st)) st = time.time() g_rwr = computeRWR(g) ed = time.time() print("RWR time: %f" % (ed - st)) st = time.time() g_ext = extract.Extract(g, g_rwr) g_ext.computeExtract() ed = time.time() print("Extract time: %f" % (ed - st)) for seed in seed_list: process_single_gray(seed, q_seed, query, cond, g_rwr, g_ext.pre) return len(seed_list)
def getExtract(self, label): if not label in self.extracts: ext = extract.Extract(self.graph, self.graph_rwr, label) ext.computeExtract() self.extracts[label] = ext return self.extracts[label]
def run_parallel_gray(gfile, qargs, num_proc): """ :param gfile: Graph JSON file :param qargs: Query args :param num_proc: Number of processes :return: """ manager = Manager() patterns = manager.dict() # g_rwr = dict() g_pre = dict() g_ = load_graph(gfile) query_, cond_, _, _, _, _ = parse_args(qargs) st = time.time() g_rwr_ = computeRWR(g_) # for src, dsts in g_rwr_.iteritems(): # d = dict() # for dst, score in dsts.iteritems(): # d[dst] = score # g_rwr[src] = d ed = time.time() print("RWR time: %f" % (ed - st)) st = time.time() g_ext_ = extract.Extract(g_, g_rwr_) g_ext_.computeExtract() for src, dsts in g_ext_.pre.iteritems(): d = dict() for dst, p in dsts.iteritems(): d[dst] = p g_pre[src] = d ed = time.time() print("Extract time: %f" % (ed - st)) # Find query candidates q_seed_ = list(query_.nodes())[0] kl = Condition.get_node_label(query_, q_seed_) kp = Condition.get_node_props(query_, q_seed_) seeds = Condition.filter_nodes(g_, kl, kp) # Find all candidates if not seeds: ## No seed candidates print("No more seed vertices available. Exit G-Ray algorithm.") return # Split seed list num_seeds = len(seeds) num_members = num_seeds / num_proc seed_lists = list() for i in range(num_proc): st = i * num_members ed = num_seeds if (i == num_proc - 1) else (i + 1) * num_members seed_lists.append(seeds[st:ed]) def process_multiple_gray(seed_list, q_seed, q_args, g_rwr, g_pre): # st = time.time() # g = nx.MultiGraph(g_) # g_rwr = dict(g_rwr_) # g_pre = dict(g_ext_.pre) # ed = time.time() # print ed - st query, cond, _, _, _, _ = parse_args(q_args) # g_rwr_ = g_rwr.copy() # g_pre_ = g_pre.copy() for seed in seed_list: process_single_gray(seed, q_seed, query, cond, g_rwr, g_pre) def process_single_gray(seed, q_seed, query, cond, g_rwr, g_pre): """ :param q_seed: Seed vertex of query graph :param cond: Condition parser :param seed: Seed vertex of data graph :return: """ def getRWR(i, j): if not i in g_rwr: return 0.0 else: return g_rwr[i].get(j, 0.0) def bridge(i, j): lst = list() if not i in g_pre: return lst if not j in g_pre[i]: return lst v = j while v != i: lst.append(v) if not v in g_pre[i]: return [] v = g_pre[i][v] lst.reverse() return lst def neighbor_expander(i, k, l, ret, rev_edge): # ll = Condition.get_node_label(query, l) # Label of destination max_good = float('-inf') candidates_j = g_rwr.keys() # Condition.filter_nodes(g, ll, {}) j = [] for j_ in candidates_j: if j_ in ret.nodes() or j_ == i: continue if rev_edge: log_good = log( getRWR(j_, i) + 1.0e-10 ) # avoid math domain errors when the vertex is unreachable else: log_good = log( getRWR(i, j_) + 1.0e-10 ) # avoid math domain errors when the vertex is unreachable if log_good > max_good: j = [j_] max_good = log_good elif log_good >= max_good - 1.0e-5: ## Almost same, may be a little smaller due to limited precision j.append(j_) return j result = nx.Graph() touched = [] nodemap = {} ## Query Vertex -> Graph Vertex unproc = query.copy() # il = Condition.get_node_label(g, seed) # props = Condition.get_node_props(g, seed) nodemap[q_seed] = seed result.add_node(seed) # result.nodes[seed][LABEL] = il # for name, value in props.iteritems(): # result.nodes[seed][name] = value touched.append(q_seed) ## Process neighbors snapshot_stack = list() snapshot_stack.append((result, touched, nodemap, unproc)) while snapshot_stack: result, touched, nodemap, unproc = snapshot_stack.pop() if unproc.number_of_edges() == 0: if valid_result(result, query, nodemap): # append_results(cond, seed, result, nodemap) print("Append results: %s" % str(result.nodes())) continue k = None l = None reversed_edge = False for k_ in touched: ## Edge for l_ in query.neighbors(k_): if not unproc.has_edge(k_, l_): continue l = l_ break if l is not None: k = k_ break if l is None: # No more matched vertices continue i = nodemap[k] touched.append(l) #### Find a path or edge (Begin) src, dst = (l, k) if reversed_edge else (k, l) elabel = Condition.get_edge_label(unproc, src, dst) if elabel is None: # Any label is OK eid = None el = '' else: eid = elabel[0] el = elabel[1] Condition.remove_edge_from_id(unproc, src, dst, eid) # Find a neighbor and connecting edge if l in nodemap: jlist = [nodemap[l]] else: # lock.acquire() jlist = neighbor_expander(i, k, l, result, reversed_edge) # find j(l) from i(k) # lock.release() if not jlist: ## No more neighbor candidates continue for j in jlist: g_src, g_dst = (j, i) if reversed_edge else (i, j) if g_src == g_dst: ## No bridge process necessary continue # lock.acquire() path = bridge(g_src, g_dst) # lock.release() if not path: continue result_ = nx.MultiGraph(result) touched_ = list(touched) nodemap_ = dict(nodemap) unproc_ = nx.MultiGraph(unproc) if l in nodemap_ and nodemap_[ l] != j: ## Need to replace mapping prevj = nodemap_[l] result_.remove_node(prevj) nodemap_[l] = j result_.add_node(j) ## Property addition is not necessary # props = Condition.get_node_props(g, j) # for k, v in props.iteritems(): # result_.nodes[j][k] = v prev = g_src valid = True for n in path: # if not Condition.has_edge_label(g, prev, n, el): # valid = False # break result_.add_edge(prev, n) prev = n if valid: snapshot_stack.append( (result_, touched_, nodemap_, unproc_)) def append_results(cond, seed, result, nodemap): if cond is not None and not cond.eval(result, nodemap): return False ## Not satisfied with complex condition for r in patterns.values(): rg = r.get_graph() if equal_graphs(rg, result): return False qresult = QueryResult.QueryResult(result, nodemap) patterns[seed] = qresult print("Append Results: %s" % str(result.nodes())) return True st = time.time() pool = Pool(num_proc) # pool = pp.ThreadPool(num_proc) # pool = pp.ProcessPool(num_proc) ## Multiprocessing is slow pool.map_async( partial(process_multiple_gray, q_seed=q_seed_, q_args=list(qargs), g_rwr=g_rwr_, g_pre=g_pre), seed_lists) # query, cond, _, _, _, _ = parse_args(qargs) # pool.map_async(partial(process_single_gray, q_seed=q_seed_, query=query_, cond=cond_, g_rwr=deepcopy(g_rwr), g_pre=deepcopy(g_pre)), seeds) pool.close() pool.join() ed = time.time() print("Parallel G-Ray time: %f" % (ed - st))