def get_all_obds(pattern,hist): possible_root_nodes=[] obds=[] for i in range(0,len(hist)): root_node_predicate_name=hist[i][0] #choose the root node predicate possible_root_nodes=[x for x in pattern.nodes() if pattern.node[x]['predicate']==root_node_predicate_name and pattern.node[x]['valueinpattern']==0] if root_node_predicate_name=='references': continue if(len(possible_root_nodes)==0): continue for root_node in possible_root_nodes: if OBDsearch.get_heuristic4_OBD(pattern, startNode = root_node)==None: print("no obd search for: ",root_node) else: obds.append(OBDsearch.get_heuristic4_OBD(pattern, startNode = root_node)) return obds
def prepare_params(args): if args.t == None: args.t = args.max_time monitoring_marks = utils.generate_monitoring_marks(args.t, args.max_time) try: data_graph = nx.read_gpickle(args.d) except: data_graph = nx.read_gml(args.d) pattern = nx.read_gml(args.p) if (not (os.path.exists(args.o))): os.makedirs(args.o) # DETERMINING ROOT NODE if args.root_node_name == None or args.root_node_id == None: ####hops make this faster hist = analyzer.get_sorted_labels_by_occurence_frequency_in_graph_hops( data_graph) ### #max degree node in pattern (hoPS) max_degree_nodes = None #analyzer.get_maximum_degree_nodes(pattern) root_node, root_node_predicate_name = utils.choose_root_node( pattern, None, hist, max_degree_nodes) else: root_node = args.root_node_id root_node_predicate_name = args.root_node_name # get images of root node in the data graph root_nodes = [ x for x in data_graph.nodes() if data_graph.node[x]['predicate'] == root_node_predicate_name ] OBdecomp = OBDsearch.get_heuristic4_OBD(pattern, startNode=root_node) if OBdecomp == None: no_obd_decomp = True OBdecomp = OBDsearch.get_flatList(pattern, startNode=root_node) Plist = [item for sublist in OBdecomp for item in sublist] return data_graph, pattern, OBdecomp, root_node, root_node_predicate_name, args.t, args.max_time, monitoring_marks, root_nodes, Plist
def get_pattern_infos(pattern_path): patterns = [] OBDPatterns = [] root_nodes_patterns = [] indices = [] equiv = [] non_equiv = [] pattern=nx.read_gml(os.path.join(pattern_path,'pattern.gml')) patterns.append(pattern) with open(os.path.join(pattern_path,'startNodeId.info')) as f: start_node=int(f.readline().rstrip()) OBD = OBDsearch.get_heuristic4_OBD(pattern, startNode=start_node) OBDPatterns.append(OBD) with open(os.path.join(pattern_path,'rootNode.info')) as f: root_node=f.readline().rstrip() root_nodes_patterns.append(root_node) ind=[] with open(os.path.join(pattern_path,'indices.info')) as f: for line in f: ind.append(int(line.rstrip())) indices.append(ind) if not os.path.isfile(os.path.join(pattern_path,'equivalence.info')): equiv.append(None) else: indices_equiv=[] with open(os.path.join(pattern_path,'equivalence.info')) as f: for line in f: a=[] l=line.rstrip().split(" ") for elem in l: a.append(int(elem)) indices_equiv.append(a) equiv.append(indices_equiv) if not os.path.isfile(os.path.join(pattern_path,'non_equivalence.info')): non_equiv.append(None) else: indices_equiv=[] with open(os.path.join(pattern_path,'non_equivalence.info')) as f: for line in f: a=[] l=line.rstrip().split(" ") for elem in l: a.append(int(elem)) indices_equiv.append(a) non_equiv.append(indices_equiv) return patterns,OBDPatterns,root_nodes_patterns,indices,equiv,non_equiv
def get_nr_embedding(data_graph, pattern, root_node, root_nodes, n_iter): sum_estimates = 0 estimates = list() start = time.time() OBdecomp = OBDsearch.get_heuristic4_OBD(pattern, startNode=root_node) for iteration_counter in range(n_iter): # sample first image of u vi = random.randrange(len(root_nodes)) v = root_nodes[vi] list_for_spent = [] list_for_spent.append(1) result = ad.find_embeddings_Furer([v], data_graph, pattern, OBdecomp, 0, [], list_for_spent, None, None, None, None) c = result[0] * len(root_nodes) estimates.append(c) sum_estimates += c end = time.time() nr_emb = sum_estimates / float(n_iter) stddev = math.sqrt(sum([(x - nr_emb)**2 for x in estimates])) / float(n_iter) print('n_iter', n_iter) print('time', end - start) print('estimate', nr_emb) print('stddev', stddev) return nr_emb, estimates
else: experiment="furer_"+str(args.max_time) output = os.path.join(args.o, experiment) if not os.path.isdir(output): os.makedirs(output) output_train_csv=os.path.join(args.o,experiment,'train.csv') output_test_csv = os.path.join(args.o,experiment, 'test.csv') time_dict_train_csv=os.path.join(args.o,experiment,'time_dict_train.csv') time_dict_test_csv=os.path.join(args.o, experiment, 'time_dict_test.csv') pattern_path=args.p patterns,OBDPatterns,root_nodes_patterns,indices,pattern_equivalence,non_equivalence=get_pattern_infos(pattern_path) target = gtp.get_target_graph(args.const, args.attr) OBDTarget = OBDsearch.get_heuristic4_OBD(target, startNode=int(args.sT)) root_node_target = args.rT fieldnames=['dummy','target',patterns[0].name] if args.e=="exact": if args.t == None or (args.t != None and args.t == "train"): exact.generate_csv_exact_counts(train_data,target,args.const,args.attr, OBDTarget, root_node_target, patterns, OBDPatterns, indices, root_nodes_patterns,pattern_equivalence,non_equivalence, output_train_csv, fieldnames,time_dict_train_csv,args.max_time) print("Training data counted ...") if args.t == None or (args.t != None and args.t == "test"): exact.generate_csv_exact_counts(test_data, target,args.const,args.attr, OBDTarget, root_node_target, patterns, OBDPatterns, indices, root_nodes_patterns,pattern_equivalence,non_equivalence, output_test_csv, fieldnames,time_dict_test_csv,args.max_time) print("Test data counted ...") if args.e=="furer":
ground_pattern = gtp.ground_pattern(target, pattern) nr_pat = furer_OBD(ground_pattern, data_graph, OBD, root_node) dict_res[fieldnames[fieldcounter]] = nr_pat fieldcounter = fieldcounter + 1 writer.writerow(dict_res) if __name__ == '__main__': data_graph = '/home/irma/work/DATA/DATA/yeast/YEAST_equiv.gpickle' data_graph = nx.read_gpickle(data_graph) tg = gtp.find_all_groundings_of_predicates(data_graph, 'function', 'constant')[0] pattern = nx.read_gml('/home/irma/work/DATA/DATA/yeast/pattern1.gml') ground_pattern = gtp.ground_pattern(tg, pattern) OBD1 = OBDsearch.get_heuristic4_OBD(pattern, startNode=4) target_attr = 'function' target_constant = 'constant' root_node_target = 'function' OBDTarget = [[1], [2]] patterns = [pattern] OBDPatterns = [OBD1] root_nodes_patterns = ['protein_class'] csvfile = '/home/irma/work/DATA/DATA/yeast/test.csv' fieldnames = ['target', 'patt1'] generate_csv_exact_counts(data_graph, target_attr, target_constant, OBDTarget, root_node_target, patterns, OBDPatterns, root_nodes_patterns, csvfile, fieldnames)
default=None, help='name of a root node') args = parser.parse_args() #Preparing the inputs data_graph, pattern, OBdecomp, root_node, root_node_predicate_name, interval, max_time, monitoring_marks, root_nodes, Plist = prepare_inputs.prepare_params( args) output_folder = 'fk_AD_results' output_path = os.path.join(args.o, output_folder) if not os.path.exists(output_path): os.makedirs(output_path) if OBdecomp != None: OBdecomp_false = [[item] for sublist in OBdecomp for item in sublist] else: OBdecomp_false = OBDsearch.get_flatList(pattern, startNode=root_node) #Main procedure monitoring_reports = {} all_furer_times = [] fudicts = [] average_time = 0 start = time.time() m, estimates = get_nr_embedding(data_graph, pattern, OBdecomp_false, root_node_predicate_name, monitoring_marks) end = time.time() average_time += end - start fdictionaries_Furer = globals_sampling.globalist_furer times_Furer = globals_sampling.globaltimes_furer[1:]
def choose_root_node(pattern, root_node_predicate_name, hist, root_node_list = None, root_node_alg = None): #start hops code choose different root nodes if root_node_list != None and root_node_alg == "MaxDeg": ran = random.randint(0, len(root_node_list) - 1) root_node = root_node_list[ran] elif root_node_alg == "MinLabel": for i in xrange(0, len(hist)): root_node_predicate_name = hist[i][0] # choose the root node predicate with min label occurance if root_node_predicate_name == 'references': continue possible_root_nodes = [x for x in pattern.nodes() if pattern.node[x]['predicate'] == root_node_predicate_name and pattern.node[x][ 'valueinpattern'] == 0] if (len(possible_root_nodes) == 0): continue break ran = random.randint(0, len(possible_root_nodes) - 1) root_node = possible_root_nodes[ran] elif root_node_alg == "MinLabelMaxDegree": for i in xrange(0, len(hist)): root_node_predicate_name = hist[i][0] # choose the root node predicate with min label occurance if root_node_predicate_name == 'references': continue possible_root_nodes = [x for x in pattern.nodes() if pattern.node[x]['predicate'] == root_node_predicate_name and pattern.node[x][ 'valueinpattern'] == 0] if (len(possible_root_nodes) == 0): continue break max_degree = 0 for r_node in possible_root_nodes: if pattern.degree(r_node) > max_degree: max_degree = pattern.degree(r_node) root_node = r_node elif root_node_alg == "Central": centers = get_center_nodes(pattern) ran = random.randint(0, len(centers) - 1) root_node = centers[ran] elif root_node_alg == "MinLabelCentral": for i in xrange(0, len(hist)): root_node_predicate_name = hist[i][0] # choose the root node predicate with min label occurance if root_node_predicate_name == 'references': continue possible_root_nodes = [x for x in pattern.nodes() if pattern.node[x]['predicate'] == root_node_predicate_name and pattern.node[x][ 'valueinpattern'] == 0] if (len(possible_root_nodes) == 0): continue break dist_to_center = get_dist_to_center(pattern) for node in dist_to_center: if node[0] in possible_root_nodes: root_node = node[0] break #end hops code elif (root_node_predicate_name == None): for i in xrange(0, len(hist)): root_node_predicate_name = hist[i][0] # choose the root node predicate if root_node_predicate_name == 'references': continue possible_root_nodes = [x for x in pattern.nodes() if pattern.node[x]['predicate'] == root_node_predicate_name and pattern.node[x][ 'valueinpattern'] == 0] if (len(possible_root_nodes) == 0): continue ran = random.randint(0, len(possible_root_nodes) - 1) root_node = possible_root_nodes[ran] if OBDsearch.get_heuristic4_OBD(pattern, startNode=root_node) == None: continue break else: possible_root_nodes = [x for x in pattern.nodes() if pattern.node[x]['predicate'] == root_node_predicate_name] ran = random.randint(0, len(possible_root_nodes) - 1) root_node = possible_root_nodes[ran] return root_node, pattern.node[root_node]['predicate']