def get_nr_emb_within_time(data_graph_path, pattern_path, output_path, time_seconds): print "Exhaustive checkup ...." nr_emb = None monitoring_marks = utils.generate_monitoring_marks(time_seconds, time_seconds) data_graph = None try: data_graph = nx.read_gpickle(data_graph_path) except: data_graph = nx.read_gml(data_graph_path) number_of_nodes_in_data = len(data_graph) pattern = nx.read_gml(pattern_path) #vis.visualize_graph(pattern, "sat") #analyzer.add_values_in_pattern_for_graph_if_missing(pattern) output_path = os.path.join(output_path) if not os.path.exists(output_path): os.makedirs(output_path) root_node_predicate_name = None #well, not predefined. Let the algorithm find it and denote it (its id in the pattern) pattern_name = os.path.basename(pattern_path)[:-4] # logging.basicConfig( # level=logging.DEBUG, # filename=os.path.join(output_path,'error_exhaustive.log'), # filemode='w') # sys.excepthook = my_excepthook root_node = None #first check if the root node is determined by some other algorithm if not os.path.exists(os.path.join(output_path, 'root_node.dec')): hist = analyzer.get_sorted_labels_by_occurence_frequency_in_graph( data_graph_path) root_node, root_node_predicate_name = ut.choose_root_node( pattern, root_node_predicate_name, hist) with open(os.path.join(output_path, 'root_node.dec'), 'w') as f: f.write(str(root_node) + " ") f.write(str(root_node_predicate_name) + "\n") f.write("Determined by exhaustive approach") else: #read root node from the file with open(os.path.join(output_path, 'root_node.dec'), 'r') as f: for line in f.readlines(): root_node = int(line.split(" ")[0]) root_node_predicate_name = str( line.split(" ")[1].rstrip().lstrip()) break print "root node predicate name: ", root_node_predicate_name #get root nodes root_nodes = [ x for x in data_graph.nodes() if data_graph.node[x]['predicate'] == root_node_predicate_name ] print "Number of root nodes: ", len(root_nodes) #get OBD print "Root node,", pattern.node[root_node] OBdecomp = OBDsearch.get_heuristic4_OBD(pattern, startNode=root_node) if OBdecomp == None: print "No ombdecomp!" no_obd_decomp = True with open(os.path.join(output_path, 'no_obdecomp.info'), 'w') as f: f.write("No OBDecomp!") OBdecomp = OBDsearch.get_flatList(pattern, startNode=root_node) #get ordered list from OBD Plist = [item for sublist in OBdecomp for item in sublist] print "Using OBD: %s" % str(OBdecomp) print "and Plist: %s" % str(Plist) print "monitoring marks: ", monitoring_marks start = timeit.default_timer() try: lock = threading.Lock() print "starting scheduler" s = sched.scheduler(time.time, time.sleep) e1 = s.enter( 0, 4, exhaustive.find_nr_emb, (data_graph, pattern, Plist, root_nodes, output_path, lock)) t = threading.Thread(target=s.run) t.daemon = True t.start() time.sleep(time_seconds) end = timeit.default_timer() print "Main finished after ", end - start, "seconds" freq_dict = experiments.globals.fdict_exhaustive_limited if len(freq_dict) == 0: nr_emb = None else: nr_emb = 0 for k in freq_dict.keys(): nr_emb = nr_emb + freq_dict[k] except Wrong_root_node as e: print "Exception for the node occurred!" return nr_emb
f.write("The pattern in question was not selected by exhaustive algorithm w.r.t. selection criteria") sys.exit() #check if pattern invalid. if yes there is no point in calculation / calculate anyway for to be safe. but output file invalid.info if os.path.exists(os.path.join(args.pattern_path,"invalid.info")): print "Pattern is invalid... problem with sampling approaches" with open(os.path.join(output_path,"invalid.info"),"w") as f: f.write("invalid pattern "+args.pattern_path) #writing input gml into output if(not os.path.exists(output_path)): os.makedirs(output_path) nx.write_gml(pattern, output_path+'/input_pattern.gml') #choose or load root node and nr observations hist=analyzer.get_sorted_labels_by_occurence_frequency_in_graph(args.data_graph_path) if not os.path.exists(os.path.join(args.output_path,'root_node.dec')): hist=analyzer.get_sorted_labels_by_occurence_frequency_in_graph(args.data_graph_path) root_node,root_predicate_name=u.choose_root_node(pattern,None,hist) with open(os.path.join(args.output_path,'root_node.dec'),'w') as f: f.write(str(root_node)+" "+root_predicate_name.rstrip().lstrip()+"\n") f.write("Chosen by furer during the selection ...") else: #else, it is not selection so root node was already decided by someone with open(os.path.join(args.output_path,'root_node.dec'),'r') as f: for line in f.readlines(): root_node=int(line.split(" ")[0]) root_node_predicate_name=str(line.split(" ")[1].rstrip().lstrip()) break all_randnode_times = [] rndicts = []
def main(path_to_data_graph, path_to_results, output_path, init_pattern_size, end_pattern_size): labels = g.get_sorted_labels_by_occurence_frequency_in_graph( path_to_data_graph) freq = {} freq_not_selected = {} D = nx.read_gpickle(path_to_data_graph) label_values = {} for l in labels: label_values[l] = g.get_all_possible_nodes_wtih_values_in_data_graph( D, l[0]) for i in xrange(init_pattern_size, end_pattern_size + 1): path_results = os.path.join(path_to_results, "patterns_size_" + str(i)) #list batches for batch in os.listdir(path_results): if (not batch.startswith("batch")): continue else: print os.path.join(path_to_results, "patterns_size_" + str(i), str(batch)) #list patterns in the batch for p in os.listdir( os.path.join(path_to_results, "patterns_size_" + str(i), str(batch))): path_to_pattern = os.path.join( os.path.join(path_to_results, "patterns_size_" + str(i), str(batch)), p, "input_pattern.gml") if not (os.path.exists(path_to_pattern)): continue pattern = nx.read_gml(path_to_pattern) if not os.path.exists( os.path.join( os.path.join(path_to_results, "patterns_size_" + str(i), str(batch)), p, "selected.info")): for node in pattern.nodes(): if 'valueinpattern' in pattern.node[node].keys(): if pattern.node[node]['valueinpattern'] == 1: if not pattern.node[node][ 'value'] in freq_not_selected.keys( ): freq_not_selected[pattern.node[node] ['value']] = 1 else: freq_not_selected[pattern.node[node] ['value']] += 1 else: for node in pattern.nodes(): if 'valueinpattern' in pattern.node[node].keys(): if pattern.node[node]['valueinpattern'] == 1: if not pattern.node[node][ 'value'] in freq.keys(): freq[pattern.node[node]['value']] = 1 else: freq[pattern.node[node]['value']] += 1 sorted_x = sorted.reverse(freq.items(), key=operator.itemgetter(1)) sorted_x_NS = sorted.reverse(freq_not_selected.items(), key=operator.itemgetter(1)) with open(os.path.join(output_path, "randvarValuesFreqSelected.info")) as file: for f in sorted_x: file.write(f + "\n") with open(os.path.join(output_path, "randvarValuesFreqNotSelected.info")) as file: for f in sorted_x_NS: file.write(f + "\n")