def get_nr_emb_within_time(data_graph_path, pattern_path, output_path,
                           time_seconds):
    print "Exhaustive checkup ...."
    nr_emb = None
    monitoring_marks = utils.generate_monitoring_marks(time_seconds,
                                                       time_seconds)

    data_graph = None
    try:
        data_graph = nx.read_gpickle(data_graph_path)
    except:
        data_graph = nx.read_gml(data_graph_path)

    number_of_nodes_in_data = len(data_graph)
    pattern = nx.read_gml(pattern_path)
    #vis.visualize_graph(pattern, "sat")
    #analyzer.add_values_in_pattern_for_graph_if_missing(pattern)
    output_path = os.path.join(output_path)
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    root_node_predicate_name = None  #well, not predefined. Let the algorithm find it and denote it (its id in the pattern)
    pattern_name = os.path.basename(pattern_path)[:-4]

    #     logging.basicConfig(
    #          level=logging.DEBUG,
    #          filename=os.path.join(output_path,'error_exhaustive.log'),
    #          filemode='w')
    #     sys.excepthook = my_excepthook

    root_node = None
    #first check if the root node is determined by some other algorithm
    if not os.path.exists(os.path.join(output_path, 'root_node.dec')):
        hist = analyzer.get_sorted_labels_by_occurence_frequency_in_graph(
            data_graph_path)
        root_node, root_node_predicate_name = ut.choose_root_node(
            pattern, root_node_predicate_name, hist)
        with open(os.path.join(output_path, 'root_node.dec'), 'w') as f:
            f.write(str(root_node) + " ")
            f.write(str(root_node_predicate_name) + "\n")
            f.write("Determined by exhaustive approach")
    else:
        #read root node from the file
        with open(os.path.join(output_path, 'root_node.dec'), 'r') as f:
            for line in f.readlines():
                root_node = int(line.split(" ")[0])
                root_node_predicate_name = str(
                    line.split(" ")[1].rstrip().lstrip())
                break

    print "root node predicate name: ", root_node_predicate_name
    #get root nodes
    root_nodes = [
        x for x in data_graph.nodes()
        if data_graph.node[x]['predicate'] == root_node_predicate_name
    ]
    print "Number of root nodes: ", len(root_nodes)

    #get OBD
    print "Root node,", pattern.node[root_node]
    OBdecomp = OBDsearch.get_heuristic4_OBD(pattern, startNode=root_node)

    if OBdecomp == None:
        print "No ombdecomp!"
        no_obd_decomp = True
        with open(os.path.join(output_path, 'no_obdecomp.info'), 'w') as f:
            f.write("No OBDecomp!")
        OBdecomp = OBDsearch.get_flatList(pattern, startNode=root_node)

    #get ordered list from OBD
    Plist = [item for sublist in OBdecomp for item in sublist]
    print "Using OBD: %s" % str(OBdecomp)
    print "and Plist: %s" % str(Plist)
    print "monitoring marks: ", monitoring_marks
    start = timeit.default_timer()
    try:
        lock = threading.Lock()
        print "starting scheduler"
        s = sched.scheduler(time.time, time.sleep)
        e1 = s.enter(
            0, 4, exhaustive.find_nr_emb,
            (data_graph, pattern, Plist, root_nodes, output_path, lock))
        t = threading.Thread(target=s.run)
        t.daemon = True
        t.start()
        time.sleep(time_seconds)
        end = timeit.default_timer()
        print "Main finished after ", end - start, "seconds"
        freq_dict = experiments.globals.fdict_exhaustive_limited
        if len(freq_dict) == 0:
            nr_emb = None
        else:
            nr_emb = 0
            for k in freq_dict.keys():
                nr_emb = nr_emb + freq_dict[k]
    except Wrong_root_node as e:
        print "Exception for the node occurred!"
    return nr_emb
            f.write("The pattern in question was not selected by exhaustive algorithm w.r.t. selection criteria")
            sys.exit()

    #check if pattern invalid. if yes there is no point in calculation / calculate anyway for to be safe. but output file invalid.info
    if os.path.exists(os.path.join(args.pattern_path,"invalid.info")):
        print "Pattern is invalid... problem with sampling approaches"
        with open(os.path.join(output_path,"invalid.info"),"w") as f:
            f.write("invalid pattern "+args.pattern_path)
    
    #writing input gml into output
    if(not os.path.exists(output_path)):
        os.makedirs(output_path)
    nx.write_gml(pattern, output_path+'/input_pattern.gml')
    
    #choose or load root node and nr observations
    hist=analyzer.get_sorted_labels_by_occurence_frequency_in_graph(args.data_graph_path)
    if not os.path.exists(os.path.join(args.output_path,'root_node.dec')):
        hist=analyzer.get_sorted_labels_by_occurence_frequency_in_graph(args.data_graph_path)
        root_node,root_predicate_name=u.choose_root_node(pattern,None,hist)
        with open(os.path.join(args.output_path,'root_node.dec'),'w') as f:
            f.write(str(root_node)+" "+root_predicate_name.rstrip().lstrip()+"\n")
            f.write("Chosen by furer during the selection ...")
    else: #else, it is not selection so root node was already decided by someone
        with open(os.path.join(args.output_path,'root_node.dec'),'r') as f:
            for line in f.readlines():
                root_node=int(line.split(" ")[0])
                root_node_predicate_name=str(line.split(" ")[1].rstrip().lstrip())
                break
    
    all_randnode_times = []
    rndicts = []
Example #3
0
def main(path_to_data_graph, path_to_results, output_path, init_pattern_size,
         end_pattern_size):
    labels = g.get_sorted_labels_by_occurence_frequency_in_graph(
        path_to_data_graph)
    freq = {}
    freq_not_selected = {}
    D = nx.read_gpickle(path_to_data_graph)
    label_values = {}
    for l in labels:
        label_values[l] = g.get_all_possible_nodes_wtih_values_in_data_graph(
            D, l[0])
    for i in xrange(init_pattern_size, end_pattern_size + 1):
        path_results = os.path.join(path_to_results, "patterns_size_" + str(i))
        #list batches
        for batch in os.listdir(path_results):
            if (not batch.startswith("batch")):
                continue
            else:
                print os.path.join(path_to_results, "patterns_size_" + str(i),
                                   str(batch))
                #list patterns in the batch
                for p in os.listdir(
                        os.path.join(path_to_results,
                                     "patterns_size_" + str(i), str(batch))):
                    path_to_pattern = os.path.join(
                        os.path.join(path_to_results,
                                     "patterns_size_" + str(i), str(batch)), p,
                        "input_pattern.gml")
                    if not (os.path.exists(path_to_pattern)):
                        continue
                    pattern = nx.read_gml(path_to_pattern)
                    if not os.path.exists(
                            os.path.join(
                                os.path.join(path_to_results,
                                             "patterns_size_" + str(i),
                                             str(batch)), p, "selected.info")):
                        for node in pattern.nodes():
                            if 'valueinpattern' in pattern.node[node].keys():
                                if pattern.node[node]['valueinpattern'] == 1:
                                    if not pattern.node[node][
                                            'value'] in freq_not_selected.keys(
                                            ):
                                        freq_not_selected[pattern.node[node]
                                                          ['value']] = 1
                                    else:
                                        freq_not_selected[pattern.node[node]
                                                          ['value']] += 1
                    else:
                        for node in pattern.nodes():
                            if 'valueinpattern' in pattern.node[node].keys():
                                if pattern.node[node]['valueinpattern'] == 1:
                                    if not pattern.node[node][
                                            'value'] in freq.keys():
                                        freq[pattern.node[node]['value']] = 1
                                    else:
                                        freq[pattern.node[node]['value']] += 1
    sorted_x = sorted.reverse(freq.items(), key=operator.itemgetter(1))
    sorted_x_NS = sorted.reverse(freq_not_selected.items(),
                                 key=operator.itemgetter(1))

    with open(os.path.join(output_path,
                           "randvarValuesFreqSelected.info")) as file:
        for f in sorted_x:
            file.write(f + "\n")

    with open(os.path.join(output_path,
                           "randvarValuesFreqNotSelected.info")) as file:
        for f in sorted_x_NS:
            file.write(f + "\n")