예제 #1
0
def get_all_obds(pattern,hist):
    possible_root_nodes=[]
    obds=[]
    for i in range(0,len(hist)):
            root_node_predicate_name=hist[i][0] #choose the root node predicate
            possible_root_nodes=[x for x in pattern.nodes() if pattern.node[x]['predicate']==root_node_predicate_name and pattern.node[x]['valueinpattern']==0]            
            if root_node_predicate_name=='references':
                continue
            if(len(possible_root_nodes)==0):
               continue
            for root_node in possible_root_nodes:
              if OBDsearch.get_heuristic4_OBD(pattern, startNode = root_node)==None:
                print("no obd search for: ",root_node)
              else:
                obds.append(OBDsearch.get_heuristic4_OBD(pattern, startNode = root_node))

    
    return obds
예제 #2
0
def prepare_params(args):
    if args.t == None:
        args.t = args.max_time
    monitoring_marks = utils.generate_monitoring_marks(args.t, args.max_time)
    try:
        data_graph = nx.read_gpickle(args.d)
    except:
        data_graph = nx.read_gml(args.d)

    pattern = nx.read_gml(args.p)
    if (not (os.path.exists(args.o))):
        os.makedirs(args.o)

    # DETERMINING ROOT NODE
    if args.root_node_name == None or args.root_node_id == None:
        ####hops make this faster
        hist = analyzer.get_sorted_labels_by_occurence_frequency_in_graph_hops(
            data_graph)
        ###
        #max degree node in pattern (hoPS)
        max_degree_nodes = None  #analyzer.get_maximum_degree_nodes(pattern)
        root_node, root_node_predicate_name = utils.choose_root_node(
            pattern, None, hist, max_degree_nodes)
    else:
        root_node = args.root_node_id
        root_node_predicate_name = args.root_node_name

    # get images of root node in the data graph
    root_nodes = [
        x for x in data_graph.nodes()
        if data_graph.node[x]['predicate'] == root_node_predicate_name
    ]
    OBdecomp = OBDsearch.get_heuristic4_OBD(pattern, startNode=root_node)
    if OBdecomp == None:
        no_obd_decomp = True
        OBdecomp = OBDsearch.get_flatList(pattern, startNode=root_node)
    Plist = [item for sublist in OBdecomp for item in sublist]
    return data_graph, pattern, OBdecomp, root_node, root_node_predicate_name, args.t, args.max_time, monitoring_marks, root_nodes, Plist
예제 #3
0
def get_pattern_infos(pattern_path):
    patterns = []
    OBDPatterns = []
    root_nodes_patterns = []
    indices = []
    equiv = []
    non_equiv = []

    pattern=nx.read_gml(os.path.join(pattern_path,'pattern.gml'))
    patterns.append(pattern)
    with open(os.path.join(pattern_path,'startNodeId.info')) as f:
        start_node=int(f.readline().rstrip())
    OBD = OBDsearch.get_heuristic4_OBD(pattern, startNode=start_node)
    OBDPatterns.append(OBD)
    with open(os.path.join(pattern_path,'rootNode.info')) as f:
        root_node=f.readline().rstrip()
    root_nodes_patterns.append(root_node)
    ind=[]
    with open(os.path.join(pattern_path,'indices.info')) as f:
        for line in f:
            ind.append(int(line.rstrip()))
    indices.append(ind)

    if not os.path.isfile(os.path.join(pattern_path,'equivalence.info')):
        equiv.append(None)
    else:
        indices_equiv=[]
        with open(os.path.join(pattern_path,'equivalence.info')) as f:
            for line in f:
                a=[]
                l=line.rstrip().split(" ")
                for elem in l:
                    a.append(int(elem))
                indices_equiv.append(a)
        equiv.append(indices_equiv)
    if not os.path.isfile(os.path.join(pattern_path,'non_equivalence.info')):
        non_equiv.append(None)
    else:
        indices_equiv=[]
        with open(os.path.join(pattern_path,'non_equivalence.info')) as f:
            for line in f:
                a=[]
                l=line.rstrip().split(" ")
                for elem in l:
                    a.append(int(elem))
                indices_equiv.append(a)
        non_equiv.append(indices_equiv)
    return patterns,OBDPatterns,root_nodes_patterns,indices,equiv,non_equiv
예제 #4
0
def get_nr_embedding(data_graph, pattern, root_node, root_nodes, n_iter):

    sum_estimates = 0
    estimates = list()

    start = time.time()

    OBdecomp = OBDsearch.get_heuristic4_OBD(pattern, startNode=root_node)

    for iteration_counter in range(n_iter):

        # sample first image of u
        vi = random.randrange(len(root_nodes))
        v = root_nodes[vi]

        list_for_spent = []
        list_for_spent.append(1)

        result = ad.find_embeddings_Furer([v], data_graph, pattern, OBdecomp,
                                          0, [], list_for_spent, None, None,
                                          None, None)

        c = result[0] * len(root_nodes)

        estimates.append(c)
        sum_estimates += c

    end = time.time()
    nr_emb = sum_estimates / float(n_iter)
    stddev = math.sqrt(sum([(x - nr_emb)**2
                            for x in estimates])) / float(n_iter)

    print('n_iter', n_iter)
    print('time', end - start)
    print('estimate', nr_emb)
    print('stddev', stddev)

    return nr_emb, estimates
예제 #5
0
        else:
            experiment="furer_"+str(args.max_time)

        output = os.path.join(args.o, experiment)
        if not os.path.isdir(output):
            os.makedirs(output)

        output_train_csv=os.path.join(args.o,experiment,'train.csv')
        output_test_csv = os.path.join(args.o,experiment, 'test.csv')
        time_dict_train_csv=os.path.join(args.o,experiment,'time_dict_train.csv')
        time_dict_test_csv=os.path.join(args.o, experiment, 'time_dict_test.csv')

        pattern_path=args.p
        patterns,OBDPatterns,root_nodes_patterns,indices,pattern_equivalence,non_equivalence=get_pattern_infos(pattern_path)
        target = gtp.get_target_graph(args.const, args.attr)
        OBDTarget = OBDsearch.get_heuristic4_OBD(target, startNode=int(args.sT))
        root_node_target = args.rT

        fieldnames=['dummy','target',patterns[0].name]
        if args.e=="exact":
                if args.t == None or (args.t != None and args.t == "train"):
                    exact.generate_csv_exact_counts(train_data,target,args.const,args.attr, OBDTarget, root_node_target, patterns, OBDPatterns,
                                          indices, root_nodes_patterns,pattern_equivalence,non_equivalence, output_train_csv, fieldnames,time_dict_train_csv,args.max_time)
                print("Training data counted ...")

                if args.t == None or (args.t != None and args.t == "test"):
                    exact.generate_csv_exact_counts(test_data, target,args.const,args.attr, OBDTarget, root_node_target, patterns, OBDPatterns,
                                            indices, root_nodes_patterns,pattern_equivalence,non_equivalence, output_test_csv, fieldnames,time_dict_test_csv,args.max_time)

                print("Test data counted ...")
        if args.e=="furer":
예제 #6
0
                ground_pattern = gtp.ground_pattern(target, pattern)
                nr_pat = furer_OBD(ground_pattern, data_graph, OBD, root_node)
                dict_res[fieldnames[fieldcounter]] = nr_pat
                fieldcounter = fieldcounter + 1
            writer.writerow(dict_res)


if __name__ == '__main__':
    data_graph = '/home/irma/work/DATA/DATA/yeast/YEAST_equiv.gpickle'
    data_graph = nx.read_gpickle(data_graph)
    tg = gtp.find_all_groundings_of_predicates(data_graph, 'function',
                                               'constant')[0]
    pattern = nx.read_gml('/home/irma/work/DATA/DATA/yeast/pattern1.gml')
    ground_pattern = gtp.ground_pattern(tg, pattern)

    OBD1 = OBDsearch.get_heuristic4_OBD(pattern, startNode=4)
    target_attr = 'function'
    target_constant = 'constant'
    root_node_target = 'function'
    OBDTarget = [[1], [2]]

    patterns = [pattern]
    OBDPatterns = [OBD1]
    root_nodes_patterns = ['protein_class']
    csvfile = '/home/irma/work/DATA/DATA/yeast/test.csv'
    fieldnames = ['target', 'patt1']

    generate_csv_exact_counts(data_graph, target_attr, target_constant,
                              OBDTarget, root_node_target, patterns,
                              OBDPatterns, root_nodes_patterns, csvfile,
                              fieldnames)
예제 #7
0
                        default=None,
                        help='name of a root node')
    args = parser.parse_args()

    #Preparing the inputs
    data_graph, pattern, OBdecomp, root_node, root_node_predicate_name, interval, max_time, monitoring_marks, root_nodes, Plist = prepare_inputs.prepare_params(
        args)
    output_folder = 'fk_AD_results'
    output_path = os.path.join(args.o, output_folder)
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    if OBdecomp != None:
        OBdecomp_false = [[item] for sublist in OBdecomp for item in sublist]
    else:
        OBdecomp_false = OBDsearch.get_flatList(pattern, startNode=root_node)

    #Main procedure
    monitoring_reports = {}
    all_furer_times = []
    fudicts = []
    average_time = 0
    start = time.time()

    m, estimates = get_nr_embedding(data_graph, pattern, OBdecomp_false,
                                    root_node_predicate_name, monitoring_marks)

    end = time.time()
    average_time += end - start
    fdictionaries_Furer = globals_sampling.globalist_furer
    times_Furer = globals_sampling.globaltimes_furer[1:]
예제 #8
0
def choose_root_node(pattern, root_node_predicate_name, hist, root_node_list = None, root_node_alg = None):
    #start hops code choose different root nodes
    if root_node_list != None and root_node_alg == "MaxDeg":
        ran = random.randint(0, len(root_node_list) - 1)
        root_node = root_node_list[ran]
    elif root_node_alg == "MinLabel":
        for i in xrange(0, len(hist)):
            root_node_predicate_name = hist[i][0]  # choose the root node predicate with min label occurance
            if root_node_predicate_name == 'references':
                continue
            possible_root_nodes = [x for x in pattern.nodes() if pattern.node[x]['predicate'] == root_node_predicate_name and pattern.node[x][
                                       'valueinpattern'] == 0]
            if (len(possible_root_nodes) == 0):
                continue
            break
        ran = random.randint(0, len(possible_root_nodes) - 1)
        root_node = possible_root_nodes[ran]
    elif root_node_alg == "MinLabelMaxDegree":
        for i in xrange(0, len(hist)):
            root_node_predicate_name = hist[i][0]  # choose the root node predicate with min label occurance
            if root_node_predicate_name == 'references':
                continue
            possible_root_nodes = [x for x in pattern.nodes() if
                                       pattern.node[x]['predicate'] == root_node_predicate_name and pattern.node[x][
                                       'valueinpattern'] == 0]
            if (len(possible_root_nodes) == 0):
                continue
            break
        max_degree = 0
        for r_node in possible_root_nodes:
            if pattern.degree(r_node) > max_degree:
                max_degree = pattern.degree(r_node)
                root_node = r_node
    elif root_node_alg == "Central":
        centers = get_center_nodes(pattern)
        ran = random.randint(0, len(centers) - 1)
        root_node = centers[ran]
    elif root_node_alg == "MinLabelCentral":
        for i in xrange(0, len(hist)):
            root_node_predicate_name = hist[i][0]  # choose the root node predicate with min label occurance
            if root_node_predicate_name == 'references':
                continue
            possible_root_nodes = [x for x in pattern.nodes() if
                                       pattern.node[x]['predicate'] == root_node_predicate_name and pattern.node[x][
                                       'valueinpattern'] == 0]
            if (len(possible_root_nodes) == 0):
                continue
            break
        dist_to_center = get_dist_to_center(pattern)
        for node in dist_to_center:
            if node[0] in possible_root_nodes:
                root_node = node[0]
                break
    #end hops code
    elif (root_node_predicate_name == None):
        for i in xrange(0, len(hist)):
            root_node_predicate_name = hist[i][0]  # choose the root node predicate
            if root_node_predicate_name == 'references':
                continue
            possible_root_nodes = [x for x in pattern.nodes() if
                                   pattern.node[x]['predicate'] == root_node_predicate_name and pattern.node[x][
                                       'valueinpattern'] == 0]
            if (len(possible_root_nodes) == 0):
                continue

            ran = random.randint(0, len(possible_root_nodes) - 1)
            root_node = possible_root_nodes[ran]
            if OBDsearch.get_heuristic4_OBD(pattern, startNode=root_node) == None:
                continue
            break
    else:
        possible_root_nodes = [x for x in pattern.nodes() if pattern.node[x]['predicate'] == root_node_predicate_name]
        ran = random.randint(0, len(possible_root_nodes) - 1)
        root_node = possible_root_nodes[ran]
    return root_node, pattern.node[root_node]['predicate']