def vk_graph(my_id=57573111):
    appid = '5989878'
    login = '******'
    password = '******'
    session = vk.AuthSession(appid, login, password, scope='friends')
    vk_api = vk.API(session)
    mygraph = MyGraph()
    me = vk_api.users.get(user_ids=my_id, fields='city,photo_400_orig,domain')
    mygraph.add_single_node(0, info=me)
    my_friends_ids = vk_api.friends.get(user_id=my_id)
    my_friends_full = vk_api.friends.get(user_id=my_id,
                                         fields='city,photo_400_orig,domain')
    active_friends = [my_id]
    counter = 1
    # print my_friends_full uncomment for info
    for friend in my_friends_full:
        if friend.get('deactivated') == None:
            active_friends.append(friend.get('user_id'))
            mygraph.add_single_node(active_friends.index(
                friend.get('user_id')),
                                    info=friend)
    mutual = vk_api.friends.getMutual(source_uid=my_id,
                                      target_uids=active_friends)
    for m in mutual:
        source = m.get('id')
        source_id = active_friends.index(source)
        print source_id
        mygraph.add_single_edge(0, source_id, weight=1)
        for target in m.get('common_friends'):
            if target in active_friends:
                target_id = active_friends.index(target)
                mygraph.add_single_edge(source_id, target_id, weight=1)
    return mygraph
    def __init__(self):
        self.G = MyGraph()
        self.H = MyGraph()

        self.count_TN = 0.0
        self.count_FN = 0.0
        self.count_FP = 0.0
        self.count_TP = 0.0

        self.M1 = 0

        self.precision = 0.0
        self.recall = 0.0
        self.accuracy = 0.0
        self.SMTvalue = 0.0

        self.o = Optimize()
        timeout = 1000 * 60 * 5 # one minute
        self.o.set("timeout", timeout)
        print('timeout = ',timeout/1000/60, 'mins')

        self.model = None
        self.term2id = {}
        self.id2term = {}
        self.id2encode = {}
        self.existing_equivalent_edges = []
        self.existing_attacking_edges = [] # already in the graph
        self.additional_attacking_edges = [] # all additional edges in the graph
        self.num_subgraphs = 0
        self.num_removed_edges = 0
        self.removed_edges = []

        self.pos = None
Beispiel #3
0
def make_graph(n):
    graph = MyGraph(graph_type='graph', size='20, 11, 25!', ratio='fill', fontsize=40)

    for v in range(1, n+1):
        graph.add_nodes(v)

    return graph
Beispiel #4
0
def make_graph(n_nodes, label):
    graph = MyGraph(graph_type='graph',
                    size='20,11.25!',
                    ratio='fill',
                    label=label,
                    fontsize=40)

    for v in range(1, n_nodes + 1):
        graph.add_nodes(v)

    return graph
Beispiel #5
0
def pins(data):
    g = MyGraph()
    for d in data:
        g.add_edge(d[0], d[1])

    visited = set()
    for v in g.vertices():
        if v not in visited:
            res = paint(g, v)
            if res is None:
                return False
            visited.update(res)
    return True
Beispiel #6
0
 def create_graphs(self):
     self.graphs = []
     self.graph_solvers = []
     self.num_graph = 1
     for i in range(self.num_graph):
         mg = MyGraph(concepts_size=self.C,
                      N=self.N,
                      M=self.M,
                      alpha=self.alpha,
                      beta=self.beta)
         mg.create_graph()
         # mg.show_graph()
         print('how many nodes are there? ', len(mg.G.nodes))
         self.graphs.append(mg)
Beispiel #7
0
def get_graph(voters):
    graph = MyGraph(graph_type='graph', size='20,11.25!', ratio='fill')
    graph.add_cluster("unknows", "Votos a serem apurados")

    for v in range(1, voters + 1):
        graph.add_nodes(v)
        graph.add_nodes_cluster("unknows", v)

    return graph
def add(image, model_folder, name):
    # Create temp image
    file_path, error = helpers.save_temp_face(image)
    
    if error != "":
        return False, error
    
    # Get facenet embeddings
    model_path = os.path.join(Globals.model_path, model_folder)
    classifier_file = os.path.join(model_path, "classifier.pkl")
    
    features = classifier.get_features(Globals.temp_path, MyGraph(), classifier_file)
    
    if features.success == False:
        return False, features.error
    
    # Load model
    (model, class_names, emb_array, labels) = helpers.load_model(features.classifier_filename_exp)
    
    print(emb_array.shape)
    print(features.emb_array.shape)
    emb_array = np.append(emb_array, features.emb_array, axis = 0)
    
    # Add new embedding to array
    print("Emb array")
    print(emb_array.shape)

    matches = next((n for n in class_names if n.lower() == name.lower()), None)
    
    if matches == None:
        print("Name not found... adding new name")
        class_names.append(name)
        
    name_index = class_names.index(name)
    labels.append(name_index)

    folder_name, folder_path = helpers.get_person_folder_path(model_path, name)
    
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    file_name = os.path.basename(file_path)
    copyfile(file_path, os.path.join(folder_path, file_name))
    
    print(len(labels))
    print(len(class_names))
    
    # Retrain
    model.fit(emb_array, labels)
    
    # Save the new model / embeddings etc
    helpers.save_model(features.classifier_filename_exp, model, class_names, emb_array, labels)

    # Cleanup
    shutil.rmtree(Globals.temp_path)
    
    return True, ""
Beispiel #9
0
def retrain(model_folder_name, model_type):
    model_dir = os.path.join(Globals.model_path, model_folder_name)
    processed_dir = os.path.join(model_dir, "data")

    classifier.train(data_dir=processed_dir,
                     session=MyGraph(),
                     classifier_filename=os.path.join(model_dir,
                                                      "classifier.pkl"),
                     model_type=model_type)

    return True, ""
Beispiel #10
0
def train(input_folder_path, model_folder_name, model_type):
    print("Input Folder Path:", input_folder_path)
    print("Model Folder Name:", model_folder_name)

    print("Checking Directories...")
    if os.path.exists(input_folder_path) == False:
        return False, "Invalid input folder!"

    model_dir = os.path.join(Globals.model_path, model_folder_name)

    if os.path.exists(model_dir) == True:
        return False, "Model already exists!"

    print("Aligning faces...")
    processed_dir = os.path.join(model_dir, "data")

    my_graph = MyGraph()

    align.align_faces(AlignOptions(input_folder_path, processed_dir, my_graph))

    directories = os.listdir(processed_dir)

    # SVC's don't seem to be able to handle only having 1 image for training, so let's create a duplicate
    if model_type == "svc":
        for d in directories:
            subdir = os.path.join(processed_dir, d)

            if os.path.isdir(subdir):
                files = os.listdir(subdir)

                if len(files) == 1:
                    file_name_split = os.path.splitext(files[0])
                    file_path_from = os.path.join(subdir, files[0])
                    file_path_to = os.path.join(
                        subdir, file_name_split[0] + "_2" + file_name_split[1])
                    print("Only 1 image found for training... Duplicating ",
                          file_path_from)
                    copyfile(file_path_from, file_path_to)

    print("Training...")

    classifier.train(data_dir=processed_dir,
                     session=my_graph,
                     classifier_filename=os.path.join(model_dir,
                                                      "classifier.pkl"),
                     model_type=model_type)

    return True, ""
Beispiel #11
0
def predict(image, model_folder, verbose):
    file_path, error = helpers.save_temp_face(image)

    if error != "":
        return PredictResponse(error)

    encoded_image = ""

    with open(file_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read())
        encoded_image = encoded_image.decode('utf-8')

    my_graph = MyGraph()

    start = time.time()

    print("Align image to work with classifier")
    temp_predict = os.path.join(Globals.data_path, "temp_predict")
    align.align_faces(
        AlignOptions(Globals.temp_path, temp_predict, my_graph, True))
    shutil.rmtree(Globals.temp_path)

    end = time.time()
    print("Time taken (Align):", end - start)

    print("Classify image")
    temp_predict_data = os.path.join(temp_predict, "data")

    if not os.path.exists(temp_predict_data):
        return PredictResponse("Could not detect face")

    model_path = os.path.join(Globals.model_path, model_folder)
    classifier_file = os.path.join(model_path, "classifier.pkl")

    start = time.time()

    predict_response = classifier.prediction(temp_predict, my_graph,
                                             classifier_file, model_path,
                                             verbose, encoded_image)
    print("Cleanup...")
    shutil.rmtree(temp_predict)

    end = time.time()
    print("Time taken (Prediction):", end - start)

    return predict_response
Beispiel #12
0
def load():
    GRAPH_COUNT = 8
    graphs = []
    for graph_num in range(5, 6):
        print 'loaded %d' % graph_num
        graphs.append(Graph.Read_GraphML('graph%d.graphml' % graph_num))
    graphs_return = []
    # CHANGE HERE FOR ALL GRAPHS
    # graph = graphs[2]
    for graph in graphs:
        mygraph = MyGraph()
        count = 0
        for vertex in graph.vs:
            mygraph.add_single_node(count, graphmlid=vertex["id"])
            edges = graph.es.select(_source=count)
            for edge in edges:
                source, target = edge.tuple
                mygraph.add_single_edge(source, target, weight=1)
            count += 1
        graphs_return.append(mygraph)
    print graphs_return
    return graphs_return
class GraphSolver():

    def __init__(self):
        self.G = MyGraph()
        self.H = MyGraph()

        self.count_TN = 0.0
        self.count_FN = 0.0
        self.count_FP = 0.0
        self.count_TP = 0.0

        self.M1 = 0

        self.precision = 0.0
        self.recall = 0.0
        self.accuracy = 0.0
        self.SMTvalue = 0.0

        self.o = Optimize()
        timeout = 1000 * 60 * 5 # one minute
        self.o.set("timeout", timeout)
        print('timeout = ',timeout/1000/60, 'mins')

        self.model = None
        self.term2id = {}
        self.id2term = {}
        self.id2encode = {}
        self.existing_equivalent_edges = []
        self.existing_attacking_edges = [] # already in the graph
        self.additional_attacking_edges = [] # all additional edges in the graph
        self.num_subgraphs = 0
        self.num_removed_edges = 0
        self.removed_edges = []

        self.pos = None

    def same_domain (self, t1, t2):
        t1_domain = tldextract.extract(t1).domain
        # t1_subdomain = tldextract.extract(t1).subdomain
        t2_domain = tldextract.extract(t2).domain
        # t2_subdomain = tldextract.extract(t2).subdomain
        if t1_domain == t2_domain:
            return True
        else:
            return False

    def compare_names (self, t1, t2):
        n1 = t1.rsplit('/', 1)[-1]
        n2 = t2.rsplit('/', 1)[-1]
        # print ('n1 = ', n1)
        # print ('n2 = ', n2)
        # print ('urllib n1 = ', urllib.parse.quote(n1))
        # print ('urllib n2 = ', urllib.parse.quote(n2))
        if (urllib.parse.quote(n2) == n1 or n2 == urllib.parse.quote(n1)):
            return IDENTICAL
        else: # process it bit by bit and obtain the
            coll_n1 = ''
            for t in n1:
                if t == '(' or t == ')':
                    coll_n1 += t
                else:
                    coll_n1 += urllib.parse.quote(t)

            coll_n2 = ''
            for t in n2:
                if t == '(' or t == ')':
                    coll_n2 += t
                else:
                    coll_n2 += urllib.parse.quote(t)
            # print ('conv n1 = ', coll_n1)
            # print ('conv n2 = ', coll_n2)

            if (n1 == coll_n2 or coll_n1 == n2):
                return CONV_IDENTICAL # identical after conversion

            # ====== NOW AGAIN ======
            coll_n1 = ''
            for t in n1:
                if t == '(' or t == ')' or t == '\'':
                    coll_n1 += t
                else:
                    coll_n1 += urllib.parse.quote(t)

            coll_n2 = ''
            for t in n2:
                if t == '(' or t == ')'or t == '\'':
                    coll_n2 += t
                else:
                    coll_n2 += urllib.parse.quote(t)

            # print ('*conv n1 = ', coll_n1)
            # print ('*conv n2 = ', coll_n2)
            if (n1 == coll_n2 or coll_n1 == n2):
                return CONV_IDENTICAL # identical after conversion

            # ====== NOW AGAIN ======
            coll_n1 = ''
            for t in n1:
                if t == '(' or t == ')' or t == '\'' or t == ',':
                    coll_n1 += t
                else:
                    coll_n1 += urllib.parse.quote(t)

            coll_n2 = ''
            for t in n2:
                if t == '(' or t == ')'or t == '\'' or t == ',':
                    coll_n2 += t
                else:
                    coll_n2 += urllib.parse.quote(t)

            # print ('*conv n1 = ', coll_n1)
            # print ('*conv n2 = ', coll_n2)
            if (n1 == coll_n2 or coll_n1 == n2):
                return CONV_IDENTICAL # identical after conversion

            else:
                # print (t1,' => ', n1, ' is now ',coll_n1)
                # print (t2,' => ', n2, ' is now ',coll_n2,'\n')
                return DIFFERENT

    def find_existing_attacking_edges(self):
        count_SAME = 0
        count_DIFF = 0
        coll_existing_attacking_edges = []
        for (t1, t2) in self.G.subgraphs[0].edges:
            t1_domain = tldextract.extract(t1).domain
            t1_subdomain = tldextract.extract(t1).subdomain
            t2_domain = tldextract.extract(t2).domain
            t2_subdomain = tldextract.extract(t2).subdomain


            if t1_subdomain != '' and t2_subdomain != '' and t1_domain == t2_domain and t1_subdomain == t2_subdomain:
                if (self.compare_names(t1, t2) == DIFFERENT):
                    self.existing_attacking_edges.append((t1, t2))
                    count_DIFF += 1
                    # print ('DIFF: ', t1, t2)
                else:
                    count_SAME += 1
                    self.existing_equivalent_edges.append((t1, t2))
        # print ('SAME = ', count_SAME)
        # print ('DIFF = ', count_DIFF)
        for e in self.existing_attacking_edges:
            print ('existing_attacking_edges: ', e)

    def find_additional_attacking_edges(self):
        for x in self.domain_subdomain.keys():
            if len(self.domain_subdomain[x]) >= 2:
                for t1 in self.domain_subdomain[x]:
                    for t2 in self.domain_subdomain[x]:
                        if t1 != t2:
                            if (self.compare_names(t1, t2) == DIFFERENT):
                                self.additional_attacking_edges.append((t1, t2))


    # def compute_weight(self, t1, t2): # the most important function for now
    #     weight = 0
    #     if (t1, t2) in self.G.subgraphs[0].edges:
    #         weight = 10
    #     else:
    #         weight = -6
    #     return weight

    def load_graph(self, file_name):
        self.G.load_graph(file_name)

    def load_node_manual_label (self, file_name):
        self.G.load_node_manual_label(file_name)

    def preprocessing_before_encode(self):
        g = self.G.subgraphs[0]
        self.domain = {}
        self.domain_subdomain = {}
        for n in g.nodes:
            n_domain = tldextract.extract(n).domain
            if n_domain not in self.domain.keys():
                self.domain[n_domain] = []
            self.domain[n_domain].append(n)
        for d in self.domain.keys():
            for t in self.domain[d]:
                t_subdomain = tldextract.extract(t).subdomain
                if t_subdomain != ''  and t_subdomain!= 'www':
                    x = t_subdomain + '.' + d
                    if (x) not in self.domain_subdomain.keys():
                        self.domain_subdomain[x] = []
                    self.domain_subdomain[x].append(t)
        # print ('subdomain = ', self.domain_subdomain)
        # for k in self.domain_subdomain.keys():
        #     print ('domain.subdomain = ', k)
        #     print (self.domain_subdomain[k])




    def encode(self):
        # encode each node with an integer
        g = self.G.subgraphs[0]
        id = 0

        for n in g.nodes:
            self.term2id[n] = id
            self.id2term[id] = n
            # print ('node n = ', n, ' id = ', id)
            self.id2encode[id] = Int(str(self.term2id[n]))
            self.o.add(self.id2encode[id] >= 0) # we fix all values to non-negative values
            # self.o.add(self.id2encode[id] < max_size) # we fix all values to non-negative values
            id += 1
        # First, do a preprocessing before choosing nodes
        self.preprocessing_before_encode()

        # find existing attacking edges: #TODO change the weight function
        print ('There are in total ', len (self.G.subgraphs[0].edges))
        edges = list(g.edges).copy()
        self.find_existing_attacking_edges()
        for (t1, t2) in self.existing_attacking_edges:
            self.o.add(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]]) # WEIGHT_EXISTING_ATTACKING_EDGES)
            # self.o.add_soft(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]], WEIGHT_EXISTING_ATTACKING_EDGES)
            # print('existing attacking edge: ', t1, t2)
        print('\tThere are in total: ', len (self.existing_attacking_edges), ' existing attacking edges!')
        for (t1, t2) in self.existing_equivalent_edges:
            self.o.add(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]]) #,  WEIGHT_EXISTING_EQUIVALENT_EDGES)
            # self.o.add_soft(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]],  WEIGHT_EXISTING_EQUIVALENT_EDGES)
            # print('existing equivalent edge: ', t1, t2)
        print('\tThere are in total: ', len (self.existing_equivalent_edges), ' existing equivalence edges!')

        edges = list(filter(lambda x: x not in self.existing_attacking_edges, edges))
        edges = list(filter(lambda x: x not in self.existing_equivalent_edges, edges))
        print ('Now there are normal', len(edges), ' edges left')
        # other normal edges
        for (t1, t2) in edges:
            # if t1 and t2 has different domain, then they have a lower weight
            if self.same_domain(t1, t2):
                # self.o.add_soft(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]], WEIGHT_NORMAL_EDGES) # each edge within graphs
                self.o.add(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]]) #, WEIGHT_NORMAL_EDGES) # each edge within graphs
            else:
                self.o.add_soft(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]], WEIGHT_WEAKER_NORMAL_EDGES) # each edge within graphs

        # find additional attacking edges:
        self.find_additional_attacking_edges()
        for (t1, t2) in self.additional_attacking_edges:
            # self.o.add(Not(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]])) # each edge within graphs
            self.o.add_soft(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]], WEIGHT_ADDITIONAL_ATTACKING_EDGES) # each edge within graphs
        print('There are in total: ', len (self.additional_attacking_edges), ' additional attacking edges!')


    def solve(self):
        result = self.o.check()
        print ('solving result = ', result)
        self.model = self.o.model()
        # update the SMT value
        self.calculate_SMTvalue()

    def calculate_SMTvalue (self):

        SMT_value = 0.0
        g = self.G.subgraphs[0]
        # find existing attacking edges: #TODO change the weight function
        # print ('There are in total ', len (self.G.subgraphs[0].edges))
        # edges = list(g.edges).copy()
        # self.find_existing_attacking_edges()
        for (t1, t2) in self.existing_attacking_edges:
            if self.model.evaluate(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]]):
                SMT_value += WEIGHT_EXISTING_ATTACKING_EDGES
            # print('existing attacking edge: ', t1, t2)
        # print('\tThere are in total: ', len (self.existing_attacking_edges), ' existing attacking edges!')
        for (t1, t2) in self.existing_equivalent_edges:
            if self.model.evaluate(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]]):
                SMT_value += WEIGHT_EXISTING_EQUIVALENT_EDGES
            # print('existing equivalent edge: ', t1, t2)
        # print('\tThere are in total: ', len (self.existing_equivalent_edges), ' existing equivalence edges!')
        edges = list(g.edges).copy()
        edges = list(filter(lambda x: x not in self.existing_attacking_edges, edges))
        edges = list(filter(lambda x: x not in self.existing_equivalent_edges, edges))
        # print ('Now there are normal', len(edges), ' edges left')
        # other normal edges
        for (t1, t2) in edges:
            if self.model.evaluate(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]]):
                SMT_value += WEIGHT_NORMAL_EDGES # each edge within graphs

        # find additional attacking edges:
        # self.find_additional_attacking_edges()
        for (t1, t2) in self.additional_attacking_edges:
            # self.o.add(Not(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]])) # each edge within graphs
            if self.model.evaluate(self.id2encode[self.term2id[t1]] == self.id2encode[self.term2id[t2]]):
                SMT_value += WEIGHT_ADDITIONAL_ATTACKING_EDGES # each edge within graphs
        # print('There are in total: ', len (self.additional_attacking_edges), ' additional attacking edges!')
        print ('SMT value is', SMT_value)
        self.SMTvalue = SMT_value

    def decode (self):
        g = self.G.subgraphs[0]
        group_size = 0
        for id in self.id2encode.keys():
            # print ('eva = ', self.model.evaluate(self.id2encode[id]).as_string())
            if group_size < int(self.model.evaluate(self.id2encode[id]).as_string()):
                group_size = int(self.model.evaluate(self.id2encode[id]).as_string())
        group_size += 1
        # print ('there are in total ', group_size, ' graphs')
        for m in range (group_size):
            h = nx.Graph()
            self.H.subgraphs[m] = h

        for id in self.id2encode.keys():
            group_id = int(self.model.evaluate(self.id2encode[id]).as_long())
            t = self.id2term[id]
            self.H.subgraphs[group_id].add_node(t)
            # print (group_id, ' add node ', t)

        # print ('max = ', group_size)
        for m in range(group_size):
            g_tmp = g.subgraph(self.H.subgraphs[m].nodes)
            # print ('size = ', len(g_tmp.nodes))
            for (t1, t2) in g_tmp.edges:
                # for (t1, t2) in g.edges:
                # print ('THIS : ',t1, t2)
                id1 = self.term2id[t1]
                id2 = self.term2id[t2]

                if int(self.model.evaluate(self.id2encode[id1]).as_long()) == int(self.model.evaluate(self.id2encode[id2]).as_long()):
                    self.H.subgraphs[m].add_edge(t1, t2)
        # TODO: tidy up the group index/id so there is no empty graph in it
        tmp = self.G.subgraphs[0].copy()

        ind = 0
        dict = {}
        acc_num_edges = 0
        for k in self.H.subgraphs.keys():
            g = self.H.subgraphs[k]
            tmp.remove_edges_from(g.edges)
            if len (g.nodes) != 0:
                acc_num_edges += len(self.H.subgraphs[k].edges)
                dict[ind] = g
                ind += 1
        self.H.subgraphs = dict
        print('there are in total ', ind, ' subgraphs in the solution')
        print ('and they have ', acc_num_edges, ' edges')

        # for e in self.G.subgraphs[0].edges:
        #     if e not in Big.edges:
        #         self.removed_edges.append(e)
        self.removed_edges = tmp.edges

        self.num_removed_edges = len(self.G.subgraphs[0].edges) - acc_num_edges
        print ('SHOULD BE EQUAL: ', self.num_removed_edges, ' = ',len(self.removed_edges))
        self.num_subgraphs = ind

    # def obtain_statistics(self, file_name):
    #     # dict_al = {}
    #     #
    #     # print ('obtain statistics now!')
    #     # print ('compare against the manual decision from AL in the file ', file_name)
    #     # # now load the data in
    #     # # file_name = str(n) + '_annotation.txt'
    #     # print ('File Name = ', file_name)
    #     # file = open(file_name, 'r')
    #     # reader = csv.DictReader(file, delimiter = '\t')
    #     # for row in reader:
    #     #     e = row["Entity"]
    #     #     o = row["Annotation"]
    #     #     dict_al [e] = o
    #     #
    #     # # al_count_remain = 0
    #     # al_remain = []
    #     # # al_count_remove = 0
    #     # self.G.should_remove = []
    #     #
    #     # my_remain = list(filter(lambda v: v not in self.removed_edges, self.G.subgraphs[0].edges))
    #     # my_removed = self.removed_edges
    #     #
    #     # count_edges_involving_unknow = 0
    #     #
    #     # for (l, r) in self.G.subgraphs[0].edges:
    #     #     if dict_al[l] != 'Uncertain' and dict_al[r] != 'Uncertain': # Error
    #     #         if dict_al[l] == dict_al[r] :
    #     #             al_remain.append((l,r))
    #     #         else:
    #     #             # al_count_remove += 1
    #     #             self.G.should_remove.append((l,r))
    #     #
    #     # print ('# al removed: ', len(self.G.should_remove))
    #     # print ('# al remain: ', len(al_remain))
    #     #
    #     # print('# my removed:', len(my_removed))
    #     # print('# my remain:', len(my_remain))
    #     print ('#my removed edges:', len(self.removed_edges))
    #     for e in self.removed_edges:
    #         (l, r) = e
    #         f = (r, l)
    #         if e in self.G.should_remove or f in self.G.should_remove:
    #             print ('\t*removed edges: ', e)
    #         else:
    #             print ('\tremoved edges: ', e)
    #
    #
    #     print ('# SHOULD REMOVE: ',len(self.G.should_remove))
    #     for e in self.G.should_remove:
    #         (l, r) = e
    #         f = (r, l)
    #         if e in self.removed_edges or f in self.removed_edges:
    #             print ('\t*should remove edge: ', e)
    #         else:
    #             print ('\tshould remove edge: ', e)
    #
    #
    #     # collectFN = []
    #     # collectTP = []
    #     collect_visited_edges = []
    #     for e in self.G.subgraphs[0].edges:
    #         (l, r) = e
    #         f = (r, l)
    #         collect_visited_edges.append(e)
    #         if f in collect_visited_edges:
    #             print ('!!!!ERROR: ', f)
    #         if ((e not in self.removed_edges) and (f not in self.removed_edges))and ((e not in self.G.should_remove) and (f not in self.G.should_remove)):
    #             self.count_TN += 1
    #         elif ((e in self.removed_edges) or (f in self.removed_edges)) and ((e in self.G.should_remove) or (f in self.G.should_remove)):
    #             self.count_TP += 1
    #             # collectTP.append(e)
    #         elif ((e not in self.removed_edges) and (f not in self.removed_edges) )  and ((e in self.G.should_remove) or (f in self.G.should_remove)):
    #             self.count_FN += 1
    #             # collectFN.append(e)
    #         elif ((e in self.removed_edges) or (f in self.removed_edges)) and ((e not in self.G.should_remove) and (f not in self.G.should_remove)):
    #             self.count_FP += 1
    #         else:
    #             print ('ERROR : error', l, ' and ', r)
    #     print ('Total edges ', len(self.G.subgraphs[0].edges))
    #     # print ('There are in total ', count_edges_involving_unknow, ' edges involving unknown')
    #
    #     count_diff = 0
    #     for e in self.G.subgraphs[0].edges:
    #         (l,r) = e
    #         if self.G.node_label[l] != self.G.node_label[r]:
    #             count_diff += 1
    #             print('l = ', l, ': ', self.G.node_label[l])
    #             print('r = ', r, ': ', self.G.node_label[r])
    #     print ('VERIFY: COUNT_DIFF    = ', count_diff)
    #     print ('VERIFY: SHOULD_REMOVE = ', len(self.G.should_remove))
    #
    #     print ('==============================')
    #
    #     print ('TP = both remove: ', self.count_TP)
    #     print ('TN = both keep:   ', self.count_TN)
    #     print ('FP = predicted to remove but SHOULD KEEP: ', self.count_FP)
    #     print ('FN = predicted to keep but SHOULD REMOVE: ', self.count_FN)
    #     # print ('FN = ', collectFN)
    #     # print ('TP = ', collectTP)
    #     print ('==============================')
    #
    #     if self.count_TP + self.count_FP  != 0:
    #         self.precision = self.count_TP / (self.count_TP + self.count_FP)
    #         print('precision = TP/(TP+FP) = ', self.precision)  #TP/TP + FP
    #     if self.count_TP + self.count_FN != 0:
    #         self.recall = self.count_TP / (self.count_TP + self.count_FN )
    #         print('recall  = TP / (FN+TP) = ', self.recall) # TP / ( FN +  TP)
    #
    #     self.accuracy = (self.count_TN + self.count_TP) / (len(self.G.subgraphs[0].edges))
    #     print('accuracy = ', self.accuracy) #


    def obtain_new_statistics(self):
        # calculae M1 using self.removed_edges

        count_P = 0 # edges cross domain
        for e in self.G.subgraphs[0].edges:
            # compare l and r and see if they are in the same domain_domain
            (l, r) = e
            if not self.same_domain (l, r):
                count_P += 1

        count_P_minus = 0 # remained
        for e in self.removed_edges:
            (l,r) = e
            if not self.same_domain (l, r):
                count_P_minus += 1
        self.M1 = (count_P - count_P_minus) / count_P
        # print ('countP = ', count_P)
        # print ('countP- = ', count_P_minus)
        print ('M1 = ',self.M1)