コード例 #1
0
ファイル: subMassive.py プロジェクト: MaestroGraph/SUBMASSIVE
class SubM:

    # Initializer
    def __init__(self, path_hdt=PATH_LOD, path_eq=PATH_EQ):
        self.hdt = HDTDocument(path_hdt)
        self.subClassOf = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
        self.id_subClassOf = self.hdt.convert_term(
            "http://www.w3.org/2000/01/rdf-schema#subClassOf",
            IdentifierPosition.Predicate)
        self.equivalent = "http://www.w3.org/2002/07/owl#equivalentClass"
        self.id_equivalentClass = self.hdt.convert_term(
            "http://www.w3.org/2002/07/owl#equivalentClass",
            IdentifierPosition.Predicate)

        self.graph = nx.DiGraph()

        self.equi_graph_manager = None  #equiClassManager(path_eq)
        self.diagnosed_relations = []  # the result
        self.suggestion_on_relations = [
        ]  # from the manual decison and Joe's sameAs data. Triple
        self.leaf_classes = set()

    # the graph includes all the triples with subClassOf as predicate
    def setup_graph(self):
        (subclass_triple_ids,
         cardinality) = self.enquiry(query=(0, self.id_subClassOf, 0),
                                     mode="default")
        collect_pairs = []
        for (s_id, _, o_id) in subclass_triple_ids:
            # add to the directed graph
            collect_pairs.append((s_id, o_id))
        self.graph.add_edges_from(collect_pairs)

    # for the sake of effeciency, we remove all the leaf nodes of the graph (classes
    # that does not have subclasses. They don't participate in any cycle by definition)
    def filter_leaf_classes(self):
        for c in self.graph.nodes:
            #test if this node is a leaf
            (_, cardi) = self.enquiry(query=(0, self.id_subClassOf, c),
                                      mode="default")
            if cardi == 0:
                self.leaf_classes.add(c)
        print('there are a total of', len(self.leaf_classes),
              'leaf nodes removed')
        for c in self.leaf_classes:
            self.remove_class(c)

    # a similar funtion as that of networkx
    def remove_class(self, c, comment='remove'):
        if self.graph.has_node(c):
            self.graph.remove_node(
                c)  # this also removes all the edges related
            # self.diagnosed_classes[c]= comment
        # automatically,  remove the related edges connected

    # a similar funtion as that of networkx
    def remove_class_from(self, cs, comment='remove'):
        for c in cs:
            self.remove_class(c, comment)

    # This is for future use of the SUBMASSIVE system. A user may ignore this for now.
    def enquiry(self, query, mode="subm"):
        (s, p, o) = query
        if mode == "default":
            return self.hdt.search_triples_ids(s, p, o)
        else:
            # examine the filtered part first
            pass

    # Similar as that of networkx
    def remove_relation(self, sub, sup, comment='remove'):
        if self.graph.has_edge(sub, sup):
            self.graph.remove_edge(sub, sup)
            self.diagnose_relations(sub, sup, comment)

    # Similar as that of networkx
    def remove_relation_from(self, relation_list, comment='remove'):
        for (sub, sup) in relation_list:
            self.remove_relation(sub, sup, comment)

    # there is only one term that has different id when retrieved as Subject or Object
    def convert_to_id(self, term):
        if term == "akt742:Intangible-Thing":
            # this is the only class that has two different ids (as subject and object)
            return 2601100675
        else:
            return self.hdt.convert_term(term, IdentifierPosition.Subject)

    # there is only one term that has different id when retrieved as Subject or Object
    def convert_to_term(self, id):
        if id == 2601100675:
            return "akt742:Intangible-Thing"
            # this is the only one that has two different ids (as subject and object)
        else:
            return self.hdt.convert_id(id, IdentifierPosition.Subject)

    # remove the reflexive edges
    def filter_reflexsive(self):
        to_remove = set()
        file = open('reflexive.csv', 'w', newline='')
        writer = csv.writer(file)
        writer.writerow([
            "SUBJECT_ID", "SUBJECT", "OBJECT_ID", "OBJECT", "SUGGESTION",
            "DECISION"
        ])
        for e in self.graph.edges():
            (l, r) = e
            if l == r:
                to_remove.add(e)
        print('Number of removed reflexive relations', len(to_remove))
        for (l, r) in to_remove:
            l_term = self.convert_to_term(l)
            r_term = self.convert_to_term(r)
            writer.writerow([l, l_term, r, r_term, 'remove', 'o'])

        self.graph.remove_edges_from(list(to_remove))

    def print_graph_info(self):
        print('there are ', len(self.graph.nodes()), ' nodes')
        print('there are ', len(self.graph.edges()), ' edges')

    # compare against the owl:sameAs relations and rdfs:equivalentClass relations
    # at each iteration, if there is such a edge, then remove this one.
    def obtain_unnecessary_relations(self):
        to_remove = set()
        file = open('equivalent-unnecessary-relations.csv', 'w', newline='')
        writer = csv.writer(file)
        writer.writerow([
            "SUBJECT_ID", "SUBJECT", "OBJECT_ID", "OBJECT", "SUGGESTION",
            "DECISION"
        ])
        count_i = 0
        count_s = 0
        for e in self.graph.edges():
            label = ''
            (l, r) = e
            # convert to terms
            l_term = self.convert_to_term(l)
            r_term = self.convert_to_term(r)

            # Step 1: Equicalence Class
            (eq_triple_ids,
             cardinality1) = self.enquiry(query=(l, self.id_equivalentClass,
                                                 r),
                                          mode="default")
            (eq_triple_ids,
             cardinality2) = self.enquiry(query=(r, self.id_equivalentClass,
                                                 l),
                                          mode="default")

            if (cardinality1 == 1 or cardinality2 == 1):
                label = 'i'
                count_i += 1
            # Step 2: owl:sameAs
            if (self.equi_graph_manager.test_equivalent(l_term, r_term)):
                label += 's'
                count_s += 1
            if label != '':
                to_remove.add(e)
                writer.writerow([l, l_term, r, r_term, 'remove', label])
        print('count_s = ', count_s)
        print('count_i = ', count_i)
        print('Number of removed unnecessary relations', len(to_remove))
        self.graph.remove_edges_from(list(to_remove))

    # for the sake of memory effeciency, we can load these unnecessary relations directly
    # this is because the sameas data is very big.
    def load_unnecessary_relations(self):  # to self.suggestion_on_relations
        eq_file = open('equivalent-unnecessary-relations.csv', 'r')
        reader = csv.DictReader(eq_file)
        for row in reader:
            s_id = int(row["SUBJECT_ID"])
            o_id = int(row["OBJECT_ID"])
            sug = row["SUGGESTION"]  # should be remove
            self.suggestion_on_relations.append((s_id, o_id, sug))
        print(len(self.suggestion_on_relations), ' total  relations loaded')

    # load the manual decisions on size-two cycles
    def load_manually_decided_relations(
            self):  # to self.suggestion_on_relations
        man_file = open('lod-two-cycle.csv', 'r')
        reader = csv.DictReader(man_file)
        coll_nodes = []
        for row in reader:
            s_id = int(row["SUBJECT_ID"])
            # s = row["SUBJECT"]
            o_id = int(row["OBJECT_ID"])
            sug = row["SUGGESTION"]
            coll_nodes.append(s_id)
            coll_nodes.append(o_id)
            self.suggestion_on_relations.append((s_id, o_id, sug))
        print(len(self.suggestion_on_relations), ' total relations loaded')
        return coll_nodes

    def find_nodes_in_cycles(self, hint_nodes, max, found_min):
        # create a new graph
        tmp_graph = self.graph.copy()
        # find each node that participate in at least one cycle:
        nodes = set()

        flag = True  # flag for debugging
        count_found_cycles = 0
        while flag:
            try:
                c = []
                hint_not_working = False  #flag
                try:
                    c = nx.find_cycle(tmp_graph,
                                      hint_nodes)  # change to simple_cycles ??

                except Exception as e:
                    hint_not_working = True

                if hint_not_working:
                    c = nx.find_cycle(tmp_graph)
                count_found_cycles += 1
                print('Found Cyclce ', count_found_cycles, ' is: ', c)
                c_nodes = [x for (x, y) in c]

                (l_tmp, r_tmp) = random.choice(c)
                tmp_graph.remove_edge(l_tmp, r_tmp)
                nodes.update(c_nodes)
                if len(nodes) >= max and count_found_cycles >= found_min:

                    print('total nodes = ', len(nodes))
                    flag = False
                else:
                    nodes.update(c_nodes)
                    hint_nodes = c_nodes + hint_nodes
            except Exception as e:
                print(e)
                # print("There is no cycle anymore")
                flag = False

        nodes = list(nodes)
        print('there are in total ', len(nodes),
              '  nodes that participate in cycles')
        print(nodes)
        return nodes

    def get_cycles_from_nodes(self, nodes):
        coll_cycles = []  # a list, not a set
        # obtain a subgraph from the nodes
        subg = self.graph.subgraph(nodes)

        simp_c = list(nx.simple_cycles(subg))
        print(' and these nodes has ', len(simp_c),
              ' simple cycles among them')
        # next, process these cycles and get ready to encode
        for c in simp_c:
            if len(c) == 2:
                (l, r) = c
                coll_cycles.append([(l, r), (r, l)])
            else:
                # print ('original = ', c)
                cycle = []
                for i in range(len(c)):
                    j = i + 1
                    if i == len(c) - 1:
                        j = 0
                    cycle.append((c[i], c[j]))
                # print ('cycle = ', cycle)
                coll_cycles.append(cycle)
        return (coll_cycles)

        return coll_cycles  # get ready for encoding
コード例 #2
0
class SubP:

    # Initializer / Instance Attributes
    def __init__(self, path_hdt=PATH_LOD, path_eq=PATH_EQ):
        self.hdt = HDTDocument(path_hdt)

        self.subClassOf = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
        self.id_subClassOf = self.hdt.convert_term(
            "http://www.w3.org/2000/01/rdf-schema#subClassOf",
            IdentifierPosition.Predicate)

        self.equivalent = "http://www.w3.org/2002/07/owl#equivalentClass"
        self.id_equivalentClass = self.hdt.convert_term(
            "http://www.w3.org/2002/07/owl#equivalentClass",
            IdentifierPosition.Predicate)

        self.subPropertyOf = "http://www.w3.org/2000/01/rdf-schema#subPropertyOf"
        self.id_subPropertyOf = self.hdt.convert_term(
            "http://www.w3.org/2000/01/rdf-schema#subPropertyOf",
            IdentifierPosition.Predicate)

        self.equivalentProperty = "http://www.w3.org/2002/07/owl#equivalentProperty"
        self.id_equivalentProperty = self.hdt.convert_term(
            "http://www.w3.org/2002/07/owl#equivalentProperty",
            IdentifierPosition.Predicate)

        self.graph = nx.DiGraph()

        self.equi_graph_manager = None  #equiClassManager(path_eq)
        print('set up the equivalence class manager')
        self.diagnosed_relations = []  # the result
        self.suggestion_on_relations = [
        ]  # from the manual decison and Joe's sameAs data. Triple
        self.leaf_classes = set()

        print('finished initialization')

    def setup_graph(self):
        print('set up the graph')
        (subclass_triple_ids,
         cardinality) = self.enquiry(query=(0, self.id_subPropertyOf, 0),
                                     mode="default")
        collect_pairs = []
        for (s_id, _, o_id) in subclass_triple_ids:
            # add to the directed graph
            collect_pairs.append((s_id, o_id))

        print('there are ', len(collect_pairs), 'edges')
        self.graph.add_edges_from(collect_pairs)

    def convert_to_id(self, term):
        if term == "akt742:Intangible-Thing":
            # this is the only class that has two different ids (as subject and object)
            return 2601100675
        else:
            return self.hdt.convert_term(term, IdentifierPosition.Subject)

    def convert_to_term(self, id):
        if id == 2601100675:
            return "akt742:Intangible-Thing"
            # this is the only one that has two different ids (as subject and object)
        else:
            return self.hdt.convert_id(id, IdentifierPosition.Subject)

    def enquiry(self, query, mode="subp"):
        (s, p, o) = query
        if mode == "default":
            return self.hdt.search_triples_ids(s, p, o)
        else:
            # examine the filtered part first
            pass

    def print_info(self, sbj, obj):
        predicate_names = [
            "http://sw.cyc.com/CycAnnotations_v1#label",
            "http://www.w3.org/2000/01/rdf-schema#comment",
            "http://www.w3.org/2000/01/rdf-schema#label"
        ]

        s_domain = tldextract.extract(sbj).domain
        o_domain = tldextract.extract(obj).domain
        # filter that domain
        # if (s_domain != DOMAIN and o_domain != DOMAIN):
        #     # print (DOMAIN)
        print('SUBJECT: ', sbj)
        for p in predicate_names:
            (triples, cardinality) = self.hdt.search_triples(sbj, p, "")
            for (s, p, o) in triples:
                print('\tPREDICATE: ', p)
                print('\t\t Comments/labels  :', o, '\n')
        print('OBJECT: ', obj)
        for p in predicate_names:
            (triples, cardinality) = self.hdt.search_triples(obj, p, "")
            for (s, p, o) in triples:
                print('\tPREDICATE: ', p)
                print('\t\t Comments/labels  :', o, '\n')

        print('\n\n========================\n\n')

    def export_cycle(self):
        simp_c = list(nx.simple_cycles(self.graph))
        print('find simple cycle in graph')
        print('there are ', len(simp_c), ' simple cycles')

        count1 = 0
        count_others = 0
        count_sameas = 0
        count_eqProp = 0
        count_bigger = 0

        collect_self_loop = []
        collect_eq = []
        collect_others = []
        collect_bigger = []
        for c in simp_c:
            if len(c) == 1:
                count1 += 1
                collect_self_loop.append(c)
            elif len(c) == 2:
                # print (c)
                # for n in c:
                #     t = self.convert_to_term(n)
                #     print ('\t', t)
                # print ('\n')

                l_term = self.convert_to_term(c[0])
                r_term = self.convert_to_term(c[1])

                # id_equivalentProperty
                (subclass_triple_ids, cardinality) = self.enquiry(
                    query=(c[0], self.id_equivalentProperty, c[1]),
                    mode="default")

                # if (self.equi_graph_manager.test_equivalent(l_term, r_term)):
                #     print ('There is a owl:sameAs relation in between')
                #     count_sameas += 1
                #     collect_eq.append(c)

                if (cardinality > 0):
                    print('There is a owl:equivalentProperty in between')
                    count_eqProp += 1
                    collect_eq.append(c)

                else:
                    # self.print_info(c[0], l_term, c[1], r_term)
                    # print ('a longer one for manual decision:',c )
                    # count_others += 1
                    collect_others.append(c)
                count_others += 1
            else:
                count_bigger += 1
                collect_bigger.append((c[0], c[1]))
                collect_bigger.append((c[1], c[2]))
                collect_bigger.append((c[2], c[0]))

        print('there are ', count1, ' reflexive cycles')

        print('there are ', count_sameas, ' sameAs relations')
        print('there are ', count_eqProp, ' eqProp relations')
        print('there are ', count_others, ' size-two cycles')
        print('there are ', count_bigger, ' bigger cycles')
        # export self-loop cycles:

        with open(file_name, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([
                "SUBJECT_ID", "SUBJECT", "OBJECT_ID", "OBJECT", "SUGGESTION",
                "DECISION"
            ])
            # write to file
            # print ('collect self loop: ',collect_self_loop)
            for [s_id] in collect_self_loop:
                # convert
                s_term = self.convert_to_term(s_id)
                o_term = s_term
                writer.writerow([s_id, s_term, s_id, o_term, 'remove',
                                 'o'])  # removed from automatic method
            for (s_id, o_id) in collect_eq:
                # convert
                s_term = self.convert_to_term(s_id)
                o_term = self.convert_to_term(o_id)
                writer.writerow([s_id, s_term, o_id, o_term, 'remove',
                                 'e'])  # removed from automatic method

            for (s_id, o_id) in collect_others:
                s_term = self.convert_to_term(s_id)
                o_term = self.convert_to_term(o_id)
                self.print_info(s_term, o_term)
                writer.writerow([s_id, s_term, o_id, o_term, 'remove',
                                 '2'])  # removed from manual step
                writer.writerow([o_id, o_term, s_id, s_term, 'remove',
                                 '2'])  # removed from manual step

            for (s_id, o_id) in collect_bigger:
                s_term = self.convert_to_term(s_id)
                o_term = self.convert_to_term(o_id)
                # print ('===a longer cycle ===', c)

                writer.writerow([s_id, s_term, o_id, o_term, 'remove',
                                 'x'])  # removed from manual step

    def load_removed(self):
        # 'pre-subP.csv'
        subp_file = open('pre-subP.csv', 'r')
        reader = csv.DictReader(subp_file)
        coll_removed = []
        for row in reader:
            s_id = int(row["SUBJECT_ID"])
            # s = row["SUBJECT"]
            o_id = int(row["OBJECT_ID"])
            sug = row["SUGGESTION"]  # should be remove

            if (sug == 'remove'):
                coll_removed.append((s_id, o_id))
        print('number of removed edges:', len(coll_removed))
        self.graph.remove_edges_from(coll_removed)

    def test_cycle(self):
        try:
            c = nx.find_cycle(self.graph)  # change to simple_cycles ??
            print('cycle = ', c)

        except Exception as e:
            # hint_not_working = True
            print('no cycle')

    def export_graph_nt(self, name):
        g = Graph()
        for (s_id, o_id) in self.graph.edges:
            s_term = self.convert_to_term(s_id)
            o_term = self.convert_to_term(o_id)
            bob = URIRef("http://www.w3.org/2000/01/rdf-schema#subPropertyOf")
            g.add((URIRef(s_term), bob, URIRef(o_term)))

        # print("--- printing raw triples ---")
        # for s, p, o in g:
        #     print((s, p, o))

        g.serialize(destination=name, format='nt')
コード例 #3
0
def generate_reduced():
    # Q1 : retrieve the subClassOf relations
    hdt_file = None
    output_filename = None
    output_selfloopClass_filename = None
    output_leafClass_filename = None
    output_intermediateClass_filename = None

    if sys.argv[1] == 'lod':
        hdt_file = HDTDocument(PATH_LOD)
        output_filename = 'reduced_lod_subClassOf.csv'
        output_selfloopClass_filename = 'lod_selfloop_classes.csv'
        output_leafClass_filename = 'lod_leaf_classes.csv'
        output_intermediateClass_filename = 'further_reduced_lod_subClassOf.csv'
    else:
        hdt_file = HDTDocument(PATH_DBpedia)
        output_filename = 'dbpedia_subClassOf.csv'
        output_selfloopClass_filename = 'dbpedia_selfloop_classes.csv'
        output_leafClass_filename = 'dbpedia_leaf_classes.csv'
        output_intermediateClass_filename = 'further_reduced_dbpedia_subClassOf.csv'

    subClassOf = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
    id_subClassOf = hdt_file.convert_term(
        "http://www.w3.org/2000/01/rdf-schema#subClassOf",
        IdentifierPosition.Predicate)

    (subclass_triples,
     cardinality) = hdt_file.search_triples("", subClassOf, "")

    c1 = ['196338233', '196338418', '196338419']
    c2 = ['196338233', '196338325', '196338412']
    c3 = ['196337995', '196338014', '196338013']
    c4 = ['196338014', '196338063', '196338410']

    cs = [c1, c2, c3, c4]
    for c in cs:
        print('\n\n this cycle = ', c)
        for n in c:
            print('id =', n)
            name = hdt_file.convert_id(int(n), IdentifierPosition.Subject)
            print('name = ', name)
            s_id = hdt_file.convert_term(name, IdentifierPosition.Subject)
            print('when its subject = ', s_id)
            o_id = hdt_file.convert_term(name, IdentifierPosition.Object)
            print('when its object  = ', o_id)

    print('==================================')

    c1 = ['1193056652', '1193056593', '1193056657']
    c2 = ['1146303708', '1146299369', '1146331327']
    c3 = ['196338400', '196338312', '196338288']
    c4 = ['196338013', '196337995', '196338014']
    c5 = ['196338242', '196338410', '196337957']
    c6 = ['196338418', '196338419', '196338233']
    c7 = ['196338233', '196338325', '196338412']
    c8 = ['196338014', '196338063', '196338410']
    c9 = ['196338014', '196337975', '196338007']
    c10 = ['196338050', '196338049', '196337975']
    c11 = ['196338197', '196338462', '196338406']
    c12 = ['196338220', '196338217', '196338034']
    c13 = ['196338145', '196338152', '196338419']
    c14 = ['196338288', '196338116', '196337978']
    c15 = ['196338070', '196338360', '196338241']
    c16 = ['114657709', '114657713', '125181834']

    cs = [
        c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16
    ]
    for c in cs:
        print('\n\n that cycle = ', c)
        for n in c:
            print('id =', n)
            name = hdt_file.convert_id(int(n), IdentifierPosition.Subject)
            print('name = ', name)
            s_id = hdt_file.convert_term(name, IdentifierPosition.Subject)
            print('when its subject = ', s_id)
            o_id = hdt_file.convert_term(name, IdentifierPosition.Object)
            print('when its object  = ', o_id)
コード例 #4
0
class SubM:

    # Initializer / Instance Attributes
    def __init__(self, path_hdt=PATH_LOD):
        self.hdt = HDTDocument(path_hdt)
        self.subClassOf = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
        self.id_subClassOf = self.hdt.convert_term(
            "http://www.w3.org/2000/01/rdf-schema#subClassOf",
            IdentifierPosition.Predicate)
        self.equivalent = "http://www.w3.org/2002/07/owl#equivalentClass"
        self.id_equivalentClass = self.hdt.convert_term(
            "http://www.w3.org/2002/07/owl#equivalentClass",
            IdentifierPosition.Predicate)
        self.graph = nx.DiGraph()
        self.equi_graph = nx.Graph()
        self.diagnosed_relations = {}
        self.diagnosed_classes = {}
        self.leaf_classes = set()

    def setup_graph(self):
        (subclass_triple_ids,
         cardinality) = self.enquiry(query=(0, self.id_subClassOf, 0),
                                     mode="default")
        collect_pairs = []
        for (s_id, _, o_id) in subclass_triple_ids:
            # add to the directed graph
            collect_pairs.append((s_id, o_id))
        self.graph.add_edges_from(collect_pairs)

    def remove_unnecessary_relations(self):
        for n in self.graph.nodes():
            # test if there is an edge between this node and another node which is also in the Graph
            (eq_triple_ids,
             cardinality) = self.enquiry(query=(n, self.id_equivalentClass, 0),
                                         mode="default")
            for (_, _, m) in eq_triple_ids:
                # test if it is in the Graph
                if m in self.graph.nodes():
                    self.remove_relation(n, m, 'equivalence')
            (eq_triple_ids,
             cardinality) = self.enquiry(query=(0, self.id_equivalentClass, n),
                                         mode="default")
            for (m, _, _) in eq_triple_ids:
                # test if it is in the Graph
                if m in self.graph.nodes():
                    self.remove_relation(m, n, 'equivalence')
        print('total relations diagnosed:', len(self.diagnosed_relations))

    def export_graph(self, export_file=None):
        collect_pairs = self.graph.edges
        if export_file is not None:
            file = open(export_file, 'w', newline='')
            writer = csv.writer(file)
            writer.writerow(["SUBJECT_ID", "SUBJECT", "OBJECT_ID", "OBJECT"])
            for (s_id, o_id) in collect_pairs:
                s_term = self.convert_to_term(s_id)
                o_term = self.convert_to_term(o_id)
                writer.writerow([s_term, s_id, o_term, o_id])

    # define a function subgraph, edges
    # G.edges
    # https://networkx.github.io/documentation/stable/reference/classes/generated/networkx.DiGraph.edges.html#networkx.DiGraph.edges

    def remove_all_two_cycles(self):
        to_remove = set()
        for (l, r) in self.graph.edges():
            if (r, l) in self.graph.edges():
                to_remove.add((l, r))
                to_remove.add((r, l))
        print('there are in total', len(to_remove), ' two-cycle edges removed')
        for (l, r) in to_remove:
            self.graph.remove_edge(l, r)

    def load_manual_decisions(self, file, mode="remove"):
        #if mode = ignore, then we don't do anything to those marked as unknown
        #if mode = remove, then we remove all those that are unknown
        l_two = []
        two = open(file, newline='')
        reader_two = csv.DictReader(two)
        for row in reader_two:
            if (row['SUBJECT_ID'],
                    row['OBJECT_ID']) not in self.diagnosed_relations.keys():
                if (row['SUGGESTION'] == 'remove'):
                    self.remove_relation(row['SUBJECT_ID'],
                                         row['OBJECT_ID'],
                                         comment='remove')
                else:
                    self.diagnosed_relations[
                        row['SUBJECT_ID'],
                        row['OBJECT_ID']] = row['SUGGESTION']
            # l_two.append((row['SUBJECT_ID'], row['OBJECT_ID']))
        # if it is labeled as 'remove' then remove,
        # if it is labeled as 'unknown' then depends on the mode it is in
        print('there are in total ', len(l_two),
              ' relations removed from mannual decisions')
        self.graph.remove_edges_from(l_two)

    def enquiry(self, query, mode="subm"):
        (s, p, o) = query
        if mode == "default":
            return self.hdt.search_triples_ids(s, p, o)
        else:
            # examine the filtered part first
            pass

    def convert_to_id(self, term):
        if term == "akt742:Intangible-Thing":
            # this is the only class that has two different ids (as subject and object)
            return 2601100675
        else:
            return self.hdt.convert_term(term, IdentifierPosition.Subject)

    def convert_to_term(self, id):
        if id == 2601100675:
            return "akt742:Intangible-Thing"
            # this is the only one that has two different ids (as subject and object)
        else:
            return self.hdt.convert_id(id, IdentifierPosition.Subject)

    def remove_relation(self, sub, sup, comment='remove'):
        if self.graph.has_edge(sub, sup):
            self.graph.remove_edge(sub, sup)
            self.diagnose_relations(sub, sup, comment)

    def remove_relation_from(self, relation_list, comment='remove'):
        for (sub, sup) in relation_list:
            self.remove_relation(sub, sup, comment)

    def diagnose_relations(self, sub, sup, comment='default'):
        self.diagnosed_relations[(sub, sup)] = comment
        # change it to a dictionary?

    def diagnose_class(self, c, comment='default'):
        self.diagnosed_class[c] = comment

        # TODO, split the cases of removal and comment

    def remove_class(self, c, comment='remove'):
        if self.graph.has_node(c):
            self.graph.remove_node(
                c)  # this also removes all the edges related
            self.diagnosed_classes[c] = comment
        # TODO, also remove the related edges connected

    def remove_class_from(self, cs, comment='remove'):
        for c in cs:
            self.remove_class(c, comment)

    def filter_leaf_classes(self):
        count = len(self.diagnosed_classes)

        for c in self.graph.nodes:
            #test if this node is a leaf
            (_, cardi) = self.enquiry(query=(0, self.id_subClassOf, c),
                                      mode="default")
            if cardi == 0:
                self.leaf_classes.add(c)
        for c in self.leaf_classes:
            self.remove_class(c)
        print('there are a total of',
              len(self.diagnosed_classes) - count, 'leaf nodes removed')

    def get_domain_from_id(self, id):
        t = self.convert_to_term(id)
        return tldextract.extract(t).domain

    def filter_domain_classes(self, domain):
        filtered = set()
        for c in self.graph.nodes:
            t = self.convert_to_term(c)
            if (domain == tldextract.extract(t).domain):
                filtered.add(c)
        print('a total of ', len(filtered), ' removed w.r.t. domain ', domain)
        self.remove_class_from(list(filtered))

    def filter_reflexsive(self):
        to_remove = set()
        for e in self.graph.edges():
            (l, r) = e
            if l == r:
                to_remove.add(e)
        print('removed reflexive relations', len(to_remove))
        self.graph.remove_edges_from(list(to_remove))

    def print_cycles(self):
        count = 0
        flag = True
        while flag:
            try:
                cycle = nx.find_cycle(self.graph)
                print('find cycle', cycle)
                (l, r) = cycle[0]
                print(self.get_domain_from_id(l))
                self.graph.remove_edges_from(cycle)
            except Exception as e:
                print(e)
                flag = False