Esempio n. 1
0
    def __neighbor_search(self, input_data, relation: Relation):
        """
        Given an nid, node, hit or DRS, finds neighbors with specified
        relation.
        :param nid, node tuple, Hit, or DRS:
        """
        # convert whatever input to a DRS
        i_drs = self._general_to_drs(input_data)

        # prepare an output DRS
        o_drs = DRS([], Operation(OP.NONE))
        o_drs = o_drs.absorb_provenance(i_drs)

        # get all of the table Hits in a DRS, if necessary.
        if i_drs.mode == DRSMode.TABLE:
            self._general_to_field_drs(i_drs)

        # Check neighbors
        if not relation.from_metadata():
            for h in i_drs:
                hits_drs = self._network.neighbors_id(h, relation)
                o_drs = o_drs.absorb(hits_drs)
        else:
            md_relation = self._relation_to_mdrelation(relation)
            for h in i_drs:
                neighbors = self.md_search(h, md_relation)
                hits_drs = self._network.md_neighbors_id(
                    h, neighbors, relation)
                o_drs = o_drs.absorb(hits_drs)
        return o_drs
Esempio n. 2
0
 def drs_expand_to_table(self, drs: DRS) -> DRS:
     o_drs = DRS([], Operation(OP.NONE))
     for h in drs:
         table = h.source_name
         hits = self.__network.get_hits_from_table(table)
         drs = DRS([x for x in hits], Operation(OP.TABLE, params=[h]))
         o_drs.absorb(drs)
     return o_drs
Esempio n. 3
0
 def schema_neighbors_of(self, i_drs: DRS) -> DRS:
     o_drs = DRS([], Operation(OP.NONE))
     o_drs = o_drs.absorb_provenance(i_drs)
     if i_drs.mode == DRSMode.TABLE:
         i_drs.set_fields_mode()
         for h in i_drs:
             fields_table = self.drs_from_table_hit(h)
             i_drs = i_drs.absorb(fields_table)
     for h in i_drs:
         hits = self.__network.get_hits_from_table(h.source_name)
         hits_drs = DRS([x for x in hits], Operation(OP.TABLE, params=[h]))
         o_drs = o_drs.absorb(hits_drs)
     return o_drs
Esempio n. 4
0
 def similar_content_to(self, i_drs: DRS) -> DRS:
     """
     Given a DRS it returns another DRS that contains all fields similar to the fields of the input
     :param i_drs: the input DRS
     :return: DRS
     """
     o_drs = DRS([], Operation(OP.NONE))
     o_drs = o_drs.absorb_provenance(i_drs)
     if i_drs.mode == DRSMode.TABLE:
         i_drs.set_fields_mode()
         for h in i_drs:
             fields_table = self.drs_from_table_hit(h)
             i_drs = i_drs.absorb(fields_table)
     for h in i_drs:
         hits_drs = self.__network.neighbors_id(h, Relation.CONTENT_SIM)
         o_drs = o_drs.absorb(hits_drs)
     return o_drs
Esempio n. 5
0
 def pkfk_of(self, i_drs: DRS) -> DRS:
     """
     Given a DRS it returns another DRS that contains all fields similar to the fields of the input
     :param i_drs: the input DRS
     :return: DRS
     """
     # alternative provenance propagation
     o_drs = DRS([], Operation(OP.NONE))
     o_drs = o_drs.absorb_provenance(i_drs)
     if i_drs.mode == DRSMode.TABLE:
         i_drs.set_fields_mode()
         for h in i_drs:
             fields_table = self.drs_from_table_hit(h)
             i_drs = i_drs.absorb(fields_table)
             # o_drs.extend_provenance(fields_drs)
     for h in i_drs:
         hits_drs = self.__network.neighbors_id(h, Relation.PKFK)
         o_drs = o_drs.absorb(hits_drs)
     # o_drs.extend_provenance(i_drs)
     return o_drs
Esempio n. 6
0
 def table_names_search(self, kws: [str], max_results=10) -> DRS:
     """
     Given a collection of schema names, it returns the matches in the internal representation
     :param kws: collection (iterable) of keywords (strings)
     :return: a DRS
     """
     o_drs = DRS([], Operation(OP.NONE))
     for kw in kws:
         res_drs = self.table_name_search(kw, max_results=max_results)
         o_drs = o_drs.absorb(res_drs)
     return o_drs
Esempio n. 7
0
    def paths(self,
              drs_a: DRS,
              drs_b: DRS,
              relation=Relation.PKFK,
              max_hops=2,
              lean_search=False) -> DRS:
        """
        Is there a transitive relationship between any element in a with any
        element in b?
        This function finds the answer constrained on the primitive
        (singular for now) that is passed as a parameter.
        If b is not passed, assumes the user is searching for paths between
        elements in a.
        :param a: DRS
        :param b: DRS
        :param Relation: Relation
        :return:
        """
        # create b if it wasn't passed in.
        drs_a = self._general_to_drs(drs_a)
        drs_b = self._general_to_drs(drs_b)

        self._assert_same_mode(drs_a, drs_b)

        # absorb the provenance of both a and b
        o_drs = DRS([], Operation(OP.NONE))
        o_drs.absorb_provenance(drs_a)
        if drs_b != drs_a:
            o_drs.absorb_provenance(drs_b)

        for h1, h2 in itertools.product(drs_a, drs_b):

            # there are different network operations for table and field mode
            res_drs = None
            if drs_a.mode == DRSMode.FIELDS:
                res_drs = self._network.find_path_hit(h1,
                                                      h2,
                                                      relation,
                                                      max_hops=max_hops)
            else:
                res_drs = self._network.find_path_table(
                    h1,
                    h2,
                    relation,
                    self,
                    max_hops=max_hops,
                    lean_search=lean_search)

            o_drs = o_drs.absorb(res_drs)

        return o_drs
Esempio n. 8
0
 def paths(self, a: DRS, primitives) -> DRS:
     """
     Is there any transitive relationship between any two elements in a?
     This function finds the answer constrained on the primitive (singular for now) passed as parameter
     :param a:
     :param primitives:
     :return:
     """
     o_drs = DRS([], Operation(OP.NONE))
     o_drs = o_drs.absorb_provenance(a)
     if a.mode == DRSMode.FIELDS:
         for h1 in a:  # h1 is a Hit
             for h2 in a:  # h2 is a Hit
                 if h1 == h2:
                     continue
                 res_drs = self.__network.find_path_hit(h1, h2, primitives)
                 o_drs = o_drs.absorb(res_drs)
     elif a.mode == DRSMode.TABLE:
         for h1 in a:  # h1 is a table: str
             for h2 in a:  # h2 is a table: str
                 res_drs = self.__network.find_path_table(
                     h1, h2, primitives, self)
                 o_drs = o_drs.absorb(res_drs)
     return o_drs
Esempio n. 9
0
 def paths_between(self, a: DRS, b: DRS, primitives, max_hops=2) -> DRS:
     """
     Is there a transitive relationship between any element in a with any element in b?
     This functions finds the answer constrained on the primitive (singular for now) that is passed
     as a parameter.
     :param a:
     :param b:
     :param primitives:
     :return:
     """
     assert (a.mode == b.mode)
     o_drs = DRS([], Operation(OP.NONE))
     o_drs.absorb_provenance(a)
     o_drs.absorb_provenance(b)
     if a.mode == DRSMode.FIELDS:
         for h1 in a:  # h1 is a Hit
             for h2 in b:  # h2 is a Hit
                 if h1 == h2:
                     return o_drs  # same source and target field
                 res_drs = self.__network.find_path_hit(h1,
                                                        h2,
                                                        primitives,
                                                        max_hops=max_hops)
                 o_drs = o_drs.absorb(res_drs)
     elif a.mode == DRSMode.TABLE:
         for h1 in a:  # h1 is a table: str
             for h2 in b:  # h2 is a table: str
                 if h1 == h2:
                     return o_drs  # same source ant target table
                 res_drs = self.__network.find_path_table(h1,
                                                          h2,
                                                          primitives,
                                                          self,
                                                          max_hops=max_hops)
                 o_drs = o_drs.absorb(res_drs)
     return o_drs