def __neighbor_search(self, input_data, relation: Relation): """ Given an nid, node, hit or DRS, finds neighbors with specified relation. :param nid, node tuple, Hit, or DRS: """ # convert whatever input to a DRS i_drs = self._general_to_drs(input_data) # prepare an output DRS o_drs = DRS([], Operation(OP.NONE)) o_drs = o_drs.absorb_provenance(i_drs) # get all of the table Hits in a DRS, if necessary. if i_drs.mode == DRSMode.TABLE: self._general_to_field_drs(i_drs) # Check neighbors if not relation.from_metadata(): for h in i_drs: hits_drs = self._network.neighbors_id(h, relation) o_drs = o_drs.absorb(hits_drs) else: md_relation = self._relation_to_mdrelation(relation) for h in i_drs: neighbors = self.md_search(h, md_relation) hits_drs = self._network.md_neighbors_id( h, neighbors, relation) o_drs = o_drs.absorb(hits_drs) return o_drs
def drs_expand_to_table(self, drs: DRS) -> DRS: o_drs = DRS([], Operation(OP.NONE)) for h in drs: table = h.source_name hits = self.__network.get_hits_from_table(table) drs = DRS([x for x in hits], Operation(OP.TABLE, params=[h])) o_drs.absorb(drs) return o_drs
def schema_neighbors_of(self, i_drs: DRS) -> DRS: o_drs = DRS([], Operation(OP.NONE)) o_drs = o_drs.absorb_provenance(i_drs) if i_drs.mode == DRSMode.TABLE: i_drs.set_fields_mode() for h in i_drs: fields_table = self.drs_from_table_hit(h) i_drs = i_drs.absorb(fields_table) for h in i_drs: hits = self.__network.get_hits_from_table(h.source_name) hits_drs = DRS([x for x in hits], Operation(OP.TABLE, params=[h])) o_drs = o_drs.absorb(hits_drs) return o_drs
def similar_content_to(self, i_drs: DRS) -> DRS: """ Given a DRS it returns another DRS that contains all fields similar to the fields of the input :param i_drs: the input DRS :return: DRS """ o_drs = DRS([], Operation(OP.NONE)) o_drs = o_drs.absorb_provenance(i_drs) if i_drs.mode == DRSMode.TABLE: i_drs.set_fields_mode() for h in i_drs: fields_table = self.drs_from_table_hit(h) i_drs = i_drs.absorb(fields_table) for h in i_drs: hits_drs = self.__network.neighbors_id(h, Relation.CONTENT_SIM) o_drs = o_drs.absorb(hits_drs) return o_drs
def pkfk_of(self, i_drs: DRS) -> DRS: """ Given a DRS it returns another DRS that contains all fields similar to the fields of the input :param i_drs: the input DRS :return: DRS """ # alternative provenance propagation o_drs = DRS([], Operation(OP.NONE)) o_drs = o_drs.absorb_provenance(i_drs) if i_drs.mode == DRSMode.TABLE: i_drs.set_fields_mode() for h in i_drs: fields_table = self.drs_from_table_hit(h) i_drs = i_drs.absorb(fields_table) # o_drs.extend_provenance(fields_drs) for h in i_drs: hits_drs = self.__network.neighbors_id(h, Relation.PKFK) o_drs = o_drs.absorb(hits_drs) # o_drs.extend_provenance(i_drs) return o_drs
def table_names_search(self, kws: [str], max_results=10) -> DRS: """ Given a collection of schema names, it returns the matches in the internal representation :param kws: collection (iterable) of keywords (strings) :return: a DRS """ o_drs = DRS([], Operation(OP.NONE)) for kw in kws: res_drs = self.table_name_search(kw, max_results=max_results) o_drs = o_drs.absorb(res_drs) return o_drs
def paths(self, drs_a: DRS, drs_b: DRS, relation=Relation.PKFK, max_hops=2, lean_search=False) -> DRS: """ Is there a transitive relationship between any element in a with any element in b? This function finds the answer constrained on the primitive (singular for now) that is passed as a parameter. If b is not passed, assumes the user is searching for paths between elements in a. :param a: DRS :param b: DRS :param Relation: Relation :return: """ # create b if it wasn't passed in. drs_a = self._general_to_drs(drs_a) drs_b = self._general_to_drs(drs_b) self._assert_same_mode(drs_a, drs_b) # absorb the provenance of both a and b o_drs = DRS([], Operation(OP.NONE)) o_drs.absorb_provenance(drs_a) if drs_b != drs_a: o_drs.absorb_provenance(drs_b) for h1, h2 in itertools.product(drs_a, drs_b): # there are different network operations for table and field mode res_drs = None if drs_a.mode == DRSMode.FIELDS: res_drs = self._network.find_path_hit(h1, h2, relation, max_hops=max_hops) else: res_drs = self._network.find_path_table( h1, h2, relation, self, max_hops=max_hops, lean_search=lean_search) o_drs = o_drs.absorb(res_drs) return o_drs
def paths(self, a: DRS, primitives) -> DRS: """ Is there any transitive relationship between any two elements in a? This function finds the answer constrained on the primitive (singular for now) passed as parameter :param a: :param primitives: :return: """ o_drs = DRS([], Operation(OP.NONE)) o_drs = o_drs.absorb_provenance(a) if a.mode == DRSMode.FIELDS: for h1 in a: # h1 is a Hit for h2 in a: # h2 is a Hit if h1 == h2: continue res_drs = self.__network.find_path_hit(h1, h2, primitives) o_drs = o_drs.absorb(res_drs) elif a.mode == DRSMode.TABLE: for h1 in a: # h1 is a table: str for h2 in a: # h2 is a table: str res_drs = self.__network.find_path_table( h1, h2, primitives, self) o_drs = o_drs.absorb(res_drs) return o_drs
def paths_between(self, a: DRS, b: DRS, primitives, max_hops=2) -> DRS: """ Is there a transitive relationship between any element in a with any element in b? This functions finds the answer constrained on the primitive (singular for now) that is passed as a parameter. :param a: :param b: :param primitives: :return: """ assert (a.mode == b.mode) o_drs = DRS([], Operation(OP.NONE)) o_drs.absorb_provenance(a) o_drs.absorb_provenance(b) if a.mode == DRSMode.FIELDS: for h1 in a: # h1 is a Hit for h2 in b: # h2 is a Hit if h1 == h2: return o_drs # same source and target field res_drs = self.__network.find_path_hit(h1, h2, primitives, max_hops=max_hops) o_drs = o_drs.absorb(res_drs) elif a.mode == DRSMode.TABLE: for h1 in a: # h1 is a table: str for h2 in b: # h2 is a table: str if h1 == h2: return o_drs # same source ant target table res_drs = self.__network.find_path_table(h1, h2, primitives, self, max_hops=max_hops) o_drs = o_drs.absorb(res_drs) return o_drs