def apply(self, doc, split, train, **kwargs): """Extract candidates from the given Context. :param doc: A document to process. :param split: Which split to use. :param train: Whether or not to insert new FeatureKeys. """ logger.debug(f"Document: {doc}") # Get all the candidates in this doc that will be featurized cands_list = get_cands_list_from_split(self.session, self.candidate_classes, doc, split) feature_map = dict() # Make a flat list of all candidates from the list of list of # candidates. This helps reduce the number of queries needed to update. all_cands = itertools.chain.from_iterable(cands_list) records = list( get_mapping(self.session, Feature, all_cands, get_all_feats, feature_map)) batch_upsert_records(self.session, Feature, records) # Insert all Feature Keys if train: upsert_keys(self.session, FeatureKey, feature_map) # This return + yield makes a completely empty generator return yield
def apply(self, doc, split, train, lfs, **kwargs): """Extract candidates from the given Context. :param doc: A document to process. :param split: Which split to use. :param train: Whether or not to insert new LabelKeys. :param lfs: The list of functions to use to generate labels. """ logger.debug("Document: {}".format(doc)) if lfs is None: raise ValueError("Must provide lfs kwarg.") self.lfs = lfs # Get all the candidates in this doc that will be labeled cands_list = get_cands_list_from_split( self.session, self.candidate_classes, doc, split ) label_map = dict() for cands in cands_list: records = list( get_mapping(self.session, Label, cands, self._f_gen, label_map) ) batch_upsert_records(self.session, Label, records) # Insert all Label Keys if train: upsert_keys(self.session, LabelKey, label_map) # This return + yield makes a completely empty generator return yield
def apply(self, doc, split, train, **kwargs): """Extract candidates from the given Context. :param doc: A document to process. :param split: Which split to use. :param train: Whether or not to insert new FeatureKeys. """ logger.debug(f"Document: {doc}") # Get all the candidates in this doc that will be featurized cands_list = get_cands_list_from_split(self.session, self.candidate_classes, doc, split) feature_map = dict() for cands in cands_list: records = list( get_mapping(self.session, Feature, cands, get_all_feats, feature_map)) batch_upsert_records(self.session, Feature, records) # Insert all Feature Keys if train: upsert_keys(self.session, FeatureKey, feature_map) # This return + yield makes a completely empty generator return yield
def _after_apply(self, train: bool = False, **kwargs: Any) -> None: # Insert all Feature Keys if train: key_map: DefaultDict[str, set] = defaultdict(set) for feature in self.session.query(Feature).all(): cand = feature.candidate for key in feature.keys: key_map[key].add(cand.__class__.__tablename__) self.session.query(FeatureKey).delete(synchronize_session="fetch") # TODO: upsert is too much. insert is fine as all keys are deleted. upsert_keys(self.session, FeatureKey, key_map)
def upsert_keys( self, keys: Iterable[Union[str, Callable]], candidate_classes: Optional[ Union[Type[Candidate], List[Type[Candidate]]] ] = None, ) -> None: """Upsert the specified keys from LabelKeys. :param keys: A list of labeling functions to upsert. :param candidate_classes: A list of the Candidates to upsert the key for. If None, upsert the keys for all candidate classes associated with this Labeler. """ # Make sure keys is iterable keys = keys if isinstance(keys, (list, tuple)) else [keys] # Make sure candidate_classes is iterable if candidate_classes: candidate_classes = ( candidate_classes if isinstance(candidate_classes, (list, tuple)) else [candidate_classes] ) # Ensure only candidate classes associated with the labeler are used. candidate_classes = [ _.__tablename__ for _ in candidate_classes if _ in self.candidate_classes ] if len(candidate_classes) == 0: logger.warning( "You didn't specify valid candidate classes for this Labeler." ) return # If unspecified, just use all candidate classes else: candidate_classes = [_.__tablename__ for _ in self.candidate_classes] # build dict for use by utils key_map = dict() for key in keys: # Assume key is an LF if hasattr(key, "__name__"): key_map[key.__name__] = set(candidate_classes) elif hasattr(key, "name"): key_map[key.name] = set(candidate_classes) else: key_map[key] = set(candidate_classes) upsert_keys(self.session, LabelKey, key_map)
def _after_apply( self, train: bool = False, table: Table = Label, **kwargs: Any ) -> None: # Insert all Label Keys if train: key_map: DefaultDict[str, set] = defaultdict(set) for label in self.session.query(table).all(): cand = label.candidate for key in label.keys: key_map[key].add(cand.__class__.__tablename__) key_table = LabelKey if table == Label else GoldLabelKey self.session.query(key_table).delete(synchronize_session="fetch") # TODO: upsert is too much. insert is fine as all keys are deleted. upsert_keys(self.session, key_table, key_map)
def upsert_keys(self, keys, candidate_classes=None): """Upsert the specified keys from LabelKeys. :param keys: A list of labeling functions to upsert. :type keys: list, tuple :param candidate_classes: A list of the Candidates to upsert the key for. If None, upsert the keys for all candidate classes associated with this Labeler. :type candidate_classes: list, tuple """ # Make sure keys is iterable keys = keys if isinstance(keys, (list, tuple)) else [keys] # Make sure candidate_classes is iterable if candidate_classes: candidate_classes = ( candidate_classes if isinstance(candidate_classes, (list, tuple)) else [candidate_classes] ) # Ensure only candidate classes associated with the labeler are used. candidate_classes = [ _.__tablename__ for _ in candidate_classes if _ in self.candidate_classes ] if len(candidate_classes) == 0: logger.warning( "You didn't specify valid candidate classes for this Labeler." ) return # If unspecified, just use all candidate classes else: candidate_classes = [_.__tablename__ for _ in self.candidate_classes] # build dict for use by utils key_map = dict() for key in keys: # Assume key is an LF try: key_map[key.__name__] = set(candidate_classes) except AttributeError: key_map[key] = set(candidate_classes) upsert_keys(self.session, LabelKey, key_map)
def upsert_keys( self, keys: Iterable[str], candidate_classes: Optional[Iterable[Candidate]] = None, ) -> None: """Upsert the specified keys to FeatureKey. :param keys: A list of FeatureKey names to upsert. :type keys: list | tuple :param candidate_classes: A list of the Candidates to upsert the key for. If None, upsert the keys for all candidate classes associated with this Featurizer. :type candidate_classes: list | tuple """ # Make sure keys is iterable keys = keys if isinstance(keys, (list, tuple)) else [keys] # Make sure candidate_classes is iterable if candidate_classes: candidate_classes = ( candidate_classes if isinstance(candidate_classes, (list, tuple)) else [candidate_classes] ) # Ensure only candidate classes associated with the featurizer # are used. candidate_classes = [ _.__tablename__ for _ in candidate_classes if _ in self.candidate_classes ] if len(candidate_classes) == 0: logger.warning( "You didn't specify valid candidate classes for this featurizer." ) return # If unspecified, just use all candidate classes else: candidate_classes = [_.__tablename__ for _ in self.candidate_classes] # build dict for use by utils key_map = dict() for key in keys: key_map[key] = set(candidate_classes) upsert_keys(self.session, FeatureKey, key_map)