def apply(self, doc, split, train, **kwargs): """Extract candidates from the given Context. :param doc: A document to process. :param split: Which split to use. :param train: Whether or not to insert new FeatureKeys. """ logger.debug(f"Document: {doc}") # Get all the candidates in this doc that will be featurized cands_list = get_cands_list_from_split(self.session, self.candidate_classes, doc, split) feature_map = dict() # Make a flat list of all candidates from the list of list of # candidates. This helps reduce the number of queries needed to update. all_cands = itertools.chain.from_iterable(cands_list) records = list( get_mapping(self.session, Feature, all_cands, get_all_feats, feature_map)) batch_upsert_records(self.session, Feature, records) # Insert all Feature Keys if train: upsert_keys(self.session, FeatureKey, feature_map) # This return + yield makes a completely empty generator return yield
def apply(self, doc, split, train, lfs, **kwargs): """Extract candidates from the given Context. :param doc: A document to process. :param split: Which split to use. :param train: Whether or not to insert new LabelKeys. :param lfs: The list of functions to use to generate labels. """ logger.debug("Document: {}".format(doc)) if lfs is None: raise ValueError("Must provide lfs kwarg.") self.lfs = lfs # Get all the candidates in this doc that will be featurized cands_list = get_cands_list_from_split(self.session, self.candidate_classes, doc, split) label_keys = set() for cands in cands_list: records = list( get_mapping(self.session, Label, cands, self._f_gen, label_keys)) batch_upsert_records(self.session, Label, records) # Insert all Label Keys if train: add_keys(self.session, LabelKey, label_keys) # This return + yield makes a completely empty generator return yield
def apply( # type: ignore self, doc: Document, train: bool, lfs: List[List[Callable]], table: Table = Label, **kwargs: Any, ): """Extract candidates from the given Context. :param doc: A document to process. :param train: Whether or not to insert new LabelKeys. :param lfs: The list of functions to use to generate labels. """ logger.debug(f"Document: {doc}") if lfs is None: raise ValueError("Must provide lfs kwarg.") self.lfs = lfs # Get all the candidates in this doc that will be labeled cands_list = get_cands_list_from_doc(self.session, self.candidate_classes, doc) for cands in cands_list: records = list(get_mapping(self.session, table, cands, self._f_gen)) batch_upsert_records(self.session, table, records) # This return + yield makes a completely empty generator return yield
def apply( # type: ignore self, doc: Document, lfs: List[List[Callable]], table: Table = Label, **kwargs: Any, ) -> List[List[Dict[str, Any]]]: """Extract candidates from the given Context. :param doc: A document to process. :param lfs: The list of functions to use to generate labels. """ logger.debug(f"Document: {doc}") if lfs is None: raise ValueError("Must provide lfs kwarg.") self.lfs = lfs # Get all the candidates in this doc that will be labeled cands_list = [ getattr(doc, candidate_class.__tablename__ + "s") for candidate_class in self.candidate_classes ] records_list = [ list(get_mapping(table, cands, self._f_gen)) for cands in cands_list ] return records_list
def apply(self, doc, split, train, **kwargs): """Extract candidates from the given Context. :param doc: A document to process. :param split: Which split to use. :param train: Whether or not to insert new FeatureKeys. """ logger.debug(f"Document: {doc}") # Get all the candidates in this doc that will be featurized cands_list = get_cands_list_from_split(self.session, self.candidate_classes, doc, split) feature_map = dict() for cands in cands_list: records = list( get_mapping(self.session, Feature, cands, get_all_feats, feature_map)) batch_upsert_records(self.session, Feature, records) # Insert all Feature Keys if train: upsert_keys(self.session, FeatureKey, feature_map) # This return + yield makes a completely empty generator return yield
def apply(self, doc: Document, **kwargs: Any) -> List[List[Dict[str, Any]]]: """Extract candidates from the given Context. :param doc: A document to process. """ logger.debug(f"Document: {doc}") # Get all the candidates in this doc that will be featurized cands_list = [ getattr(doc, candidate_class.__tablename__ + "s") for candidate_class in self.candidate_classes ] records_list = [ list(get_mapping(Feature, cands, self.feature_extractors.extract)) for cands in cands_list ] return records_list
def apply(self, doc: Document, **kwargs: Any) -> Iterator[Any]: """Extract candidates from the given Context. :param doc: A document to process. """ logger.debug(f"Document: {doc}") # Get all the candidates in this doc that will be featurized cands_list = [ getattr(doc, candidate_class.__tablename__ + "s") for candidate_class in self.candidate_classes ] # Make a flat list of all candidates from the list of list of # candidates. This helps reduce the number of queries needed to update. all_cands = itertools.chain.from_iterable(cands_list) records = list(get_mapping(Feature, all_cands, self.feature_extractors.extract)) batch_upsert_records(self.session, Feature, records) # This return + yield makes a completely empty generator return yield
def apply( # type: ignore self, doc: Document, train: bool, **kwargs: Any) -> Iterator[Any]: """Extract candidates from the given Context. :param doc: A document to process. :param train: Whether or not to insert new FeatureKeys. """ logger.debug(f"Document: {doc}") # Get all the candidates in this doc that will be featurized cands_list = get_cands_list_from_doc(self.session, self.candidate_classes, doc) # Make a flat list of all candidates from the list of list of # candidates. This helps reduce the number of queries needed to update. all_cands = itertools.chain.from_iterable(cands_list) records = list( get_mapping(self.session, Feature, all_cands, self.feature_extractors.extract)) batch_upsert_records(self.session, Feature, records) # This return + yield makes a completely empty generator return yield