Example #1
0
    def apply(self, doc, split, train, **kwargs):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param split: Which split to use.
        :param train: Whether or not to insert new FeatureKeys.
        """
        logger.debug(f"Document: {doc}")

        # Get all the candidates in this doc that will be featurized
        cands_list = get_cands_list_from_split(self.session,
                                               self.candidate_classes, doc,
                                               split)

        feature_map = dict()

        # Make a flat list of all candidates from the list of list of
        # candidates. This helps reduce the number of queries needed to update.
        all_cands = itertools.chain.from_iterable(cands_list)
        records = list(
            get_mapping(self.session, Feature, all_cands, get_all_feats,
                        feature_map))
        batch_upsert_records(self.session, Feature, records)

        # Insert all Feature Keys
        if train:
            upsert_keys(self.session, FeatureKey, feature_map)

        # This return + yield makes a completely empty generator
        return
        yield
Example #2
0
    def apply(self, doc, split, train, lfs, **kwargs):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param split: Which split to use.
        :param train: Whether or not to insert new LabelKeys.
        :param lfs: The list of functions to use to generate labels.
        """
        logger.debug("Document: {}".format(doc))

        if lfs is None:
            raise ValueError("Must provide lfs kwarg.")

        self.lfs = lfs

        # Get all the candidates in this doc that will be featurized
        cands_list = get_cands_list_from_split(self.session,
                                               self.candidate_classes, doc,
                                               split)

        label_keys = set()
        for cands in cands_list:
            records = list(
                get_mapping(self.session, Label, cands, self._f_gen,
                            label_keys))
            batch_upsert_records(self.session, Label, records)

        # Insert all Label Keys
        if train:
            add_keys(self.session, LabelKey, label_keys)

        # This return + yield makes a completely empty generator
        return
        yield
Example #3
0
    def apply(self, doc, split, train, **kwargs):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param split: Which split to use.
        :param train: Whether or not to insert new FeatureKeys.
        """
        logger.debug(f"Document: {doc}")

        # Get all the candidates in this doc that will be featurized
        cands_list = get_cands_list_from_split(self.session,
                                               self.candidate_classes, doc,
                                               split)

        feature_map = dict()
        for cands in cands_list:
            records = list(
                get_mapping(self.session, Feature, cands, get_all_feats,
                            feature_map))
            batch_upsert_records(self.session, Feature, records)

        # Insert all Feature Keys
        if train:
            upsert_keys(self.session, FeatureKey, feature_map)

        # This return + yield makes a completely empty generator
        return
        yield