Python get_mapping Examples, fonduer.utils.utils_udf.get_mapping Python Examples

Example #1

0

Show file

File: featurizer.py Project: sbrown-ai/fonduer

    def apply(self, doc, split, train, **kwargs):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param split: Which split to use.
        :param train: Whether or not to insert new FeatureKeys.
        """
        logger.debug(f"Document: {doc}")

        # Get all the candidates in this doc that will be featurized
        cands_list = get_cands_list_from_split(self.session,
                                               self.candidate_classes, doc,
                                               split)

        feature_map = dict()

        # Make a flat list of all candidates from the list of list of
        # candidates. This helps reduce the number of queries needed to update.
        all_cands = itertools.chain.from_iterable(cands_list)
        records = list(
            get_mapping(self.session, Feature, all_cands, get_all_feats,
                        feature_map))
        batch_upsert_records(self.session, Feature, records)

        # Insert all Feature Keys
        if train:
            upsert_keys(self.session, FeatureKey, feature_map)

        # This return + yield makes a completely empty generator
        return
        yield

Example #2

0

Show file

File: labeler.py Project: robingong/fonduer

    def apply(self, doc, split, train, lfs, **kwargs):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param split: Which split to use.
        :param train: Whether or not to insert new LabelKeys.
        :param lfs: The list of functions to use to generate labels.
        """
        logger.debug("Document: {}".format(doc))

        if lfs is None:
            raise ValueError("Must provide lfs kwarg.")

        self.lfs = lfs

        # Get all the candidates in this doc that will be featurized
        cands_list = get_cands_list_from_split(self.session,
                                               self.candidate_classes, doc,
                                               split)

        label_keys = set()
        for cands in cands_list:
            records = list(
                get_mapping(self.session, Label, cands, self._f_gen,
                            label_keys))
            batch_upsert_records(self.session, Label, records)

        # Insert all Label Keys
        if train:
            add_keys(self.session, LabelKey, label_keys)

        # This return + yield makes a completely empty generator
        return
        yield

Example #3

0

Show file

    def apply(  # type: ignore
        self,
        doc: Document,
        train: bool,
        lfs: List[List[Callable]],
        table: Table = Label,
        **kwargs: Any,
    ):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param train: Whether or not to insert new LabelKeys.
        :param lfs: The list of functions to use to generate labels.
        """
        logger.debug(f"Document: {doc}")

        if lfs is None:
            raise ValueError("Must provide lfs kwarg.")

        self.lfs = lfs

        # Get all the candidates in this doc that will be labeled
        cands_list = get_cands_list_from_doc(self.session,
                                             self.candidate_classes, doc)

        for cands in cands_list:
            records = list(get_mapping(self.session, table, cands,
                                       self._f_gen))
            batch_upsert_records(self.session, table, records)

        # This return + yield makes a completely empty generator
        return
        yield

Example #4

0

Show file

File: labeler.py Project: sudhu26/fonduer

    def apply(  # type: ignore
        self,
        doc: Document,
        lfs: List[List[Callable]],
        table: Table = Label,
        **kwargs: Any,
    ) -> List[List[Dict[str, Any]]]:
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param lfs: The list of functions to use to generate labels.
        """
        logger.debug(f"Document: {doc}")

        if lfs is None:
            raise ValueError("Must provide lfs kwarg.")

        self.lfs = lfs

        # Get all the candidates in this doc that will be labeled
        cands_list = [
            getattr(doc, candidate_class.__tablename__ + "s")
            for candidate_class in self.candidate_classes
        ]

        records_list = [
            list(get_mapping(table, cands, self._f_gen))
            for cands in cands_list
        ]
        return records_list

Example #5

0

Show file

    def apply(self, doc, split, train, **kwargs):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param split: Which split to use.
        :param train: Whether or not to insert new FeatureKeys.
        """
        logger.debug(f"Document: {doc}")

        # Get all the candidates in this doc that will be featurized
        cands_list = get_cands_list_from_split(self.session,
                                               self.candidate_classes, doc,
                                               split)

        feature_map = dict()
        for cands in cands_list:
            records = list(
                get_mapping(self.session, Feature, cands, get_all_feats,
                            feature_map))
            batch_upsert_records(self.session, Feature, records)

        # Insert all Feature Keys
        if train:
            upsert_keys(self.session, FeatureKey, feature_map)

        # This return + yield makes a completely empty generator
        return
        yield

Example #6

0

Show file

    def apply(self, doc: Document,
              **kwargs: Any) -> List[List[Dict[str, Any]]]:
        """Extract candidates from the given Context.

        :param doc: A document to process.
        """
        logger.debug(f"Document: {doc}")

        # Get all the candidates in this doc that will be featurized
        cands_list = [
            getattr(doc, candidate_class.__tablename__ + "s")
            for candidate_class in self.candidate_classes
        ]

        records_list = [
            list(get_mapping(Feature, cands, self.feature_extractors.extract))
            for cands in cands_list
        ]
        return records_list

Example #7

0

Show file

File: featurizer.py Project: moloodbahar/fonduer

    def apply(self, doc: Document, **kwargs: Any) -> Iterator[Any]:
        """Extract candidates from the given Context.

        :param doc: A document to process.
        """
        logger.debug(f"Document: {doc}")

        # Get all the candidates in this doc that will be featurized
        cands_list = [
            getattr(doc, candidate_class.__tablename__ + "s")
            for candidate_class in self.candidate_classes
        ]

        # Make a flat list of all candidates from the list of list of
        # candidates. This helps reduce the number of queries needed to update.
        all_cands = itertools.chain.from_iterable(cands_list)
        records = list(get_mapping(Feature, all_cands, self.feature_extractors.extract))
        batch_upsert_records(self.session, Feature, records)

        # This return + yield makes a completely empty generator
        return
        yield

Example #8

0

Show file

    def apply(  # type: ignore
            self, doc: Document, train: bool, **kwargs: Any) -> Iterator[Any]:
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param train: Whether or not to insert new FeatureKeys.
        """
        logger.debug(f"Document: {doc}")

        # Get all the candidates in this doc that will be featurized
        cands_list = get_cands_list_from_doc(self.session,
                                             self.candidate_classes, doc)

        # Make a flat list of all candidates from the list of list of
        # candidates. This helps reduce the number of queries needed to update.
        all_cands = itertools.chain.from_iterable(cands_list)
        records = list(
            get_mapping(self.session, Feature, all_cands,
                        self.feature_extractors.extract))
        batch_upsert_records(self.session, Feature, records)

        # This return + yield makes a completely empty generator
        return
        yield