Ejemplo n.º 1
0
    def apply(self, doc, split, train, **kwargs):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param split: Which split to use.
        :param train: Whether or not to insert new FeatureKeys.
        """
        logger.debug(f"Document: {doc}")

        # Get all the candidates in this doc that will be featurized
        cands_list = get_cands_list_from_split(self.session,
                                               self.candidate_classes, doc,
                                               split)

        feature_map = dict()

        # Make a flat list of all candidates from the list of list of
        # candidates. This helps reduce the number of queries needed to update.
        all_cands = itertools.chain.from_iterable(cands_list)
        records = list(
            get_mapping(self.session, Feature, all_cands, get_all_feats,
                        feature_map))
        batch_upsert_records(self.session, Feature, records)

        # Insert all Feature Keys
        if train:
            upsert_keys(self.session, FeatureKey, feature_map)

        # This return + yield makes a completely empty generator
        return
        yield
Ejemplo n.º 2
0
    def apply(self, doc, split, train, lfs, **kwargs):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param split: Which split to use.
        :param train: Whether or not to insert new LabelKeys.
        :param lfs: The list of functions to use to generate labels.
        """
        logger.debug("Document: {}".format(doc))

        if lfs is None:
            raise ValueError("Must provide lfs kwarg.")

        self.lfs = lfs

        # Get all the candidates in this doc that will be labeled
        cands_list = get_cands_list_from_split(
            self.session, self.candidate_classes, doc, split
        )

        label_map = dict()
        for cands in cands_list:
            records = list(
                get_mapping(self.session, Label, cands, self._f_gen, label_map)
            )
            batch_upsert_records(self.session, Label, records)

        # Insert all Label Keys
        if train:
            upsert_keys(self.session, LabelKey, label_map)

        # This return + yield makes a completely empty generator
        return
        yield
Ejemplo n.º 3
0
    def apply(self, doc, split, train, **kwargs):
        """Extract candidates from the given Context.

        :param doc: A document to process.
        :param split: Which split to use.
        :param train: Whether or not to insert new FeatureKeys.
        """
        logger.debug(f"Document: {doc}")

        # Get all the candidates in this doc that will be featurized
        cands_list = get_cands_list_from_split(self.session,
                                               self.candidate_classes, doc,
                                               split)

        feature_map = dict()
        for cands in cands_list:
            records = list(
                get_mapping(self.session, Feature, cands, get_all_feats,
                            feature_map))
            batch_upsert_records(self.session, Feature, records)

        # Insert all Feature Keys
        if train:
            upsert_keys(self.session, FeatureKey, feature_map)

        # This return + yield makes a completely empty generator
        return
        yield
Ejemplo n.º 4
0
 def _after_apply(self, train: bool = False, **kwargs: Any) -> None:
     # Insert all Feature Keys
     if train:
         key_map: DefaultDict[str, set] = defaultdict(set)
         for feature in self.session.query(Feature).all():
             cand = feature.candidate
             for key in feature.keys:
                 key_map[key].add(cand.__class__.__tablename__)
         self.session.query(FeatureKey).delete(synchronize_session="fetch")
         # TODO: upsert is too much. insert is fine as all keys are deleted.
         upsert_keys(self.session, FeatureKey, key_map)
Ejemplo n.º 5
0
    def upsert_keys(
        self,
        keys: Iterable[Union[str, Callable]],
        candidate_classes: Optional[
            Union[Type[Candidate], List[Type[Candidate]]]
        ] = None,
    ) -> None:
        """Upsert the specified keys from LabelKeys.

        :param keys: A list of labeling functions to upsert.
        :param candidate_classes: A list of the Candidates to upsert the key for.
            If None, upsert the keys for all candidate classes associated with
            this Labeler.
        """
        # Make sure keys is iterable
        keys = keys if isinstance(keys, (list, tuple)) else [keys]

        # Make sure candidate_classes is iterable
        if candidate_classes:
            candidate_classes = (
                candidate_classes
                if isinstance(candidate_classes, (list, tuple))
                else [candidate_classes]
            )

            # Ensure only candidate classes associated with the labeler are used.
            candidate_classes = [
                _.__tablename__
                for _ in candidate_classes
                if _ in self.candidate_classes
            ]

            if len(candidate_classes) == 0:
                logger.warning(
                    "You didn't specify valid candidate classes for this Labeler."
                )
                return
        # If unspecified, just use all candidate classes
        else:
            candidate_classes = [_.__tablename__ for _ in self.candidate_classes]

        # build dict for use by utils
        key_map = dict()
        for key in keys:
            # Assume key is an LF
            if hasattr(key, "__name__"):
                key_map[key.__name__] = set(candidate_classes)
            elif hasattr(key, "name"):
                key_map[key.name] = set(candidate_classes)
            else:
                key_map[key] = set(candidate_classes)

        upsert_keys(self.session, LabelKey, key_map)
Ejemplo n.º 6
0
 def _after_apply(
     self, train: bool = False, table: Table = Label, **kwargs: Any
 ) -> None:
     # Insert all Label Keys
     if train:
         key_map: DefaultDict[str, set] = defaultdict(set)
         for label in self.session.query(table).all():
             cand = label.candidate
             for key in label.keys:
                 key_map[key].add(cand.__class__.__tablename__)
         key_table = LabelKey if table == Label else GoldLabelKey
         self.session.query(key_table).delete(synchronize_session="fetch")
         # TODO: upsert is too much. insert is fine as all keys are deleted.
         upsert_keys(self.session, key_table, key_map)
Ejemplo n.º 7
0
    def upsert_keys(self, keys, candidate_classes=None):
        """Upsert the specified keys from LabelKeys.

        :param keys: A list of labeling functions to upsert.
        :type keys: list, tuple
        :param candidate_classes: A list of the Candidates to upsert the key for.
            If None, upsert the keys for all candidate classes associated with
            this Labeler.
        :type candidate_classes: list, tuple
        """
        # Make sure keys is iterable
        keys = keys if isinstance(keys, (list, tuple)) else [keys]

        # Make sure candidate_classes is iterable
        if candidate_classes:
            candidate_classes = (
                candidate_classes
                if isinstance(candidate_classes, (list, tuple))
                else [candidate_classes]
            )

            # Ensure only candidate classes associated with the labeler are used.
            candidate_classes = [
                _.__tablename__
                for _ in candidate_classes
                if _ in self.candidate_classes
            ]

            if len(candidate_classes) == 0:
                logger.warning(
                    "You didn't specify valid candidate classes for this Labeler."
                )
                return
        # If unspecified, just use all candidate classes
        else:
            candidate_classes = [_.__tablename__ for _ in self.candidate_classes]

        # build dict for use by utils
        key_map = dict()
        for key in keys:
            # Assume key is an LF
            try:
                key_map[key.__name__] = set(candidate_classes)
            except AttributeError:
                key_map[key] = set(candidate_classes)

        upsert_keys(self.session, LabelKey, key_map)
Ejemplo n.º 8
0
    def upsert_keys(
        self,
        keys: Iterable[str],
        candidate_classes: Optional[Iterable[Candidate]] = None,
    ) -> None:
        """Upsert the specified keys to FeatureKey.

        :param keys: A list of FeatureKey names to upsert.
        :type keys: list | tuple
        :param candidate_classes: A list of the Candidates to upsert the key for.
            If None, upsert the keys for all candidate classes associated with
            this Featurizer.
        :type candidate_classes: list | tuple
        """
        # Make sure keys is iterable
        keys = keys if isinstance(keys, (list, tuple)) else [keys]

        # Make sure candidate_classes is iterable
        if candidate_classes:
            candidate_classes = (
                candidate_classes
                if isinstance(candidate_classes, (list, tuple))
                else [candidate_classes]
            )

            # Ensure only candidate classes associated with the featurizer
            # are used.
            candidate_classes = [
                _.__tablename__
                for _ in candidate_classes
                if _ in self.candidate_classes
            ]

            if len(candidate_classes) == 0:
                logger.warning(
                    "You didn't specify valid candidate classes for this featurizer."
                )
                return
        # If unspecified, just use all candidate classes
        else:
            candidate_classes = [_.__tablename__ for _ in self.candidate_classes]

        # build dict for use by utils
        key_map = dict()
        for key in keys:
            key_map[key] = set(candidate_classes)
        upsert_keys(self.session, FeatureKey, key_map)