def _encode_new_dataset(self, dataset, params: EncoderParams):
        encoded_dataset = RepertoireDataset(repertoires=dataset.repertoires, params=dataset.params,
                                            metadata_file=dataset.metadata_file)
        encoded_repertoires, labels = self._encode_repertoires(dataset, params)

        feature_annotations = self._get_feature_info()

        encoded_dataset.add_encoded_data(EncodedData(
            examples=encoded_repertoires,
            labels=labels,
            feature_names=list(feature_annotations["sequence_id"]),
            feature_annotations=feature_annotations,
            example_ids=[repertoire.identifier for repertoire in dataset.get_data()],
            encoding=MatchedSequencesEncoder.__name__
        ))

        return encoded_dataset
Ejemplo n.º 2
0
    def _encode_new_dataset(self, dataset, params: EncoderParams):
        self._load_regex_df()

        encoded_dataset = RepertoireDataset(
            repertoires=dataset.repertoires,
            params=dataset.params,
            metadata_file=dataset.metadata_file)

        feature_annotations = self._get_feature_info()
        encoded_repertoires, labels = self._encode_repertoires(dataset, params)

        encoded_dataset.add_encoded_data(
            EncodedData(examples=encoded_repertoires,
                        example_ids=list(
                            dataset.get_metadata(["subject_id"]).values())[0],
                        feature_names=list(feature_annotations["chain_id"]),
                        feature_annotations=feature_annotations,
                        labels=labels,
                        encoding=MatchedRegexEncoder.__name__))

        return encoded_dataset