def create_dummy_receptordataset(self, path):
        receptors = [
            TCABReceptor(identifier="1",
                         alpha=ReceptorSequence(amino_acid_sequence="AAATTT",
                                                identifier="1a",
                                                metadata=SequenceMetadata(
                                                    v_gene="TRAV1",
                                                    j_gene="TRAJ1",
                                                    chain=Chain.ALPHA,
                                                    frame_type="IN",
                                                    custom_params={
                                                        "d_call": "TRAD1",
                                                        "custom1": "cust1"
                                                    })),
                         beta=ReceptorSequence(amino_acid_sequence="ATATAT",
                                               identifier="1b",
                                               metadata=SequenceMetadata(
                                                   v_gene="TRBV1",
                                                   j_gene="TRBJ1",
                                                   chain=Chain.BETA,
                                                   frame_type="IN",
                                                   custom_params={
                                                       "d_call": "TRBD1",
                                                       "custom1": "cust1"
                                                   }))),
            TCABReceptor(identifier="2",
                         alpha=ReceptorSequence(amino_acid_sequence="AAAAAA",
                                                identifier="2a",
                                                metadata=SequenceMetadata(
                                                    v_gene="TRAV1",
                                                    j_gene="TRAJ1",
                                                    chain=Chain.ALPHA,
                                                    frame_type="IN",
                                                    custom_params={
                                                        "d_call": "TRAD1",
                                                        "custom2": "cust1"
                                                    })),
                         beta=ReceptorSequence(amino_acid_sequence="AAAAAA",
                                               identifier="2b",
                                               metadata=SequenceMetadata(
                                                   v_gene="TRBV1",
                                                   j_gene="TRBJ1",
                                                   chain=Chain.BETA,
                                                   frame_type="IN",
                                                   custom_params={
                                                       "d_call": "TRBD1",
                                                       "custom2": "cust1"
                                                   })))
        ]

        receptors_path = path / "receptors"
        PathBuilder.build(receptors_path)
        return ReceptorDataset.build(receptors, 2, receptors_path)
Beispiel #2
0
    def _implant_signals_in_receptors(
            simulation_state: SimulationState) -> Dataset:
        processed_receptors = SignalImplanter._implant_signals(
            simulation_state, SignalImplanter._process_receptor)
        processed_dataset = ReceptorDataset.build(
            receptors=processed_receptors,
            file_size=simulation_state.dataset.file_size,
            name=simulation_state.dataset.name,
            path=simulation_state.result_path)

        processed_dataset.labels = {**(simulation_state.dataset.labels if simulation_state.dataset.labels is not None else {}),
                                    **{signal: [True, False] for signal in simulation_state.signals}}

        return processed_dataset
    def _construct_test_dataset(self, path, dataset_size: int = 50):
        receptors = [
            TCABReceptor(alpha=ReceptorSequence(amino_acid_sequence="AAAA"),
                         beta=ReceptorSequence(amino_acid_sequence="ATA"),
                         metadata={"l1": 1},
                         identifier=str("1")),
            TCABReceptor(alpha=ReceptorSequence(amino_acid_sequence="ATA"),
                         beta=ReceptorSequence(amino_acid_sequence="ATT"),
                         metadata={"l1": 2},
                         identifier=str("2"))
        ]

        PathBuilder.build(path)

        lc = LabelConfiguration()
        lc.add_label("l1", [1, 2])

        dataset = ReceptorDataset.build(receptors, 2, path)
        return dataset, lc
    def _import_from_files(filenames: List[str], generic_params: DatasetImportParams) -> ReceptorDataset:
        elements = []

        for file in filenames:
            df = pd.read_csv(file, sep=generic_params.separator, usecols=generic_params.columns_to_load)
            df.dropna()
            df.drop_duplicates()
            df.rename(columns=generic_params.column_mapping, inplace=True)

            if "alpha_amino_acid_sequence" in df:
                df["alpha_amino_acid_sequence"] = df["alpha_amino_acid_sequence"].str[1:-1]
            if "beta_amino_acid_sequence" in df:
                df["beta_amino_acid_sequence"] = df["beta_amino_acid_sequence"].str[1:-1]
            if "alpha_nucleotide_sequence" in df:
                df["alpha_nucleotide_sequence"] = df["alpha_nucleotide_sequence"].str[3:-3]
            if "beta_nucleotide_sequence" in df:
                df["beta_nucleotide_sequence"] = df["beta_nucleotide_sequence"].str[3:-3]

            chain_vals = [ch for ch in generic_params.receptor_chains.value]
            chain_names = [Chain.get_chain(ch).name.lower() for ch in generic_params.receptor_chains.value]

            for chain_name in chain_names:
                df = SingleLineReceptorImport.make_gene_columns(df, ["v", "j"], chain_name)

            for index, row in df.iterrows():
                sequences = {chain_vals[i]: ReceptorSequence(amino_acid_sequence=row[
                                     chain_name + "_amino_acid_sequence"] if chain_name + "_amino_acid_sequence" in row else None,
                                                  nucleotide_sequence=row[
                                                      chain_name + "_nucleotide_sequence"] if chain_name + "_nucleotide_sequence" in row else None,
                                                  metadata=SequenceMetadata(
                                                      v_gene=row[f"{chain_name}_v_gene"], v_allele=row[f"{chain_name}_v_allele"],
                                                      v_subgroup=row[f'{chain_name}_v_subgroup'],
                                                      j_gene=row[f"{chain_name}_j_gene"], j_allele=row[f"{chain_name}_j_allele"],
                                                      j_subgroup=row[f'{chain_name}_j_subgroup'],
                                                      chain=chain_name, count=row["count"], region_type=generic_params.region_type.value))
                             for i, chain_name in enumerate(chain_names)}

                elements.append(ReceptorBuilder.build_object(sequences, row["identifier"],
                                                             {key: row[key] for key in row.keys()
                                                              if all(item not in key for item in
                                                                     ["v_gene", 'j_gene', "count", "identifier"] + chain_names)}))

        return ReceptorDataset.build(elements, generic_params.sequence_file_size, generic_params.result_path)
    def construct_test_flatten_dataset(self, path):
        receptors = [
            TCABReceptor(alpha=ReceptorSequence(amino_acid_sequence="AAATTT",
                                                identifier="1a"),
                         beta=ReceptorSequence(amino_acid_sequence="ATATAT",
                                               identifier="1b"),
                         metadata={"l1": 1},
                         identifier="1"),
            TCABReceptor(alpha=ReceptorSequence(amino_acid_sequence="AAAAAA",
                                                identifier="2a"),
                         beta=ReceptorSequence(amino_acid_sequence="AAAAAA",
                                               identifier="2b"),
                         metadata={"l1": 2},
                         identifier="2"),
            TCABReceptor(alpha=ReceptorSequence(amino_acid_sequence="AAAAAA",
                                                identifier="2a"),
                         beta=ReceptorSequence(amino_acid_sequence="AAAAAA",
                                               identifier="2b"),
                         metadata={"l1": 2},
                         identifier="2")
        ]

        return ReceptorDataset.build(receptors, 10, path)