Python ImportHelper.prepare_frame_type_list Examples

Programming Language: Python

Namespace/Package Name: source.util.ImportHelper

Class/Type: ImportHelper

Method/Function: prepare_frame_type_list

Examples at hotexamples.com: 2

Python ImportHelper.prepare_frame_type_list - 2 examples found. These are the top rated real world Python examples of source.util.ImportHelper.ImportHelper.prepare_frame_type_list extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

drop_empty_sequences(9)

drop_illegal_character_sequences(8)

junction_to_cdr3(7)

load_chains_from_genes(6)

import_dataset(6)

update_gene_info(5)

import_receptors(3)

get_sequence_filenames(2)

prepare_frame_type_list(2)

standardize_none_values(2)

import_items(1)

import_repertoire_dataset(1)

extract_sequence_dataset_params(1)

load_chains(1)

load_chains_from_column(1)

load_dataset_if_exists(1)

store_sequence_items(1)

Example #1

Show file

    def preprocess_dataframe(dataframe: pd.DataFrame, params: DatasetImportParams):
        dataframe.loc[:, "frame_types"] = dataframe.frame_types.str.upper()

        frame_type_list = ImportHelper.prepare_frame_type_list(params)
        dataframe = dataframe[dataframe["frame_types"].isin(frame_type_list)]
        dataframe.loc[:, "region_types"] = params.region_type.name

        if params.region_type == RegionType.IMGT_CDR3:
            if "sequences" in dataframe.columns:
                dataframe.loc[:, 'sequences'] = [y[(84 - 3 * len(x)): 78] if x is not None else None for x, y in zip(dataframe['sequence_aas'], dataframe['sequences'])]
            dataframe.loc[:, 'sequence_aas'] = dataframe["sequence_aas"].str[1:-1]
        elif "sequences" in dataframe.columns:
            dataframe.loc[:, 'sequences'] = [y[(81 - 3 * len(x)): 81] if x is not None else None for x, y in zip(dataframe['sequence_aas'], dataframe['sequences'])]

        dataframe = AdaptiveImportHelper.parse_adaptive_germline_to_imgt(dataframe, params.organism)
        dataframe = ImportHelper.standardize_none_values(dataframe)
        ImportHelper.drop_empty_sequences(dataframe, params.import_empty_aa_sequences, params.import_empty_nt_sequences)
        ImportHelper.drop_illegal_character_sequences(dataframe, params.import_illegal_characters)

        if "chains" in dataframe.columns:
            dataframe.loc[:, "chains"] = ImportHelper.load_chains(dataframe)
        else:
            # loading from v_subgroups is preferred as sometimes v_genes is None when v_subgroups is defined
            if "v_subgroups" in dataframe.columns:
                dataframe.loc[:, "chains"] = ImportHelper.load_chains_from_column(dataframe, "v_subgroups")
            else:
                dataframe.loc[:, "chains"] = ImportHelper.load_chains_from_genes(dataframe)

        return dataframe

Example #2

Show file

    def preprocess_dataframe(df: pd.DataFrame, params: DatasetImportParams):
        """
        Function for preprocessing data from a dataframe containing AIRR data, such that:
            - productive sequences, sequences with stop codons or out of frame sequences are filtered according to specification
            - if RegionType is CDR3, the leading C and trailing W are removed from the sequence to match the CDR3 definition
            - if no chain column was specified, the chain is extracted from the v gene name
            - the allele information is removed from the V and J genes
        """
        if "productive" in df.columns:
            df["frame_types"] = SequenceFrameType.OUT.name
            df.loc[df["productive"] == True,
                   "frame_types"] = SequenceFrameType.IN.name
        else:
            df["frame_types"] = None

        if "vj_in_frame" in df.columns:
            df.loc[df["vj_in_frame"] == True,
                   "frame_types"] = SequenceFrameType.IN.name
        if "stop_codon" in df.columns:
            df.loc[df["stop_codon"] == True,
                   "frame_types"] = SequenceFrameType.STOP.name

        if "productive" in df.columns:
            frame_type_list = ImportHelper.prepare_frame_type_list(params)
            df = df[df["frame_types"].isin(frame_type_list)]

        if params.region_type == RegionType.IMGT_CDR3:
            if "sequence_aas" not in df.columns and "sequences" not in df.columns:
                if "cdr3" in df.columns:
                    df.rename(columns={"cdr3": "sequences"}, inplace=True)
                if "cdr3_aa" in df.columns:
                    df.rename(columns={"cdr3_aa": "sequence_aas"},
                              inplace=True)
                df.loc[:, "region_types"] = params.region_type.name
            elif "junction" in params.column_mapping or "junction_aa" in params.column_mapping:
                ImportHelper.junction_to_cdr3(df, params.region_type)
        # todo else: support "full_sequence" import through regiontype?

        if "chains" not in df.columns:
            df.loc[:, "chains"] = ImportHelper.load_chains_from_genes(df)

        df = ImportHelper.update_gene_info(df)
        ImportHelper.drop_empty_sequences(df, params.import_empty_aa_sequences,
                                          params.import_empty_nt_sequences)
        ImportHelper.drop_illegal_character_sequences(
            df, params.import_illegal_characters)

        return df