Esempio n. 1
0
stitch_list_fields = ['_name', 'V', 'J', 'C', '_CDR3', '_leader']

out_headers = [
    'TCR_name', 'TRA_nt', 'TRB_nt', 'TRA_aa', 'TRB_aa', 'TRAV', 'TRAJ',
    'TRA_CDR3', 'TRBV', 'TRBJ', 'TRB_CDR3', 'TRAC', 'TRBC', 'TRA_leader',
    'TRB_leader', 'Linker', 'Linked_nt', 'Linked_aa', 'Warnings/Errors'
]

if __name__ == '__main__':

    # Get input arguments, get the required data read in
    fxn.check_scripts_dir()
    input_args = vars(args())

    codons = fxn.get_optimal_codons(input_args['codon_usage'],
                                    input_args['species'].upper())
    linker_dict = fxn.get_linker_dict()

    tcr_dat = {}
    tcr_functionality = {}

    for c in ['TRA', 'TRB']:

        tmp_tcr_dat, tmp_functionality = fxn.get_imgt_data(
            c, st.gene_types, input_args['species'].upper())
        tcr_dat[c] = tmp_tcr_dat
        tcr_functionality[c] = tmp_functionality

    # Then go through in file and stitch each TCR on each line
    if not os.path.isfile(input_args['in_file']):
        raise IOError(
Esempio n. 2
0
        specific_args['name'], used_alleles['v'], used_alleles['j'],
        used_alleles['c'], specific_args['cdr3'], used_alleles['l'] + '(L)'
    ]

    # TODO add information to output header if additional 5'/3' sequences specified?
    return out_bits, stitched_nt, transl_offset


gene_types = list(fxn.regions.values())

if __name__ == '__main__':

    # Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in
    fxn.check_scripts_dir()
    input_args, chain = fxn.sort_input(vars(args()))
    codons = fxn.get_optimal_codons(input_args['codon_usage_path'],
                                    input_args['species'])
    imgt_dat, tcr_functionality, partial = fxn.get_imgt_data(
        chain, gene_types, input_args['species'])

    if input_args['extra_genes']:
        imgt_dat, tcr_functionality = fxn.get_additional_genes(
            imgt_dat, tcr_functionality)
        input_args['skip_c_checks'] = True

    if input_args['preferred_alleles_path']:
        preferred_alleles = fxn.get_preferred_alleles(
            input_args['preferred_alleles_path'], gene_types, imgt_dat,
            partial, chain)
    else:
        preferred_alleles = {}
Esempio n. 3
0
    start = time()

    # Get species, in order of command line arg > inferred from input file name > default human
    if input_args['species']:
        if input_args['species'].upper() in fxn.find_species_covered():
            species = input_args['species'].upper()
        else:
            raise IOError("No data available for requested species: " + input_args['species'])
    else:
        species_inference = fxn.infer_species(input_args['in_file'])
        if species_inference:
            species = species_inference
        else:
            species = 'HUMAN'

    codons = fxn.get_optimal_codons(input_args['codon_usage'], species)
    linker_dict = fxn.get_linker_dict()

    tcr_dat = {}
    tcr_functionality = {}
    preferences = {}

    # Figure out whether a/b or g/d
    if input_args['receptor']:
        input_receptor = input_args['receptor'].upper()
        if ('A' in input_receptor or 'B' in input_receptor) and not ('G' in input_receptor or 'D' in input_receptor):
            receptor = 'TRA/TRB'
        elif ('G' in input_receptor or 'D' in input_receptor) and not ('A' in input_receptor or 'B' in input_receptor):
            receptor = 'TRG/TRD'
        else:
            raise IOError("Unable to determine receptor from '-r' command string: " + input_receptor + ". ")
Esempio n. 4
0
        preferred_file = values['find_preferred_alleles']
        just_file = preferred_file.split('/')[-1]
        window['preferred_allele_button'].update(just_file)

    elif event == 'Run Stitchr':
        warning_msgs = coll.defaultdict(str)

        window['linked_out'].update('')
        window['linked_log'].update('')

        # Disable stitchr button while code is running
        window['Run Stitchr'].update(disabled=True)

        # Loop through both chains, determine which are asked for, and read data in
        codons = fxn.get_optimal_codons('', species)
        outputs = coll.defaultdict()

        # If additional genes provided, read in and run rudimentary checks
        if values['additional_genes'] != extra_gene_text + '\n':
            outputs['additional_fastas_raw'] = [
                x for x in read_fasta_box(
                    values['additional_genes'].split('\n') + ['>\n'])
            ][:-1]

            # Check no redundant gene names
            if len(list(set([x[0] for x in outputs['additional_fastas_raw']]))) != \
                    len(outputs['additional_fastas_raw']):
                window['additional_genes'].update(
                    "Multiple FASTAs detected with the same identifier name.\n"
                    "Additional genes ignored; correct and retry")