Example #1
0
    # Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in
    fxn.check_scripts_dir()
    input_args, chain, codons = fxn.sort_input(vars(args()))

    imgt_dat, tcr_functionality = fxn.get_imgt_data(chain, gene_types,
                                                    input_args['species'])

    out_list, stitched = stitch(input_args, chain, imgt_dat, tcr_functionality,
                                codons)
    out_str = '|'.join(out_list) + '(L)'

    print(
        '----------------------------------------------------------------------------------------------'
    )
    print(fxn.fastafy('nt|' + out_str, stitched))
    print(fxn.fastafy('aa|' + out_str, fxn.translate_nt(stitched)))

    # If a known/partial amino acid sequence provided, ensure they match up with a quick printed alignment
    if 'aa' in input_args:
        from Bio import pairwise2
        from Bio.pairwise2 import format_alignment
        alignments = pairwise2.align.globalxx(input_args['aa'],
                                              fxn.translate_nt(stitched))
        for i in range(0, 600, 60):
            print('\n')
            if i > len(alignments[0][0]):
                break
            for y in [
                    x[i:i + 60]
                    for x in format_alignment(*alignments[0]).split('\n')[:3]
Example #2
0
                                        preferred_alleles)
    out_str = '|'.join(out_list)

    # Output the appropriate strings to stdout
    if input_args['mode'] not in ['BOTH_FA', 'AA_FA', 'NT_FA', 'AA', 'NT']:
        raise IOError(
            "Unknown output mode detected: " + input_args['mode'] + ". \n"
            "Should be one of 'BOTH_FA' (default), 'AA_FA', 'NT_FA', 'AA', 'NT'."
        )

    if '_FA' in input_args['mode']:
        print(
            '----------------------------------------------------------------------------------------------'
        )
        if input_args['mode'] == 'BOTH_FA' or input_args['mode'] == 'NT_FA':
            print(fxn.fastafy('nt|' + out_str, stitched))

        if input_args['mode'] == 'BOTH_FA' or input_args['mode'] == 'AA_FA':
            # Use the offset to 5' pad the stitched sequence with 'N's to make up for non-codon length 5' added seqs
            print(
                fxn.fastafy('aa|' + out_str,
                            fxn.translate_nt('N' * offset + stitched)))

    elif input_args['mode'] == 'NT':
        print(stitched)

    elif input_args['mode'] == 'AA':
        print(fxn.translate_nt('N' * offset + stitched))

    # If a known/partial amino acid sequence provided, ensure they match up with a quick printed alignment
    if input_args['aa']:
Example #3
0
if __name__ == '__main__':

    # Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in
    fxn.check_scripts_dir()
    input_args, chain, codons = fxn.sort_input(vars(args()))

    imgt_dat, tcr_functionality = fxn.get_imgt_data(chain, gene_types,
                                                    input_args['species'])

    out_list, stitched = stitch(input_args, chain, imgt_dat, tcr_functionality,
                                codons)
    out_str = '-'.join(out_list)

    print '----------------------------------------------------------------------------------------------'
    print fxn.fastafy('nt-' + out_str, stitched)
    print fxn.fastafy('aa-' + out_str, fxn.translate_nt(stitched))

    # If a known/partial amino acid sequence provided, ensure they match up with a quick printed alignment
    if 'aa' in input_args:
        from Bio import pairwise2
        from Bio.pairwise2 import format_alignment
        alignments = pairwise2.align.globalxx(input_args['aa'],
                                              fxn.translate_nt(stitched))
        for i in range(0, 600, 60):
            print '\n'
            if i > len(alignments[0][0]):
                break
            for y in [
                    x[i:i + 60]
                    for x in format_alignment(*alignments[0]).split('\n')[:3]
Example #4
0
        print("Error: imgt-data.fasta file not detected for\'", species + \
                "'. Please generate and place it in the appropriate Data subdirectory.")
        sys.exit()

    # If so, check the modification time for the imgt-data.fasta file, assuming that's the last download time
    input_imgt_file = species_dir + 'imgt-data.fasta'
    mod_date = datetime.fromtimestamp(
        os.path.getmtime(input_imgt_file)).strftime('%Y-%m-%d')

    # Then read through the FASTA and sort into the appropriate chains
    with open(input_imgt_file, 'rU') as in_file, \
            open(species_dir + 'TRA.fasta', 'w') as TRA, \
            open(species_dir + 'TRB.fasta', 'w') as TRB:

        prot = coll.defaultdict(coll.defaultdict)

        for fasta_id, seq, blank in fxn.read_fa(in_file):
            gene, allele = fasta_id.split('|')[1].split('*')

            # NB: TRDV included with TRA genes due to the evidence that even non 'TRAV/DV' genes can recombine with TRAJ
            if 'TRA' in gene or 'TRDV' in gene:
                TRA.write(fxn.fastafy(fasta_id, seq))
            elif 'TRB' in gene:
                TRB.write(fxn.fastafy(fasta_id, seq))

    # Finally log the dates
    log_txt = 'imgt-data.fasta_last_modified ' + mod_date + '\nsplit-imgt-data.py_last_run ' + fxn.today(
    )
    with open(species_dir + 'data-production-date.txt', 'w') as log_file:
        log_file.write(log_txt)
Example #5
0
                        # Can't do C checks if user providing genes, as it may be a C
                        if values['additional_genes'] != extra_gene_text + '\n':
                            tcr_bits['skip_c_checks'] = True

                        tcr_bits = fxn.autofill_input(tcr_bits, chain)

                        # Run the stitching
                        outputs[ref_chain + '_out_list'], \
                        outputs[ref_chain + '_stitched'], \
                        outputs[ref_chain + '_offset'] = st.stitch(tcr_bits, tcr_dat, functionality,
                                                                   partial, codons, 3, preferred)

                        outputs[ref_chain + '_out_str'] = '|'.join(
                            outputs[ref_chain + '_out_list'])
                        outputs[ref_chain + '_fasta'] = fxn.fastafy(
                            'nt|' + outputs[ref_chain + '_out_str'],
                            outputs[ref_chain + '_stitched'])

                        window[ref_chain + '_out'].update(outputs[ref_chain +
                                                                  '_fasta'])

                    except Exception as message:
                        warning_msgs[ref_chain + '_out'] = str(message)

                elif values[ref_chain +
                            'V'] or values[ref_chain +
                                           'J'] or values[ref_chain + '_CDR3']:
                    warnings.warn(
                        'V gene, J gene, and CDR3 sequence are all required to stitch a TCR chain.'
                    )