# Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in fxn.check_scripts_dir() input_args, chain, codons = fxn.sort_input(vars(args())) imgt_dat, tcr_functionality = fxn.get_imgt_data(chain, gene_types, input_args['species']) out_list, stitched = stitch(input_args, chain, imgt_dat, tcr_functionality, codons) out_str = '|'.join(out_list) + '(L)' print( '----------------------------------------------------------------------------------------------' ) print(fxn.fastafy('nt|' + out_str, stitched)) print(fxn.fastafy('aa|' + out_str, fxn.translate_nt(stitched))) # If a known/partial amino acid sequence provided, ensure they match up with a quick printed alignment if 'aa' in input_args: from Bio import pairwise2 from Bio.pairwise2 import format_alignment alignments = pairwise2.align.globalxx(input_args['aa'], fxn.translate_nt(stitched)) for i in range(0, 600, 60): print('\n') if i > len(alignments[0][0]): break for y in [ x[i:i + 60] for x in format_alignment(*alignments[0]).split('\n')[:3]
preferred_alleles) out_str = '|'.join(out_list) # Output the appropriate strings to stdout if input_args['mode'] not in ['BOTH_FA', 'AA_FA', 'NT_FA', 'AA', 'NT']: raise IOError( "Unknown output mode detected: " + input_args['mode'] + ". \n" "Should be one of 'BOTH_FA' (default), 'AA_FA', 'NT_FA', 'AA', 'NT'." ) if '_FA' in input_args['mode']: print( '----------------------------------------------------------------------------------------------' ) if input_args['mode'] == 'BOTH_FA' or input_args['mode'] == 'NT_FA': print(fxn.fastafy('nt|' + out_str, stitched)) if input_args['mode'] == 'BOTH_FA' or input_args['mode'] == 'AA_FA': # Use the offset to 5' pad the stitched sequence with 'N's to make up for non-codon length 5' added seqs print( fxn.fastafy('aa|' + out_str, fxn.translate_nt('N' * offset + stitched))) elif input_args['mode'] == 'NT': print(stitched) elif input_args['mode'] == 'AA': print(fxn.translate_nt('N' * offset + stitched)) # If a known/partial amino acid sequence provided, ensure they match up with a quick printed alignment if input_args['aa']:
if __name__ == '__main__': # Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in fxn.check_scripts_dir() input_args, chain, codons = fxn.sort_input(vars(args())) imgt_dat, tcr_functionality = fxn.get_imgt_data(chain, gene_types, input_args['species']) out_list, stitched = stitch(input_args, chain, imgt_dat, tcr_functionality, codons) out_str = '-'.join(out_list) print '----------------------------------------------------------------------------------------------' print fxn.fastafy('nt-' + out_str, stitched) print fxn.fastafy('aa-' + out_str, fxn.translate_nt(stitched)) # If a known/partial amino acid sequence provided, ensure they match up with a quick printed alignment if 'aa' in input_args: from Bio import pairwise2 from Bio.pairwise2 import format_alignment alignments = pairwise2.align.globalxx(input_args['aa'], fxn.translate_nt(stitched)) for i in range(0, 600, 60): print '\n' if i > len(alignments[0][0]): break for y in [ x[i:i + 60] for x in format_alignment(*alignments[0]).split('\n')[:3]
print("Error: imgt-data.fasta file not detected for\'", species + \ "'. Please generate and place it in the appropriate Data subdirectory.") sys.exit() # If so, check the modification time for the imgt-data.fasta file, assuming that's the last download time input_imgt_file = species_dir + 'imgt-data.fasta' mod_date = datetime.fromtimestamp( os.path.getmtime(input_imgt_file)).strftime('%Y-%m-%d') # Then read through the FASTA and sort into the appropriate chains with open(input_imgt_file, 'rU') as in_file, \ open(species_dir + 'TRA.fasta', 'w') as TRA, \ open(species_dir + 'TRB.fasta', 'w') as TRB: prot = coll.defaultdict(coll.defaultdict) for fasta_id, seq, blank in fxn.read_fa(in_file): gene, allele = fasta_id.split('|')[1].split('*') # NB: TRDV included with TRA genes due to the evidence that even non 'TRAV/DV' genes can recombine with TRAJ if 'TRA' in gene or 'TRDV' in gene: TRA.write(fxn.fastafy(fasta_id, seq)) elif 'TRB' in gene: TRB.write(fxn.fastafy(fasta_id, seq)) # Finally log the dates log_txt = 'imgt-data.fasta_last_modified ' + mod_date + '\nsplit-imgt-data.py_last_run ' + fxn.today( ) with open(species_dir + 'data-production-date.txt', 'w') as log_file: log_file.write(log_txt)
# Can't do C checks if user providing genes, as it may be a C if values['additional_genes'] != extra_gene_text + '\n': tcr_bits['skip_c_checks'] = True tcr_bits = fxn.autofill_input(tcr_bits, chain) # Run the stitching outputs[ref_chain + '_out_list'], \ outputs[ref_chain + '_stitched'], \ outputs[ref_chain + '_offset'] = st.stitch(tcr_bits, tcr_dat, functionality, partial, codons, 3, preferred) outputs[ref_chain + '_out_str'] = '|'.join( outputs[ref_chain + '_out_list']) outputs[ref_chain + '_fasta'] = fxn.fastafy( 'nt|' + outputs[ref_chain + '_out_str'], outputs[ref_chain + '_stitched']) window[ref_chain + '_out'].update(outputs[ref_chain + '_fasta']) except Exception as message: warning_msgs[ref_chain + '_out'] = str(message) elif values[ref_chain + 'V'] or values[ref_chain + 'J'] or values[ref_chain + '_CDR3']: warnings.warn( 'V gene, J gene, and CDR3 sequence are all required to stitch a TCR chain.' )