if __name__ == '__main__': # Get input arguments, get the required data read in fxn.check_scripts_dir() input_args = vars(args()) codons = fxn.get_optimal_codons(input_args['codon_usage'], input_args['species'].upper()) linker_dict = fxn.get_linker_dict() tcr_dat = {} tcr_functionality = {} for c in ['TRA', 'TRB']: tmp_tcr_dat, tmp_functionality = fxn.get_imgt_data( c, st.gene_types, input_args['species'].upper()) tcr_dat[c] = tmp_tcr_dat tcr_functionality[c] = tmp_functionality # Then go through in file and stitch each TCR on each line if not os.path.isfile(input_args['in_file']): raise IOError( input_args['in_file'] + " not detected - please check and specify in file again.") # TODO opener function for gzipped with open(input_args['in_file'], 'rU') as in_file: line_count = 0 out_data = ['\t'.join(out_headers)]
regions = { 'v': 'V-REGION', 'j': 'J-REGION', 'c': 'EX1+EX2+EX3+EX4', 'l': 'L-PART1+L-PART2' } gene_types = list(regions.values()) if __name__ == '__main__': # Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in fxn.check_scripts_dir() input_args, chain, codons = fxn.sort_input(vars(args())) imgt_dat, tcr_functionality = fxn.get_imgt_data(chain, gene_types, input_args['species']) out_list, stitched = stitch(input_args, chain, imgt_dat, tcr_functionality, codons) out_str = '|'.join(out_list) + '(L)' print( '----------------------------------------------------------------------------------------------' ) print(fxn.fastafy('nt|' + out_str, stitched)) print(fxn.fastafy('aa|' + out_str, fxn.translate_nt(stitched))) # If a known/partial amino acid sequence provided, ensure they match up with a quick printed alignment if 'aa' in input_args: from Bio import pairwise2 from Bio.pairwise2 import format_alignment
# Then stitch each individual chain... for ref_chain in ['TR1', 'TR2']: chain = convert_chains[receptor][ref_chain] window[ref_chain + '_out'].update('') window[ref_chain + '_log'].update('') with warnings.catch_warnings(record=True) as chain_log: warnings.simplefilter("always") if values[ref_chain + 'V'] and values[ref_chain + 'J'] and values[ref_chain + '_CDR3']: try: tcr_dat, functionality, partial = fxn.get_imgt_data( chain, st.gene_types, species) # If additional genes provided, just add them to all possible gene segment types if values['additional_genes'] != extra_gene_text + '\n': for extra_gene in outputs['additional_fastas']: gene, allele = extra_gene[0].split('*') for gene_type in tcr_dat.keys(): if gene not in tcr_dat[gene_type]: tcr_dat[gene_type][ gene] = coll.defaultdict(list) if allele in tcr_dat[gene_type][gene]: raise warnings.warn( "User provided gene/allele combination "
else: # If not explicitly provided, infer from input TSV headers with fxn.opener(input_args['in_file']) as in_file: for line in in_file: if 'TRAV' in line and 'TRGV' not in line: receptor = 'TRA/TRB' elif 'TRGV' in line and 'TRAV' not in line: receptor = 'TRG/TRD' else: raise IOError("Unable to determine receptor from input file header, please check template. ") break # Define the individual receptors (chains or loci, i.e. TRA and TRB or TRG and TRD) in play r1, r2 = receptor.split('/') for c in [r1, r2]: tmp_tcr_dat, tmp_functionality, partial = fxn.get_imgt_data(c, st.gene_types, species) tcr_dat[c] = tmp_tcr_dat tcr_functionality[c] = tmp_functionality if 'extra_genes' in input_args: if input_args['extra_genes']: tcr_dat[c], tcr_functionality[c] = fxn.get_additional_genes(tcr_dat[c], tcr_functionality[c]) input_args['skip_c_checks'] = True else: input_args['skip_c_checks'] = False # Allow for provision of preferred alleles if input_args['preferred_alleles_path']: preferences[c] = fxn.get_preferred_alleles(input_args['preferred_alleles_path'], list(fxn.regions.values()), tcr_dat[c], partial, c) else:
if __name__ == '__main__': # TODO move all this to one large bracketing function? # Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in fxn.check_scripts_dir() input_args, chain, codons = fxn.sort_input(vars(args())) regions = { 'v': 'V-REGION', 'j': 'J-REGION', 'c': 'EX1+EX2+EX3+EX4', 'l': 'L-PART1+L-PART2' } gene_types = regions.values() imgt_dat, functionality = fxn.get_imgt_data(chain, gene_types) # Then find each of the appropriate sequences done = {} for r in regions: if '*' in input_args[r]: gene, allele = input_args[r].split('*') if allele not in imgt_dat[regions[r]][gene]: print "\tCannot find", r.upper(), "gene", input_args[r] + \ ": attempting prototypical allele (" + gene + "*01)" allele = '01' else: gene = input_args[r] allele = '01'