stitch_list_fields = ['_name', 'V', 'J', 'C', '_CDR3', '_leader'] out_headers = [ 'TCR_name', 'TRA_nt', 'TRB_nt', 'TRA_aa', 'TRB_aa', 'TRAV', 'TRAJ', 'TRA_CDR3', 'TRBV', 'TRBJ', 'TRB_CDR3', 'TRAC', 'TRBC', 'TRA_leader', 'TRB_leader', 'Linker', 'Linked_nt', 'Linked_aa', 'Warnings/Errors' ] if __name__ == '__main__': # Get input arguments, get the required data read in fxn.check_scripts_dir() input_args = vars(args()) codons = fxn.get_optimal_codons(input_args['codon_usage'], input_args['species'].upper()) linker_dict = fxn.get_linker_dict() tcr_dat = {} tcr_functionality = {} for c in ['TRA', 'TRB']: tmp_tcr_dat, tmp_functionality = fxn.get_imgt_data( c, st.gene_types, input_args['species'].upper()) tcr_dat[c] = tmp_tcr_dat tcr_functionality[c] = tmp_functionality # Then go through in file and stitch each TCR on each line if not os.path.isfile(input_args['in_file']): raise IOError(
specific_args['name'], used_alleles['v'], used_alleles['j'], used_alleles['c'], specific_args['cdr3'], used_alleles['l'] + '(L)' ] # TODO add information to output header if additional 5'/3' sequences specified? return out_bits, stitched_nt, transl_offset gene_types = list(fxn.regions.values()) if __name__ == '__main__': # Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in fxn.check_scripts_dir() input_args, chain = fxn.sort_input(vars(args())) codons = fxn.get_optimal_codons(input_args['codon_usage_path'], input_args['species']) imgt_dat, tcr_functionality, partial = fxn.get_imgt_data( chain, gene_types, input_args['species']) if input_args['extra_genes']: imgt_dat, tcr_functionality = fxn.get_additional_genes( imgt_dat, tcr_functionality) input_args['skip_c_checks'] = True if input_args['preferred_alleles_path']: preferred_alleles = fxn.get_preferred_alleles( input_args['preferred_alleles_path'], gene_types, imgt_dat, partial, chain) else: preferred_alleles = {}
start = time() # Get species, in order of command line arg > inferred from input file name > default human if input_args['species']: if input_args['species'].upper() in fxn.find_species_covered(): species = input_args['species'].upper() else: raise IOError("No data available for requested species: " + input_args['species']) else: species_inference = fxn.infer_species(input_args['in_file']) if species_inference: species = species_inference else: species = 'HUMAN' codons = fxn.get_optimal_codons(input_args['codon_usage'], species) linker_dict = fxn.get_linker_dict() tcr_dat = {} tcr_functionality = {} preferences = {} # Figure out whether a/b or g/d if input_args['receptor']: input_receptor = input_args['receptor'].upper() if ('A' in input_receptor or 'B' in input_receptor) and not ('G' in input_receptor or 'D' in input_receptor): receptor = 'TRA/TRB' elif ('G' in input_receptor or 'D' in input_receptor) and not ('A' in input_receptor or 'B' in input_receptor): receptor = 'TRG/TRD' else: raise IOError("Unable to determine receptor from '-r' command string: " + input_receptor + ". ")
preferred_file = values['find_preferred_alleles'] just_file = preferred_file.split('/')[-1] window['preferred_allele_button'].update(just_file) elif event == 'Run Stitchr': warning_msgs = coll.defaultdict(str) window['linked_out'].update('') window['linked_log'].update('') # Disable stitchr button while code is running window['Run Stitchr'].update(disabled=True) # Loop through both chains, determine which are asked for, and read data in codons = fxn.get_optimal_codons('', species) outputs = coll.defaultdict() # If additional genes provided, read in and run rudimentary checks if values['additional_genes'] != extra_gene_text + '\n': outputs['additional_fastas_raw'] = [ x for x in read_fasta_box( values['additional_genes'].split('\n') + ['>\n']) ][:-1] # Check no redundant gene names if len(list(set([x[0] for x in outputs['additional_fastas_raw']]))) != \ len(outputs['additional_fastas_raw']): window['additional_genes'].update( "Multiple FASTAs detected with the same identifier name.\n" "Additional genes ignored; correct and retry")