for l in infos: mouse = l['subject'] epitope = l['epitope'] cdr3a = l['cdr3a'] cdr3b = l['cdr3b'] ## note that we are using mm1 reps here that also dont have allele info va_rep = l['va_label_rep'] ja_rep = l['ja_label_rep'] vb_rep = l['vb_label_rep'] jb_rep = l['jb_label_rep'] if junction_bars: a_junction_results = tcr_sampler.analyze_junction( organism, l['va_gene'], l['ja_gene'], cdr3a, l['cdr3a_nucseq'], return_cdr3_nucseq_src=True ) b_junction_results = tcr_sampler.analyze_junction( organism, l['vb_gene'], l['jb_gene'], cdr3b, l['cdr3b_nucseq'], return_cdr3_nucseq_src=True ) cdr3a_new_nucseq, cdr3a_protseq_masked, cdr3a_protseq_new_nucleotide_countstring,\ a_trims, a_inserts, cdr3a_nucseq_src = a_junction_results cdr3b_new_nucseq, cdr3b_protseq_masked, cdr3b_protseq_new_nucleotide_countstring,\ b_trims, b_inserts, cdr3b_nucseq_src = b_junction_results ## try to distinguish between N before D and N after D for i in range(len(cdr3b_nucseq_src)): if cdr3b_nucseq_src[i] == 'N': if cdr3b_nucseq_src[:i].count('D')==0: cdr3b_nucseq_src[i] = 'N1' else: cdr3b_nucseq_src[i] = 'N2' else:
def make_default_logo_svg_cmds(upper_left, width, height, organism, tcr_infos, ab, distance_params=None, rep_dists=None, add_fake_alleles=False, show_full_cdr3=False): # right now single-chain only # returns cmds assert ab in 'AB' if distance_params is None: distance_params = tcr_distances.DistanceParams(config_string=None) if rep_dists is None: #print 'precomputing v-region distances' rep_dists = tcr_distances.compute_all_v_region_distances( organism, distance_params) #print 'done precomputing v-region distances' util.assign_label_reps_and_colors_based_on_most_common_genes_in_repertoire( tcr_infos, organism) rep_colors = {} for info in tcr_infos: for vj in 'vj': for abl in 'ab': rep = info[vj + abl + '_label_rep'] color = info[vj + abl + '_label_rep_color'] rep_colors[rep] = color tcrs = [] dist_tcrs = [] def add_fake_allele_info(x): if '*' not in x: return x + '*01' else: return x for l in tcr_infos: mouse = l['subject'] epitope = l['epitope'] cdr3a = l['cdr3a'] cdr3b = l['cdr3b'] ## for computing distances va_gene = l['va_gene'] ja_gene = l['ja_gene'] va_genes = l['va_genes'].split(';') vb_gene = l['vb_gene'] jb_gene = l['jb_gene'] vb_genes = l['vb_genes'].split(';') # add '*01' -- hacky! if add_fake_alleles: va_genes = map(add_fake_allele_info, va_genes) vb_genes = map(add_fake_allele_info, vb_genes) va_gene = add_fake_allele_info(va_gene) ja_gene = add_fake_allele_info(ja_gene) vb_gene = add_fake_allele_info(vb_gene) jb_gene = add_fake_allele_info(jb_gene) va_reps = frozenset( (all_genes.all_genes[organism][x].rep for x in va_genes)) vb_reps = frozenset( (all_genes.all_genes[organism][x].rep for x in vb_genes)) dist_tcrs.append([va_reps, vb_reps, cdr3a, cdr3b]) #all_info.append( l ) ## note that we are using mm1 reps here that also dont have allele info va_rep = l['va_label_rep'] ja_rep = l['ja_label_rep'] vb_rep = l['vb_label_rep'] jb_rep = l['jb_label_rep'] cdr3a_nucseq_src = ['V'] * (3 * len(cdr3a)) ## hack, unused cdr3b_nucseq_src = ['V'] * (3 * len(cdr3b)) if junction_bars: if ab == 'A': a_junction_results = tcr_sampler.analyze_junction( organism, va_gene, ja_gene, cdr3a, l['cdr3a_nucseq'].lower(), return_cdr3_nucseq_src=True) cdr3a_new_nucseq, cdr3a_protseq_masked, cdr3a_protseq_new_nucleotide_countstring,\ a_trims, a_inserts, cdr3a_nucseq_src = a_junction_results elif ab == 'B': b_junction_results = tcr_sampler.analyze_junction( organism, vb_gene, jb_gene, cdr3b, l['cdr3b_nucseq'].lower(), return_cdr3_nucseq_src=True) cdr3b_new_nucseq, cdr3b_protseq_masked, cdr3b_protseq_new_nucleotide_countstring,\ b_trims, b_inserts, cdr3b_nucseq_src = b_junction_results ## try to distinguish between N before D and N after D for i in range(len(cdr3b_nucseq_src)): if cdr3b_nucseq_src[i] == 'N': if cdr3b_nucseq_src[:i].count('D') == 0: cdr3b_nucseq_src[i] = 'N1' else: cdr3b_nucseq_src[i] = 'N2' assert len(cdr3a_nucseq_src) == 3 * len(cdr3a) assert len(cdr3b_nucseq_src) == 3 * len(cdr3b) #print cdr3b, cdr3b_nucseq_src tcrs.append((mouse, va_rep, ja_rep, vb_rep, jb_rep, cdr3a, cdr3b, cdr3a_nucseq_src, cdr3b_nucseq_src, l['clone_id'])) ## compute distances, used in logo construction for picking the center tcr for aligning against #print 'computing distances:',len(dist_tcrs) chains = ab all_dists = np.zeros((len(dist_tcrs), len(dist_tcrs))) for i, t1 in enumerate(dist_tcrs): for j in range(i + 1, len(dist_tcrs)): dist = tcr_distances.compute_distance(t1, dist_tcrs[j], chains, rep_dists, distance_params) all_dists[i][j] = dist all_dists[j][i] = dist # now make the logo members = range(len(tcrs)) scale_w = float(width) / default_width scale_h = float(height) / default_height # scale everything by our desired height, width return make_tcr_logo(upper_left, tcrs, members, all_dists, ab, rep_colors, scale_w * default_vj_logo_width, scale_w * default_pwmplusgaps_width, scale_w * default_xpad, scale_h * default_pwm_height, scale_h * default_junction_bars_height, scale_h * default_ypad, show_full_cdr3)
if new_cdr3b_nucseq != cdr3b_nucseq: ## note note note new_cdr3b_protseq = read_sanger_data.get_translation( new_cdr3b_nucseq, '+1')[0] else: new_cdr3b_protseq = cdr3b_protseq[:] assert new_cdr3b_protseq == read_sanger_data.get_translation( cdr3b_nucseq, '+1')[0] bprob_protseq = tcr_sampler.beta_cdr3_protseq_probability( organism, vb_gene, jb_gene, new_cdr3b_protseq, verbose=verbose) vals = dict(l) #line.split('\t') + ['']*(len(outfields)-len(infields)) if add_masked_seqs: ## junction analysis a_junction_results = tcr_sampler.analyze_junction( organism, va_gene, ja_gene, cdr3a_protseq, cdr3a_nucseq) b_junction_results = tcr_sampler.analyze_junction( organism, vb_gene, jb_gene, cdr3b_protseq, cdr3b_nucseq) cdr3a_new_nucseq, cdr3a_protseq_masked, cdr3a_protseq_new_nucleotide_countstring,a_trims,a_inserts \ = a_junction_results cdr3b_new_nucseq, cdr3b_protseq_masked, cdr3b_protseq_new_nucleotide_countstring,b_trims,b_inserts \ = b_junction_results # from tcr_sampler.py: # trims = ( v_trim, d0_trim, d1_trim, j_trim ) # inserts = ( best_d_id, n_vd_insert, n_dj_insert, n_vj_insert ) assert a_trims[1] + a_trims[2] + a_inserts[0] + a_inserts[ 1] + a_inserts[2] + b_inserts[3] == 0 assert a_inserts[3] == len(cdr3a_new_nucseq)
if chain == 'A': if correct_cdr3_seqs: tmp_results = tcr_sampler.analyze_junction\ ( organism, v_gene, j_gene, cdr3_protseq, cdr3_nucseq, return_corrected_cdr3_seqs = True, mismatch_score = mismatch_score_for_correcting_cdr3_seqs ) corrected_cdr3_nucseq, corrected_cdr3_protseq = list( tmp_results)[-2:] if corrected_cdr3_nucseq != cdr3_nucseq: print 'fixing early sequence error', cdr3_nucseq, '==>', corrected_cdr3_nucseq cdr3_nucseq = corrected_cdr3_nucseq[:] cdr3_protseq = corrected_cdr3_protseq[:] junction_results = tcr_sampler.analyze_junction( organism, v_gene, j_gene, cdr3_protseq, cdr3_nucseq) new_nucseq, cdr3_protseq_masked, cdr3_protseq_new_nucleotide_countstring, trims, inserts \ = junction_results (v_trim, d0_trim, d1_trim, j_trim) = trims (d_id, n_vd_insert, n_dj_insert, n_vj_insert) = inserts if not new_nucseq: new_nucseq = '-' assert d0_trim + d1_trim + n_vd_insert + n_dj_insert == 0 junction_info = '%s %s -%d -%d +%d' % ( new_nucseq, cdr3_protseq_masked, v_trim, j_trim, n_vj_insert) else: junction_infos = [] possible_d_ids = tcr_rearrangement_new.all_trbd_nucseq[ organism].keys()[:]