def __init__(self, organism, params=None): self.organism = organism[:] if params is None: self.params = tcr_distances.DistanceParams() else: self.params = copy.deepcopy(params) self.rep_dists = tcr_distances.compute_all_v_region_distances( organism, self.params)
p.flag('intrasubject_nbrdists') p.multiword('clones_files').cast(lambda x: x.split()) p.multiword('dist_chains').cast(lambda x: x.split( )).default('A B AB').described_as( 'The chains over which the distance calcn will be performed; default is all three ("A B AB")' ) p.multiword('epitope_prefixes').cast(lambda x: x.split()) p.multiword('nbrdist_percentiles').cast( lambda x: [int(val) for val in x.split()]).default( "5 10 25") #"5 10 25 -1 -5 -10") p.str('distance_params') #internal legacy hack new_nbrdists = not intrasubject_nbrdists distance_params = tcr_distances.DistanceParams(config_string=distance_params) if not clones_files: assert clones_file clones_files = [clones_file] if not epitope_prefixes: epitope_prefixes = [''] * len(clones_files) assert len(epitope_prefixes) == len(clones_files) print 'precomputing v-region distances' rep_dists = tcr_distances.compute_all_v_region_distances( organism, distance_params) print 'done precomputing v-region distances'
def make_default_logo_svg_cmds(upper_left, width, height, organism, tcr_infos, ab, distance_params=None, rep_dists=None, add_fake_alleles=False, show_full_cdr3=False): # right now single-chain only # returns cmds assert ab in 'AB' if distance_params is None: distance_params = tcr_distances.DistanceParams(config_string=None) if rep_dists is None: #print 'precomputing v-region distances' rep_dists = tcr_distances.compute_all_v_region_distances( organism, distance_params) #print 'done precomputing v-region distances' util.assign_label_reps_and_colors_based_on_most_common_genes_in_repertoire( tcr_infos, organism) rep_colors = {} for info in tcr_infos: for vj in 'vj': for abl in 'ab': rep = info[vj + abl + '_label_rep'] color = info[vj + abl + '_label_rep_color'] rep_colors[rep] = color tcrs = [] dist_tcrs = [] def add_fake_allele_info(x): if '*' not in x: return x + '*01' else: return x for l in tcr_infos: mouse = l['subject'] epitope = l['epitope'] cdr3a = l['cdr3a'] cdr3b = l['cdr3b'] ## for computing distances va_gene = l['va_gene'] ja_gene = l['ja_gene'] va_genes = l['va_genes'].split(';') vb_gene = l['vb_gene'] jb_gene = l['jb_gene'] vb_genes = l['vb_genes'].split(';') # add '*01' -- hacky! if add_fake_alleles: va_genes = map(add_fake_allele_info, va_genes) vb_genes = map(add_fake_allele_info, vb_genes) va_gene = add_fake_allele_info(va_gene) ja_gene = add_fake_allele_info(ja_gene) vb_gene = add_fake_allele_info(vb_gene) jb_gene = add_fake_allele_info(jb_gene) va_reps = frozenset( (all_genes.all_genes[organism][x].rep for x in va_genes)) vb_reps = frozenset( (all_genes.all_genes[organism][x].rep for x in vb_genes)) dist_tcrs.append([va_reps, vb_reps, cdr3a, cdr3b]) #all_info.append( l ) ## note that we are using mm1 reps here that also dont have allele info va_rep = l['va_label_rep'] ja_rep = l['ja_label_rep'] vb_rep = l['vb_label_rep'] jb_rep = l['jb_label_rep'] cdr3a_nucseq_src = ['V'] * (3 * len(cdr3a)) ## hack, unused cdr3b_nucseq_src = ['V'] * (3 * len(cdr3b)) if junction_bars: if ab == 'A': a_junction_results = tcr_sampler.analyze_junction( organism, va_gene, ja_gene, cdr3a, l['cdr3a_nucseq'].lower(), return_cdr3_nucseq_src=True) cdr3a_new_nucseq, cdr3a_protseq_masked, cdr3a_protseq_new_nucleotide_countstring,\ a_trims, a_inserts, cdr3a_nucseq_src = a_junction_results elif ab == 'B': b_junction_results = tcr_sampler.analyze_junction( organism, vb_gene, jb_gene, cdr3b, l['cdr3b_nucseq'].lower(), return_cdr3_nucseq_src=True) cdr3b_new_nucseq, cdr3b_protseq_masked, cdr3b_protseq_new_nucleotide_countstring,\ b_trims, b_inserts, cdr3b_nucseq_src = b_junction_results ## try to distinguish between N before D and N after D for i in range(len(cdr3b_nucseq_src)): if cdr3b_nucseq_src[i] == 'N': if cdr3b_nucseq_src[:i].count('D') == 0: cdr3b_nucseq_src[i] = 'N1' else: cdr3b_nucseq_src[i] = 'N2' assert len(cdr3a_nucseq_src) == 3 * len(cdr3a) assert len(cdr3b_nucseq_src) == 3 * len(cdr3b) #print cdr3b, cdr3b_nucseq_src tcrs.append((mouse, va_rep, ja_rep, vb_rep, jb_rep, cdr3a, cdr3b, cdr3a_nucseq_src, cdr3b_nucseq_src, l['clone_id'])) ## compute distances, used in logo construction for picking the center tcr for aligning against #print 'computing distances:',len(dist_tcrs) chains = ab all_dists = np.zeros((len(dist_tcrs), len(dist_tcrs))) for i, t1 in enumerate(dist_tcrs): for j in range(i + 1, len(dist_tcrs)): dist = tcr_distances.compute_distance(t1, dist_tcrs[j], chains, rep_dists, distance_params) all_dists[i][j] = dist all_dists[j][i] = dist # now make the logo members = range(len(tcrs)) scale_w = float(width) / default_width scale_h = float(height) / default_height # scale everything by our desired height, width return make_tcr_logo(upper_left, tcrs, members, all_dists, ab, rep_colors, scale_w * default_vj_logo_width, scale_w * default_pwmplusgaps_width, scale_w * default_xpad, scale_h * default_pwm_height, scale_h * default_junction_bars_height, scale_h * default_ypad, show_full_cdr3)