def get_dbg_str(indelfo): if len(indelfo['qr_gap_seq']) != len(indelfo['gl_gap_seq']): print indelfo['qr_gap_seq'] print indelfo['gl_gap_seq'] raise Exception('different length qr and gl gap seqs (see previous lines)') qrprintstr, glprintstr = [], [] for ich in range(len(indelfo['qr_gap_seq'])): qrb, glb = indelfo['qr_gap_seq'][ich], indelfo['gl_gap_seq'][ich] qrcolor, glcolor = None, None if qrb in utils.gap_chars or glb in utils.gap_chars: qrcolor = 'light_blue' glcolor = 'light_blue' elif qrb in utils.ambiguous_bases: qrcolor = 'light_blue' elif glb in utils.ambiguous_bases: glcolor = 'light_blue' elif qrb != glb: qrcolor = 'red' qrprintstr.append(utils.color(qrcolor, qrb if qrb not in utils.gap_chars else '*')) # change it to a start just cause that's what it originally was... at some point should switch to just leaving it whatever gap char it was glprintstr.append(utils.color(glcolor, glb if glb not in utils.gap_chars else '*')) qrprintstr = ''.join(qrprintstr) glprintstr = ''.join(glprintstr) gene_str = '' gwidth = str(len('query')) if 'v' in indelfo['genes']: gene_str = utils.color_gene(indelfo['genes']['v'], width=int(gwidth), leftpad=True) gwidth = str(utils.len_excluding_colors(gene_str)) dj_gene_str = ' '.join([utils.color_gene(indelfo['genes'][r]) for r in 'dj' if r in indelfo['genes']]) dbg_str_list = [(' %' + gwidth + 's %s %s') % (gene_str, glprintstr, dj_gene_str), (' %' + gwidth + 's %s') % ('query', qrprintstr)] for idl in indelfo['indels']: dbg_str_list.append('%10s: %d base%s at %d (%s)' % (idl['type'], idl['len'], utils.plural(idl['len']), idl['pos'], idl['seqstr'])) return '\n'.join(dbg_str_list)
def print_partitions(self, reco_info=None, extrastr='', abbreviate=True, print_header=True, n_to_print=None, calc_missing_values='none', highlight_cluster_indices=None, print_partition_indices=False, ipart_center=None, sort_by_size=True): assert calc_missing_values in ['none', 'all', 'best'] if reco_info is not None and calc_missing_values == 'all': self.calculate_missing_values(reco_info) if print_header: print ' %s%7s %10s %-7s %s%5s %4s' % ( ' ' * utils.len_excluding_colors(extrastr), '', 'logprob', 'delta', 'index ' if print_partition_indices else '', 'clusters', 'n_procs'), if reco_info is not None or self.we_have_a_ccf: print ' %5s %5s' % ('purity', 'completeness'), print '' for ip in self.get_surrounding_partitions(n_to_print, i_center=ipart_center): if reco_info is not None and calc_missing_values == 'best' and ip == self.i_best: self.calculate_missing_values(reco_info, only_ip=ip) mark = ' ' if ip == self.i_best: mark = 'best ' if ip == self.i_best_minus_x: mark = mark[:-2] + '* ' if mark.count(' ') < len(mark): mark = utils.color('yellow', mark) right_extrastr = '' if self.n_seqs( ) < 200 else mark # if line is going to be really long, put the yellow stuff also on the right side self.print_partition( ip, reco_info, extrastr=extrastr + mark, abbreviate=abbreviate, highlight_cluster_indices=highlight_cluster_indices, print_partition_indices=print_partition_indices, right_extrastr=right_extrastr, sort_by_size=sort_by_size)
def run(self, args): if not args.dry_run: open(self.logfname, 'w').close() for name, info in self.tests.items(): if args.quick and name not in self.quick_tests: continue self.prepare_to_run(args, name, info) action = info['action'] cmd_str = info['bin'] + ' ' + action cmd_str += ' ' + ' '.join(info['extras'] + self.common_extras) if name == 'simulate': cmd_str += ' --outfname ' + self.infnames['new']['simu'] cmd_str += ' --indel-frequency 0.01 --indel-location v' elif 'cache-parameters-' not in name: cmd_str += ' --outfname ' + self.dirs[ 'new'] + '/' + name + '.csv' logstr = '%s %s' % (utils.color( 'green', name, width=30, padside='right'), cmd_str) print logstr if utils.len_excluding_colors( logstr ) < args.print_width else logstr[:args.print_width] + '[...]' if args.dry_run: continue logfile = open(self.logfname, 'a') logfile.write(logstr + '\n') logfile.close() start = time.time() try: check_call(cmd_str + ' 1>>' + self.logfname + ' 2>>' + self.logfname, shell=True) except CalledProcessError, err: # print err # this just says it exited with code != 0 print ' log tail:' print utils.pad_lines(check_output(['tail', self.logfname])) sys.exit(1) # raise Exception('exited with error') self.run_times[name] = time.time() - start # seconds
def print_seq_in_reco_event(original_line, iseq, extra_str='', label='', one_line=False, seed_uid=None, check_line_integrity=False): """ Print ascii summary of recombination event and mutation. If <one_line>, then skip the germline lines, and only print the final_seq line. """ line = original_line if check_line_integrity: # it's very important not to modify <line> -- this lets you verify that you aren't line = copy.deepcopy( original_line) # copy that we can modify without changing <line> delstrs = { d: '.' * line[d + '_del'] for d in utils.all_erosions } # NOTE len(delstrs[<del>]) is not in general the same as len(line[<del>_del]) if len( delstrs['v_5p'] ) > 50: # don't print a million dots if left-side v deletion is really big delstrs['v_5p'] = '.%d.' % len(delstrs['v_5p']) # if there isn't enough space for dots in the vj line, we add some dashes to everybody so things fit (rare in heavy chain rearrangements, but pretty common in light chain) d_plus_inserts_length = len(line['vd_insertion'] + line['d_gl_seq'] + line['dj_insertion']) if line['v_3p_del'] + line[ 'j_5p_del'] > d_plus_inserts_length: # if dots for v and j interior deletions will be longer than <d_plus_inserts_length> delstrs['v_3p'] = '.%d.' % line['v_3p_del'] delstrs['j_5p'] = '.%d.' % line['j_5p_del'] gapstr = '-' * (len(delstrs['v_3p'] + delstrs['j_5p']) - d_plus_inserts_length) gap_insert_point = len( line['fv_insertion'] + delstrs['v_5p'] + line['v_gl_seq'] ) # it doesn't really matter exactly where we put the blue dashes, as long as it's the same place in all four lines, but this is a good spot extra_space_because_of_fixed_nospace = max( 0, d_plus_inserts_length - len(delstrs['v_3p'] + delstrs['j_5p']) ) # if shortening the <delstrs> already over-compensated for the lack of space (i.e., if the number of dashes necessary is zero), then we need to add some dots to the vj line below else: gapstr = '' gap_insert_point = None extra_space_because_of_fixed_nospace = 0 eroded_seqs_dots = { r: delstrs[r + '_5p'] + line[r + '_gl_seq'] + delstrs[r + '_3p'] for r in utils.regions } # build the three germline lines insert_line = ' ' * (len(line['fv_insertion']) + line['lengths']['v'] + len(delstrs['v_5p'])) \ + line['vd_insertion'] + ' ' * line['lengths']['d'] + line['dj_insertion'] \ + ' ' * (line['lengths']['j'] + line['j_3p_del'] + len(line['jf_insertion'])) germline_d_start = len(line['fv_insertion']) + line['lengths']['v'] + len( line['vd_insertion']) - line['d_5p_del'] germline_d_end = germline_d_start + line['d_5p_del'] + line['lengths'][ 'd'] + line['d_3p_del'] d_line = ' ' * (germline_d_start + len(delstrs['v_5p'])) \ + eroded_seqs_dots['d'] \ + ' ' * (len(line['j_gl_seq']) + len(line['dj_insertion']) - line['d_3p_del'] + line['j_3p_del'] + len(line['jf_insertion'])) germline_v_end = len(line['fv_insertion']) + len(line['v_gl_seq']) + line[ 'v_3p_del'] - 1 # position in the query sequence at which we find the last base of the v match. NOTE we subtract off the v_5p_del because we're *not* adding dots for that deletion (it's just too long) germline_j_start = germline_d_end + 1 - line['d_3p_del'] + len( line['dj_insertion']) - line['j_5p_del'] vj_line = ' ' * len(line['fv_insertion']) + eroded_seqs_dots['v'] + '.' * extra_space_because_of_fixed_nospace \ + ' ' * (germline_j_start - germline_v_end - 2) + eroded_seqs_dots['j'] + ' ' * len(line['jf_insertion']) # and the query line qrseq_line = ' ' * len( delstrs['v_5p']) + line['seqs'][iseq] + ' ' * line['j_3p_del'] outstrs = [insert_line, d_line, vj_line, qrseq_line] check_outsr_lengths( line, outstrs, fix=True ) # I think the only way they can be different is if the d right side erosion is so long that it hangs over the right side of the j if gap_insert_point is not None: for istr in [ 0, 1, 3 ]: # everybody except the vj line, which already has the modified interior delstrs above outstrs[ istr] = outstrs[istr][:gap_insert_point] + gapstr + outstrs[ istr][gap_insert_point:] check_outsr_lengths(line, outstrs, fix=True) colors = [[[] for _ in range(len(ostr))] for ostr in outstrs] if indelutils.has_indels(line['indelfos'][iseq]): # outstrs, colors = old_indel_shenanigans(line, iseq, outstrs, colors) outstrs, colors = indel_shenanigans(line, iseq, outstrs, colors) outstrs = add_colors(outstrs, colors, line) suffixes = [ 'insert%s\n' % ('s' if utils.has_d_gene(utils.get_locus(line['v_gene'])) else ''), '%s\n' % (utils.color_gene(line['d_gene'])), '%s %s\n' % (utils.color_gene(line['v_gene']), utils.color_gene(line['j_gene'])), '%s %4.2f mut %s\n' % (get_uid_str(line, iseq, seed_uid), line['mut_freqs'][iseq], utils.color('red', utils.is_functional_dbg_str(line, iseq))) ] outstrs = [ '%s%s %s' % (extra_str, ostr, suf) for ostr, suf in zip(outstrs, suffixes) ] if label != '': # this doesn't really work if the edge of the removed string is the middle of a color code... but oh well, it doesn't really happen any more since I shortened the kbound label from waterer.py offset = max( 0, len(extra_str) - 2) # skootch <label> this many positions leftward into <extra_str> removed_str = outstrs[0][offset:offset + utils.len_excluding_colors(label)] outstrs[0] = outstrs[0][:offset] + label + outstrs[0][ utils.len_excluding_colors(label) + offset:] # NOTE this *replaces* the bases in <extra_str> with <label>, which is only fine if they're spaces if removed_str.strip() != '': print '%s%s (covered by label \'%s\')' % ( ' ' * offset, utils.color('red', removed_str), label) if one_line: outstrs = outstrs[-1:] # remove all except the query seq line elif not utils.has_d_gene(utils.get_locus(line['v_gene'])): outstrs.pop(1) # remove the d germline line print ''.join(outstrs), if check_line_integrity: if set(line.keys()) != set(original_line.keys()): raise Exception('ack 1') for k in line: if line[k] != original_line[k]: print 'key %s differs:\n %s\n %s ' % (k, line[k], original_line[k]) raise Exception('')
def read_gex(outdir, min_dprod=0.001, debug=True): # barcodes barcode_vals = [] with open('%s/%s' % (outdir, barcodefname)) as bfile: for il, line in enumerate(bfile): lstrs = line.strip().split() icount = int(lstrs.pop(0).strip('[]')) assert icount == len( barcode_vals ) + 1 # <icount> is the R-style (1-based) index of the first element in this line barcode_vals += [s.strip('"') for s in lstrs] if debug: print ' read %d barcodes' % len(barcode_vals) # pca values rotation_vals = collections.OrderedDict( ) # relationship between pca and gene names (map from gene name to list of pca components) with open('%s/%s' % (outdir, pcafname)) as pfile: pca_comps = None # names for each pca component (like PC3) for il, line in enumerate(pfile): if il == 0: pca_comps = line.strip().split() for ipc, pc in enumerate(pca_comps): assert pc[:2] == 'PC' assert int(pc[2:]) == ipc + 1 continue lstrs = line.strip().split() gene = lstrs.pop(0) assert len(lstrs) == len(pca_comps) rotation_vals[gene] = [float(vstr) for vstr in lstrs] if debug: print ' %d pca components for %d genes: %s' % ( len(pca_comps), len(rotation_vals), ' '.join(rotation_vals)) # umap values umap_vals = [] # list of (x, y) umap values for each cell with open('%s/%s' % (outdir, umapfname)) as ufile: for il, line in enumerate(ufile): lstrs = line.strip().split() if il == 0: assert lstrs == ['[,%d]' % i for i in [1, 2]] else: icount = int(lstrs.pop(0).strip('[]').rstrip(',')) assert icount == len(umap_vals) + 1 umap_vals.append([float(v) for v in lstrs]) if debug: print ' %d umap values' % len(umap_vals) assert len(umap_vals) == len(barcode_vals) # cluster assignments cluster_vals = [] with open('%s/%s' % (outdir, clusterfname)) as cfile: for il, line in enumerate(cfile): lstrs = line.strip().split() if lstrs[0] != 'Levels:': icount = int(lstrs.pop(0).strip('[]')) assert icount == len( cluster_vals ) + 1 # <icount> is the R-style (1-based) index of the first element in this line cluster_vals += [int(c) for c in lstrs] else: # last line lists the clusters (not sure why they're called "levels" cluster_ints = [ int(c) for c in lstrs[1:] ] # names of the clusters (1-based integer index) assert cluster_ints == list( range(min(cluster_ints), max(cluster_ints) + 1)) assert set(cluster_ints) == set(cluster_vals) if debug: print ' %d values in %d clusters: %s' % ( len(cluster_vals), len(cluster_ints), ' '.join( str(c) for c in cluster_ints)) assert len(cluster_vals) == len(barcode_vals) # markers for each cluster pairwise_cmarkers = { '%d-%d' % (c1, c2): [] for c1, c2 in itertools.permutations(cluster_ints, 2) } # reversing them (1-2 vs 2-1) the values are just the negative of each other if they're both there, but you don't get all the same genes summary_cmarkers = {'%d-summary' % c: [] for c in cluster_ints} for cname in cluster_ints: other_clusters = [c for c in cluster_ints if c != cname] with open('%s/%s' % (outdir, markfname(cname))) as cfile: reader = csv.DictReader(cfile) assert list(reader.fieldnames)[:5] == [ '', 'Top', 'p.value', 'FDR', 'summary.logFC' ] # summary.logFC is the log-fold change from the comparison with the lowest p-value (not necessarily the min/max log fold change) assert list(reader.fieldnames)[5:] == [ 'logFC.%d' % i for i in other_clusters ] # should be a column for each pairwise comparison with another cluster for il, line in enumerate(reader): gene = line[''] logfc_vals = { i: float(line['logFC.%d' % i]) for i in other_clusters } summary_cmarkers['%d-summary' % cname].append( (gene, float(line['summary.logFC']))) for c2 in logfc_vals: pairwise_cmarkers['%d-%d' % (cname, c2)].append( (gene, logfc_vals[c2])) for ckey in pairwise_cmarkers: pairwise_cmarkers[ckey] = collections.OrderedDict( sorted(pairwise_cmarkers[ckey], key=operator.itemgetter(1), reverse=True)) for ckey in summary_cmarkers: summary_cmarkers[ckey] = collections.OrderedDict( sorted(summary_cmarkers[ckey], key=operator.itemgetter(1), reverse=True)) # reference marker genes fabfo, waickfo = read_ref_data() print ' interpretation: "this cluster is much more <type>-like than <clusters>, based on relative upregulation of <N genes>"' print ' type any (N genes) vs. single clusters gene contributions (sum over clusters)' for cname in cluster_ints: print ' %s' % utils.color('green', 'cluster %d' % cname) for vtype in waickfo: clprods = [] all_contribs = {} for ic2, c2 in enumerate([c for c in cluster_ints if c != cname]): dprod, gene_contribs = gexdot( waickfo[vtype], pairwise_cmarkers['%d-%d' % (cname, c2)], return_gene_contributions=True, lbstr='%8s %s ' % ((vtype + ':') if ic2 == 0 else '', utils.color('blue', str(c2)))) #, debug=True) if dprod < min_dprod: continue clprods.append({ 'c2': c2, 'dprod': dprod, 'gene_contribs': gene_contribs }) for tg, contr in gene_contribs.items(): if tg not in all_contribs: all_contribs[tg] = 0. all_contribs[tg] += gene_contribs[tg] clprods = sorted(clprods, key=lambda x: x['dprod'], reverse=True) anydprod, anygcontribs = gexdot( waickfo[vtype], summary_cmarkers['%d-summary' % cname], return_gene_contributions=True) # lbstr=XXX sumclprod = {'dprod': anydprod, 'gene_contribs': anygcontribs} if debug and len(clprods) > 0: def dcol(d): if d['dprod'] > 0.1: return 'red' elif d['dprod'] > 0.01: return 'yellow' else: return None def dpstr(d): return utils.color(dcol(d), '%.3f' % d['dprod']) def cstr(d): return utils.color('blue', '%d' % d['c2']) tmpstr = ' '.join('%s %s' % (cstr(d), dpstr(d)) for d in clprods) anystr = '' if sumclprod['dprod'] > min_dprod: anystr = '%s (%2d)' % (dpstr(sumclprod), len(sumclprod['gene_contribs'])) print ' %s %-s %-s %s' % ( utils.color('purple', vtype, width=8), # utils.color('blue', ' '.join('%d'%d['c2'] for d in clprods), width=20, padside='right'), anystr + ' ' * (12 - utils.len_excluding_colors(anystr)), tmpstr + ' ' * (70 - utils.len_excluding_colors(tmpstr)), ' '.join('%s %.1f' % (g.lower(), c) for g, c in sorted(all_contribs.items(), key=operator.itemgetter(1), reverse=True)), )
def finalize_region(self, region, sorted_gene_counts, annotations=None, debug=False): easycounts = {gene: counts for gene, counts in sorted_gene_counts} total_counts = sum([counts for counts in easycounts.values()]) class_counts = self.separate_into_classes(region, sorted_gene_counts, easycounts) genes_to_keep = set() if debug: print ' %s groups separated by %d snps (-: same group as previous kept gene)' % ( utils.color('blue', region), self.n_max_snps[region]) print ' %-20s %5s %s removed genes (snps counts%s)%s%s' % ( 'genes to keep', 'counts', '' if self.simglfo is None else utils.color('blue', 'sim'), '' if self.simglfo is None else utils.color( 'blue', ' sim counts'), '' if self.simglfo is None else (' ' + utils.color('red', 'x:') + ' not in simulation'), '' if (annotations is None or self.reco_info is None) else (' %s sim counts/genes for the queries assigned to this kept gene %s' % (utils.color('blue', '['), utils.color('blue', ']'))), ), def count_str(cnt): if cnt < 10.: return '%.1f' % cnt else: return '%.0f' % cnt def simcountstr( gene, ws ): # counts in simulation for <gene> (note that this is _not_ the same as sim_gene_count_str(), since this takes no account of _which_ queries these counts occur in [plus it's coming from the opposite point of view]) if self.simglfo is None: rstr = '' elif gene in self.simglfo['seqs'][utils.get_region(gene)]: rstr = utils.color( 'blue', (' %' + ws + 'd') % self.simcounts[utils.get_region(gene)][gene]) else: rstr = utils.color('red', (' %' + ws + 's') % 'x') return rstr def sim_gene_count_str( kgene ): # figure out simulation genes and counts for the uids assigned to <kgene> if annotations is None or self.reco_info is None: return '' uids_this_gene = [ uid for uid, line in annotations.items() if line[region + '_gene'] == kgene ] sim_genes = { } # simulation genes for the uids that we assigned to <kgene> (note that self.simcounts doesn't have this per-uid information) for uid in uids_this_gene: sgene = self.reco_info[uid][region + '_gene'] if sgene not in sim_genes: sim_genes[sgene] = 0 sim_genes[sgene] += 1 sorted_sim_gene_counts = sorted(sim_genes.items(), key=operator.itemgetter(1), reverse=True) count_str = ' '.join([ utils.color('blue' if sg == kgene else 'red', str(c)) for sg, c in sorted_sim_gene_counts ]) sgene_str = ' '.join( [utils.color_gene(sg) for sg, _ in sorted_sim_gene_counts]) return '%s %s' % (count_str, sgene_str) for iclass in range(len(class_counts)): gclass = class_counts[iclass] kept_this_class = [] for ig in range(len(gclass)): gfo = gclass[ig] if float( gfo['counts'] ) / total_counts < self.args.min_allele_prevalence_fraction: # always skip everybody that's super uncommon pass # don't keep it elif ig == 0: # keep the first one from this class genes_to_keep.add(gfo['gene']) kept_this_class.append(gfo['gene']) elif utils.hamming_distance( gclass[0]['seq'], gclass[ig]['seq'] ) == 0: # don't keep it if it's indistinguishable from the most common one (the matches are probably mostly really the best one) pass # don't keep it elif len( kept_this_class ) < self.args.n_alleles_per_gene: # always keep the most common <self.args.n_alleles_per_gene> in each class [note: defaults to 1 if looking for new alleles, otherwise 2] genes_to_keep.add(gfo['gene']) kept_this_class.append(gfo['gene']) else: pass # don't keep it if debug and gfo['gene'] in genes_to_keep: snpstr = ' ' if ig == 0 else '(%d)' % utils.hamming_distance( gclass[0]['seq'], gfo['seq'] ) # only happens if we keep more than one from this class print '\n %s%-s %7s%s %-3s' % ( '- ' if ig > 0 else ' ', utils.color_gene(gfo['gene'], width=20), count_str(gfo['counts']), simcountstr( gfo['gene'], '4'), snpstr), if debug: if len(kept_this_class) == 0: print '\n %s%-s %7s%4s %-3s' % ( ' ', utils.color('blue', 'none', width=20, padside='right'), '-', '', ''), removedfo = [ gfo for gfo in gclass if gfo['gene'] not in genes_to_keep ] removed_str = '' if len(removedfo) > 0: number_strs = [ '(%d %3s%s)' % (gfo['hdist'], count_str( gfo['counts']), simcountstr(gfo['gene'], '1')) for gfo in removedfo ] name_strs = [ '%s' % (utils.color_gene(gfo['gene'])) for gfo in removedfo ] removed_str = '%s %s' % (' '.join(number_strs), ' '.join(name_strs)) annotation_str = '' if (annotations is not None and self.reco_info is not None) and len(kept_this_class) > 0: annotation_str = '%s %s %s' % (utils.color( 'blue', '['), sim_gene_count_str( kept_this_class[-1]), utils.color('blue', ']')) print ' %s %s %s' % ( removed_str, (70 - utils.len_excluding_colors(removed_str)) * ' ', annotation_str), if debug: print '' genes_to_remove = set(self.glfo['seqs'][region]) - genes_to_keep print ' keeping %d / %d %s gene%s' % ( len(genes_to_keep), len(self.glfo['seqs'][region]), region, utils.plural(len(genes_to_keep))) if len(genes_to_keep) == 0: print ' would\'ve kept zero genes, instead keeping all of them' genes_to_keep = copy.deepcopy(genes_to_remove) genes_to_remove.clear() if self.simglfo is not None: missing_genes = set(self.simglfo['seqs'][region]) - genes_to_keep if len(missing_genes) > 0: print ' %s %d simulation genes (counts): %s' % (utils.color( 'red', 'missing'), len(missing_genes), ' '.join( [('%s %d' % (utils.color_gene(g), self.simcounts[region][g])) for g in sorted(missing_genes)])) completely_absent_genes = missing_genes - genes_to_remove if len(completely_absent_genes) > 0: print '%s %d simulation genes completely absent: %s' % ( utils.color('red', 'warning'), len(completely_absent_genes), ' '.join( [('%s %d' % (utils.color_gene(g), self.simcounts[region][g])) for g in sorted(completely_absent_genes)])) self.genes_to_keep |= genes_to_keep # add the ones from _this_ region (rhs) to the ones from all regions (lhs) self.genes_to_remove |= genes_to_remove self.finalized = True
g for base in args.bases for g in get_genes(base, args.allele_numbers) ] if len(genes) == 0: raise Exception( 'couldn\'t find any genes for the specified --bases %s\n choices:\n %s' % (' '.join(args.bases), ' '.join( sorted(set([get_base(g) for g in glfo['seqs'][args.region]]))))) args.other_genes = utils.get_arg_list(args.other_genes) if args.other_genes is not None: genes += args.other_genes seqstrs = ['' for _ in range(len(genes))] snpstrs = ['' for _ in range(len(genes))] gene_str_width = max( [utils.len_excluding_colors(utils.color_gene(g)) for g in genes]) codon_positions = glfo[utils.conserved_codons[args.locus][args.region] + '-positions'] if args.region != 'd' else None max_seq_len = max([len(glfo['seqs'][args.region][g]) for g in genes]) ref_gene = genes[0] if args.ref_allele is None else utils.rejoin_gene( args.locus, args.region, utils.primary_version(genes[0]), utils.sub_version(genes[0]), args.ref_allele) if ref_gene != genes[0]: genes.remove(ref_gene) genes.insert(0, ref_gene) ref_seq = glfo['seqs'][args.region][ref_gene] ref_pos = codon_positions[ref_gene] for igene in range(0, len(genes)): gene = genes[igene]