def reconstruct_indelfo_from_indel_list(indel_list, line, iseq, debug=False): # old-style files if 'reversed_seq' in indel_list: # handle super-old files print '%s encountered file with super old, unhandled indel format, proceeding, but indel info may be inconsistent' % (utils.color('red', 'error')) return line['indelfos'][iseq] = get_empty_indel() if len(indel_list) == 0: return ifo_positions = [ifo['pos'] for ifo in indel_list] if len(ifo_positions) != len(set(ifo_positions)): print '%s two indels at the same position, everything will be kinda messed up' % utils.color('red', 'error') ifos_by_pos = {ifo['pos'] : ifo for ifo in indel_list} qr_gap_seq, gl_gap_seq = [], [] iqr, igl, iindel = 0, 0, 0 if debug: print len(line['input_seqs'][iseq]), line['input_seqs'][iseq] print len(line['naive_seq']), line['naive_seq'] while iqr < len(line['input_seqs'][iseq]): if debug: print ' %3d %3d' % (iqr, igl), if iindel in ifos_by_pos: ifo = ifos_by_pos[iindel] if ifo['type'] == 'insertion': if ifo['seqstr'] != line['input_seqs'][iseq][iqr : iqr + ifo['len']]: print '%s indel info seqstr doesn\'t match input seq str:' % utils.color('red', 'error') utils.color_mutants(ifo['seqstr'], line['input_seqs'][iseq][iqr : iqr + ifo['len']], align=True, print_result=True, extra_str=' ') qr_gap_seq += ifo['seqstr'].split() gl_gap_seq += [ifo['len'] * utils.gap_chars[0]] if debug: print ' %s %s' % (ifo['seqstr'].split(), [ifo['len'] * utils.gap_chars[0]]) iqr += ifo['len'] else: if ifo['seqstr'] != line['naive_seq'][igl : igl + ifo['len']]: print '%s indel info seqstr doesn\'t match naive seq str:' % utils.color('red', 'error') utils.color_mutants(ifo['seqstr'], line['naive_seq'][igl : igl + ifo['len']], align=True, print_result=True, extra_str=' ') qr_gap_seq += [ifo['len'] * utils.gap_chars[0]] gl_gap_seq += ifo['seqstr'].split() if debug: print ' %s %s' % ([ifo['len'] * utils.gap_chars[0]], ifo['seqstr'].split()) igl += ifo['len'] del ifos_by_pos[iindel] iindel += ifo['len'] else: qr_gap_seq += [line['input_seqs'][iseq][iqr]] gl_gap_seq += [line['naive_seq'][igl]] if debug: print ' %s %s' % (line['input_seqs'][iseq][iqr], line['naive_seq'][igl]) iqr += 1 igl += 1 iindel += 1 line['indelfos'][iseq]['qr_gap_seq'] = ''.join(qr_gap_seq) line['indelfos'][iseq]['gl_gap_seq'] = ''.join(gl_gap_seq) line['indelfos'][iseq]['indels'] = indel_list line['indelfos'][iseq]['reversed_seq'] = line['indel_reversed_seqs'][iseq] line['indelfos'][iseq]['genes'] = {r : line[r + '_gene'] for r in utils.regions} if debug: print ' reconstructed indelfo' print get_dbg_str(line['indelfos'][iseq])
def get_dbg_str(indelfo): if len(indelfo['qr_gap_seq']) != len(indelfo['gl_gap_seq']): print indelfo['qr_gap_seq'] print indelfo['gl_gap_seq'] raise Exception('different length qr and gl gap seqs (see previous lines)') qrprintstr, glprintstr = [], [] for ich in range(len(indelfo['qr_gap_seq'])): qrb, glb = indelfo['qr_gap_seq'][ich], indelfo['gl_gap_seq'][ich] qrcolor, glcolor = None, None if qrb in utils.gap_chars or glb in utils.gap_chars: qrcolor = 'light_blue' glcolor = 'light_blue' elif qrb in utils.ambiguous_bases: qrcolor = 'light_blue' elif glb in utils.ambiguous_bases: glcolor = 'light_blue' elif qrb != glb: qrcolor = 'red' qrprintstr.append(utils.color(qrcolor, qrb if qrb not in utils.gap_chars else '*')) # change it to a start just cause that's what it originally was... at some point should switch to just leaving it whatever gap char it was glprintstr.append(utils.color(glcolor, glb if glb not in utils.gap_chars else '*')) qrprintstr = ''.join(qrprintstr) glprintstr = ''.join(glprintstr) gene_str = '' gwidth = str(len('query')) if 'v' in indelfo['genes']: gene_str = utils.color_gene(indelfo['genes']['v'], width=int(gwidth), leftpad=True) gwidth = str(utils.len_excluding_colors(gene_str)) dj_gene_str = ' '.join([utils.color_gene(indelfo['genes'][r]) for r in 'dj' if r in indelfo['genes']]) dbg_str_list = [(' %' + gwidth + 's %s %s') % (gene_str, glprintstr, dj_gene_str), (' %' + gwidth + 's %s') % ('query', qrprintstr)] for idl in indelfo['indels']: dbg_str_list.append('%10s: %d base%s at %d (%s)' % (idl['type'], idl['len'], utils.plural(idl['len']), idl['pos'], idl['seqstr'])) return '\n'.join(dbg_str_list)
def tree_x(fs: FileSystem, depth: int, level=0): """ 生成文件目录结构,具体样式百度 . └── test ├── css ├── img │ └── head └── js :param fs: :param depth: 打印的深度 :param level: 已经到达的文件深度 :return: """ if depth == 0: return pwd_cat = fs.load_pwd_obj() # 获取当前目录 file_list = pwd_cat.file_name_and_types() for name, flag in file_list: if flag == DIR_TYPE: # 文件夹 print("│ " * level, end="") print("├──", color(name, DIR_COLOR_F, DIR_COLOR_B)) flag_x = fs.ch_sig_dir(name, info=False) if flag_x: tree_x(fs, depth - 1, level + 1) fs.ch_sig_dir("..") if flag == FILE_TYPE: # 文件 print("│ " * level, end="") print("├──", color(name, FILE_COLOR_F, FILE_COLOR_B))
def print_partition(self, ip, reco_info=None, extrastr='', abbreviate=True, highlight_cluster_indices=None, print_cluster_indices=False, right_extrastr=''): # NOTE <highlight_cluster_indices> and <print_cluster_indices> are quite different despite sounding similar, but I can't think of something else to call the latter that makes more sense # NOTE it's nicer to *not* sort by cluster size here, since preserving the order tends to frequently make it obvious which clusters are merging as your eye scans downwards through the output if ip > 0: # delta between this logprob and the previous one delta_str = '%.1f' % (self.logprobs[ip] - self.logprobs[ip-1]) else: delta_str = '' print ' %s %-12.2f%-7s %s%-5d %4d' % (extrastr, self.logprobs[ip], delta_str, ('%-5d ' % ip) if print_cluster_indices else '', len(self.partitions[ip]), self.n_procs[ip]), print ' ' + self.get_ccf_str(ip), # clusters sorted_clusters = sorted(self.partitions[ip], key=lambda c: len(c), reverse=True) for iclust in range(len(sorted_clusters)): cluster = sorted_clusters[iclust] if abbreviate: cluster_str = ':'.join(['o' if len(uid) > 3 else uid for uid in cluster]) else: # cluster_str = ':'.join(sorted([str(uid) for uid in cluster])) cluster_str = ':'.join([str(uid) for uid in cluster]) if reco_info is not None and not utils.from_same_event(reco_info, cluster): cluster_str = utils.color('red', cluster_str) if self.seed_unique_id is not None and self.seed_unique_id in cluster: cluster_str = utils.color('reverse_video', cluster_str) if highlight_cluster_indices is not None and iclust in highlight_cluster_indices: cluster_str = utils.color('red', cluster_str) if abbreviate: print ' %s' % cluster_str, else: print ' %s' % cluster_str, print '%s' % right_extrastr, print ''
def RunSmbFinger(host): try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(host) s.settimeout(0.7) h = SMBHeader(cmd="\x72",flag1="\x18",flag2="\x53\xc8") n = SMBNego(data = SMBNegoFingerData()) n.calculate() Packet = str(h)+str(n) Buffer = struct.pack(">i", len(''.join(Packet)))+Packet s.send(Buffer) data = s.recv(2048) if data[8:10] == "\x72\x00": Header = SMBHeader(cmd="\x73",flag1="\x18",flag2="\x17\xc8",uid="\x00\x00") Body = SMBSessionFingerData() Body.calculate() Packet = str(Header)+str(Body) Buffer = struct.pack(">i", len(''.join(Packet)))+Packet s.send(Buffer) data = s.recv(2048) if data[8:10] == "\x73\x16": return OsNameClientVersion(data) except: print color("[!] ", 1, 1) +" Fingerprint failed" return None
def compare_performance(self): # NOTE does *not* regenerate the reference performance file based on the reference outputs UPDATE hm, wait, do I still use the performance files? print "comparing to reference performance" refkeys = set(self.perf_info["ref"].keys()) newkeys = set(self.perf_info["new"].keys()) if len(refkeys - newkeys) > 0 or len(newkeys - refkeys) > 0: print " %d keys only in ref" % len(refkeys - newkeys) print " %d keys only in new" % len(newkeys - refkeys) print " %d in common" % len(refkeys & newkeys) raise Exception("") for name in self.perf_info["ref"]: # don't use the sets above so we get the nice ordering ref_val = self.perf_info["ref"][name] new_val = self.perf_info["new"][name] val_type = name.split("-")[-1] print " %-28s %-15s %-5.3f" % (name.replace("-" + val_type, ""), val_type, ref_val), fractional_change = (new_val - ref_val) / ref_val # NOTE not the abs value yet if abs(fractional_change) > self.eps_vals[val_type]: print "--> %-5.3f %s" % (new_val, utils.color("red", "(%+.3f)" % fractional_change)), elif abs(fractional_change) > self.tiny_eps: print "--> %-5.3f %s" % (new_val, utils.color("yellow", "(%+.3f)" % fractional_change)), else: print " ok ", print ""
def compare_run_times(self): print 'checking run times' def read_run_times(stype): times[stype] = {} with open(self.dirs[stype] + '/run-times.csv') as timefile: reader = csv.DictReader(timefile) for line in reader: times[stype][line['name']] = float(line['seconds']) times = {} for stype in self.stypes: read_run_times(stype) for name in times['ref']: if args.quick and name not in self.quick_tests: continue if args.only_ref and '-ref-' not in name: continue if args.skip_ref and '-ref-' in name: continue print ' %30s %7.1f' % (name, times['ref'][name]), if name not in times['new']: print ' no new time for %s' % utils.color('red', name) continue fractional_change = (times['new'][name] - times['ref'][name]) / times['ref'][name] if abs(fractional_change) > 0.2: print '--> %-5.1f %s' % (times['new'][name], utils.color('red', '(%+.3f)' % fractional_change)), elif abs(fractional_change) > 0.1: print '--> %-5.1f %s' % (times['new'][name], utils.color('yellow', '(%+.3f)' % fractional_change)), else: print ' ok ', print ''
def compare_performance(self, input_stype): performance_metric_list = [n for n in self.perf_info['ref'] if input_stype in n] if len(performance_metric_list) == 0: return print ' performance with %s simulation and parameters' % input_stype # make sure there's a new performance value for each reference one, and vice versa refkeys = set(self.perf_info['ref'].keys()) newkeys = set(self.perf_info['new'].keys()) if len(refkeys - newkeys) > 0 or len(newkeys - refkeys) > 0: print ' %d keys only in ref' % len(refkeys - newkeys) print ' %d keys only in new' % len(newkeys - refkeys) print ' %d in common' % len(refkeys & newkeys) raise Exception('') for name in performance_metric_list: # don't use the sets above so we get the nice ordering ref_val = self.perf_info['ref'][name] new_val = self.perf_info['new'][name] val_type = name.split('-')[-1] print ' %-28s %-15s %-5.3f' % (name.replace('-' + val_type, ''), val_type, ref_val), fractional_change = (new_val - ref_val) / ref_val # NOTE not the abs value yet if abs(fractional_change) > self.eps_vals[val_type]: print '--> %-5.3f %s' % (new_val, utils.color('red', '(%+.3f)' % fractional_change)), elif abs(fractional_change) > self.tiny_eps: print '--> %-5.3f %s' % (new_val, utils.color('yellow', '(%+.3f)' % fractional_change)), else: print ' ok ', print ''
def RunSmbFinger(host): try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(host) s.settimeout(0.7) h = SMBHeader(cmd="\x72", flag1="\x18", flag2="\x53\xc8") n = SMBNego(data=SMBNegoFingerData()) n.calculate() Packet = str(h) + str(n) Buffer = struct.pack(">i", len(''.join(Packet))) + Packet s.send(Buffer) data = s.recv(2048) if data[8:10] == "\x72\x00": Header = SMBHeader(cmd="\x73", flag1="\x18", flag2="\x17\xc8", uid="\x00\x00") Body = SMBSessionFingerData() Body.calculate() Packet = str(Header) + str(Body) Buffer = struct.pack(">i", len(''.join(Packet))) + Packet s.send(Buffer) data = s.recv(2048) if data[8:10] == "\x73\x16": return OsNameClientVersion(data) except: print color("[!] ", 1, 1) + " Fingerprint failed" return None
def check_single_ifo(old_ifo, new_ifo): if debug: print ' len %d pos %d seqstr %s' % ( old_ifo['len'], old_ifo['pos'], old_ifo['seqstr']), if new_ifo != old_ifo: if debug: print ' %s' % utils.color('red', 'nope') new_seqstr, old_seqstr = utils.color_mutants( old_ifo['seqstr'], new_ifo['seqstr'], return_ref=True, align=True) #len(old_ifo['seqstr']) != len(new_ifo['seqstr'])) if print_on_err: print ' pos %d --> %s len %d --> %s seqstr %s --> %s' % ( old_ifo['pos'], utils.color( None if new_ifo['pos'] == old_ifo['pos'] else 'red', '%d' % new_ifo['pos']), old_ifo['len'], utils.color( None if new_ifo['len'] == old_ifo['len'] else 'red', '%d' % new_ifo['len']), old_seqstr, new_seqstr) return False else: if debug: print ' %s' % utils.color('green', 'ok') return True
def epoch_info_printer(epoch, mean_loss, epoch_time, total_time, lr, train_samples, valid_samples=0, valid_loss=None, mean_blue=None, valid_time=0): valid_loss = round(valid_loss, 5) if valid_loss else None valid_blue = round(mean_blue, 5) if mean_blue else None print("\n========== Epoch Summary ==========") print(color(" Epoch: %s Finished. " % epoch, 1)) print(" Train Mean Loss: %s " % color(round(mean_loss, 5), 2)) print(" Valid Mean Loss: %s " % color(valid_loss, 2)) print(" Valid Mean BLEU: %s " % color(valid_blue, 2)) print(" Train Data Size: %s " % color(train_samples, 2)) print(" Valid Data Size: %s " % color(valid_samples, 2)) print(" Epoch Time Consumed: %ss " % color(epoch_time, 2)) print(" Valid Time Consumed: %ss " % color(valid_time, 2)) print(" Total Time Consumed: %ss " % color(total_time, 2)) print(" Current Learning Rate: %s " % color(round(lr, 8), 2)) print("===================================") print("\n\n\n")
def get_indel_info(self, query_name, cigarstr, qrseq, glseq, gene): cigars = re.findall('[0-9][0-9]*[A-Z]', cigarstr) # split cigar string into its parts cigars = [(cstr[-1], int(cstr[:-1])) for cstr in cigars] # split each part into the code and the length codestr = '' qpos = 0 # position within query sequence indelfo = utils.get_empty_indel() # replacement_seq: query seq with insertions removed and germline bases inserted at the position of deletions tmp_indices = [] for code, length in cigars: codestr += length * code if code == 'I': # advance qr seq but not gl seq indelfo['indels'].append({'type' : 'insertion', 'pos' : qpos, 'len' : length, 'seqstr' : ''}) # insertion begins at <pos> tmp_indices += [len(indelfo['indels']) - 1 for _ in range(length)]# indel index corresponding to this position in the alignment elif code == 'D': # advance qr seq but not gl seq indelfo['indels'].append({'type' : 'deletion', 'pos' : qpos, 'len' : length, 'seqstr' : ''}) # first deleted base is <pos> (well, first base which is in the position of the first deleted base) tmp_indices += [len(indelfo['indels']) - 1 for _ in range(length)]# indel index corresponding to this position in the alignment else: tmp_indices += [None for _ in range(length)] # indel index corresponding to this position in the alignment qpos += length qrprintstr, glprintstr = '', '' iqr, igl = 0, 0 for icode in range(len(codestr)): code = codestr[icode] if code == 'M': qrbase = qrseq[iqr] if qrbase != glseq[igl]: qrbase = utils.color('red', qrbase) qrprintstr += qrbase glprintstr += glseq[igl] indelfo['reversed_seq'] += qrseq[iqr] # add the base to the overall sequence with all indels reversed elif code == 'S': continue elif code == 'I': qrprintstr += utils.color('light_blue', qrseq[iqr]) glprintstr += utils.color('light_blue', '*') indelfo['indels'][tmp_indices[icode]]['seqstr'] += qrseq[iqr] # and to the sequence of just this indel igl -= 1 elif code == 'D': qrprintstr += utils.color('light_blue', '*') glprintstr += utils.color('light_blue', glseq[igl]) indelfo['reversed_seq'] += glseq[igl] # add the base to the overall sequence with all indels reversed indelfo['indels'][tmp_indices[icode]]['seqstr'] += glseq[igl] # and to the sequence of just this indel iqr -= 1 else: raise Exception('unhandled code %s' % code) iqr += 1 igl += 1 if self.debug: print '\n indels in %s' % query_name print ' %20s %s' % (gene, glprintstr) print ' %20s %s' % ('query', qrprintstr) for idl in indelfo['indels']: print ' %10s: %d bases at %d (%s)' % (idl['type'], idl['len'], idl['pos'], idl['seqstr']) # utils.undo_indels(indelfo) # print ' %s' % self.input_info[query_name]['seq'] return indelfo
def check_droplet_id_groups(tdbg=False): # check against the droplet id method (we could just do it this way, but it would only work for 10x, and only until they change their naming convention) pgroup_strs = set(':'.join(sorted(pg)) for pg in pid_groups) all_uids = list( set([ su for l in cpaths for c in cpaths[l].best() for u in c for su in [u] + utils.per_seq_val(all_antns[u], 'paired-uids', u) ])) n_not_found = 0 for dropid, drop_queries in itertools.groupby( sorted(all_uids, key=utils.get_droplet_id), key=utils.get_droplet_id): dqlist = list(drop_queries) found = ':'.join(sorted(dqlist)) in pgroup_strs if not found: overlaps = [g for g in pgroup_strs if dropid in g] overlaps = utils.get_single_entry(overlaps) n_not_found += 1 if tdbg or not found: print ' %25s %s %s %s' % ( utils.color('green', '-') if found else utils.color( 'red', 'x'), dropid, ' '.join( sorted(utils.get_contig_id(q) for q in dqlist)), utils.color( 'red', ' '.join( sorted( utils.get_contig_id(q) for q in overlaps.split(':'))) if not found else '')) if n_not_found > 0: print ' %s droplet id group check failed for %d groups' % ( utils.color('red', 'error'), n_not_found)
def parse_ramesh_seqs(glseqs, outdir, debug=False): for locus in glseqs: glutils.remove_glfo_files(outdir, locus) # write to a glfo dir without extra info for region in glseqs[locus]: fn = glutils.get_fname(outdir, locus, region) if not os.path.exists(os.path.dirname(fn)): os.makedirs(os.path.dirname(fn)) with open(fn, 'w') as ofile: for gene, seq in glseqs[locus][region].items(): ofile.write('>%s\n%s\n' % (gene, seq)) # figure out extra info template_glfo = glutils.read_glfo('data/germlines/macaque', locus) glfo = glutils.read_glfo(outdir, locus, template_glfo=template_glfo, remove_bad_genes=True, debug=True) # trim non-coding stuff upstream of v (and remove non-full-length ones) gene_groups = {} for region in ['v']: group_labels = sorted( set([utils.gene_family(g) for g in glfo['seqs'][region]])) gene_groups[region] = [(glabel, { g: glfo['seqs'][region][g] for g in glfo['seqs'][region] if utils.gene_family(g) == glabel }) for glabel in group_labels] for region in [r for r in utils.regions if r in gene_groups]: if debug: print '%s' % utils.color('reverse_video', utils.color('green', region)) for group_label, group_seqs in gene_groups[ region]: # ok, this isn't really doing anything any more if debug: print ' %s' % utils.color('blue', group_label) for gene, seq in group_seqs.items(): trim_and_remove_genes(region, gene, seq, glfo, template_glfo, debug=debug) # remove any seqs with ambiguous bases for region in [r for r in utils.regions if r in glfo['seqs']]: for gene, seq in glfo['seqs'][region].items(): if utils.ambig_frac(seq) > 0.: if debug: print ' %d ambiguous bases: %s' % ( len(seq) * utils.ambig_frac(seq), utils.color_gene(gene)) glutils.remove_gene(glfo, gene) # glutils.print_glfo(glfo) # write final result glutils.write_glfo(outdir, glfo, debug=True)
def get_data_plots(args, baseoutdir, methods, study, dsets): metafos = heads.read_metadata(study) assert len(set([metafos[ds]['locus'] for ds in dsets ])) # make sure everybody has the same locus mfo = metafos[dsets[0]] data_outdirs = [ heads.get_datadir( study, 'processed', extra_str='gls-gen-paper-' + args.label) + '/' + ds for ds in dsets ] outdir = get_outdir( args, baseoutdir, varname='data', varval=study + '/' + '-vs-'.join(dsets) ) # for data, only the plots go here, since datascripts puts its output somewhere else if len(dsets) > 1 and len(methods) == 1: # sample vs sample glslabels = dsets title = get_dset_title([metafos[ds] for ds in dsets]) if study != 'kate-qrs': title += ' %s' % methstr(methods[0]) title_color = methods[0] legends = get_dset_legends([metafos[ds] for ds in dsets]) legend_title = methstr( methods[0] ) if study == 'kate-qrs' else None # for kate-qrs we need to put the subject _and_ the isotype in the title, so there's no room for the method pie_chart_faces = False print '%s:' % utils.color('green', methods[0]), elif len(methods) > 1 and len(dsets) == 1: # method vs method glslabels = methods title = get_dset_title([mfo]) title_color = None legends = [methstr(m) + ' only' for m in methods] legend_title = None pie_chart_faces = True print '%s:' % utils.color('green', dsets[0]), else: raise Exception('one of \'em has to be length 1: %d %d' % (len(methods), len(dsets))) print '%s' % (' %s ' % utils.color('light_blue', 'vs')).join(glslabels) make_gls_tree_plot(args, outdir + '/' + '-vs-'.join(methods) + '/gls-gen-plots', study + '-' + '-vs-'.join(dsets), glsfnames=[ get_gls_fname(ddir, meth, locus=mfo['locus'], data=True) for ddir in data_outdirs for meth in methods ], glslabels=glslabels, locus=mfo['locus'], title=title, title_color=title_color, legends=legends, legend_title=legend_title, pie_chart_faces=pie_chart_faces)
def construct_dict(self): """从训练数据中构建词典""" print("\nConstructing Dict ...\n") self.train_lines = self.read_lines_from_file(self.train_data, with_target=True) self.valid_lines = self.read_lines_from_file(self.valid_data, with_target=True) self.test_lines = None if not self.test_data else self.read_lines_from_file( self.test_data, with_target=True) self.special_tokens = { self.PAD_token: self.PAD_idx, self.BOS_token: self.BOS_idx, self.EOS_token: self.EOS_idx, self.OOV_token: self.OOV_idx } word_indexer = list({ word.strip() for line in self.train_lines for word in line[0] + line[1] if len(word.strip()) > 1 }) char_indexer = list(set("".join(word_indexer))) for token, index in self.special_tokens.items(): word_indexer.insert(index, token) char_indexer.insert(index, token) self.index2word = { index: word for index, word in enumerate(word_indexer) } self.word2index = { word: index for index, word in enumerate(word_indexer) } self.index2char = { index: word for index, word in enumerate(char_indexer) } self.char2index = { word: index for index, word in enumerate(char_indexer) } self.flag_constructed_dict = True vocab_file = { "word2index": self.word2index, "char2index": self.char2index } torch.save(vocab_file, self.args.vocab_file) print(color("\nSuccessfully Constructed Dict.", 2)) print(color("\nVocabulary File Has Been Saved.\n", 1)) print("Number of words: ", color(len(self.word2index), 2)) print("Number of chars: ", color(len(self.char2index), 2), "\n\n") return self.word2index, self.char2index, self.index2word, self.index2char
def __init__(self, **kwargs): self.border_width = kwargs.pop('border_width', 1) self.cell_size = kwargs.pop('cell_size', 7) self.color_empty = kwargs.pop('color_empty', color(255, 255, 255)) self.color_filled = kwargs.pop('color_filled', color()) self.color_border = kwargs.pop('color_border', color(100, 100, 100)) self.cell_plus_border = self.border_width + self.cell_size
async def on_member_update(self, before: discord.Member, after: discord.Member): log_embed = self.log_embed("edit", footer=f"ID: {before.id}", user=after) if before.nick != after.nick: self.logger.info( color( f"member `{after}` nickname changed: {before.nick} -> {after.nick}", "blue", )) log_embed.description = f"**Nickname changed: {after.mention}**" log_embed.add_field(name="Before", value=before.nick, inline=False) log_embed.add_field(name="After", value=after.nick, inline=False) elif before.roles != after.roles: if len(before.roles) < len(after.roles): added: typing.List[discord.Role] = [ role for role in after.roles if role not in before.roles ] self.logger.info( color( f"member `{after}` roles added: {', '.join([r.name for r in added])}", "green", )) log_embed.description = f"**Role{'s' if len(added) > 1 else ''} added to {after.mention}:**" log_embed.colour = discord.Colour.green() log_embed.add_field( name="Added roles", value=", ".join([role.mention for role in added]), inline=False, ) else: removed: typing.List[discord.Role] = [ role for role in before.roles if role not in after.roles ] self.logger.info( color( f"member `{after}` roles removed: {', '.join([r.name for r in removed])}", "red", )) log_embed.description = f"**Role{'s' if len(removed) > 1 else ''} removed from {after.mention}**" log_embed.colour = discord.Colour.red() log_embed.add_field( name="Removed roles", value=", ".join([role.mention for role in removed]), inline=False, ) else: return await self.log_channel.send(embed=log_embed)
def handle_colors(self, world, text): """ Prints out all the colors we know about.""" response = '' for background in range(40, 48): for foreground in range(30, 38): response += color(str(foreground), foreground, background) response += color(str(foreground), foreground, background, 1) response += "\33[0m\n" self.write(response)
def handle_colors(self, world, text): """ Prints out all the colors we know about.""" response = '' for background in range(40,48): for foreground in range(30,38): response += color(str(foreground), foreground, background) response += color(str(foreground), foreground, background, 1) response += "\33[0m\n" self.write(response)
def print_key_differences(vtype, refkeys, newkeys): print ' %s keys' % vtype if len(refkeys - newkeys) > 0 or len(newkeys - refkeys) > 0: if len(refkeys - newkeys) > 0: print utils.color('red', ' %d only in ref version' % len(refkeys - newkeys)) if len(newkeys - refkeys) > 0: print utils.color('red', ' %d only in new version' % len(newkeys - refkeys)) print ' %d in common' % len(refkeys & newkeys) else: print ' %d identical keys in new and ref cache' % len(refkeys)
def ref_updated(event): if "submitter" not in event: # Implies an auto-generated merge-commit was pushed by the system # Ignore since we only care about reporting direct-pushes return updated_ref = event["refUpdate"] branch = updated_ref["refName"] if branch in branch_ignore: return to_hash = shorten_hash(updated_ref['newRev']) from_hash = shorten_hash(updated_ref['oldRev']) project = utils.project_from_change(updated_ref) submitter = username_from_person(event["submitter"]) link = utils.link_from_project(project) msg_project_branch = utils.build_repo_branch(project, branch) + color() msg_owner = color(GREEN) + submitter + color() msg_old_ref = color(bold=True) + from_hash + color() msg_new_ref = color(bold=True) + to_hash + color(GREY) msg_link = color(NAVY, underline=True) + link + color() message = "%s updated %s from %s to %s : %s" % ( msg_owner, msg_project_branch, msg_old_ref, msg_new_ref, msg_link) emit_message(message)
def get_uid_str(line, iseq, seed_uid, duplicated_uids=None): uid_width = max([len(uid) for uid in line['unique_ids']]) fstr = '%' + str(uid_width) + 's' uidstr = fstr % line['unique_ids'][iseq] if seed_uid is not None and line['unique_ids'][iseq] == seed_uid: uidstr = utils.color('red', uidstr) if duplicated_uids is not None and line['unique_ids'][ iseq] in duplicated_uids: uidstr += ' ' + utils.color( 'red', 'duplicate: %d' % duplicated_uids[line['unique_ids'][iseq]]) return uidstr
def simcountstr( gene, ws ): # counts in simulation for <gene> (note that this is _not_ the same as sim_gene_count_str(), since this takes no account of _which_ queries these counts occur in [plus it's coming from the opposite point of view]) if self.simglfo is None: rstr = '' elif gene in self.simglfo['seqs'][utils.get_region(gene)]: rstr = utils.color( 'blue', (' %' + ws + 'd') % self.simcounts[utils.get_region(gene)][gene]) else: rstr = utils.color('red', (' %' + ws + 's') % 'x') return rstr
def change_merged(event): change = event["change"] branch = change["branch"] if branch in branch_ignore: return owner = username_from_person(change["owner"]) msg_owner = color(GREEN) + owner + "'s" + color() msg_description = describe_patchset(change) message = "Applied %s change on %s" % (msg_owner, msg_description) emit_message(message)
def compare_data_annotation(self, input_stype): ptest = 'annotate-' + input_stype + '-data' if args.quick and ptest not in self.quick_tests: return print ' %s data annotation' % input_stype infnames = [self.dirs[version_stype] + '/' + ptest + '.csv' for version_stype in self.stypes] cmd = 'diff -u ' + ' '.join(infnames) + ' | grep "^+[^+]" | wc -l' n_diff_lines = int(check_output(cmd, shell=True)) if n_diff_lines == 0: print ' ok' else: print utils.color('red', ' %d lines differ' % n_diff_lines), print ' (%s)' % cmd
def infer_tree_from_leaves(self, region, in_tree, leafseqs, naive_seq): if 'dendropy' not in sys.modules: import dendropy dendropy = sys.modules['dendropy'] taxon_namespace = dendropy.TaxonNamespace() with tempfile.NamedTemporaryFile() as tmpfile: tmpfile.write('>%s\n%s\n' % ('naive', naive_seq)) for iseq in range(len(leafseqs)): tmpfile.write( '>t%s\n%s\n' % (iseq + 1, leafseqs[iseq]) ) # NOTE the order of the leaves/names is checked when reading bppseqgen output tmpfile.flush() # BEWARE if you forget this you are f****d with open(os.devnull, 'w') as fnull: out_tree = subprocess.check_output('./bin/FastTree -gtr -nt ' + tmpfile.name, shell=True, stderr=fnull) out_dtree = dendropy.Tree.get_from_string( out_tree, 'newick', taxon_namespace=taxon_namespace) out_dtree.reroot_at_node( out_dtree.find_node_with_taxon_label('naive'), update_bipartitions=True) out_tree = out_dtree.as_string(schema='newick', suppress_rooting=True) in_height = treegenerator.get_mean_height(in_tree) out_height = treegenerator.get_mean_height(out_tree) base_width = 100 print ' %s trees:' % ('full sequence' if region == 'all' else region) print ' %s' % utils.color('blue', 'input:') print treegenerator.get_ascii_tree(in_tree, extra_str=' ', width=base_width) print ' %s' % utils.color('blue', 'output:') print treegenerator.get_ascii_tree(out_tree, extra_str=' ', width=int(base_width * out_height / in_height)) in_dtree = dendropy.Tree.get_from_string( in_tree, 'newick', taxon_namespace=taxon_namespace) if self.args.debug: print ' heights: %.3f %.3f' % (in_height, out_height) print ' symmetric difference: %d' % dendropy.calculate.treecompare.symmetric_difference( in_dtree, out_dtree) print ' euclidean distance: %f' % dendropy.calculate.treecompare.euclidean_distance( in_dtree, out_dtree) print ' r-f distance: %f' % dendropy.calculate.treecompare.robinson_foulds_distance( in_dtree, out_dtree)
def print_partition(self, ip, reco_info=None, extrastr='', abbreviate=True, smc_print=False): # NOTE it's nicer to *not* sort by cluster size here, since preserving the order tends to frequently make it obvious which clusters are merging as your eye scans downwards through the output if ip > 0: # delta between this logprob and the previous one delta_str = '%.1f' % (self.logprobs[ip] - self.logprobs[ip - 1]) else: delta_str = '' print ' %s %-12.2f%-7s %-5d %4d' % ( extrastr, self.logprobs[ip], delta_str, len( self.partitions[ip]), self.n_procs[ip]), # logweight (and inverse of number of potential parents) if self.logweights[ip] is not None and smc_print: way_str, logweight_str = '', '' expon = math.exp(self.logweights[ip]) n_ways = 0 if expon == 0. else 1. / expon way_str = ('%.1f' % n_ways) if n_ways < 1e7 else ('%8.1e' % n_ways) logweight_str = '%8.3f' % self.logweights[ip] print ' ' + self.get_ccf_str(ip), if self.logweights[ip] is not None and smc_print: print ' %10s %8s ' % (way_str, logweight_str), # clusters for cluster in sorted(self.partitions[ip], key=lambda c: len(c), reverse=True): if abbreviate: cluster_str = ':'.join( ['o' if len(uid) > 3 else uid for uid in cluster]) else: # cluster_str = ':'.join(sorted([str(uid) for uid in cluster])) cluster_str = ':'.join([str(uid) for uid in cluster]) if reco_info is not None and not utils.from_same_event( reco_info, cluster): cluster_str = utils.color('red', cluster_str) if self.seed_unique_id is not None and self.seed_unique_id in cluster: cluster_str = utils.color('reverse_video', cluster_str) if abbreviate: print ' %s' % cluster_str, else: print ' %s' % cluster_str, print ''
async def on_ready(self): self.cg_client = codingame.Client(is_async=True) for cog in Config.DEFAULT_COGS: self.load_extension(cog) self.logger.info(color("loaded all cogs", "green")) await self.change_presence(activity=discord.Game( name=f"{Config.PREFIX}help")) self.logger.debug(color(f"set status to `{Config.PREFIX}help`", "cyan")) self.logger.info(color(f"logged in as user `{self.user}`", "green"))
def comment_added(event): change = event["change"] branch = change["branch"] if branch in branch_ignore: return author = event["author"] author = username_from_person(author) msg_author = color(GREEN) + author + color() msg_description = describe_patchset(change) message = "%s reviewed %s" % (msg_author, msg_description) emit_message(message)
def compare_production_results(self): if args.quick: return print 'diffing production results' for fname in ['test/parameters/data', 'test/simu.csv', 'test/parameters/simu']: print ' %-30s' % fname, cmd = 'diff -qbr ' + ' '.join(self.dirs[st] + '/' + fname for st in self.stypes) proc = Popen(cmd.split(), stdout=PIPE, stderr=PIPE) out, err = proc.communicate() if proc.returncode == 0: print ' ok' else: differlines = [ l for l in out.split('\n') if 'differ' in l] onlylines = [ l for l in out.split('\n') if 'Only' in l] print '' if len(differlines) > 0: n_total_files = int(check_output('find ' + self.dirs['ref'] + '/' + fname + ' -type f | wc -l', shell=True)) if n_total_files == 1: assert len(differlines) == 1 print utils.color('red', ' file differs'), else: print utils.color('red', ' %d / %d files differ' % (len(differlines), n_total_files)), if len(onlylines) > 0: for st in self.stypes: theseonlylines = [l for l in onlylines if self.dirs[st] + '/' + fname in l] if len(theseonlylines) > 0: print utils.color('red', ' %d files only in %s' % (len(theseonlylines), st)), if differlines == 0 and onlylines == 0: print utils.color('red', ' not sure why, but diff returned %d' % proc.returncode), print ' (%s)' % cmd if err != '': print err
def show_lls_info(self): """ 打印当前目录下的详细文件信息 :return: """ pwd_cat = self.load_pwd_obj() for name, inode_id in pwd_cat.son_list(): inode = self.get_inode(inode_id) res = inode.show_ll_info(self.fp) if inode.target_type == DIR_TYPE: name = color(name, DIR_COLOR_F, DIR_COLOR_B) else: name = color(name, FILE_COLOR_F, FILE_COLOR_B) print(' '.join(res) + ' ' + name)
def shark(render, **kwargs): # barycentric w, v, u = kwargs['bar'] # coords A, B, C = kwargs['triangle'] t = A.x * w + B.x * u + C.x * v grey = int(t * 256) if grey < 0: grey = 0 if grey > 255: grey = 255 tcolor = color(grey, 150, 170) # normals nA, nB, nC = kwargs['varying_normals'] # light intensity iA, iB, iC = [dot(n, render.light) for n in (nA, nB, nC)] intensity = w * iA + u * iB + v * iC if (intensity > 0.85): intensity = 1 elif (intensity > 0.60): intensity = 0.80 elif (intensity > 0.45): intensity = 0.60 elif (intensity > 0.30): intensity = 0.45 elif (intensity > 0.15): intensity = 0.30 else: intensity = 0 r = int(tcolor[2] * intensity) if r < 0: r = 0 elif r > 255: r = 255 g = int(tcolor[1] * intensity) if g < 0: g = 0 elif g > 255: g = 255 b = int(tcolor[0] * intensity) if b < 0: b = 0 elif b > 255: b = 255 return color(r, g, b)
def compare_data_annotation(self, input_stype): # NOTE don't really need to do this for simulation, since for simulation we already compare the performance info ptest = 'annotate-' + input_stype + '-data' if args.quick and ptest not in self.quick_tests: return print ' %s data annotation' % input_stype infnames = [self.dirs[version_stype] + '/' + ptest + '.csv' for version_stype in self.stypes] cmd = 'diff -u ' + ' '.join(infnames) + ' | grep "^+[^+]" | wc -l' n_diff_lines = int(check_output(cmd, shell=True)) if n_diff_lines == 0: print ' ok' else: n_total_lines = int(check_output(['wc', '-l', infnames[0]]).split()[0]) print utils.color('red', ' %d / %d lines differ' % (n_diff_lines, n_total_lines)), print ' (%s)' % cmd
def remove_reference_results(self, expected_content): print ' remove ref files' dir_content = set([os.path.basename(f) for f in glob.glob(self.dirs['ref'] + '/*')]) if len(dir_content - expected_content) > 0 or len(expected_content - dir_content) > 0: if len(dir_content - expected_content) > 0: print 'in ref dir but not expected\n %s' % (utils.color('red', ' '.join(dir_content - expected_content))) if len(expected_content - dir_content) > 0: print 'expected but not in ref dir\n %s' % (utils.color('red', ' '.join(expected_content - dir_content))) raise Exception('unexpected or missing content in reference dir') for fname in [self.dirs['ref'] + '/' + ec for ec in expected_content]: print ' rm %s' % fname if os.path.isdir(fname): shutil.rmtree(fname) else: os.remove(fname)
def _process(self, value): """Process a value from theme.json and returns the color code.""" if self.hex: try: code = int(value) except ValueError: pass else: if code > 15: raise ValueError('Using extended color along with hex') # Quick note about extended color codes: # 0-7 are standard, binary: 0bBGR with 0% or 68% color # 8-15 are somehow standard, binary: 0bBGR with 0% or 100% color # 16-231 are RGB with components between 0 and 5 (216 values) # 232-255 are B&W colors from black to white (24 values) code = utils.color(value) if code is None or code > 15: if code is None: red, green, blue = utils.colorx(value) elif code < 232: code = code - 16 red, green, blue = code // 36, (code % 36) // 6, code % 6 red, green, blue = [x * 1000 // 6 for x in (red, green, blue)] else: red, green, blue = [(code - 232) * 1000 // 23] * 3 code = self.add_rgb(red, green, blue) return code
def print_partitions(self, reco_info=None, extrastr='', abbreviate=True, print_header=True, n_to_print=None, smc_print=False, calc_missing_values='none'): assert calc_missing_values in ['none', 'all', 'best'] if reco_info is not None and calc_missing_values == 'all': self.calculate_missing_values(reco_info) if print_header: print ' %7s %10s %-7s %5s %4s' % ('', 'logprob', 'delta', 'clusters', 'n_procs'), if reco_info is not None or self.we_have_an_adj_mi: print ' %5s' % ('adj mi'), print ' %5s %5s' % ('ccf under', 'over'), if self.logweights[0] is not None and smc_print: print ' %10s %7s' % ('pot.parents', 'logweight'), print '' for ip in self.get_surrounding_partitions(n_partitions=n_to_print): if reco_info is not None and calc_missing_values == 'best' and ip == self.i_best: self.calculate_missing_values(reco_info, only_ip=ip) mark = ' ' if ip == self.i_best: mark = 'best ' if ip == self.i_best_minus_x: mark = mark[:-2] + '* ' if mark.count(' ') < len(mark): mark = utils.color('yellow', mark) self.print_partition(ip, reco_info, extrastr=mark+extrastr, abbreviate=abbreviate, smc_print=smc_print)
def readlines(self, lines): for line in lines: if ( "path_index" in line and int(line["path_index"]) != self.initial_path_index ): # if <lines> contains more than one path_index, that means they represent more than one path, so you need to use glomerator, not just one ClusterPath raise Exception( "path index in lines %d doesn't match my initial path index %d" % (int(line["path_index"]), self.initial_path_index) ) if "partition" not in line: raise Exception("'partition' not among headers, maybe this isn't a partition file?") if "seed_unique_id" in line and line["seed_unique_id"] != "": if self.seed_unique_id is None: self.seed_unique_id = line["seed_unique_id"] if line["seed_unique_id"] != self.seed_unique_id: print "%s seed uids for each line not all the same %s %s" % ( utils.color("yellow", "warning"), line["seed_unique_id"], self.seed_unique_id, ) partitionstr = line["partition"] partition = [cluster_str.split(":") for cluster_str in partitionstr.split(";")] ccfs = [None, None] if "ccf_under" in line and "ccf_over" in line and line["ccf_under"] != "" and line["ccf_over"] != "": ccfs = [float(line["ccf_under"]), float(line["ccf_over"])] self.we_have_a_ccf = True self.add_partition( partition, float(line["logprob"]), int(line.get("n_procs", 1)), logweight=float(line.get("logweight", 0)), ccfs=ccfs, )
def run(self, args): open(self.logfname, 'w').close() for name, info in self.tests.items(): if args.quick and name not in self.quick_tests: continue self.prepare_to_run(args, name, info) action = info['action'] cmd_str = info['bin'] + ' ' + action cmd_str += ' ' + ' '.join(info['extras'] + self.common_extras) if name == 'simulate': cmd_str += ' --outfname ' + self.simfnames['new'] elif 'cache-parameters-' not in name: cmd_str += ' --outfname ' + self.dirs['new'] + '/' + name + '.csv' logstr = '%s %s' % (utils.color('green', name, width=30, padside='right'), cmd_str) print logstr if args.dry_run: continue logfile = open(self.logfname, 'a') logfile.write(logstr + '\n') logfile.close() start = time.time() try: check_call(cmd_str + ' 1>>' + self.logfname + ' 2>>' + self.logfname, shell=True) except CalledProcessError, err: # print err # this just says it exited with code != 0 print ' log tail:' print utils.pad_lines(check_output(['tail', self.logfname])) sys.exit(1) # raise Exception('exited with error') self.run_times[name] = time.time() - start # seconds
def print_partitions( self, reco_info=None, extrastr="", abbreviate=True, print_header=True, n_to_print=None, smc_print=False, calc_missing_values="none", ): assert calc_missing_values in ["none", "all", "best"] if reco_info is not None and calc_missing_values == "all": self.calculate_missing_values(reco_info) if print_header: print " %7s %10s %-7s %5s %4s" % ("", "logprob", "delta", "clusters", "n_procs"), if reco_info is not None or self.we_have_a_ccf: print " %5s %5s" % ("purity", "completeness"), if self.logweights[0] is not None and smc_print: print " %10s %7s" % ("pot.parents", "logweight"), print "" for ip in self.get_surrounding_partitions(n_partitions=n_to_print): if reco_info is not None and calc_missing_values == "best" and ip == self.i_best: self.calculate_missing_values(reco_info, only_ip=ip) mark = " " if ip == self.i_best: mark = "best " if ip == self.i_best_minus_x: mark = mark[:-2] + "* " if mark.count(" ") < len(mark): mark = utils.color("yellow", mark) self.print_partition(ip, reco_info, extrastr=mark + extrastr, abbreviate=abbreviate, smc_print=smc_print)
def compare_production_results(self): if args.quick: return print 'diffing production results' for fname in ['test/parameters/data', 'test/simu.csv', 'test/parameters/simu/hmm-true', 'test/parameters/simu/sw', 'test/parameters/simu/hmm']: print ' %s' % fname cmd = 'diff -qbr ' + ' '.join(self.dirs[st] + '/' + fname for st in self.stypes) proc = Popen(cmd.split(), stdout=PIPE, stderr=PIPE) out, err = proc.communicate() if proc.returncode != 0: outlines = [ l for l in out.split('\n') if 'differ' in l ] n_total_files = int(check_output('find ' + self.dirs['ref'] + '/' + fname + ' -type f | wc -l', shell=True)) print utils.color('red', ' %d / %d files differ' % (len(outlines), n_total_files)), print ' (%s)' % cmd if err != '': print err
def downsample_iseqs_by_multiplicity(cluster_line, multiplicity_seqmeta, max_sequences_count, always_include_ids): """ First take the always keep, then take as many as you can of the remaining seqs, in order of highest multiplicity """ warnings.warn( utils.color( "red", "Downsampling cluster sequences by multiplicity. Should there be many sequences with equal multiplicity (e.g. 'singlets' all with multiplicity of 1), downsampling will be done arbitrarily among them.", )) if len(multiplicity_seqmeta["multiplicities"]) != len( cluster_line["input_seqs"]): raise Exception( "Something went wrong internally, mutiplicities are calculated for each seq in the cluster annotation but the number of seqs in the annotation does not match the number of multiplicities" ) always_include_iseqs = [ iseq for iseq in range(len(cluster_line["input_seqs"])) if cluster_line["unique_ids"][iseq] in always_include_ids ] rest_iseqs = [ iseq for iseq in range(len(cluster_line["input_seqs"])) if cluster_line["unique_ids"][iseq] not in always_include_ids ] remaining_seqs_to_take_count = max_sequences_count - len( always_include_ids) downsampled_iseqs = ( always_include_iseqs + sorted( rest_iseqs, key=lambda iseq: multiplicity_seqmeta["multiplicities"][ iseq], # Sort by multiplicity reverse=True, )[:remaining_seqs_to_take_count]) # Descending order return downsampled_iseqs
def print_partition(self, ip, reco_info=None, extrastr="", abbreviate=True, smc_print=False): # NOTE it's nicer to *not* sort by cluster size here, since preserving the order tends to frequently make it obvious which clusters are merging as your eye scans downwards through the output if ip > 0: # delta between this logprob and the previous one delta_str = "%.1f" % (self.logprobs[ip] - self.logprobs[ip - 1]) else: delta_str = "" print " %s %-12.2f%-7s %-5d %4d" % ( extrastr, self.logprobs[ip], delta_str, len(self.partitions[ip]), self.n_procs[ip], ), # logweight (and inverse of number of potential parents) if self.logweights[ip] is not None and smc_print: way_str, logweight_str = "", "" expon = math.exp(self.logweights[ip]) n_ways = 0 if expon == 0.0 else 1.0 / expon way_str = ("%.1f" % n_ways) if n_ways < 1e7 else ("%8.1e" % n_ways) logweight_str = "%8.3f" % self.logweights[ip] print " " + self.get_ccf_str(ip), if self.logweights[ip] is not None and smc_print: print " %10s %8s " % (way_str, logweight_str), # clusters for cluster in self.partitions[ip]: if abbreviate: cluster_str = ":".join(["o" if len(uid) > 3 else uid for uid in cluster]) else: # cluster_str = ':'.join(sorted([str(uid) for uid in cluster])) cluster_str = ":".join([str(uid) for uid in cluster]) if reco_info is not None and not utils.from_same_event(reco_info, cluster): cluster_str = utils.color("red", cluster_str) if self.seed_unique_id is not None and self.seed_unique_id in cluster: cluster_str = utils.color("reverse_video", cluster_str) if abbreviate: print " %s" % cluster_str, else: print " %s" % cluster_str, print ""
def compare_production_results(self): if args.quick: return print "diffing production results" for fname in ["test/parameters/data", "test/simu.csv", "test/parameters/simu"]: cmd = " diff -qbr " + " ".join(self.dirs[st] + "/" + fname for st in self.stypes) print cmd proc = Popen(cmd.split(), stdout=PIPE, stderr=PIPE) out, err = proc.communicate() if proc.returncode != 0: outlines = [l for l in out.split("\n") if "differ" in l] n_total_files = int( check_output("find " + self.dirs["ref"] + "/" + fname + " -type f | wc -l", shell=True) ) print utils.color("red", " %d / %d files differ" % (len(outlines), n_total_files)) if err != "": print err
def handleInput(self, world, input): comm = input.split(" ", 1)[0] if ("handle_%s" % comm) in self._dir: exec ( "self.handle_%s(world, input)" % comm) else: # CATCH ALL for bad commands self.write(color("huh? '%s'\n" % input, 35)) self.write("> ")
def print_partition(self, ip, reco_info=None, extrastr='', one_line=True, abbreviate=True): if one_line: if ip > 0: # delta between this logprob and the previous one delta_str = '%.1f' % (self.logprobs[ip] - self.logprobs[ip-1]) else: delta_str = '' print ' %5s %-12.2f%-7s %-5d %5d' % (extrastr, self.logprobs[ip], delta_str, len(self.partitions[ip]), self.n_procs[ip]), # logweight (and inverse of number of potential parents) if self.logweights[ip] is not None: way_str, logweight_str = '', '' expon = math.exp(self.logweights[ip]) n_ways = 0 if expon == 0. else 1. / expon way_str = ('%.1f' % n_ways) if n_ways < 1e7 else ('%8.1e' % n_ways) logweight_str = '%8.3f' % self.logweights[ip] # adj mi if reco_info is not None: adj_mi_str = '' if self.adj_mis[ip] is None: adj_mi_str = 'na' else: if self.adj_mis[ip] > 1e-3: adj_mi_str = '%-8.3f' % self.adj_mis[ip] else: adj_mi_str = '%-8.0e' % self.adj_mis[ip] print ' %8s ' % (adj_mi_str), if self.logweights[ip] is not None: print ' %10s %8s ' % (way_str, logweight_str), else: print ' %5s partition %-15.2f' % (extrastr, self.logprobs[ip]), if reco_info is not None: print ' %-8.2f' % (self.adj_mis[ip]), print '' print ' clonal? ids' # clusters for cluster in self.partitions[ip]: same_event = utils.from_same_event(reco_info is None, reco_info, cluster) if same_event is None: same_event = -1 if abbreviate: cluster_str = ':'.join(['o' for uid in cluster]) else: cluster_str = ':'.join([str(uid) for uid in cluster]) if not same_event: cluster_str = utils.color('red', cluster_str) if one_line: if abbreviate: print ' %s' % cluster_str, else: print ' %s' % cluster_str, else: print ' %d %s' % (int(same_event), cluster_str) if one_line: print ''
def change_abandoned(event): change = event["change"] branch = change["branch"] if branch in branch_ignore: return owner = username_from_person(change["owner"]) abandoner = username_from_person(event["abandoner"]) if owner != abandoner: msg_owner = color(GREEN) + owner + "'s" + color() else: msg_owner = "their" msg_abandoner = color(GREEN) + abandoner + color() msg_description = describe_patchset(change) message = "%s abandoned %s change on %s" % (msg_abandoner, msg_owner, msg_description) emit_message(message)
def remove_reference_results(self, expected_content): print " remove ref files" dir_content = set([os.path.basename(f) for f in glob.glob(self.dirs["ref"] + "/*")]) if len(dir_content - expected_content) > 0 or len(expected_content - dir_content) > 0: if len(dir_content - expected_content) > 0: print "in ref dir but not expected\n %s" % ( utils.color("red", " ".join(dir_content - expected_content)) ) if len(expected_content - dir_content) > 0: print "expected but not in ref dir\n %s" % ( utils.color("red", " ".join(expected_content - dir_content)) ) raise Exception("unexpected or missing content in reference dir") for fname in [self.dirs["ref"] + "/" + ec for ec in expected_content]: print " rm %s" % fname if os.path.isdir(fname): shutil.rmtree(fname) else: os.remove(fname)
def fit_istart(self, gene, istart, positions_to_try_to_fit, subxyvals, fitfo, debug=False): residuals = {} for pos in positions_to_try_to_fit: # skip positions that are too close to the 5' end of V (misassigned insertions look like snps) if pos > len(self.glfo['seqs'][utils.get_region(gene)][gene]) - self.n_five_prime_positions_to_exclude - 1: continue # as long as we already have a few non-candidate positions, skip positions that have no frequencies greater than the min y intercept (note that they could in principle still have a large y intercept, but we don't really care) if len(residuals) > istart + self.min_non_candidate_positions_to_fit and len([f for f in subxyvals[pos]['freqs'] if f > self.min_y_intercept]) == 0: continue if sum(subxyvals[pos]['total']) < self.n_total_min: continue # also skip positions that only have a few points to fit (i.e. genes that were very rare, or I guess maybe if they were always eroded past this position) if len(subxyvals[pos]['n_mutelist']) < 3: continue zero_icpt_fit = self.get_curvefit(subxyvals[pos]['n_mutelist'], subxyvals[pos]['freqs'], subxyvals[pos]['errs'], y_icpt_bounds=(0. - self.small_number, 0. + self.small_number)) big_icpt_fit = self.get_curvefit(subxyvals[pos]['n_mutelist'], subxyvals[pos]['freqs'], subxyvals[pos]['errs'], y_icpt_bounds=self.big_y_icpt_bounds) residuals[pos] = {'zero_icpt' : zero_icpt_fit['residuals_over_ndof'], 'big_icpt' : big_icpt_fit['residuals_over_ndof']} self.fitted_positions[gene].add(pos) # if we already did the fit for another <istart>, it'll already be in there if len(residuals) <= istart: # needs to be at least one longer, so we have the first-non-snp if debug: print ' not enough observations to fit more than %d snps' % (istart - 1) return residual_ratios = {pos : float('inf') if r['big_icpt'] == 0. else r['zero_icpt'] / r['big_icpt'] for pos, r in residuals.items()} sorted_ratios = sorted(residual_ratios.items(), key=operator.itemgetter(1), reverse=True) # sort the positions in decreasing order of residual ratio candidate_snps = [pos for pos, _ in sorted_ratios[:istart]] # the first <istart> positions are the "candidate snps" max_non_snp, max_non_snp_ratio = sorted_ratios[istart] # position and ratio for largest non-candidate min_candidate_ratio = min([residual_ratios[cs] for cs in candidate_snps]) # fitfo['scores'][istart] = (min_candidate_ratio - max_non_snp_ratio) / max(self.small_number, max_non_snp_ratio) fitfo['min_snp_ratios'][istart] = min([residual_ratios[cs] for cs in candidate_snps]) fitfo['candidates'][istart] = {cp : residual_ratios[cp] for cp in candidate_snps} if debug: # if debug > 1: # print '%70s %s' % ('', ''.join(['%11d' % nm for nm in subxyvals[max_non_snp]['n_mutelist']])) for pos in candidate_snps + [max_non_snp, ]: xtrastrs = ('[', ']') if pos == max_non_snp else (' ', ' ') pos_str = '%3s' % str(pos) if residual_ratios[pos] > self.min_min_candidate_ratio: pos_str = utils.color('yellow', pos_str) print ' %s %s %5s (%5s / %-5s) %4d / %-4d %s' % (xtrastrs[0], pos_str, fstr(residual_ratios[pos]), fstr(residuals[pos]['zero_icpt']), fstr(residuals[pos]['big_icpt']), sum(subxyvals[pos]['obs']), sum(subxyvals[pos]['total']), xtrastrs[1]), # if debug > 1: # print ' ', ''.join(['%4d / %-4d' % (subxyvals[pos]['obs'][inm], subxyvals[pos]['total'][inm]) for inm in range(len(subxyvals[pos]['n_mutelist']))]) print ''
def restrict_to_genes(glfo, only_genes, debug=False): """ remove from <glfo> any genes which are not in <only_genes> """ if only_genes is None: return only_genes_not_in_glfo = set(only_genes) - set([g for r in utils.regions for g in glfo['seqs'][r]]) if len(only_genes_not_in_glfo) > 0: print ' %s genes %s in --only-genes aren\'t in glfo to begin with' % (utils.color('red', 'warning'), ' '.join(only_genes_not_in_glfo)) genes_to_remove = set([g for r in utils.regions for g in glfo['seqs'][r]]) - set(only_genes) if debug: print ' removing %d genes from glfo' % len(genes_to_remove) remove_genes(glfo, genes_to_remove)
def print_partition(self, ip, reco_info=None, extrastr='', abbreviate=True, smc_print=False): if ip > 0: # delta between this logprob and the previous one delta_str = '%.1f' % (self.logprobs[ip] - self.logprobs[ip-1]) else: delta_str = '' print ' %s %-12.2f%-7s %-5d %4d' % (extrastr, self.logprobs[ip], delta_str, len(self.partitions[ip]), self.n_procs[ip]), # logweight (and inverse of number of potential parents) if self.logweights[ip] is not None and smc_print: way_str, logweight_str = '', '' expon = math.exp(self.logweights[ip]) n_ways = 0 if expon == 0. else 1. / expon way_str = ('%.1f' % n_ways) if n_ways < 1e7 else ('%8.1e' % n_ways) logweight_str = '%8.3f' % self.logweights[ip] print ' ' + self.get_ccf_str(ip), if self.logweights[ip] is not None and smc_print: print ' %10s %8s ' % (way_str, logweight_str), # clusters for cluster in self.partitions[ip]: if abbreviate: cluster_str = ':'.join(['o' if len(uid) > 3 else uid for uid in cluster]) else: # cluster_str = ':'.join(sorted([str(uid) for uid in cluster])) cluster_str = ':'.join([str(uid) for uid in cluster]) if reco_info is not None and not utils.from_same_event(reco_info, cluster): cluster_str = utils.color('red', cluster_str) if self.seed_unique_id is not None and self.seed_unique_id in cluster: cluster_str = utils.color('reverse_video', cluster_str) if abbreviate: print ' %s' % cluster_str, else: print ' %s' % cluster_str, print ''
def patchset_created(event): change = event["change"] branch = change["branch"] if branch in branch_ignore: return uploader = username_from_person(event["uploader"]) trac_id = utils.extract_trac_id(change['subject']) number = int(event['patchSet']['number']) msg_owner = color(GREEN) + uploader + color() msg_description = describe_patchset(change) msg_verb = 'updated' if number > 1 else 'submitted' message = "%s %s %s" % (msg_owner, msg_verb, msg_description) if trac_id is not None: trac_link = utils.link_from_trac_id(trac_id) msg_trac_link = color(NAVY, underline=True) + trac_link + color(GREY) message += " : %s" % (msg_trac_link) emit_message(message)