Example #1
0
def reconstruct_indelfo_from_indel_list(indel_list, line, iseq, debug=False):  # old-style files
    if 'reversed_seq' in indel_list:  # handle super-old files
        print '%s encountered file with super old, unhandled indel format, proceeding, but indel info may be inconsistent' % (utils.color('red', 'error'))
        return

    line['indelfos'][iseq] = get_empty_indel()
    if len(indel_list) == 0:
        return

    ifo_positions = [ifo['pos'] for ifo in indel_list]
    if len(ifo_positions) != len(set(ifo_positions)):
        print '%s two indels at the same position, everything will be kinda messed up' % utils.color('red', 'error')
    ifos_by_pos = {ifo['pos'] : ifo for ifo in indel_list}
    qr_gap_seq, gl_gap_seq = [], []
    iqr, igl, iindel = 0, 0, 0
    if debug:
        print len(line['input_seqs'][iseq]), line['input_seqs'][iseq]
        print len(line['naive_seq']), line['naive_seq']
    while iqr < len(line['input_seqs'][iseq]):
        if debug:
            print '  %3d  %3d' % (iqr, igl),
        if iindel in ifos_by_pos:
            ifo = ifos_by_pos[iindel]
            if ifo['type'] == 'insertion':
                if ifo['seqstr'] != line['input_seqs'][iseq][iqr : iqr + ifo['len']]:
                    print '%s indel info seqstr doesn\'t match input seq str:' % utils.color('red', 'error')
                    utils.color_mutants(ifo['seqstr'], line['input_seqs'][iseq][iqr : iqr + ifo['len']], align=True, print_result=True, extra_str='        ')
                qr_gap_seq += ifo['seqstr'].split()
                gl_gap_seq += [ifo['len'] * utils.gap_chars[0]]
                if debug:
                    print '  %s    %s' % (ifo['seqstr'].split(), [ifo['len'] * utils.gap_chars[0]])
                iqr += ifo['len']
            else:
                if ifo['seqstr'] != line['naive_seq'][igl : igl + ifo['len']]:
                    print '%s indel info seqstr doesn\'t match naive seq str:' % utils.color('red', 'error')
                    utils.color_mutants(ifo['seqstr'], line['naive_seq'][igl : igl + ifo['len']], align=True, print_result=True, extra_str='        ')
                qr_gap_seq += [ifo['len'] * utils.gap_chars[0]]
                gl_gap_seq += ifo['seqstr'].split()
                if debug:
                    print '  %s    %s' % ([ifo['len'] * utils.gap_chars[0]], ifo['seqstr'].split())
                igl += ifo['len']
            del ifos_by_pos[iindel]
            iindel += ifo['len']
        else:
            qr_gap_seq += [line['input_seqs'][iseq][iqr]]
            gl_gap_seq += [line['naive_seq'][igl]]
            if debug:
                print '  %s    %s' % (line['input_seqs'][iseq][iqr], line['naive_seq'][igl])
            iqr += 1
            igl += 1
            iindel += 1

    line['indelfos'][iseq]['qr_gap_seq'] = ''.join(qr_gap_seq)
    line['indelfos'][iseq]['gl_gap_seq'] = ''.join(gl_gap_seq)
    line['indelfos'][iseq]['indels'] = indel_list
    line['indelfos'][iseq]['reversed_seq'] = line['indel_reversed_seqs'][iseq]
    line['indelfos'][iseq]['genes'] = {r : line[r + '_gene'] for r in utils.regions}
    if debug:
        print '  reconstructed indelfo'
        print get_dbg_str(line['indelfos'][iseq])
Example #2
0
def get_dbg_str(indelfo):
    if len(indelfo['qr_gap_seq']) != len(indelfo['gl_gap_seq']):
        print indelfo['qr_gap_seq']
        print indelfo['gl_gap_seq']
        raise Exception('different length qr and gl gap seqs (see previous lines)')
    qrprintstr, glprintstr = [], []
    for ich in range(len(indelfo['qr_gap_seq'])):
        qrb, glb = indelfo['qr_gap_seq'][ich], indelfo['gl_gap_seq'][ich]
        qrcolor, glcolor = None, None
        if qrb in utils.gap_chars or glb in utils.gap_chars:
            qrcolor = 'light_blue'
            glcolor = 'light_blue'
        elif qrb in utils.ambiguous_bases:
            qrcolor = 'light_blue'
        elif glb in utils.ambiguous_bases:
            glcolor = 'light_blue'
        elif qrb != glb:
            qrcolor = 'red'
        qrprintstr.append(utils.color(qrcolor, qrb if qrb not in utils.gap_chars else '*'))  # change it to a start just cause that's what it originally was... at some point should switch to just leaving it whatever gap char it was
        glprintstr.append(utils.color(glcolor, glb if glb not in utils.gap_chars else '*'))
    qrprintstr = ''.join(qrprintstr)
    glprintstr = ''.join(glprintstr)

    gene_str = ''
    gwidth = str(len('query'))
    if 'v' in indelfo['genes']:
        gene_str = utils.color_gene(indelfo['genes']['v'], width=int(gwidth), leftpad=True)
        gwidth = str(utils.len_excluding_colors(gene_str))
    dj_gene_str = ' '.join([utils.color_gene(indelfo['genes'][r]) for r in 'dj' if r in indelfo['genes']])
    dbg_str_list = [('  %' + gwidth + 's  %s  %s') % (gene_str, glprintstr, dj_gene_str),
                    ('  %' + gwidth + 's  %s') % ('query', qrprintstr)]
    for idl in indelfo['indels']:
        dbg_str_list.append('%10s: %d base%s at %d (%s)' % (idl['type'], idl['len'], utils.plural(idl['len']), idl['pos'], idl['seqstr']))
    return '\n'.join(dbg_str_list)
Example #3
0
def tree_x(fs: FileSystem, depth: int, level=0):
    """
    生成文件目录结构,具体样式百度
    .
    └── test
        ├── css
        ├── img
        │   └── head
        └── js
    :param fs:
    :param depth: 打印的深度
    :param level: 已经到达的文件深度
    :return:
    """
    if depth == 0:
        return
    pwd_cat = fs.load_pwd_obj()  # 获取当前目录
    file_list = pwd_cat.file_name_and_types()
    for name, flag in file_list:
        if flag == DIR_TYPE:  # 文件夹
            print("│   " * level, end="")
            print("├──", color(name, DIR_COLOR_F, DIR_COLOR_B))
            flag_x = fs.ch_sig_dir(name, info=False)
            if flag_x:
                tree_x(fs, depth - 1, level + 1)
                fs.ch_sig_dir("..")
        if flag == FILE_TYPE:  # 文件
            print("│   " * level, end="")
            print("├──", color(name, FILE_COLOR_F, FILE_COLOR_B))
Example #4
0
    def print_partition(self, ip, reco_info=None, extrastr='', abbreviate=True, highlight_cluster_indices=None, print_cluster_indices=False, right_extrastr=''):  # NOTE <highlight_cluster_indices> and <print_cluster_indices> are quite different despite sounding similar, but I can't think of something else to call the latter that makes more sense
        #  NOTE it's nicer to *not* sort by cluster size here, since preserving the order tends to frequently make it obvious which clusters are merging as your eye scans downwards through the output
        if ip > 0:  # delta between this logprob and the previous one
            delta_str = '%.1f' % (self.logprobs[ip] - self.logprobs[ip-1])
        else:
            delta_str = ''
        print '      %s  %-12.2f%-7s   %s%-5d  %4d' % (extrastr, self.logprobs[ip], delta_str, ('%-5d  ' % ip) if print_cluster_indices else '', len(self.partitions[ip]), self.n_procs[ip]),

        print '    ' + self.get_ccf_str(ip),

        # clusters
        sorted_clusters = sorted(self.partitions[ip], key=lambda c: len(c), reverse=True)
        for iclust in range(len(sorted_clusters)):
            cluster = sorted_clusters[iclust]
            if abbreviate:
                cluster_str = ':'.join(['o' if len(uid) > 3 else uid for uid in cluster])
            else:
                # cluster_str = ':'.join(sorted([str(uid) for uid in cluster]))
                cluster_str = ':'.join([str(uid) for uid in cluster])

            if reco_info is not None and not utils.from_same_event(reco_info, cluster):
                cluster_str = utils.color('red', cluster_str)

            if self.seed_unique_id is not None and self.seed_unique_id in cluster:
                cluster_str = utils.color('reverse_video', cluster_str)

            if highlight_cluster_indices is not None and iclust in highlight_cluster_indices:
                cluster_str = utils.color('red', cluster_str)
            
            if abbreviate:
                print ' %s' % cluster_str,
            else:
                print '   %s' % cluster_str,
        print '%s' % right_extrastr,
        print ''
Example #5
0
def RunSmbFinger(host):
	try:
		s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		s.connect(host)
		s.settimeout(0.7)

		h = SMBHeader(cmd="\x72",flag1="\x18",flag2="\x53\xc8")
		n = SMBNego(data = SMBNegoFingerData())
		n.calculate()
		
		Packet = str(h)+str(n)
		Buffer = struct.pack(">i", len(''.join(Packet)))+Packet
		s.send(Buffer)
		data = s.recv(2048)
		
		if data[8:10] == "\x72\x00":
			Header = SMBHeader(cmd="\x73",flag1="\x18",flag2="\x17\xc8",uid="\x00\x00")
			Body = SMBSessionFingerData()
			Body.calculate()

			Packet = str(Header)+str(Body)
			Buffer = struct.pack(">i", len(''.join(Packet)))+Packet  

			s.send(Buffer) 
			data = s.recv(2048)

		if data[8:10] == "\x73\x16":
			return OsNameClientVersion(data)
	except:
		print color("[!] ", 1, 1) +" Fingerprint failed"
		return None
Example #6
0
    def compare_performance(self):
        # NOTE does *not* regenerate the reference performance file based on the reference outputs  UPDATE hm, wait, do I still use the performance files?
        print "comparing to reference performance"

        refkeys = set(self.perf_info["ref"].keys())
        newkeys = set(self.perf_info["new"].keys())
        if len(refkeys - newkeys) > 0 or len(newkeys - refkeys) > 0:
            print "  %d keys only in ref" % len(refkeys - newkeys)
            print "  %d keys only in new" % len(newkeys - refkeys)
            print "  %d in common" % len(refkeys & newkeys)
            raise Exception("")

        for name in self.perf_info["ref"]:  # don't use the sets above so we get the nice ordering
            ref_val = self.perf_info["ref"][name]
            new_val = self.perf_info["new"][name]
            val_type = name.split("-")[-1]
            print "  %-28s %-15s       %-5.3f" % (name.replace("-" + val_type, ""), val_type, ref_val),
            fractional_change = (new_val - ref_val) / ref_val  # NOTE not the abs value yet
            if abs(fractional_change) > self.eps_vals[val_type]:
                print "--> %-5.3f %s" % (new_val, utils.color("red", "(%+.3f)" % fractional_change)),
            elif abs(fractional_change) > self.tiny_eps:
                print "--> %-5.3f %s" % (new_val, utils.color("yellow", "(%+.3f)" % fractional_change)),
            else:
                print "    ok   ",
            print ""
Example #7
0
    def compare_run_times(self):
        print 'checking run times'

        def read_run_times(stype):
            times[stype] = {}
            with open(self.dirs[stype] + '/run-times.csv') as timefile:
                reader = csv.DictReader(timefile)
                for line in reader:
                    times[stype][line['name']] = float(line['seconds'])
        times = {}
        for stype in self.stypes:
            read_run_times(stype)

        for name in times['ref']:
            if args.quick and name not in self.quick_tests:
                continue
            if args.only_ref and '-ref-' not in name:
                continue
            if args.skip_ref and '-ref-' in name:
                continue
            print '  %30s   %7.1f' % (name, times['ref'][name]),
            if name not in times['new']:
                print '  no new time for %s' % utils.color('red', name)
                continue
            fractional_change = (times['new'][name] - times['ref'][name]) / times['ref'][name]
            if abs(fractional_change) > 0.2:
                print '--> %-5.1f %s' % (times['new'][name], utils.color('red', '(%+.3f)' % fractional_change)),
            elif abs(fractional_change) > 0.1:
                print '--> %-5.1f %s' % (times['new'][name], utils.color('yellow', '(%+.3f)' % fractional_change)),
            else:
                print '    ok   ',
            print ''
Example #8
0
    def compare_performance(self, input_stype):
        performance_metric_list = [n for n in self.perf_info['ref'] if input_stype in n]
        if len(performance_metric_list) == 0:
            return

        print '  performance with %s simulation and parameters' % input_stype

        # make sure there's a new performance value for each reference one, and vice versa
        refkeys = set(self.perf_info['ref'].keys())
        newkeys = set(self.perf_info['new'].keys())
        if len(refkeys - newkeys) > 0 or len(newkeys - refkeys) > 0:
            print '  %d keys only in ref' % len(refkeys - newkeys)
            print '  %d keys only in new' % len(newkeys - refkeys)
            print '  %d in common' % len(refkeys & newkeys)
            raise Exception('')

        for name in performance_metric_list:  # don't use the sets above so we get the nice ordering
            ref_val = self.perf_info['ref'][name]
            new_val = self.perf_info['new'][name]
            val_type = name.split('-')[-1]
            print '    %-28s %-15s       %-5.3f' % (name.replace('-' + val_type, ''), val_type, ref_val),
            fractional_change = (new_val - ref_val) / ref_val  # NOTE not the abs value yet
            if abs(fractional_change) > self.eps_vals[val_type]:
                print '--> %-5.3f %s' % (new_val, utils.color('red', '(%+.3f)' % fractional_change)),
            elif abs(fractional_change) > self.tiny_eps:
                print '--> %-5.3f %s' % (new_val, utils.color('yellow', '(%+.3f)' % fractional_change)),
            else:
                print '    ok   ',
            print ''
Example #9
0
def RunSmbFinger(host):
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.connect(host)
        s.settimeout(0.7)

        h = SMBHeader(cmd="\x72", flag1="\x18", flag2="\x53\xc8")
        n = SMBNego(data=SMBNegoFingerData())
        n.calculate()

        Packet = str(h) + str(n)
        Buffer = struct.pack(">i", len(''.join(Packet))) + Packet
        s.send(Buffer)
        data = s.recv(2048)

        if data[8:10] == "\x72\x00":
            Header = SMBHeader(cmd="\x73",
                               flag1="\x18",
                               flag2="\x17\xc8",
                               uid="\x00\x00")
            Body = SMBSessionFingerData()
            Body.calculate()

            Packet = str(Header) + str(Body)
            Buffer = struct.pack(">i", len(''.join(Packet))) + Packet

            s.send(Buffer)
            data = s.recv(2048)

        if data[8:10] == "\x73\x16":
            return OsNameClientVersion(data)
    except:
        print color("[!] ", 1, 1) + " Fingerprint failed"
        return None
Example #10
0
 def check_single_ifo(old_ifo, new_ifo):
     if debug:
         print '  len %d  pos %d  seqstr %s' % (
             old_ifo['len'], old_ifo['pos'], old_ifo['seqstr']),
     if new_ifo != old_ifo:
         if debug:
             print '  %s' % utils.color('red', 'nope')
         new_seqstr, old_seqstr = utils.color_mutants(
             old_ifo['seqstr'],
             new_ifo['seqstr'],
             return_ref=True,
             align=True)  #len(old_ifo['seqstr']) != len(new_ifo['seqstr']))
         if print_on_err:
             print '  pos %d --> %s    len %d --> %s    seqstr %s --> %s' % (
                 old_ifo['pos'],
                 utils.color(
                     None if new_ifo['pos'] == old_ifo['pos'] else 'red',
                     '%d' % new_ifo['pos']), old_ifo['len'],
                 utils.color(
                     None if new_ifo['len'] == old_ifo['len'] else 'red',
                     '%d' % new_ifo['len']), old_seqstr, new_seqstr)
         return False
     else:
         if debug:
             print '  %s' % utils.color('green', 'ok')
         return True
Example #11
0
    def compare_run_times(self):
        print 'checking run times'

        def read_run_times(stype):
            times[stype] = {}
            with open(self.dirs[stype] + '/run-times.csv') as timefile:
                reader = csv.DictReader(timefile)
                for line in reader:
                    times[stype][line['name']] = float(line['seconds'])
        times = {}
        for stype in self.stypes:
            read_run_times(stype)

        for name in times['ref']:
            if args.quick and name not in self.quick_tests:
                continue
            if args.only_ref and '-ref-' not in name:
                continue
            if args.skip_ref and '-ref-' in name:
                continue
            print '  %30s   %7.1f' % (name, times['ref'][name]),
            if name not in times['new']:
                print '  no new time for %s' % utils.color('red', name)
                continue
            fractional_change = (times['new'][name] - times['ref'][name]) / times['ref'][name]
            if abs(fractional_change) > 0.2:
                print '--> %-5.1f %s' % (times['new'][name], utils.color('red', '(%+.3f)' % fractional_change)),
            elif abs(fractional_change) > 0.1:
                print '--> %-5.1f %s' % (times['new'][name], utils.color('yellow', '(%+.3f)' % fractional_change)),
            else:
                print '    ok   ',
            print ''
Example #12
0
    def compare_performance(self, input_stype):
        performance_metric_list = [n for n in self.perf_info['ref'] if input_stype in n]
        if len(performance_metric_list) == 0:
            return

        print '  performance with %s simulation and parameters' % input_stype

        # make sure there's a new performance value for each reference one, and vice versa
        refkeys = set(self.perf_info['ref'].keys())
        newkeys = set(self.perf_info['new'].keys())
        if len(refkeys - newkeys) > 0 or len(newkeys - refkeys) > 0:
            print '  %d keys only in ref' % len(refkeys - newkeys)
            print '  %d keys only in new' % len(newkeys - refkeys)
            print '  %d in common' % len(refkeys & newkeys)
            raise Exception('')

        for name in performance_metric_list:  # don't use the sets above so we get the nice ordering
            ref_val = self.perf_info['ref'][name]
            new_val = self.perf_info['new'][name]
            val_type = name.split('-')[-1]
            print '    %-28s %-15s       %-5.3f' % (name.replace('-' + val_type, ''), val_type, ref_val),
            fractional_change = (new_val - ref_val) / ref_val  # NOTE not the abs value yet
            if abs(fractional_change) > self.eps_vals[val_type]:
                print '--> %-5.3f %s' % (new_val, utils.color('red', '(%+.3f)' % fractional_change)),
            elif abs(fractional_change) > self.tiny_eps:
                print '--> %-5.3f %s' % (new_val, utils.color('yellow', '(%+.3f)' % fractional_change)),
            else:
                print '    ok   ',
            print ''
Example #13
0
def epoch_info_printer(epoch,
                       mean_loss,
                       epoch_time,
                       total_time,
                       lr,
                       train_samples,
                       valid_samples=0,
                       valid_loss=None,
                       mean_blue=None,
                       valid_time=0):

    valid_loss = round(valid_loss, 5) if valid_loss else None
    valid_blue = round(mean_blue, 5) if mean_blue else None

    print("\n========== Epoch Summary ==========")
    print(color("  Epoch: %s Finished. " % epoch, 1))
    print("  Train Mean Loss: %s " % color(round(mean_loss, 5), 2))
    print("  Valid Mean Loss: %s " % color(valid_loss, 2))
    print("  Valid Mean BLEU: %s " % color(valid_blue, 2))
    print("  Train Data Size: %s " % color(train_samples, 2))
    print("  Valid Data Size: %s " % color(valid_samples, 2))
    print("  Epoch Time Consumed: %ss " % color(epoch_time, 2))
    print("  Valid Time Consumed: %ss " % color(valid_time, 2))
    print("  Total Time Consumed: %ss " % color(total_time, 2))
    print("  Current Learning Rate: %s " % color(round(lr, 8), 2))
    print("===================================")
    print("\n\n\n")
Example #14
0
    def get_indel_info(self, query_name, cigarstr, qrseq, glseq, gene):
        cigars = re.findall('[0-9][0-9]*[A-Z]', cigarstr)  # split cigar string into its parts
        cigars = [(cstr[-1], int(cstr[:-1])) for cstr in cigars]  # split each part into the code and the length

        codestr = ''
        qpos = 0  # position within query sequence
        indelfo = utils.get_empty_indel()  # replacement_seq: query seq with insertions removed and germline bases inserted at the position of deletions
        tmp_indices = []
        for code, length in cigars:
            codestr += length * code
            if code == 'I':  # advance qr seq but not gl seq
                indelfo['indels'].append({'type' : 'insertion', 'pos' : qpos, 'len' : length, 'seqstr' : ''})  # insertion begins at <pos>
                tmp_indices += [len(indelfo['indels']) - 1  for _ in range(length)]# indel index corresponding to this position in the alignment
            elif code == 'D':  # advance qr seq but not gl seq
                indelfo['indels'].append({'type' : 'deletion', 'pos' : qpos, 'len' : length, 'seqstr' : ''})  # first deleted base is <pos> (well, first base which is in the position of the first deleted base)
                tmp_indices += [len(indelfo['indels']) - 1  for _ in range(length)]# indel index corresponding to this position in the alignment
            else:
                tmp_indices += [None  for _ in range(length)]  # indel index corresponding to this position in the alignment
            qpos += length

        qrprintstr, glprintstr = '', ''
        iqr, igl = 0, 0
        for icode in range(len(codestr)):
            code = codestr[icode]
            if code == 'M':
                qrbase = qrseq[iqr]
                if qrbase != glseq[igl]:
                    qrbase = utils.color('red', qrbase)
                qrprintstr += qrbase
                glprintstr += glseq[igl]
                indelfo['reversed_seq'] += qrseq[iqr]  # add the base to the overall sequence with all indels reversed
            elif code == 'S':
                continue
            elif code == 'I':
                qrprintstr += utils.color('light_blue', qrseq[iqr])
                glprintstr += utils.color('light_blue', '*')
                indelfo['indels'][tmp_indices[icode]]['seqstr'] += qrseq[iqr]  # and to the sequence of just this indel
                igl -= 1
            elif code == 'D':
                qrprintstr += utils.color('light_blue', '*')
                glprintstr += utils.color('light_blue', glseq[igl])
                indelfo['reversed_seq'] += glseq[igl]  # add the base to the overall sequence with all indels reversed
                indelfo['indels'][tmp_indices[icode]]['seqstr'] += glseq[igl]  # and to the sequence of just this indel
                iqr -= 1
            else:
                raise Exception('unhandled code %s' % code)

            iqr += 1
            igl += 1

        if self.debug:
            print '\n      indels in %s' % query_name
            print '          %20s %s' % (gene, glprintstr)
            print '          %20s %s' % ('query', qrprintstr)
            for idl in indelfo['indels']:
                print '          %10s: %d bases at %d (%s)' % (idl['type'], idl['len'], idl['pos'], idl['seqstr'])
        # utils.undo_indels(indelfo)
        # print '                       %s' % self.input_info[query_name]['seq']

        return indelfo
Example #15
0
 def check_droplet_id_groups(tdbg=False):
     # check against the droplet id method (we could just do it this way, but it would only work for 10x, and only until they change their naming convention)
     pgroup_strs = set(':'.join(sorted(pg)) for pg in pid_groups)
     all_uids = list(
         set([
             su for l in cpaths for c in cpaths[l].best() for u in c
             for su in [u] +
             utils.per_seq_val(all_antns[u], 'paired-uids', u)
         ]))
     n_not_found = 0
     for dropid, drop_queries in itertools.groupby(
             sorted(all_uids, key=utils.get_droplet_id),
             key=utils.get_droplet_id):
         dqlist = list(drop_queries)
         found = ':'.join(sorted(dqlist)) in pgroup_strs
         if not found:
             overlaps = [g for g in pgroup_strs if dropid in g]
             overlaps = utils.get_single_entry(overlaps)
             n_not_found += 1
         if tdbg or not found:
             print '  %25s %s  %s  %s' % (
                 utils.color('green', '-') if found else utils.color(
                     'red', 'x'), dropid, ' '.join(
                         sorted(utils.get_contig_id(q) for q in dqlist)),
                 utils.color(
                     'red', ' '.join(
                         sorted(
                             utils.get_contig_id(q)
                             for q in overlaps.split(':')))
                     if not found else ''))
     if n_not_found > 0:
         print '  %s droplet id group check failed for %d groups' % (
             utils.color('red', 'error'), n_not_found)
Example #16
0
def parse_ramesh_seqs(glseqs, outdir, debug=False):
    for locus in glseqs:
        glutils.remove_glfo_files(outdir, locus)
        # write to a glfo dir without extra info
        for region in glseqs[locus]:
            fn = glutils.get_fname(outdir, locus, region)
            if not os.path.exists(os.path.dirname(fn)):
                os.makedirs(os.path.dirname(fn))
            with open(fn, 'w') as ofile:
                for gene, seq in glseqs[locus][region].items():
                    ofile.write('>%s\n%s\n' % (gene, seq))

        # figure out extra info
        template_glfo = glutils.read_glfo('data/germlines/macaque', locus)
        glfo = glutils.read_glfo(outdir,
                                 locus,
                                 template_glfo=template_glfo,
                                 remove_bad_genes=True,
                                 debug=True)

        # trim non-coding stuff upstream of v (and remove non-full-length ones)
        gene_groups = {}
        for region in ['v']:
            group_labels = sorted(
                set([utils.gene_family(g) for g in glfo['seqs'][region]]))
            gene_groups[region] = [(glabel, {
                g: glfo['seqs'][region][g]
                for g in glfo['seqs'][region] if utils.gene_family(g) == glabel
            }) for glabel in group_labels]
        for region in [r for r in utils.regions if r in gene_groups]:
            if debug:
                print '%s' % utils.color('reverse_video',
                                         utils.color('green', region))
            for group_label, group_seqs in gene_groups[
                    region]:  # ok, this isn't really doing anything any more
                if debug:
                    print '  %s' % utils.color('blue', group_label)
                for gene, seq in group_seqs.items():
                    trim_and_remove_genes(region,
                                          gene,
                                          seq,
                                          glfo,
                                          template_glfo,
                                          debug=debug)

        # remove any seqs with ambiguous bases
        for region in [r for r in utils.regions if r in glfo['seqs']]:
            for gene, seq in glfo['seqs'][region].items():
                if utils.ambig_frac(seq) > 0.:
                    if debug:
                        print '   %d ambiguous bases: %s' % (
                            len(seq) * utils.ambig_frac(seq),
                            utils.color_gene(gene))
                    glutils.remove_gene(glfo, gene)

        # glutils.print_glfo(glfo)

        # write final result
        glutils.write_glfo(outdir, glfo, debug=True)
def get_data_plots(args, baseoutdir, methods, study, dsets):
    metafos = heads.read_metadata(study)
    assert len(set([metafos[ds]['locus'] for ds in dsets
                    ]))  # make sure everybody has the same locus
    mfo = metafos[dsets[0]]
    data_outdirs = [
        heads.get_datadir(
            study, 'processed', extra_str='gls-gen-paper-' + args.label) +
        '/' + ds for ds in dsets
    ]
    outdir = get_outdir(
        args,
        baseoutdir,
        varname='data',
        varval=study + '/' + '-vs-'.join(dsets)
    )  # for data, only the plots go here, since datascripts puts its output somewhere else
    if len(dsets) > 1 and len(methods) == 1:  # sample vs sample
        glslabels = dsets
        title = get_dset_title([metafos[ds] for ds in dsets])
        if study != 'kate-qrs':
            title += '  %s' % methstr(methods[0])
        title_color = methods[0]
        legends = get_dset_legends([metafos[ds] for ds in dsets])
        legend_title = methstr(
            methods[0]
        ) if study == 'kate-qrs' else None  # for kate-qrs we need to put the subject _and_ the isotype in the title, so there's no room for the method
        pie_chart_faces = False
        print '%s:' % utils.color('green', methods[0]),
    elif len(methods) > 1 and len(dsets) == 1:  # method vs method
        glslabels = methods
        title = get_dset_title([mfo])
        title_color = None
        legends = [methstr(m) + ' only' for m in methods]
        legend_title = None
        pie_chart_faces = True
        print '%s:' % utils.color('green', dsets[0]),
    else:
        raise Exception('one of \'em has to be length 1: %d %d' %
                        (len(methods), len(dsets)))
    print '%s' % (' %s ' % utils.color('light_blue', 'vs')).join(glslabels)
    make_gls_tree_plot(args,
                       outdir + '/' + '-vs-'.join(methods) + '/gls-gen-plots',
                       study + '-' + '-vs-'.join(dsets),
                       glsfnames=[
                           get_gls_fname(ddir,
                                         meth,
                                         locus=mfo['locus'],
                                         data=True) for ddir in data_outdirs
                           for meth in methods
                       ],
                       glslabels=glslabels,
                       locus=mfo['locus'],
                       title=title,
                       title_color=title_color,
                       legends=legends,
                       legend_title=legend_title,
                       pie_chart_faces=pie_chart_faces)
Example #18
0
    def construct_dict(self):
        """从训练数据中构建词典"""
        print("\nConstructing Dict ...\n")
        self.train_lines = self.read_lines_from_file(self.train_data,
                                                     with_target=True)
        self.valid_lines = self.read_lines_from_file(self.valid_data,
                                                     with_target=True)
        self.test_lines = None if not self.test_data else self.read_lines_from_file(
            self.test_data, with_target=True)

        self.special_tokens = {
            self.PAD_token: self.PAD_idx,
            self.BOS_token: self.BOS_idx,
            self.EOS_token: self.EOS_idx,
            self.OOV_token: self.OOV_idx
        }

        word_indexer = list({
            word.strip()
            for line in self.train_lines for word in line[0] + line[1]
            if len(word.strip()) > 1
        })
        char_indexer = list(set("".join(word_indexer)))

        for token, index in self.special_tokens.items():
            word_indexer.insert(index, token)
            char_indexer.insert(index, token)

        self.index2word = {
            index: word
            for index, word in enumerate(word_indexer)
        }
        self.word2index = {
            word: index
            for index, word in enumerate(word_indexer)
        }
        self.index2char = {
            index: word
            for index, word in enumerate(char_indexer)
        }
        self.char2index = {
            word: index
            for index, word in enumerate(char_indexer)
        }

        self.flag_constructed_dict = True
        vocab_file = {
            "word2index": self.word2index,
            "char2index": self.char2index
        }
        torch.save(vocab_file, self.args.vocab_file)
        print(color("\nSuccessfully Constructed Dict.", 2))
        print(color("\nVocabulary File Has Been Saved.\n", 1))
        print("Number of words: ", color(len(self.word2index), 2))
        print("Number of chars: ", color(len(self.char2index), 2), "\n\n")

        return self.word2index, self.char2index, self.index2word, self.index2char
Example #19
0
    def __init__(self, **kwargs):
        self.border_width = kwargs.pop('border_width', 1)
        self.cell_size = kwargs.pop('cell_size', 7)

        self.color_empty = kwargs.pop('color_empty', color(255, 255, 255))
        self.color_filled = kwargs.pop('color_filled', color())
        self.color_border = kwargs.pop('color_border', color(100, 100, 100))

        self.cell_plus_border = self.border_width + self.cell_size
Example #20
0
    async def on_member_update(self, before: discord.Member,
                               after: discord.Member):
        log_embed = self.log_embed("edit",
                                   footer=f"ID: {before.id}",
                                   user=after)

        if before.nick != after.nick:
            self.logger.info(
                color(
                    f"member `{after}` nickname changed: {before.nick} -> {after.nick}",
                    "blue",
                ))
            log_embed.description = f"**Nickname changed: {after.mention}**"
            log_embed.add_field(name="Before", value=before.nick, inline=False)
            log_embed.add_field(name="After", value=after.nick, inline=False)

        elif before.roles != after.roles:
            if len(before.roles) < len(after.roles):
                added: typing.List[discord.Role] = [
                    role for role in after.roles if role not in before.roles
                ]
                self.logger.info(
                    color(
                        f"member `{after}` roles added: {', '.join([r.name for r in added])}",
                        "green",
                    ))

                log_embed.description = f"**Role{'s' if len(added) > 1 else ''} added to {after.mention}:**"
                log_embed.colour = discord.Colour.green()
                log_embed.add_field(
                    name="Added roles",
                    value=", ".join([role.mention for role in added]),
                    inline=False,
                )
            else:
                removed: typing.List[discord.Role] = [
                    role for role in before.roles if role not in after.roles
                ]
                self.logger.info(
                    color(
                        f"member `{after}` roles removed: {', '.join([r.name for r in removed])}",
                        "red",
                    ))

                log_embed.description = f"**Role{'s' if len(removed) > 1 else ''} removed from {after.mention}**"
                log_embed.colour = discord.Colour.red()
                log_embed.add_field(
                    name="Removed roles",
                    value=", ".join([role.mention for role in removed]),
                    inline=False,
                )

        else:
            return

        await self.log_channel.send(embed=log_embed)
Example #21
0
    def handle_colors(self, world, text):
        """ Prints out all the colors we know about."""
        response = ''
        for background in range(40, 48):
            for foreground in range(30, 38):
                response += color(str(foreground), foreground, background)
                response += color(str(foreground), foreground, background, 1)
            response += "\33[0m\n"

        self.write(response)
Example #22
0
  def handle_colors(self, world, text):
    """ Prints out all the colors we know about."""
    response = ''
    for background in range(40,48):
      for foreground in range(30,38):
        response += color(str(foreground), foreground, background)
        response += color(str(foreground), foreground, background, 1)
      response += "\33[0m\n"

    self.write(response)
Example #23
0
 def print_key_differences(vtype, refkeys, newkeys):
     print '    %s keys' % vtype
     if len(refkeys - newkeys) > 0 or len(newkeys - refkeys) > 0:
         if len(refkeys - newkeys) > 0:
             print utils.color('red', '      %d only in ref version' % len(refkeys - newkeys))
         if len(newkeys - refkeys) > 0:
             print utils.color('red', '      %d only in new version' % len(newkeys - refkeys))
         print '      %d in common' % len(refkeys & newkeys)
     else:
         print '        %d identical keys in new and ref cache' % len(refkeys)
Example #24
0
 def print_key_differences(vtype, refkeys, newkeys):
     print '    %s keys' % vtype
     if len(refkeys - newkeys) > 0 or len(newkeys - refkeys) > 0:
         if len(refkeys - newkeys) > 0:
             print utils.color('red', '      %d only in ref version' % len(refkeys - newkeys))
         if len(newkeys - refkeys) > 0:
             print utils.color('red', '      %d only in new version' % len(newkeys - refkeys))
         print '      %d in common' % len(refkeys & newkeys)
     else:
         print '        %d identical keys in new and ref cache' % len(refkeys)
Example #25
0
def ref_updated(event):
    if "submitter" not in event:
        # Implies an auto-generated merge-commit was pushed by the system
        # Ignore since we only care about reporting direct-pushes
        return

    updated_ref = event["refUpdate"]

    branch = updated_ref["refName"]
    if branch in branch_ignore: return

    to_hash = shorten_hash(updated_ref['newRev'])
    from_hash = shorten_hash(updated_ref['oldRev'])

    project = utils.project_from_change(updated_ref)
    submitter = username_from_person(event["submitter"])
    link = utils.link_from_project(project)

    msg_project_branch = utils.build_repo_branch(project, branch) + color()
    msg_owner = color(GREEN) + submitter + color()
    msg_old_ref = color(bold=True) + from_hash + color()
    msg_new_ref = color(bold=True) + to_hash + color(GREY)
    msg_link = color(NAVY, underline=True) + link + color()

    message = "%s updated %s from %s to %s : %s" % (
        msg_owner, msg_project_branch, msg_old_ref, msg_new_ref, msg_link)
    emit_message(message)
Example #26
0
def get_uid_str(line, iseq, seed_uid, duplicated_uids=None):
    uid_width = max([len(uid) for uid in line['unique_ids']])
    fstr = '%' + str(uid_width) + 's'
    uidstr = fstr % line['unique_ids'][iseq]
    if seed_uid is not None and line['unique_ids'][iseq] == seed_uid:
        uidstr = utils.color('red', uidstr)
    if duplicated_uids is not None and line['unique_ids'][
            iseq] in duplicated_uids:
        uidstr += ' ' + utils.color(
            'red', 'duplicate: %d' % duplicated_uids[line['unique_ids'][iseq]])
    return uidstr
Example #27
0
 def simcountstr(
     gene, ws
 ):  # counts in simulation for <gene> (note that this is _not_ the same as sim_gene_count_str(), since this takes no account of _which_ queries these counts occur in [plus it's coming from the opposite point of view])
     if self.simglfo is None:
         rstr = ''
     elif gene in self.simglfo['seqs'][utils.get_region(gene)]:
         rstr = utils.color(
             'blue', (' %' + ws + 'd') %
             self.simcounts[utils.get_region(gene)][gene])
     else:
         rstr = utils.color('red', (' %' + ws + 's') % 'x')
     return rstr
Example #28
0
def change_merged(event):
    change = event["change"]

    branch = change["branch"]
    if branch in branch_ignore: return

    owner = username_from_person(change["owner"])

    msg_owner = color(GREEN) + owner + "'s" + color()
    msg_description = describe_patchset(change)

    message = "Applied %s change on %s" % (msg_owner, msg_description)
    emit_message(message)
Example #29
0
 def compare_data_annotation(self, input_stype):
     ptest = 'annotate-' + input_stype + '-data'
     if args.quick and ptest not in self.quick_tests:
         return
     print '  %s data annotation' % input_stype
     infnames = [self.dirs[version_stype] + '/' + ptest + '.csv' for version_stype in self.stypes]
     cmd = 'diff -u ' + ' '.join(infnames) + ' | grep "^+[^+]" | wc -l'
     n_diff_lines = int(check_output(cmd, shell=True))
     if n_diff_lines == 0:
         print '      ok'
     else:
         print utils.color('red', '      %d lines differ' % n_diff_lines),
         print '   (%s)' % cmd
Example #30
0
def change_merged(event):
    change = event["change"]

    branch = change["branch"]
    if branch in branch_ignore: return

    owner = username_from_person(change["owner"])

    msg_owner = color(GREEN) + owner + "'s" + color()
    msg_description = describe_patchset(change)

    message = "Applied %s change on %s" % (msg_owner, msg_description)
    emit_message(message)
Example #31
0
    def infer_tree_from_leaves(self, region, in_tree, leafseqs, naive_seq):
        if 'dendropy' not in sys.modules:
            import dendropy
        dendropy = sys.modules['dendropy']
        taxon_namespace = dendropy.TaxonNamespace()
        with tempfile.NamedTemporaryFile() as tmpfile:
            tmpfile.write('>%s\n%s\n' % ('naive', naive_seq))
            for iseq in range(len(leafseqs)):
                tmpfile.write(
                    '>t%s\n%s\n' % (iseq + 1, leafseqs[iseq])
                )  # NOTE the order of the leaves/names is checked when reading bppseqgen output
            tmpfile.flush()  # BEWARE if you forget this you are f****d
            with open(os.devnull, 'w') as fnull:
                out_tree = subprocess.check_output('./bin/FastTree -gtr -nt ' +
                                                   tmpfile.name,
                                                   shell=True,
                                                   stderr=fnull)
            out_dtree = dendropy.Tree.get_from_string(
                out_tree, 'newick', taxon_namespace=taxon_namespace)
            out_dtree.reroot_at_node(
                out_dtree.find_node_with_taxon_label('naive'),
                update_bipartitions=True)
            out_tree = out_dtree.as_string(schema='newick',
                                           suppress_rooting=True)

        in_height = treegenerator.get_mean_height(in_tree)
        out_height = treegenerator.get_mean_height(out_tree)
        base_width = 100
        print '  %s trees:' % ('full sequence' if region == 'all' else region)
        print '    %s' % utils.color('blue', 'input:')
        print treegenerator.get_ascii_tree(in_tree,
                                           extra_str='      ',
                                           width=base_width)
        print '    %s' % utils.color('blue', 'output:')
        print treegenerator.get_ascii_tree(out_tree,
                                           extra_str='        ',
                                           width=int(base_width * out_height /
                                                     in_height))

        in_dtree = dendropy.Tree.get_from_string(
            in_tree, 'newick', taxon_namespace=taxon_namespace)

        if self.args.debug:
            print '                   heights: %.3f   %.3f' % (in_height,
                                                               out_height)
            print '      symmetric difference: %d' % dendropy.calculate.treecompare.symmetric_difference(
                in_dtree, out_dtree)
            print '        euclidean distance: %f' % dendropy.calculate.treecompare.euclidean_distance(
                in_dtree, out_dtree)
            print '              r-f distance: %f' % dendropy.calculate.treecompare.robinson_foulds_distance(
                in_dtree, out_dtree)
Example #32
0
    def print_partition(self,
                        ip,
                        reco_info=None,
                        extrastr='',
                        abbreviate=True,
                        smc_print=False):
        #  NOTE it's nicer to *not* sort by cluster size here, since preserving the order tends to frequently make it obvious which clusters are merging as your eye scans downwards through the output
        if ip > 0:  # delta between this logprob and the previous one
            delta_str = '%.1f' % (self.logprobs[ip] - self.logprobs[ip - 1])
        else:
            delta_str = ''
        print '      %s  %-12.2f%-7s   %-5d  %4d' % (
            extrastr, self.logprobs[ip], delta_str, len(
                self.partitions[ip]), self.n_procs[ip]),

        # logweight (and inverse of number of potential parents)
        if self.logweights[ip] is not None and smc_print:
            way_str, logweight_str = '', ''
            expon = math.exp(self.logweights[ip])
            n_ways = 0 if expon == 0. else 1. / expon
            way_str = ('%.1f' % n_ways) if n_ways < 1e7 else ('%8.1e' % n_ways)
            logweight_str = '%8.3f' % self.logweights[ip]

        print '    ' + self.get_ccf_str(ip),

        if self.logweights[ip] is not None and smc_print:
            print '   %10s    %8s   ' % (way_str, logweight_str),

        # clusters
        for cluster in sorted(self.partitions[ip],
                              key=lambda c: len(c),
                              reverse=True):
            if abbreviate:
                cluster_str = ':'.join(
                    ['o' if len(uid) > 3 else uid for uid in cluster])
            else:
                # cluster_str = ':'.join(sorted([str(uid) for uid in cluster]))
                cluster_str = ':'.join([str(uid) for uid in cluster])

            if reco_info is not None and not utils.from_same_event(
                    reco_info, cluster):
                cluster_str = utils.color('red', cluster_str)

            if self.seed_unique_id is not None and self.seed_unique_id in cluster:
                cluster_str = utils.color('reverse_video', cluster_str)

            if abbreviate:
                print ' %s' % cluster_str,
            else:
                print '   %s' % cluster_str,
        print ''
Example #33
0
    async def on_ready(self):
        self.cg_client = codingame.Client(is_async=True)

        for cog in Config.DEFAULT_COGS:
            self.load_extension(cog)

        self.logger.info(color("loaded all cogs", "green"))

        await self.change_presence(activity=discord.Game(
            name=f"{Config.PREFIX}help"))
        self.logger.debug(color(f"set status to `{Config.PREFIX}help`",
                                "cyan"))

        self.logger.info(color(f"logged in as user `{self.user}`", "green"))
Example #34
0
def comment_added(event):
    change = event["change"]

    branch = change["branch"]
    if branch in branch_ignore: return

    author = event["author"]
    author = username_from_person(author)

    msg_author = color(GREEN) + author + color()
    msg_description = describe_patchset(change)

    message = "%s reviewed %s" % (msg_author, msg_description)
    emit_message(message)
Example #35
0
 def compare_production_results(self):
     if args.quick:
         return
     print 'diffing production results'
     for fname in ['test/parameters/data', 'test/simu.csv', 'test/parameters/simu']:
         print '    %-30s' % fname,
         cmd = 'diff -qbr ' + ' '.join(self.dirs[st] + '/' + fname for st in self.stypes)
         proc = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
         out, err = proc.communicate()
         if proc.returncode == 0:
             print '       ok'
         else:
             differlines = [ l for l in out.split('\n') if 'differ' in l]
             onlylines = [ l for l in out.split('\n') if 'Only' in l]
             print ''
             if len(differlines) > 0:
                 n_total_files = int(check_output('find ' + self.dirs['ref'] + '/' + fname + ' -type f | wc -l', shell=True))
                 if n_total_files == 1:
                     assert len(differlines) == 1
                     print utils.color('red', '      file differs'),
                 else:
                     print utils.color('red', '      %d / %d files differ' % (len(differlines), n_total_files)),
             if len(onlylines) > 0:
                 for st in self.stypes:
                     theseonlylines = [l for l in onlylines if self.dirs[st] + '/' + fname in l]
                     if len(theseonlylines) > 0:
                         print utils.color('red', '      %d files only in %s' % (len(theseonlylines), st)),
             if differlines == 0 and onlylines == 0:
                 print utils.color('red', '      not sure why, but diff returned %d' % proc.returncode),
             print '  (%s)' % cmd
             if err != '':
                 print err
Example #36
0
def comment_added(event):
    change = event["change"]

    branch = change["branch"]
    if branch in branch_ignore: return

    author = event["author"]
    author = username_from_person(author)

    msg_author = color(GREEN) + author + color()
    msg_description = describe_patchset(change)

    message = "%s reviewed %s" % (msg_author, msg_description)
    emit_message(message)
Example #37
0
 def compare_production_results(self):
     if args.quick:
         return
     print 'diffing production results'
     for fname in ['test/parameters/data', 'test/simu.csv', 'test/parameters/simu']:
         print '    %-30s' % fname,
         cmd = 'diff -qbr ' + ' '.join(self.dirs[st] + '/' + fname for st in self.stypes)
         proc = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
         out, err = proc.communicate()
         if proc.returncode == 0:
             print '       ok'
         else:
             differlines = [ l for l in out.split('\n') if 'differ' in l]
             onlylines = [ l for l in out.split('\n') if 'Only' in l]
             print ''
             if len(differlines) > 0:
                 n_total_files = int(check_output('find ' + self.dirs['ref'] + '/' + fname + ' -type f | wc -l', shell=True))
                 if n_total_files == 1:
                     assert len(differlines) == 1
                     print utils.color('red', '      file differs'),
                 else:
                     print utils.color('red', '      %d / %d files differ' % (len(differlines), n_total_files)),
             if len(onlylines) > 0:
                 for st in self.stypes:
                     theseonlylines = [l for l in onlylines if self.dirs[st] + '/' + fname in l]
                     if len(theseonlylines) > 0:
                         print utils.color('red', '      %d files only in %s' % (len(theseonlylines), st)),
             if differlines == 0 and onlylines == 0:
                 print utils.color('red', '      not sure why, but diff returned %d' % proc.returncode),
             print '  (%s)' % cmd
             if err != '':
                 print err
Example #38
0
 def show_lls_info(self):
     """
     打印当前目录下的详细文件信息
     :return:
     """
     pwd_cat = self.load_pwd_obj()
     for name, inode_id in pwd_cat.son_list():
         inode = self.get_inode(inode_id)
         res = inode.show_ll_info(self.fp)
         if inode.target_type == DIR_TYPE:
             name = color(name, DIR_COLOR_F, DIR_COLOR_B)
         else:
             name = color(name, FILE_COLOR_F, FILE_COLOR_B)
         print(' '.join(res) + ' ' + name)
Example #39
0
def shark(render, **kwargs):
    # barycentric
    w, v, u = kwargs['bar']
    # coords
    A, B, C = kwargs['triangle']

    t = A.x * w + B.x * u + C.x * v
    grey = int(t * 256)
    if grey < 0:
        grey = 0
    if grey > 255:
        grey = 255
    tcolor = color(grey, 150, 170)
    # normals
    nA, nB, nC = kwargs['varying_normals']

    # light intensity
    iA, iB, iC = [dot(n, render.light) for n in (nA, nB, nC)]
    intensity = w * iA + u * iB + v * iC

    if (intensity > 0.85):
        intensity = 1
    elif (intensity > 0.60):
        intensity = 0.80
    elif (intensity > 0.45):
        intensity = 0.60
    elif (intensity > 0.30):
        intensity = 0.45
    elif (intensity > 0.15):
        intensity = 0.30
    else:
        intensity = 0

    r = int(tcolor[2] * intensity)
    if r < 0:
        r = 0
    elif r > 255:
        r = 255
    g = int(tcolor[1] * intensity)
    if g < 0:
        g = 0
    elif g > 255:
        g = 255
    b = int(tcolor[0] * intensity)
    if b < 0:
        b = 0
    elif b > 255:
        b = 255
    return color(r, g, b)
Example #40
0
 def compare_data_annotation(self, input_stype):
     # NOTE don't really need to do this for simulation, since for simulation we already compare the performance info
     ptest = 'annotate-' + input_stype + '-data'
     if args.quick and ptest not in self.quick_tests:
         return
     print '  %s data annotation' % input_stype
     infnames = [self.dirs[version_stype] + '/' + ptest + '.csv' for version_stype in self.stypes]
     cmd = 'diff -u ' + ' '.join(infnames) + ' | grep "^+[^+]" | wc -l'
     n_diff_lines = int(check_output(cmd, shell=True))
     if n_diff_lines == 0:
         print '      ok'
     else:
         n_total_lines = int(check_output(['wc', '-l', infnames[0]]).split()[0])
         print utils.color('red', '      %d / %d lines differ' % (n_diff_lines, n_total_lines)),
         print '   (%s)' % cmd
Example #41
0
 def remove_reference_results(self, expected_content):
     print '  remove ref files'
     dir_content = set([os.path.basename(f) for f in glob.glob(self.dirs['ref'] + '/*')])
     if len(dir_content - expected_content) > 0 or len(expected_content - dir_content) > 0:
         if len(dir_content - expected_content) > 0:
             print 'in ref dir but not expected\n    %s' % (utils.color('red', ' '.join(dir_content - expected_content)))
         if len(expected_content - dir_content) > 0:
             print 'expected but not in ref dir\n    %s' % (utils.color('red', ' '.join(expected_content - dir_content)))
         raise Exception('unexpected or missing content in reference dir')
     for fname in [self.dirs['ref'] + '/' + ec for ec in expected_content]:
         print '    rm %s' % fname
         if os.path.isdir(fname):
             shutil.rmtree(fname)
         else:
             os.remove(fname)
Example #42
0
 def compare_data_annotation(self, input_stype):
     # NOTE don't really need to do this for simulation, since for simulation we already compare the performance info
     ptest = 'annotate-' + input_stype + '-data'
     if args.quick and ptest not in self.quick_tests:
         return
     print '  %s data annotation' % input_stype
     infnames = [self.dirs[version_stype] + '/' + ptest + '.csv' for version_stype in self.stypes]
     cmd = 'diff -u ' + ' '.join(infnames) + ' | grep "^+[^+]" | wc -l'
     n_diff_lines = int(check_output(cmd, shell=True))
     if n_diff_lines == 0:
         print '      ok'
     else:
         n_total_lines = int(check_output(['wc', '-l', infnames[0]]).split()[0])
         print utils.color('red', '      %d / %d lines differ' % (n_diff_lines, n_total_lines)),
         print '   (%s)' % cmd
Example #43
0
 def remove_reference_results(self, expected_content):
     print '  remove ref files'
     dir_content = set([os.path.basename(f) for f in glob.glob(self.dirs['ref'] + '/*')])
     if len(dir_content - expected_content) > 0 or len(expected_content - dir_content) > 0:
         if len(dir_content - expected_content) > 0:
             print 'in ref dir but not expected\n    %s' % (utils.color('red', ' '.join(dir_content - expected_content)))
         if len(expected_content - dir_content) > 0:
             print 'expected but not in ref dir\n    %s' % (utils.color('red', ' '.join(expected_content - dir_content)))
         raise Exception('unexpected or missing content in reference dir')
     for fname in [self.dirs['ref'] + '/' + ec for ec in expected_content]:
         print '    rm %s' % fname
         if os.path.isdir(fname):
             shutil.rmtree(fname)
         else:
             os.remove(fname)
Example #44
0
 def _process(self, value):
     """Process a value from theme.json and returns the color code."""
     if self.hex:
         try:
             code = int(value)
         except ValueError:
             pass
         else:
             if code > 15:
                 raise ValueError('Using extended color along with hex')
     # Quick note about extended color codes:
     # 0-7 are standard, binary: 0bBGR with 0% or 68% color
     # 8-15 are somehow standard, binary: 0bBGR with 0% or 100% color
     # 16-231 are RGB with components between 0 and 5 (216 values)
     # 232-255 are B&W colors from black to white (24 values)
     code = utils.color(value)
     if code is None or code > 15:
         if code is None:
             red, green, blue = utils.colorx(value)
         elif code < 232:
             code = code - 16
             red, green, blue = code // 36, (code % 36) // 6, code % 6
             red, green, blue = [x * 1000 // 6 for x in (red, green, blue)]
         else:
             red, green, blue = [(code - 232) * 1000 // 23] * 3
         code = self.add_rgb(red, green, blue)
     return code
Example #45
0
    def print_partitions(self, reco_info=None, extrastr='', abbreviate=True, print_header=True, n_to_print=None, smc_print=False, calc_missing_values='none'):
        assert calc_missing_values in ['none', 'all', 'best']
        if reco_info is not None and calc_missing_values == 'all':
            self.calculate_missing_values(reco_info)

        if print_header:
            print '    %7s %10s   %-7s %5s  %4s' % ('', 'logprob', 'delta', 'clusters', 'n_procs'),
            if reco_info is not None or self.we_have_an_adj_mi:
                print ' %5s' % ('adj mi'),
                print ' %5s %5s' % ('ccf under', 'over'),
            if self.logweights[0] is not None and smc_print:
                print '  %10s  %7s' % ('pot.parents', 'logweight'),
            print ''

        for ip in self.get_surrounding_partitions(n_partitions=n_to_print):
            if reco_info is not None and calc_missing_values == 'best' and ip == self.i_best:
                self.calculate_missing_values(reco_info, only_ip=ip)
            mark = '      '
            if ip == self.i_best:
                mark = 'best  '
            if ip == self.i_best_minus_x:
                mark = mark[:-2] + '* '
            if mark.count(' ') < len(mark):
                mark = utils.color('yellow', mark)
            self.print_partition(ip, reco_info, extrastr=mark+extrastr, abbreviate=abbreviate, smc_print=smc_print)
Example #46
0
 def readlines(self, lines):
     for line in lines:
         if (
             "path_index" in line and int(line["path_index"]) != self.initial_path_index
         ):  # if <lines> contains more than one path_index, that means they represent more than one path, so you need to use glomerator, not just one ClusterPath
             raise Exception(
                 "path index in lines %d doesn't match my initial path index %d"
                 % (int(line["path_index"]), self.initial_path_index)
             )
         if "partition" not in line:
             raise Exception("'partition' not among headers, maybe this isn't a partition file?")
         if "seed_unique_id" in line and line["seed_unique_id"] != "":
             if self.seed_unique_id is None:
                 self.seed_unique_id = line["seed_unique_id"]
             if line["seed_unique_id"] != self.seed_unique_id:
                 print "%s seed uids for each line not all the same %s %s" % (
                     utils.color("yellow", "warning"),
                     line["seed_unique_id"],
                     self.seed_unique_id,
                 )
         partitionstr = line["partition"]
         partition = [cluster_str.split(":") for cluster_str in partitionstr.split(";")]
         ccfs = [None, None]
         if "ccf_under" in line and "ccf_over" in line and line["ccf_under"] != "" and line["ccf_over"] != "":
             ccfs = [float(line["ccf_under"]), float(line["ccf_over"])]
             self.we_have_a_ccf = True
         self.add_partition(
             partition,
             float(line["logprob"]),
             int(line.get("n_procs", 1)),
             logweight=float(line.get("logweight", 0)),
             ccfs=ccfs,
         )
Example #47
0
    def run(self, args):
        open(self.logfname, 'w').close()

        for name, info in self.tests.items():
            if args.quick and name not in self.quick_tests:
                continue

            self.prepare_to_run(args, name, info)

            action = info['action']
            cmd_str = info['bin'] + ' ' + action
            cmd_str += ' ' + ' '.join(info['extras'] + self.common_extras)
            if name == 'simulate':
                cmd_str += ' --outfname ' + self.simfnames['new']
            elif 'cache-parameters-' not in name:
                cmd_str += ' --outfname ' + self.dirs['new'] + '/' + name + '.csv'

            logstr = '%s   %s' % (utils.color('green', name, width=30, padside='right'), cmd_str)
            print logstr
            if args.dry_run:
                continue
            logfile = open(self.logfname, 'a')
            logfile.write(logstr + '\n')
            logfile.close()
            start = time.time()
            try:
                check_call(cmd_str + ' 1>>' + self.logfname + ' 2>>' + self.logfname, shell=True)
            except CalledProcessError, err:
                # print err  # this just says it exited with code != 0
                print '  log tail:'
                print utils.pad_lines(check_output(['tail', self.logfname]))
                sys.exit(1)  # raise Exception('exited with error')
            self.run_times[name] = time.time() - start  # seconds
Example #48
0
    def print_partitions(
        self,
        reco_info=None,
        extrastr="",
        abbreviate=True,
        print_header=True,
        n_to_print=None,
        smc_print=False,
        calc_missing_values="none",
    ):
        assert calc_missing_values in ["none", "all", "best"]
        if reco_info is not None and calc_missing_values == "all":
            self.calculate_missing_values(reco_info)

        if print_header:
            print "    %7s %10s   %-7s %5s  %4s" % ("", "logprob", "delta", "clusters", "n_procs"),
            if reco_info is not None or self.we_have_a_ccf:
                print " %5s %5s" % ("purity", "completeness"),
            if self.logweights[0] is not None and smc_print:
                print "  %10s  %7s" % ("pot.parents", "logweight"),
            print ""

        for ip in self.get_surrounding_partitions(n_partitions=n_to_print):
            if reco_info is not None and calc_missing_values == "best" and ip == self.i_best:
                self.calculate_missing_values(reco_info, only_ip=ip)
            mark = "      "
            if ip == self.i_best:
                mark = "best  "
            if ip == self.i_best_minus_x:
                mark = mark[:-2] + "* "
            if mark.count(" ") < len(mark):
                mark = utils.color("yellow", mark)
            self.print_partition(ip, reco_info, extrastr=mark + extrastr, abbreviate=abbreviate, smc_print=smc_print)
Example #49
0
 def compare_production_results(self):
     if args.quick:
         return
     print 'diffing production results'
     for fname in ['test/parameters/data', 'test/simu.csv', 'test/parameters/simu/hmm-true', 'test/parameters/simu/sw', 'test/parameters/simu/hmm']:
         print '    %s' % fname
         cmd = 'diff -qbr ' + ' '.join(self.dirs[st] + '/' + fname for st in self.stypes)
         proc = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
         out, err = proc.communicate()
         if proc.returncode != 0:
             outlines = [ l for l in out.split('\n') if 'differ' in l ]
             n_total_files = int(check_output('find ' + self.dirs['ref'] + '/' + fname + ' -type f | wc -l', shell=True))
             print utils.color('red', '      %d / %d files differ' % (len(outlines), n_total_files)),
             print '  (%s)' % cmd
             if err != '':
                 print err
Example #50
0
def downsample_iseqs_by_multiplicity(cluster_line, multiplicity_seqmeta,
                                     max_sequences_count, always_include_ids):
    """ First take the always keep, then take as many as you can of the remaining seqs, in order of highest multiplicity """
    warnings.warn(
        utils.color(
            "red",
            "Downsampling cluster sequences by multiplicity. Should there be many sequences with equal multiplicity (e.g. 'singlets' all with multiplicity of 1), downsampling will be done arbitrarily among them.",
        ))
    if len(multiplicity_seqmeta["multiplicities"]) != len(
            cluster_line["input_seqs"]):
        raise Exception(
            "Something went wrong internally, mutiplicities are calculated for each seq in the cluster annotation but the number of seqs in the annotation does not match the number of multiplicities"
        )
    always_include_iseqs = [
        iseq for iseq in range(len(cluster_line["input_seqs"]))
        if cluster_line["unique_ids"][iseq] in always_include_ids
    ]
    rest_iseqs = [
        iseq for iseq in range(len(cluster_line["input_seqs"]))
        if cluster_line["unique_ids"][iseq] not in always_include_ids
    ]
    remaining_seqs_to_take_count = max_sequences_count - len(
        always_include_ids)
    downsampled_iseqs = (
        always_include_iseqs + sorted(
            rest_iseqs,
            key=lambda iseq: multiplicity_seqmeta["multiplicities"][
                iseq],  # Sort by multiplicity
            reverse=True,
        )[:remaining_seqs_to_take_count])  # Descending order
    return downsampled_iseqs
Example #51
0
    def print_partition(self, ip, reco_info=None, extrastr="", abbreviate=True, smc_print=False):
        #  NOTE it's nicer to *not* sort by cluster size here, since preserving the order tends to frequently make it obvious which clusters are merging as your eye scans downwards through the output
        if ip > 0:  # delta between this logprob and the previous one
            delta_str = "%.1f" % (self.logprobs[ip] - self.logprobs[ip - 1])
        else:
            delta_str = ""
        print "      %s  %-12.2f%-7s   %-5d  %4d" % (
            extrastr,
            self.logprobs[ip],
            delta_str,
            len(self.partitions[ip]),
            self.n_procs[ip],
        ),

        # logweight (and inverse of number of potential parents)
        if self.logweights[ip] is not None and smc_print:
            way_str, logweight_str = "", ""
            expon = math.exp(self.logweights[ip])
            n_ways = 0 if expon == 0.0 else 1.0 / expon
            way_str = ("%.1f" % n_ways) if n_ways < 1e7 else ("%8.1e" % n_ways)
            logweight_str = "%8.3f" % self.logweights[ip]

        print "    " + self.get_ccf_str(ip),

        if self.logweights[ip] is not None and smc_print:
            print "   %10s    %8s   " % (way_str, logweight_str),

        # clusters
        for cluster in self.partitions[ip]:
            if abbreviate:
                cluster_str = ":".join(["o" if len(uid) > 3 else uid for uid in cluster])
            else:
                # cluster_str = ':'.join(sorted([str(uid) for uid in cluster]))
                cluster_str = ":".join([str(uid) for uid in cluster])

            if reco_info is not None and not utils.from_same_event(reco_info, cluster):
                cluster_str = utils.color("red", cluster_str)

            if self.seed_unique_id is not None and self.seed_unique_id in cluster:
                cluster_str = utils.color("reverse_video", cluster_str)

            if abbreviate:
                print " %s" % cluster_str,
            else:
                print "   %s" % cluster_str,
        print ""
Example #52
0
 def compare_production_results(self):
     if args.quick:
         return
     print "diffing production results"
     for fname in ["test/parameters/data", "test/simu.csv", "test/parameters/simu"]:
         cmd = "  diff -qbr " + " ".join(self.dirs[st] + "/" + fname for st in self.stypes)
         print cmd
         proc = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
         out, err = proc.communicate()
         if proc.returncode != 0:
             outlines = [l for l in out.split("\n") if "differ" in l]
             n_total_files = int(
                 check_output("find " + self.dirs["ref"] + "/" + fname + " -type f | wc -l", shell=True)
             )
             print utils.color("red", "    %d / %d files differ" % (len(outlines), n_total_files))
             if err != "":
                 print err
Example #53
0
 def handleInput(self, world, input):
   comm = input.split(" ", 1)[0]
   if ("handle_%s" % comm) in self._dir:
     exec ( "self.handle_%s(world, input)" % comm)
   else:
     # CATCH ALL for bad commands
     self.write(color("huh? '%s'\n" % input, 35))
   self.write("> ")
Example #54
0
    def print_partition(self, ip, reco_info=None, extrastr='', one_line=True, abbreviate=True):
        if one_line:
            if ip > 0:  # delta between this logprob and the previous one
                delta_str = '%.1f' % (self.logprobs[ip] - self.logprobs[ip-1])
            else:
                delta_str = ''
            print '      %5s  %-12.2f%-7s   %-5d  %5d' % (extrastr, self.logprobs[ip], delta_str, len(self.partitions[ip]), self.n_procs[ip]),

            # logweight (and inverse of number of potential parents)
            if self.logweights[ip] is not None:
                way_str, logweight_str = '', ''
                expon = math.exp(self.logweights[ip])
                n_ways = 0 if expon == 0. else 1. / expon
                way_str = ('%.1f' % n_ways) if n_ways < 1e7 else ('%8.1e' % n_ways)
                logweight_str = '%8.3f' % self.logweights[ip]

            # adj mi
            if reco_info is not None:
                adj_mi_str = ''
                if self.adj_mis[ip] is None:
                    adj_mi_str = 'na'
                else:
                    if self.adj_mis[ip] > 1e-3:
                        adj_mi_str = '%-8.3f' % self.adj_mis[ip]
                    else:
                        adj_mi_str = '%-8.0e' % self.adj_mis[ip]
                print '      %8s   ' % (adj_mi_str),
            if self.logweights[ip] is not None:
                print '   %10s    %8s   ' % (way_str, logweight_str),
        else:
            print '  %5s partition   %-15.2f' % (extrastr, self.logprobs[ip]),
            if reco_info is not None:
                print '    %-8.2f' % (self.adj_mis[ip]),
            print ''
            print '   clonal?   ids'

        # clusters
        for cluster in self.partitions[ip]:
            same_event = utils.from_same_event(reco_info is None, reco_info, cluster)
            if same_event is None:
                same_event = -1

            if abbreviate:
                cluster_str = ':'.join(['o' for uid in cluster])
            else:
                cluster_str = ':'.join([str(uid) for uid in cluster])
            if not same_event:
                cluster_str = utils.color('red', cluster_str)
            
            if one_line:
                if abbreviate:
                    print ' %s' % cluster_str,
                else:
                    print '   %s' % cluster_str,
            else:
                print '     %d    %s' % (int(same_event), cluster_str)
        if one_line:
            print ''
Example #55
0
def change_abandoned(event):
    change = event["change"]

    branch = change["branch"]
    if branch in branch_ignore: return

    owner = username_from_person(change["owner"])
    abandoner = username_from_person(event["abandoner"])

    if owner != abandoner:
        msg_owner = color(GREEN) + owner + "'s" + color()
    else:
        msg_owner = "their"
    msg_abandoner = color(GREEN) + abandoner + color()
    msg_description = describe_patchset(change)

    message = "%s abandoned %s change on %s" % (msg_abandoner, msg_owner, msg_description)
    emit_message(message)
Example #56
0
 def remove_reference_results(self, expected_content):
     print "  remove ref files"
     dir_content = set([os.path.basename(f) for f in glob.glob(self.dirs["ref"] + "/*")])
     if len(dir_content - expected_content) > 0 or len(expected_content - dir_content) > 0:
         if len(dir_content - expected_content) > 0:
             print "in ref dir but not expected\n    %s" % (
                 utils.color("red", " ".join(dir_content - expected_content))
             )
         if len(expected_content - dir_content) > 0:
             print "expected but not in ref dir\n    %s" % (
                 utils.color("red", " ".join(expected_content - dir_content))
             )
         raise Exception("unexpected or missing content in reference dir")
     for fname in [self.dirs["ref"] + "/" + ec for ec in expected_content]:
         print "    rm %s" % fname
         if os.path.isdir(fname):
             shutil.rmtree(fname)
         else:
             os.remove(fname)
Example #57
0
    def fit_istart(self, gene, istart, positions_to_try_to_fit, subxyvals, fitfo, debug=False):
        residuals = {}
        for pos in positions_to_try_to_fit:
            # skip positions that are too close to the 5' end of V (misassigned insertions look like snps)
            if pos > len(self.glfo['seqs'][utils.get_region(gene)][gene]) - self.n_five_prime_positions_to_exclude - 1:
                continue

            # as long as we already have a few non-candidate positions, skip positions that have no frequencies greater than the min y intercept (note that they could in principle still have a large y intercept, but we don't really care)
            if len(residuals) > istart + self.min_non_candidate_positions_to_fit and len([f for f in subxyvals[pos]['freqs'] if f > self.min_y_intercept]) == 0:
                continue

            if sum(subxyvals[pos]['total']) < self.n_total_min:
                continue

            # also skip positions that only have a few points to fit (i.e. genes that were very rare, or I guess maybe if they were always eroded past this position)
            if len(subxyvals[pos]['n_mutelist']) < 3:
                continue

            zero_icpt_fit = self.get_curvefit(subxyvals[pos]['n_mutelist'], subxyvals[pos]['freqs'], subxyvals[pos]['errs'], y_icpt_bounds=(0. - self.small_number, 0. + self.small_number))
            big_icpt_fit = self.get_curvefit(subxyvals[pos]['n_mutelist'], subxyvals[pos]['freqs'], subxyvals[pos]['errs'], y_icpt_bounds=self.big_y_icpt_bounds)

            residuals[pos] = {'zero_icpt' : zero_icpt_fit['residuals_over_ndof'], 'big_icpt' : big_icpt_fit['residuals_over_ndof']}

            self.fitted_positions[gene].add(pos)  # if we already did the fit for another <istart>, it'll already be in there

        if len(residuals) <= istart:  # needs to be at least one longer, so we have the first-non-snp
            if debug:
                print '      not enough observations to fit more than %d snps' % (istart - 1)
            return

        residual_ratios = {pos : float('inf') if r['big_icpt'] == 0. else r['zero_icpt'] / r['big_icpt'] for pos, r in residuals.items()}
        sorted_ratios = sorted(residual_ratios.items(), key=operator.itemgetter(1), reverse=True)  # sort the positions in decreasing order of residual ratio
        candidate_snps = [pos for pos, _ in sorted_ratios[:istart]]  # the first <istart> positions are the "candidate snps"
        max_non_snp, max_non_snp_ratio = sorted_ratios[istart]  # position and ratio for largest non-candidate
        min_candidate_ratio = min([residual_ratios[cs] for cs in candidate_snps])

        # fitfo['scores'][istart] = (min_candidate_ratio - max_non_snp_ratio) / max(self.small_number, max_non_snp_ratio)
        fitfo['min_snp_ratios'][istart] = min([residual_ratios[cs] for cs in candidate_snps])
        fitfo['candidates'][istart] = {cp : residual_ratios[cp] for cp in candidate_snps}

        if debug:
            # if debug > 1:
            #     print '%70s %s' % ('', ''.join(['%11d' % nm for nm in subxyvals[max_non_snp]['n_mutelist']]))
            for pos in candidate_snps + [max_non_snp, ]:
                xtrastrs = ('[', ']') if pos == max_non_snp else (' ', ' ')
                pos_str = '%3s' % str(pos)
                if residual_ratios[pos] > self.min_min_candidate_ratio:
                    pos_str = utils.color('yellow', pos_str)
                print '               %s %s    %5s   (%5s / %-5s)       %4d / %-4d %s' % (xtrastrs[0], pos_str, fstr(residual_ratios[pos]),
                                                                                       fstr(residuals[pos]['zero_icpt']), fstr(residuals[pos]['big_icpt']),
                                                                                       sum(subxyvals[pos]['obs']), sum(subxyvals[pos]['total']), xtrastrs[1]),
                # if debug > 1:
                #     print '      ', ''.join(['%4d / %-4d' % (subxyvals[pos]['obs'][inm], subxyvals[pos]['total'][inm]) for inm in range(len(subxyvals[pos]['n_mutelist']))])
                print ''
Example #58
0
def restrict_to_genes(glfo, only_genes, debug=False):
    """ remove from <glfo> any genes which are not in <only_genes> """
    if only_genes is None:
        return

    only_genes_not_in_glfo = set(only_genes) - set([g for r in utils.regions for g in glfo['seqs'][r]])
    if len(only_genes_not_in_glfo) > 0:
        print '  %s genes %s in --only-genes aren\'t in glfo to begin with' % (utils.color('red', 'warning'), ' '.join(only_genes_not_in_glfo))

    genes_to_remove = set([g for r in utils.regions for g in glfo['seqs'][r]]) - set(only_genes)
    if debug:
        print '    removing %d genes from glfo' % len(genes_to_remove)
    remove_genes(glfo, genes_to_remove)
Example #59
0
    def print_partition(self, ip, reco_info=None, extrastr='', abbreviate=True, smc_print=False):
        if ip > 0:  # delta between this logprob and the previous one
            delta_str = '%.1f' % (self.logprobs[ip] - self.logprobs[ip-1])
        else:
            delta_str = ''
        print '      %s  %-12.2f%-7s   %-5d  %4d' % (extrastr, self.logprobs[ip], delta_str, len(self.partitions[ip]), self.n_procs[ip]),

        # logweight (and inverse of number of potential parents)
        if self.logweights[ip] is not None and smc_print:
            way_str, logweight_str = '', ''
            expon = math.exp(self.logweights[ip])
            n_ways = 0 if expon == 0. else 1. / expon
            way_str = ('%.1f' % n_ways) if n_ways < 1e7 else ('%8.1e' % n_ways)
            logweight_str = '%8.3f' % self.logweights[ip]

        print '    ' + self.get_ccf_str(ip),

        if self.logweights[ip] is not None and smc_print:
            print '   %10s    %8s   ' % (way_str, logweight_str),

        # clusters
        for cluster in self.partitions[ip]:
            if abbreviate:
                cluster_str = ':'.join(['o' if len(uid) > 3 else uid for uid in cluster])
            else:
                # cluster_str = ':'.join(sorted([str(uid) for uid in cluster]))
                cluster_str = ':'.join([str(uid) for uid in cluster])

            if reco_info is not None and not utils.from_same_event(reco_info, cluster):
                cluster_str = utils.color('red', cluster_str)

            if self.seed_unique_id is not None and self.seed_unique_id in cluster:
                cluster_str = utils.color('reverse_video', cluster_str)
            
            if abbreviate:
                print ' %s' % cluster_str,
            else:
                print '   %s' % cluster_str,
        print ''
Example #60
0
def patchset_created(event):
    change = event["change"]

    branch = change["branch"]
    if branch in branch_ignore: return

    uploader = username_from_person(event["uploader"])
    trac_id = utils.extract_trac_id(change['subject'])
    number = int(event['patchSet']['number'])

    msg_owner = color(GREEN) + uploader + color()
    msg_description = describe_patchset(change)
    msg_verb = 'updated' if number > 1  else 'submitted'

    message = "%s %s %s" % (msg_owner, msg_verb, msg_description)

    if trac_id is not None:
        trac_link = utils.link_from_trac_id(trac_id)
        msg_trac_link = color(NAVY, underline=True) + trac_link + color(GREY)
        message += " : %s" % (msg_trac_link)

    emit_message(message)