Ejemplo n.º 1
0
    def test_get_next_from_file(self):
        '''get_next_from_file() should read seqs from OK, and raise error at badly formatted file'''
        bad_files = [
            'fastn_unittest_fail_no_AT.fq', 'fastn_unittest_fail_no_seq.fq',
            'fastn_unittest_fail_no_plus.fq', 'fastn_unittest_fail_no_qual.fq'
        ]

        for fname in bad_files:
            f_in = utils.open_file_read(fname)
            fq = fastn.Fastq()
            with self.assertRaises(fastn.Error):
                while fq.get_next_from_file(f_in):
                    pass

            utils.close(f_in)

        fname = 'fastn_unittest_good_file.fq'
        try:
            f_in = open(fname)
        except IOError:
            print("Error opening '" + fname + "'", file=sys.stderr)
            sys.exit(1)

        fq = fastn.Fastq()
        while fq.get_next_from_file(f_in):
            self.assertEqual(fq, fastn.Fastq('ID', 'ACGTA', 'IIIII'))
        utils.close(f_in)
Ejemplo n.º 2
0
    def barplot_of_one_stat_sorted(self, stat, outprefix, main='', stat2=None):
        r_script = outprefix + '.R'
        f = utils.open_file_write(r_script)

        if stat2 is None:
            bar_heights = [self.results[scaff][stat] for scaff in scaffolders if (self.data_type, scaff) not in bad_runs]
        else:
            bar_heights = [self.results[scaff][stat] + self.results[scaff][stat2] for scaff in scaffolders if (self.data_type, scaff) not in bad_runs]

        all_data = list(zip([int(x) for x in bar_heights], [scaff for scaff in scaffolders if (self.data_type, scaff) not in bad_runs], [r_colours[scaff] for scaff in scaffolders if (self.data_type, scaff) not in bad_runs]))


        all_data.sort()
        bar_heights = [str(x[0]) for x in all_data]
        names = 'c(' + ','.join(['"' + x[1] + '"' for x in all_data]) + ')'
        cols = ['"' + t[2] + '"' for t in all_data]

        for type in ['pdf', 'png', 'svg']:
            print(type + '("' + outprefix + '.' + type + '")', file=f)
            print('par(mar=c(10,4,4,2) + 0.1)', file=f)

            print('barplot(c(' + ','.join(bar_heights), '), ',
                  'names.arg=', names, ', '
                  'main="', main, '",',
                  ' ylab="', stat, '", ',
                  'col=c(' + ','.join(cols) + '), ',
                  'las=2',
                  ')', sep='', file=f)

            print('dev.off()', file=f)

        utils.close(f)
        run_r_script(r_script)
Ejemplo n.º 3
0
def barplot_by_input_data(stat_to_plot, prefix, plot_width=21, plot_height=7):
    bar_heights = []
    colours = []

    for t in test_data_types:
        for scaff in scaffolders:
            bar_heights.append(results[t].results[scaff][stat_to_plot])
            colours.append(r_colours[scaff])

    r_script = prefix + '.R'
    f = utils.open_file_write(r_script)

    for type in ['png', 'pdf']:
        print(type + '("' + outprefix + '.' + type + '", width=', plot_width, ', height=', plot_height, ')', file=f)

        print('barplot(c(' + ','.join(bar_heights), '), ',
                      #'names.arg=', names, ', '
                      ' ylab="', stat_to_plot, '", ',
                      'col=c(', ','.join(['"' + x + '"' for x in colours]), ') ',
                      ')', sep='', file=f)

        print('dev.off()', file=f)

    utils.close(f)
    run_r_script(r_script)
Ejemplo n.º 4
0
def test_nth_variation():
    a, b = create_cases()
    assert (close(nth_variation(a, b, 0), 178)
            and close(nth_variation(a, b, 1), 1.80865306195869)
            and close(nth_variation(a, b, 2), 0.02414940510773)
            and close(nth_variation(a, b, 3), 0.00036023819549)
            and close(nth_variation(a, b, 4), 5.71352726e-06))
Ejemplo n.º 5
0
    def plot_scatter(self, stat1, stat2, outprefix, legend=False, main=''):
        r_script = outprefix + '.R'
        f = utils.open_file_write(r_script)

        x_coords = [int(self.results[scaff][stat1]) for scaff in scaffolders if (self.data_type, scaff) not in bad_runs]
        y_coords = [int(self.results[scaff][stat2]) for scaff in scaffolders if (self.data_type, scaff) not in bad_runs]
        x_max = max(x_coords)
        y_max = max(y_coords)
        r_syms_v, r_syms_l, r_cols_v, r_cols_l = self.get_r_vectors()

        for type in ['pdf', 'png', 'svg']:
            print(type + '("' + outprefix + '.' + type + '")', file=f)

            print('plot(c(' + ','.join(str(x) for x in x_coords), '), ',
                  'c(', ','.join(str(x) for x in y_coords), '), ',
                  'xlab="', stat1, '", ',
                  'ylab="', stat2, '", ',
                  #'xlim=c(0,', x_max, '), ',
                  #'ylim=c(0,', y_max, '), ',
                  'main="', main, '",',
                  'col=', r_cols_v, ', ',
                  'pch=', r_syms_v, ', ',
                  'bg=', r_cols_v,
                  ')', sep='', file=f)

            if legend:
                print(r_legend('topleft'), file=f)

            print('dev.off()', file=f)

        utils.close(f)
        run_r_script(r_script)
Ejemplo n.º 6
0
def insert_history_china():
    history_china = get_history_china()
    # 获取数据库连接
    conn, cursor = utils.get_conn()
    # 时间是字符串数据,不能直接写入到数据库中
    #  insert into history_china(date) values ('2011-04-08 00:00:00');
    sql = '''insert into  
    history_china(date,confirmed_count,confirm_add,suspect,suspect_add,cure,cure_add,dead,dead_add,
    current_confirmed_count,current_confirmed_Incr) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
    cursor.execute('delete from history_china')
    # 提交事务
    conn.commit()

    for i in history_china:
        # 日期字符串转为y-m-d形式,固定写法
        datastr = str(i['dateId'])
        tup = time.strptime(datastr, "%Y%m%d")
        dt = time.strftime("%Y-%m-%d", tup)
        cursor.execute(sql, [
            dt, i['confirmedCount'], i['confirmedIncr'], i['suspectedCount'],
            i['suspectedCountIncr'], i['curedCount'], i['curedIncr'],
            i['deadCount'], i['deadIncr'], i['currentConfirmedCount'],
            i['currentConfirmedIncr']
        ])

        # 提交事务
        conn.commit()
    print("中国历史数据数据插入成功")
    utils.close(conn, cursor)
def get_scaff_results(dir):
    flag_counts = {k: 0 for k in possible_flags}
    flag_counts['skipped'] = 0
    flag_counts['lost'] = 0

    log_file = dir + '/check_scaffolds.log'

    if os.path.exists(log_file):
        f = utils.open_file_read(dir + '/check_scaffolds.log')
        for line in f:
            a = line.split()

            if a[0].isdigit():
                flag_counts[int(a[0])] = int(a[1])
            elif a[0] in ['lost', 'skipped']:
                flag_counts[a[0]] = int(a[1])

        utils.close(f)
    else:
        print('Warning: no log file', log_file, file=sys.stderr)
        flag_counts['bad_joins'] = 0

    flag_counts['bad_joins'] = sum([
        flag_counts[x] for x in flag_counts.keys()
        if x not in [0, 16, 'skipped']
    ])

    return flag_counts
Ejemplo n.º 8
0
def file_reader(fname):
    f = utils.open_file_read(fname)

    for line in f:
        yield BlastHit(line)

    utils.close(f)
Ejemplo n.º 9
0
def insert_taday_province_china():
    details_china = get_history_china_details()
    # 获取数据库连接
    conn, cursor = utils.get_conn()
    cursor.execute('delete from today_province_china')
    # 提交事务
    conn.commit()
    sql = '''insert into  
            today_province_china(id,update_time,province,city,current_confirmed_count,current_confirmed_Incr,cure,cure_add,dead,dead_add) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
    # 获取当前时间戳
    data = time.strftime("%Y-%m-%d %X")  # 代表小时和分钟
    i = 1
    for province in details_china:
        cursor.execute(sql, [
            i, data, province['provinceName'], province['provinceName'],
            province['currentConfirmedCount'], province['confirmedIncr'],
            province['curedCount'], province['curedIncr'],
            province['deadCount'], province['deadIncr']
        ])
        i += 1
        conn.commit()
    # 提交事务
    conn.commit()
    print("中国各省数据插入成功")
    utils.close(conn, cursor)
Ejemplo n.º 10
0
def test_model(env, model):
    model.eval()

    env.max_path_length = 200
    with torch.no_grad():
        obs = env.reset()
        for _ in range(env.max_path_length):
            #Prepare tensors
            img = env._get_viewer('human')._read_pixels_as_in_window()
            img = resize_img(img)
            img = np.expand_dims(img, axis=0)
            t_img = torch.from_numpy(img).to(dtype=torch.float,
                                             device=args.device)
            config = np.expand_dims(obs[:3], axis=0)
            t_config = torch.from_numpy(config).to(dtype=torch.float,
                                                   device=args.device)

            #Execute action
            action = model(t_img, t_config)
            a = action.squeeze().cpu().numpy()
            obs, reward, done, info = env.step(a)
            env.render()

            # Save video
            if (_ < 1):
                time.sleep(0.25)
        close(env)
Ejemplo n.º 11
0
def file_reader(fname):
    f = utils.open_file_read(fname)

    for line in f:
        yield BlastHit(line)

    utils.close(f)
Ejemplo n.º 12
0
def file_reader(fname):
    f = utils.open_file_read(fname)

    for line in f:
        yield MpileupLine(line)

    utils.close(f)
Ejemplo n.º 13
0
    def __init__(self, filename):
        f = utils.open_file_read(filename)

        self.version = None
        self.mutations = {}  # (seq name, pos) -> [list of mutations]

        for line in f:
            # first line should define that this is a genome diff file
            if self.version is None:
                if not line.startswith('#=GENOME_DIFF'):
                    raise Error("Error. first line of file '" + filename +
                                "' should start with: #=GENOME_DIFF")

                self.version = line.rstrip().split()[-1]
                continue

            # for now, ignore the rest of the metadata
            if line.startswith('#'):
                continue

            fields = line.rstrip().split('\t')

            if fields[0] in mutation_types:
                mutation = Mutation(line)
                self.mutations[mutation.seq_id, mutation.position] = mutation

        utils.close(f)
Ejemplo n.º 14
0
    def write_gff(self, filename):
        # sort the output by reference name then position
        f = utils.open_file_write(filename)

        for k in sorted(self.mutations.keys()):
            print(self.mutations[k].to_gff(), file=f)

        utils.close(f)
def print_dict_as_tsv(d, filename):
    f = utils.open_file_write(filename)

    for id in d:
        for interval in d[id]:
            print(id, interval.start+1, interval.end+1, sep='\t', file=f)

    utils.close(f)
Ejemplo n.º 16
0
def file_reader(fname):
    f = utils.open_file_read(fname)
    c = Caf()

    while c.get_next_from_file(f):
        yield c

    utils.close(f)
Ejemplo n.º 17
0
def reverse_complement(infile, outfile):
    seq_reader = file_reader(infile)
    fout = utils.open_file_write(outfile)

    for seq in seq_reader:
        seq.revcomp()
        print(seq, file=fout)

    utils.close(fout)
Ejemplo n.º 18
0
def replace_bases(infile, outfile, old, new):
    seq_reader = file_reader(infile)
    f_out = utils.open_file_write(outfile)

    for seq in seq_reader:
        seq.replace_bases(old, new)
        print(seq, file=f_out)

    utils.close(f_out)
Ejemplo n.º 19
0
def file_reader(fname):
    f = utils.open_file_read(fname)
    for line in f:
        if line.startswith('@'):
            continue

        yield SamRecord(line)

    utils.close(f)
Ejemplo n.º 20
0
 def test_file_reader_mpileup(self):
     '''file_reader should iterate through a pileup file correctly'''
     tmp_out = 'tmp.mpileup'
     fout = utils.open_file_write(tmp_out)
     mpileup_reader = mpileup.file_reader('mpileup_unittest.mpileup')
     for mp in mpileup_reader:
         print(mp, file=fout)
     utils.close(fout)
     self.assertTrue(filecmp.cmp('mpileup_unittest.mpileup', tmp_out))
     os.unlink(tmp_out)
Ejemplo n.º 21
0
def fastn_to_quasr_primers(infile, outfile):
    seq_reader = file_reader(infile)
    f_out = utils.open_file_write(outfile)

    for seq in seq_reader:
        seq2 = copy.copy(seq)
        seq2.revcomp()
        print(seq.seq, seq2.seq, sep='\t', file=f_out)

    utils.close(f_out)
Ejemplo n.º 22
0
def trim(infile, outfile, start, end):
    seq_reader = file_reader(infile)
    fout = utils.open_file_write(outfile)

    for seq in seq_reader:
        seq.trim(start, end)
        if len(seq):
            print(seq, file=fout)

    utils.close(fout)
Ejemplo n.º 23
0
 def test_file_reader(self):
     '''file_reader should iterate through a nucmer file correctly'''
     tmp_out = 'nucmer_unittest.coords.tmp'
     fout = utils.open_file_write(tmp_out)
     nucmer_reader = nucmer.file_reader('nucmer_unittest.coords')
     for hit in nucmer_reader:
         print(hit, file=fout)
     utils.close(fout)
     self.assertTrue(filecmp.cmp('nucmer_unittest.coords.out', tmp_out))
     os.unlink(tmp_out)
Ejemplo n.º 24
0
 def test_file_reader_sam(self):
     '''file_reader should iterate through a BAM file correctly'''
     tmp_sam_out = 'tmp.sam'
     fout = utils.open_file_write(tmp_sam_out)
     sam_reader = sam.file_reader('sam_unittest.bam')
     for sam_record in sam_reader:
         print(sam_record, file=fout)
     utils.close(fout)
     self.assertTrue(filecmp.cmp('sam_unittest.sam', tmp_sam_out))
     os.unlink(tmp_sam_out)
Ejemplo n.º 25
0
    def test_get_next_from_file(self):
        '''get_next_from_file() should read seqs from OK, including weirdness in file'''
        f_in = utils.open_file_read('fastn_unittest.fa')
        fa = fastn.Fasta()
        counter = 1

        while fa.get_next_from_file(f_in):
            self.assertEqual(fa, fastn.Fasta(str(counter), 'ACGTA'))
            counter += 1

        utils.close(f_in)
Ejemplo n.º 26
0
def file_reader(fname):
    f = utils.open_file_read(fname)
    in_header = True

    for line in f:
        if in_header:
            if line.startswith('['):
                in_header = False
            continue
        yield NucmerHit(line)

    utils.close(f)
Ejemplo n.º 27
0
    def test_file_reader(self):
        '''file_reader should iterate through a blast file correctly'''
        tmp_out = 'blast_unittest.m8.tmp'

        for f in ['blast_unittest.m8', 'blast_unittest.m8.with_lengths']:
            blast_reader = blast.file_reader('blast_unittest.m8')
            fout = utils.open_file_write(tmp_out)
            for hit in blast_reader:
                print(hit, file=fout)
            utils.close(fout)
            self.assertTrue(filecmp.cmp('blast_unittest.m8', tmp_out))
            os.unlink(tmp_out)
Ejemplo n.º 28
0
def add_sequence_lengths(infile, ref_fai, qry_fai, outfile):
    ref_lengths = {}
    qry_lengths = {}

    fastn.lengths_from_fai(ref_fai, ref_lengths)
    fastn.lengths_from_fai(qry_fai, qry_lengths)

    f = utils.open_file_write(outfile)
    blast_reader = file_reader(infile)
    for hit in blast_reader:
        hit.add_sequence_lengths(ref_lengths, qry_lengths)
        print(hit, file=f)
    utils.close(f)
Ejemplo n.º 29
0
def add_sequence_lengths(infile, ref_fai, qry_fai, outfile):
    ref_lengths = {}
    qry_lengths = {}

    fastn.lengths_from_fai(ref_fai, ref_lengths)
    fastn.lengths_from_fai(qry_fai, qry_lengths)

    f = utils.open_file_write(outfile)
    blast_reader = file_reader(infile)
    for hit in blast_reader:
        hit.add_sequence_lengths(ref_lengths, qry_lengths)
        print(hit, file=f)
    utils.close(f)
Ejemplo n.º 30
0
def insert_hotdata():
    #获取数据库连接
    conn, cursor = utils.get_conn()
    sql = 'insert into hotsearch(dt,content) values (%s,%s)'
    datas = get_hotdata()
    #获取当前时间戳
    dt = time.strftime("%Y-%m-%d %X")
    for item in datas:
        cursor.execute(sql, (dt, item))
        conn.commit()

    print('数据插入成功')
    utils.close(conn, cursor)
Ejemplo n.º 31
0
def test_minkowsky():
    a, b = create_cases()

    try:
        minkowsky(a, b, 0)
        assert False
    except ValueError:
        assert True

    assert close(minkowsky(a, b, 1), 1.80865306195869) and close(
        minkowsky(a, b, 2), 0.15540078863291) and close(
            minkowsky(a, b, 3), 0.07115355218523) and close(
                minkowsky(a, b, 4), 0.04889067980003)
    def __init__(self, bsub_o, log_file, max_joins, extra_cpu=0, extra_mem=0):
        # get flag counts etc from the log file
        self.flag_counts = {k: 0 for k in ScaffResults.possible_flags}
        self.stats = {k: 0 for k in ScaffResults.evaluation_score_keys}

        if os.path.exists(log_file):
            f = utils.open_file_read(log_file)
            for line in f:
                a = line.split()

                if a[0].isdigit():
                    self.flag_counts[int(a[0])] = int(a[1])
                elif a[0] == 'lost':
                    self.stats['Lost tags'] = int(a[1])
                elif a[0] == 'skipped':
                    self.stats['Skipped tags'] = int(a[1])
            utils.close(f)
            self.stats['Bad joins'] = sum([
                self.flag_counts[x]
                for x in self.flag_counts.keys() if x not in [0, 16]
            ]) + self.stats['Lost tags']
        else:
            print('Warning: no log file', log_file, file=sys.stderr)

        # get cpu and mem from bsub file
        bsub_out = utils.syscall_get_stdout('bsub-out2stats.py -s ' + bsub_o)
        assert len(bsub_out) == 1
        (attempt_no, exit_code, wall_hrs, cpu_secs, cpu_hrs, mem, swap,
         filename) = bsub_out[0].split('\t')
        assert exit_code == '0'

        self.stats['Correct joins'] = self.flag_counts[0]
        self.cpu = int(round(float(cpu_secs), 0))
        self.mem = int(mem)
        self.extra_cpu = extra_cpu
        self.extra_mem = extra_mem
        self.stats['Total CPU'] = self.cpu + extra_cpu
        self.max_mem = max(self.mem, extra_mem)
        self.scores = {k: -1 for k in ScaffResults.evaluation_score_keys}
        self.worksheet_row = -1
        self.potential_joins = max_joins
        self.total_joins = self.stats['Correct joins'] + self.stats['Bad joins']

        if self.total_joins > 0:
            self.percent_joins_correct = self.stats[
                'Correct joins'] / self.total_joins
        else:
            self.percent_joins_correct = 0

        self.percent_correct_joins_made = self.stats[
            'Correct joins'] / self.potential_joins