def test_get_next_from_file(self): '''get_next_from_file() should read seqs from OK, and raise error at badly formatted file''' bad_files = [ 'fastn_unittest_fail_no_AT.fq', 'fastn_unittest_fail_no_seq.fq', 'fastn_unittest_fail_no_plus.fq', 'fastn_unittest_fail_no_qual.fq' ] for fname in bad_files: f_in = utils.open_file_read(fname) fq = fastn.Fastq() with self.assertRaises(fastn.Error): while fq.get_next_from_file(f_in): pass utils.close(f_in) fname = 'fastn_unittest_good_file.fq' try: f_in = open(fname) except IOError: print("Error opening '" + fname + "'", file=sys.stderr) sys.exit(1) fq = fastn.Fastq() while fq.get_next_from_file(f_in): self.assertEqual(fq, fastn.Fastq('ID', 'ACGTA', 'IIIII')) utils.close(f_in)
def barplot_of_one_stat_sorted(self, stat, outprefix, main='', stat2=None): r_script = outprefix + '.R' f = utils.open_file_write(r_script) if stat2 is None: bar_heights = [self.results[scaff][stat] for scaff in scaffolders if (self.data_type, scaff) not in bad_runs] else: bar_heights = [self.results[scaff][stat] + self.results[scaff][stat2] for scaff in scaffolders if (self.data_type, scaff) not in bad_runs] all_data = list(zip([int(x) for x in bar_heights], [scaff for scaff in scaffolders if (self.data_type, scaff) not in bad_runs], [r_colours[scaff] for scaff in scaffolders if (self.data_type, scaff) not in bad_runs])) all_data.sort() bar_heights = [str(x[0]) for x in all_data] names = 'c(' + ','.join(['"' + x[1] + '"' for x in all_data]) + ')' cols = ['"' + t[2] + '"' for t in all_data] for type in ['pdf', 'png', 'svg']: print(type + '("' + outprefix + '.' + type + '")', file=f) print('par(mar=c(10,4,4,2) + 0.1)', file=f) print('barplot(c(' + ','.join(bar_heights), '), ', 'names.arg=', names, ', ' 'main="', main, '",', ' ylab="', stat, '", ', 'col=c(' + ','.join(cols) + '), ', 'las=2', ')', sep='', file=f) print('dev.off()', file=f) utils.close(f) run_r_script(r_script)
def barplot_by_input_data(stat_to_plot, prefix, plot_width=21, plot_height=7): bar_heights = [] colours = [] for t in test_data_types: for scaff in scaffolders: bar_heights.append(results[t].results[scaff][stat_to_plot]) colours.append(r_colours[scaff]) r_script = prefix + '.R' f = utils.open_file_write(r_script) for type in ['png', 'pdf']: print(type + '("' + outprefix + '.' + type + '", width=', plot_width, ', height=', plot_height, ')', file=f) print('barplot(c(' + ','.join(bar_heights), '), ', #'names.arg=', names, ', ' ' ylab="', stat_to_plot, '", ', 'col=c(', ','.join(['"' + x + '"' for x in colours]), ') ', ')', sep='', file=f) print('dev.off()', file=f) utils.close(f) run_r_script(r_script)
def test_nth_variation(): a, b = create_cases() assert (close(nth_variation(a, b, 0), 178) and close(nth_variation(a, b, 1), 1.80865306195869) and close(nth_variation(a, b, 2), 0.02414940510773) and close(nth_variation(a, b, 3), 0.00036023819549) and close(nth_variation(a, b, 4), 5.71352726e-06))
def plot_scatter(self, stat1, stat2, outprefix, legend=False, main=''): r_script = outprefix + '.R' f = utils.open_file_write(r_script) x_coords = [int(self.results[scaff][stat1]) for scaff in scaffolders if (self.data_type, scaff) not in bad_runs] y_coords = [int(self.results[scaff][stat2]) for scaff in scaffolders if (self.data_type, scaff) not in bad_runs] x_max = max(x_coords) y_max = max(y_coords) r_syms_v, r_syms_l, r_cols_v, r_cols_l = self.get_r_vectors() for type in ['pdf', 'png', 'svg']: print(type + '("' + outprefix + '.' + type + '")', file=f) print('plot(c(' + ','.join(str(x) for x in x_coords), '), ', 'c(', ','.join(str(x) for x in y_coords), '), ', 'xlab="', stat1, '", ', 'ylab="', stat2, '", ', #'xlim=c(0,', x_max, '), ', #'ylim=c(0,', y_max, '), ', 'main="', main, '",', 'col=', r_cols_v, ', ', 'pch=', r_syms_v, ', ', 'bg=', r_cols_v, ')', sep='', file=f) if legend: print(r_legend('topleft'), file=f) print('dev.off()', file=f) utils.close(f) run_r_script(r_script)
def insert_history_china(): history_china = get_history_china() # 获取数据库连接 conn, cursor = utils.get_conn() # 时间是字符串数据,不能直接写入到数据库中 # insert into history_china(date) values ('2011-04-08 00:00:00'); sql = '''insert into history_china(date,confirmed_count,confirm_add,suspect,suspect_add,cure,cure_add,dead,dead_add, current_confirmed_count,current_confirmed_Incr) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''' cursor.execute('delete from history_china') # 提交事务 conn.commit() for i in history_china: # 日期字符串转为y-m-d形式,固定写法 datastr = str(i['dateId']) tup = time.strptime(datastr, "%Y%m%d") dt = time.strftime("%Y-%m-%d", tup) cursor.execute(sql, [ dt, i['confirmedCount'], i['confirmedIncr'], i['suspectedCount'], i['suspectedCountIncr'], i['curedCount'], i['curedIncr'], i['deadCount'], i['deadIncr'], i['currentConfirmedCount'], i['currentConfirmedIncr'] ]) # 提交事务 conn.commit() print("中国历史数据数据插入成功") utils.close(conn, cursor)
def get_scaff_results(dir): flag_counts = {k: 0 for k in possible_flags} flag_counts['skipped'] = 0 flag_counts['lost'] = 0 log_file = dir + '/check_scaffolds.log' if os.path.exists(log_file): f = utils.open_file_read(dir + '/check_scaffolds.log') for line in f: a = line.split() if a[0].isdigit(): flag_counts[int(a[0])] = int(a[1]) elif a[0] in ['lost', 'skipped']: flag_counts[a[0]] = int(a[1]) utils.close(f) else: print('Warning: no log file', log_file, file=sys.stderr) flag_counts['bad_joins'] = 0 flag_counts['bad_joins'] = sum([ flag_counts[x] for x in flag_counts.keys() if x not in [0, 16, 'skipped'] ]) return flag_counts
def file_reader(fname): f = utils.open_file_read(fname) for line in f: yield BlastHit(line) utils.close(f)
def insert_taday_province_china(): details_china = get_history_china_details() # 获取数据库连接 conn, cursor = utils.get_conn() cursor.execute('delete from today_province_china') # 提交事务 conn.commit() sql = '''insert into today_province_china(id,update_time,province,city,current_confirmed_count,current_confirmed_Incr,cure,cure_add,dead,dead_add) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''' # 获取当前时间戳 data = time.strftime("%Y-%m-%d %X") # 代表小时和分钟 i = 1 for province in details_china: cursor.execute(sql, [ i, data, province['provinceName'], province['provinceName'], province['currentConfirmedCount'], province['confirmedIncr'], province['curedCount'], province['curedIncr'], province['deadCount'], province['deadIncr'] ]) i += 1 conn.commit() # 提交事务 conn.commit() print("中国各省数据插入成功") utils.close(conn, cursor)
def test_model(env, model): model.eval() env.max_path_length = 200 with torch.no_grad(): obs = env.reset() for _ in range(env.max_path_length): #Prepare tensors img = env._get_viewer('human')._read_pixels_as_in_window() img = resize_img(img) img = np.expand_dims(img, axis=0) t_img = torch.from_numpy(img).to(dtype=torch.float, device=args.device) config = np.expand_dims(obs[:3], axis=0) t_config = torch.from_numpy(config).to(dtype=torch.float, device=args.device) #Execute action action = model(t_img, t_config) a = action.squeeze().cpu().numpy() obs, reward, done, info = env.step(a) env.render() # Save video if (_ < 1): time.sleep(0.25) close(env)
def file_reader(fname): f = utils.open_file_read(fname) for line in f: yield MpileupLine(line) utils.close(f)
def __init__(self, filename): f = utils.open_file_read(filename) self.version = None self.mutations = {} # (seq name, pos) -> [list of mutations] for line in f: # first line should define that this is a genome diff file if self.version is None: if not line.startswith('#=GENOME_DIFF'): raise Error("Error. first line of file '" + filename + "' should start with: #=GENOME_DIFF") self.version = line.rstrip().split()[-1] continue # for now, ignore the rest of the metadata if line.startswith('#'): continue fields = line.rstrip().split('\t') if fields[0] in mutation_types: mutation = Mutation(line) self.mutations[mutation.seq_id, mutation.position] = mutation utils.close(f)
def write_gff(self, filename): # sort the output by reference name then position f = utils.open_file_write(filename) for k in sorted(self.mutations.keys()): print(self.mutations[k].to_gff(), file=f) utils.close(f)
def print_dict_as_tsv(d, filename): f = utils.open_file_write(filename) for id in d: for interval in d[id]: print(id, interval.start+1, interval.end+1, sep='\t', file=f) utils.close(f)
def file_reader(fname): f = utils.open_file_read(fname) c = Caf() while c.get_next_from_file(f): yield c utils.close(f)
def reverse_complement(infile, outfile): seq_reader = file_reader(infile) fout = utils.open_file_write(outfile) for seq in seq_reader: seq.revcomp() print(seq, file=fout) utils.close(fout)
def replace_bases(infile, outfile, old, new): seq_reader = file_reader(infile) f_out = utils.open_file_write(outfile) for seq in seq_reader: seq.replace_bases(old, new) print(seq, file=f_out) utils.close(f_out)
def file_reader(fname): f = utils.open_file_read(fname) for line in f: if line.startswith('@'): continue yield SamRecord(line) utils.close(f)
def test_file_reader_mpileup(self): '''file_reader should iterate through a pileup file correctly''' tmp_out = 'tmp.mpileup' fout = utils.open_file_write(tmp_out) mpileup_reader = mpileup.file_reader('mpileup_unittest.mpileup') for mp in mpileup_reader: print(mp, file=fout) utils.close(fout) self.assertTrue(filecmp.cmp('mpileup_unittest.mpileup', tmp_out)) os.unlink(tmp_out)
def fastn_to_quasr_primers(infile, outfile): seq_reader = file_reader(infile) f_out = utils.open_file_write(outfile) for seq in seq_reader: seq2 = copy.copy(seq) seq2.revcomp() print(seq.seq, seq2.seq, sep='\t', file=f_out) utils.close(f_out)
def trim(infile, outfile, start, end): seq_reader = file_reader(infile) fout = utils.open_file_write(outfile) for seq in seq_reader: seq.trim(start, end) if len(seq): print(seq, file=fout) utils.close(fout)
def test_file_reader(self): '''file_reader should iterate through a nucmer file correctly''' tmp_out = 'nucmer_unittest.coords.tmp' fout = utils.open_file_write(tmp_out) nucmer_reader = nucmer.file_reader('nucmer_unittest.coords') for hit in nucmer_reader: print(hit, file=fout) utils.close(fout) self.assertTrue(filecmp.cmp('nucmer_unittest.coords.out', tmp_out)) os.unlink(tmp_out)
def test_file_reader_sam(self): '''file_reader should iterate through a BAM file correctly''' tmp_sam_out = 'tmp.sam' fout = utils.open_file_write(tmp_sam_out) sam_reader = sam.file_reader('sam_unittest.bam') for sam_record in sam_reader: print(sam_record, file=fout) utils.close(fout) self.assertTrue(filecmp.cmp('sam_unittest.sam', tmp_sam_out)) os.unlink(tmp_sam_out)
def test_get_next_from_file(self): '''get_next_from_file() should read seqs from OK, including weirdness in file''' f_in = utils.open_file_read('fastn_unittest.fa') fa = fastn.Fasta() counter = 1 while fa.get_next_from_file(f_in): self.assertEqual(fa, fastn.Fasta(str(counter), 'ACGTA')) counter += 1 utils.close(f_in)
def file_reader(fname): f = utils.open_file_read(fname) in_header = True for line in f: if in_header: if line.startswith('['): in_header = False continue yield NucmerHit(line) utils.close(f)
def test_file_reader(self): '''file_reader should iterate through a blast file correctly''' tmp_out = 'blast_unittest.m8.tmp' for f in ['blast_unittest.m8', 'blast_unittest.m8.with_lengths']: blast_reader = blast.file_reader('blast_unittest.m8') fout = utils.open_file_write(tmp_out) for hit in blast_reader: print(hit, file=fout) utils.close(fout) self.assertTrue(filecmp.cmp('blast_unittest.m8', tmp_out)) os.unlink(tmp_out)
def add_sequence_lengths(infile, ref_fai, qry_fai, outfile): ref_lengths = {} qry_lengths = {} fastn.lengths_from_fai(ref_fai, ref_lengths) fastn.lengths_from_fai(qry_fai, qry_lengths) f = utils.open_file_write(outfile) blast_reader = file_reader(infile) for hit in blast_reader: hit.add_sequence_lengths(ref_lengths, qry_lengths) print(hit, file=f) utils.close(f)
def insert_hotdata(): #获取数据库连接 conn, cursor = utils.get_conn() sql = 'insert into hotsearch(dt,content) values (%s,%s)' datas = get_hotdata() #获取当前时间戳 dt = time.strftime("%Y-%m-%d %X") for item in datas: cursor.execute(sql, (dt, item)) conn.commit() print('数据插入成功') utils.close(conn, cursor)
def test_minkowsky(): a, b = create_cases() try: minkowsky(a, b, 0) assert False except ValueError: assert True assert close(minkowsky(a, b, 1), 1.80865306195869) and close( minkowsky(a, b, 2), 0.15540078863291) and close( minkowsky(a, b, 3), 0.07115355218523) and close( minkowsky(a, b, 4), 0.04889067980003)
def __init__(self, bsub_o, log_file, max_joins, extra_cpu=0, extra_mem=0): # get flag counts etc from the log file self.flag_counts = {k: 0 for k in ScaffResults.possible_flags} self.stats = {k: 0 for k in ScaffResults.evaluation_score_keys} if os.path.exists(log_file): f = utils.open_file_read(log_file) for line in f: a = line.split() if a[0].isdigit(): self.flag_counts[int(a[0])] = int(a[1]) elif a[0] == 'lost': self.stats['Lost tags'] = int(a[1]) elif a[0] == 'skipped': self.stats['Skipped tags'] = int(a[1]) utils.close(f) self.stats['Bad joins'] = sum([ self.flag_counts[x] for x in self.flag_counts.keys() if x not in [0, 16] ]) + self.stats['Lost tags'] else: print('Warning: no log file', log_file, file=sys.stderr) # get cpu and mem from bsub file bsub_out = utils.syscall_get_stdout('bsub-out2stats.py -s ' + bsub_o) assert len(bsub_out) == 1 (attempt_no, exit_code, wall_hrs, cpu_secs, cpu_hrs, mem, swap, filename) = bsub_out[0].split('\t') assert exit_code == '0' self.stats['Correct joins'] = self.flag_counts[0] self.cpu = int(round(float(cpu_secs), 0)) self.mem = int(mem) self.extra_cpu = extra_cpu self.extra_mem = extra_mem self.stats['Total CPU'] = self.cpu + extra_cpu self.max_mem = max(self.mem, extra_mem) self.scores = {k: -1 for k in ScaffResults.evaluation_score_keys} self.worksheet_row = -1 self.potential_joins = max_joins self.total_joins = self.stats['Correct joins'] + self.stats['Bad joins'] if self.total_joins > 0: self.percent_joins_correct = self.stats[ 'Correct joins'] / self.total_joins else: self.percent_joins_correct = 0 self.percent_correct_joins_made = self.stats[ 'Correct joins'] / self.potential_joins