def process(infiles, opts, replicons): scriptpath = os.path.dirname(os.path.abspath(__file__)) # Split summary files split_files_all = list() split_files_q0 = list() for norm_file in infiles: outfile_all = common.add_suffix(norm_file, common.ALL_SUFFIX) outfile_q0 = common.add_suffix(norm_file, common.Q0_SUFFIX) common.run_cmd([ "python", os.path.join(scriptpath, "split_sum.py"), norm_file, outfile_all, outfile_q0 ]) split_files_all.append(outfile_all) split_files_q0.append(outfile_q0) # Compile summary sets readscomp = os.path.join(opts.workdir, common.READSCOMP) cmd = ["python", os.path.join(scriptpath, "compile_sets.py")] cmd.extend(split_files_all) cmd.extend(split_files_q0) cmd.append(readscomp) common.run_cmd(cmd) # Annotate positions reads_anno = common.add_suffix(readscomp, common.ANNO_SUFFIX) cmd = [ "python", os.path.join(scriptpath, "annotate.py"), "--infile", readscomp, "--annofiles", opts.annofiles, "--outfile", reads_anno, "--fasta", opts.reference_fa ] common.run_cmd(cmd) # Merge annotations and counts anno_hits_file = os.path.join(opts.workdir, opts.outfile_anno) anno_reduced = common.add_suffix(reads_anno, common.TAB_SUFFIX) replicon_names = ",".join(sorted(replicons.values(), key=replicons.get)) common.run_cmd([ "python", os.path.join(scriptpath, "merge_anno.py"), "--reads_file", readscomp, "--anno_file", reads_anno, "--names", replicon_names, "--outfile1", anno_hits_file, "--outfile2", anno_reduced ]) # Tabulate by gene tab_file = os.path.join(opts.workdir, opts.outfile_tab) cmd = [ "python", os.path.join(scriptpath, "tabulate.py"), "--infile", anno_reduced, "--annofiles", opts.annofiles, "--outfile", tab_file, "--fasta", opts.reference_fa ] common.run_cmd(cmd) print "Processing complete. Output in " + anno_hits_file + " and " + tab_file
def main(mode = ct.PAPER_TRADING, start_date = '20170815', end_date = '20171113'): if mode == ct.PAPER_TRADING: cash = 100000 beta = 9.49 mean = -0.282 std = 34.73 feed = dataFramefeed.Feed() instruments = ['300296', '300613'] fpath = '/Users/hellobiek/Documents/workspace/python/quant/smart_deal_tool/configure/tushare.json' ts_client = get_tushare_client(fpath) for code in instruments: df = ts.pro_bar(pro_api = ts_client, ts_code = add_suffix(code), adj = 'qfq', start_date = start_date, end_date = end_date) df = df.rename(columns = {"ts_code": "code", "trade_date": "date", "vol": "volume", "pct_change": "pchange"}) df['date'] = df.date.apply(lambda x: time.strftime('%Y-%m-%d', time.strptime(x, "%Y%m%d"))) df = df.set_index("date") feed.addBarsFromDataFrame(code, df) # broker setting # broker commission类设置 broker_commission = broker.backtesting.TradePercentage(0.002) # fill strategy设置 fill_stra = broker.fillstrategy.DefaultStrategy(volumeLimit = 1.0) sli_stra = broker.slippage.NoSlippage() fill_stra.setSlippageModel(sli_stra) # 完善broker类 brk = broker.backtesting.Broker(cash, feed, broker_commission) brk.setFillStrategy(fill_stra) pStrategy = PairTradingStrategy(feed, instruments, brk, beta, mean, std, cash) returnsAnalyzer = sreturn.Returns() pStrategy.attachAnalyzer(returnsAnalyzer) sharpeRatioAnalyzer = sharpe.SharpeRatio() pStrategy.attachAnalyzer(sharpeRatioAnalyzer) drawDownAnalyzer = drawdown.DrawDown() pStrategy.attachAnalyzer(drawDownAnalyzer) tradesAnalyzer = trades.Trades() pStrategy.attachAnalyzer(tradesAnalyzer) plt = plotter.StrategyPlotter(pStrategy) # Plot the simple returns on each bar. plt.getOrCreateSubplot("returns").addDataSeries("Simple returns", returnsAnalyzer.getReturns()) pStrategy.run() plt.plot() print("Final portfolio value: $%.2f" % pStrategy.getResult()) print("Cumulative returns: %.2f %%" % (returnsAnalyzer.getCumulativeReturns()[-1] * 100)) print("Sharpe ratio: %.2f" % (sharpeRatioAnalyzer.getSharpeRatio(0.05))) print("Max. drawdown: %.2f %%" % (drawDownAnalyzer.getMaxDrawDown() * 100)) print("Longest drawdown duration: %s" % (drawDownAnalyzer.getLongestDrawDownDuration()))
def process(infiles, opts, replicons): scriptpath = os.path.dirname(os.path.abspath(__file__)) # Split summary files split_files_all = list() split_files_q0 = list() for norm_file in infiles: outfile_all = common.add_suffix(norm_file, common.ALL_SUFFIX) outfile_q0 = common.add_suffix(norm_file, common.Q0_SUFFIX) common.run_cmd(["python", os.path.join(scriptpath, "split_sum.py"), norm_file, outfile_all, outfile_q0]) split_files_all.append(outfile_all) split_files_q0.append(outfile_q0) # Compile summary sets readscomp = os.path.join(opts.workdir, common.READSCOMP) cmd = ["python", os.path.join(scriptpath, "compile_sets.py"), "--outfile", readscomp] if opts.ok_locs_file: cmd.extend(["--ok_locs_file", opts.ok_locs_file]) cmd.extend(split_files_all) cmd.extend(split_files_q0) common.run_cmd(cmd) # Annotate positions reads_anno = common.add_suffix(readscomp, common.ANNO_SUFFIX) cmd = ["python", os.path.join(scriptpath, "annotate.py"), "--infile", readscomp, "--annofiles", opts.annofiles, "--outfile", reads_anno, "--fasta", opts.reference_fa] common.run_cmd(cmd) # Merge annotations and counts anno_hits_file = os.path.join(opts.workdir, opts.outfile_anno) anno_reduced = common.add_suffix(reads_anno, common.TAB_SUFFIX) replicon_names = ",".join(sorted(replicons.values(), key=replicons.get)) common.run_cmd(["python", os.path.join(scriptpath, "merge_anno.py"), "--reads_file", readscomp, "--anno_file", reads_anno, "--names", replicon_names, "--outfile1", anno_hits_file, "--outfile2", anno_reduced]) # Tabulate by gene tab_file = os.path.join(opts.workdir, opts.outfile_tab) cmd = ["python", os.path.join(scriptpath, "tabulate.py"), "--infile", anno_reduced, "--annofiles", opts.annofiles, "--outfile", tab_file, "--fasta", opts.reference_fa] common.run_cmd(cmd) print "Processing complete. Output in " + anno_hits_file + " and " + tab_file
def select_code(code_list, start_date, end_date): date_arrays = list() for mdate in get_dates_array(start_date, end_date, dformat = "%Y%m%d"): if CCalendar.is_trading_day(transfer_int_to_date_string(mdate), redis = mredis): date_arrays.append(mdate) #choose stock which is not suspended verry long total_df = pd.DataFrame() for code in code_list: df = ts.pro_bar(pro_api = ts_client, ts_code = add_suffix(code), adj = 'qfq', start_date = start_date, end_date = end_date) if df is None: continue if len(df) > int(0.8 * len(date_arrays)): df = df.rename(columns = {"ts_code": "code", "trade_date": "date", "pct_change": "pchange"}) df = df.set_index('date') total_df[code] = df.close return total_df
def process(infiles, opts): scriptpath = os.path.dirname(os.path.abspath(__file__)) label = os.path.splitext(os.path.basename(infiles[0]))[0] # Group the input files (read1, index, read2) if using index reads and/or 2nd-end reads infile_groups = list() try: while len(infiles) > 0: fq1 = infiles.pop(0) group = (fq1, None, None) if opts.demux_r2 and (opts.demux_i or opts.verify_i): group = (fq1, infiles.pop(0), infiles.pop(0)) elif opts.demux_r2: group = (fq1, None, infiles.pop(0)) elif opts.demux_i or opts.verify_i: group = (fq1, infiles.pop(0), None) infile_groups.append(group) except IndexError: print "ERROR: wrong number of input files" print "Processing " + str(len(infile_groups)) + " Tn-seq fileset(s)" # Validation: make sure we have the required input files for the chosen options (r1_file, i_file, r2_file) = infile_groups[0] if (opts.verify_i or opts.demux_i) and i_file is None: print "Please supply index fastq files when using demultiplex-by-index or verify-by-index options" exit(1) if (opts.demux_r2) and r2_file is None: print "Please supply 2nd-end fastq files when using demultiplex-by-read2 option" exit(1) # Create the working directory if needed try: os.makedirs(opts.workdir) except OSError: if not os.path.isdir(opts.workdir): raise # Chastity filter chaste_files = list() if opts.dochastity: for (r1_file, i_file, r2_file) in infile_groups: fname = common.add_suffix(os.path.basename(r1_file), common.CHASTE_SUFFIX) r1_out = os.path.join(opts.workdir, fname) cmd = [ "python", os.path.join(scriptpath, "ch_filter.py"), "--end1", r1_file, "--outfile_e1", r1_out ] if r2_file is None: r2_out = None else: fname = common.add_suffix(os.path.basename(r2_file), common.CHASTE_SUFFIX) r2_out = os.path.join(opts.workdir, fname) cmd.extend(["--end2", r2_file, "--outfile_e2", r2_out]) if i_file is None: i_out = None else: fname = common.add_suffix(os.path.basename(i_file), common.CHASTE_SUFFIX) i_out = os.path.join(opts.workdir, fname) cmd.extend(["--index", i_file, "--outfile_i", i_out]) common.run_cmd(cmd) chaste_group = (r1_out, i_out, r2_out) chaste_files.append(chaste_group) else: for (r1_file, i_file, r2_file) in infile_groups: if r1_file: r1_out = os.path.join(opts.workdir, os.path.basename(r1_file)) shutil.copyfile(r1_file, r1_out) else: r1_out = None if r2_file: r2_out = os.path.join(opts.workdir, os.path.basename(r2_file)) shutil.copyfile(r2_file, r2_out) else: r2_out = None if i_file: i_out = os.path.join(opts.workdir, os.path.basename(i_file)) shutil.copyfile(i_file, i_out) else: i_out = None chaste_files.append((r1_out, i_out, r2_out)) # De-multiplex using index, keeping read1 files demux_files = list() if opts.demux_i: barcodes = read_barcodes(opts.barcodefile) print "Expected barcodes: " + ", ".join(barcodes) for (r1_file, i_file, r2_file) in chaste_files: (out_prefix_r1, out_extn) = os.path.splitext(r1_file) cmd = [ "python", os.path.join(scriptpath, "demux.py"), "--seqs", opts.barcodefile, "--end1", r1_file, "--index", i_file ] if r2_file: cmd.extend(["end2", r2_file]) common.run_cmd(cmd) for seq in barcodes: r1_file = out_prefix_r1 + "_" + seq + out_extn demux_files.append((r1_file, None, None)) else: demux_files = chaste_files # Hash and count if opts.verify_i: hash_logs = list() for (r1_ch_file, ind_ch_file, r2_ch_file) in chaste_files: parts = os.path.splitext(ind_ch_file) outfile = parts[0] + common.HASH_EXTENSION common.run_cmd([ "python", os.path.join(scriptpath, "hash_index_reads.py"), "--infile", ind_ch_file, "--outfile", outfile, "--tn_end_length", str(len(opts.tn_end_seq)) ]) hash_logs.append((r1_ch_file, ind_ch_file, r2_ch_file, outfile)) # Filter first-end reads if passed hash if opts.verify_i: filtered_files = list() for (r1_ch_file, ind_ch_file, r2_ch_file, hash_log) in hash_logs: (index_ok, ratio) = check_hash(hash_log, opts.tn_end_seq) if index_ok: print "Index sequences are primarily " + opts.tn_end_seq + " (" + str( ratio * 100) + "%)" outfile1 = common.add_suffix(r1_ch_file, common.TNEND_SUFFIX) cmd = [ "python", os.path.join(scriptpath, "tnend_filter.py"), "--end1", r1_ch_file, "--index", ind_ch_file, "--outfile1", outfile1, "--tn_end", opts.tn_end_seq ] if r2_file: outfile2 = common.add_suffix(r2_ch_file, common.TNEND_SUFFIX) cmd.extend(["--end2", r2_ch_file, "--outfile2", outfile2]) else: outfile2 = None common.run_cmd(cmd) filtered_files.append((outfile1, None, outfile2)) else: print "WARNING: most index counts do not match expected sequence - skipping Tn end filter (" + str( ratio * 100) + "% " + opts.tn_end_seq + ")" filtered_files.append((r1_ch_file, None, r2_ch_file)) else: filtered_files = demux_files # Filter and trim first-end reads if opts.verify_r1: trimmed_files = list() for (r1_file, i_file, r2_file) in filtered_files: outfile = common.add_suffix(r1_file, common.TRIM_SUFFIX) common.run_cmd([ "python", os.path.join(scriptpath, "r1_filter.py"), "--end1", r1_file, "--outfile", outfile, "--seq", opts.tn_end_seq ]) trimmed_files.append((outfile, None, None)) else: trimmed_files = filtered_files # De-multiplex using read2, keeping read1 and index files mappable_files = list() if opts.demux_r2: barcodes = read_barcodes(opts.barcodefile) for (r1_file, i_file, r2_file) in trimmed_files: (out_prefix_r1, out_extn) = os.path.splitext(r1_file) cmd = [ "python", os.path.join(scriptpath, "demux.py"), "--seqs", opts.barcodefile, "--end1", r1_file, "--end2", r2_file ] if i_file: cmd.extend(["--index", i_file]) (out_prefix_i, _) = os.path.splitext(i_file) common.run_cmd(cmd) for seq in barcodes: r1_out = out_prefix_r1 + "_" + seq + out_extn if i_file: i_out = out_prefix_i + "_" + seq + out_extn else: i_out = None mappable_files.append((r1_out, i_out)) else: mappable_files = [(r1, ind) for (r1, ind, r2) in trimmed_files] # Map primary reads against genome using desired aligner cores = multiprocessing.cpu_count() if cores > 2: cores -= 1 sam_files = list() if opts.use_bowtie: print "Aligning reads with Bowtie" common.run_cmd([ common.BOWTIE_BUILD, "--quiet", opts.reference_fa, opts.reference_fa ]) for (e1_filtered_fq, _) in mappable_files: parts = os.path.splitext(e1_filtered_fq) label = parts[0] sam_file = label + common.SAM_EXTENSION common.run_cmd([ common.BOWTIE, "--sam", "--threads", str(cores), opts.reference_fa, e1_filtered_fq, sam_file ]) sam_files.append(sam_file) else: print "Aligning reads with BWA" common.run_cmd([common.BWA, "index", opts.reference_fa]) for (e1_filtered_fq, _) in mappable_files: parts = os.path.splitext(e1_filtered_fq) label = parts[0] aln_file = label + common.ALN_EXTENSION with open(aln_file, "w") as aln_fh: # common.run_cmd_file_out([common.BWA, "aln", "-t", str(cores), opts.reference_fa, e1_filtered_fq], aln_fh) common.run_cmd_file_out([ common.BWA, "aln", "-l", "1000", "-t", str(cores), "-n", str(common.BWA_PCT_MISSING), opts.reference_fa, e1_filtered_fq ], aln_fh) sam_file = label + common.SAM_EXTENSION with open(sam_file, "w") as sam_fh: common.run_cmd_file_out([ common.BWA, "samse", opts.reference_fa, aln_file, e1_filtered_fq ], sam_fh) sam_files.append(sam_file) # Summarize mappings sum_files = list() for sam_file in sam_files: parts = os.path.splitext(sam_file) outfile = parts[0] + common.SUM_EXTENSION run_cmd_list = [ "python", os.path.join(scriptpath, "summarize_mappings.py"), "--infile", sam_file, "--outfile", outfile ] if opts.backendseq: run_cmd_list += ["--backendseq"] common.run_cmd(run_cmd_list) sum_files.append(outfile) # Merge slipped reads sum_mg_files = list() for sum_file in sum_files: outfile = common.add_suffix(sum_file, common.MERGE_SUFFIX) if opts.merge_slipped: common.run_cmd([ "python", os.path.join(scriptpath, "merge_slipped.py"), "--infile", sum_file, "--outfile", outfile ]) sum_mg_files.append(outfile) else: sum_mg_files.append(sum_file) # Normalize read counts norm_files = list() filenum = 0 for mg_file in sum_mg_files: filenum += 1 if opts.norm_factor > 0: parts = os.path.splitext(mg_file) norm_file = parts[0] + common.NORM_SUFFIX + parts[1] if opts.norm_factor is None: common.run_cmd([ "python", os.path.join(scriptpath, "norm.py"), "--infile", mg_file, "--outfile", norm_file ]) else: common.run_cmd([ "python", os.path.join(scriptpath, "norm.py"), "--infile", mg_file, "--outfile", norm_file, "--norm-factor", str(opts.norm_factor) ]) norm_files.append(norm_file) else: norm_files.append(mg_file) # This file list should be pasted into the annotation command print "Finished reads lists: " + " ".join(norm_files)
def process(infiles, opts): scriptpath = os.path.dirname(os.path.abspath(__file__)) label = os.path.splitext(os.path.basename(infiles[0]))[0] # Group the input files (read1, index, read2) if using index reads and/or 2nd-end reads infile_groups = list() try: while len(infiles) > 0: fq1 = infiles.pop(0) group = (fq1, None, None) if opts.demux_r2 and (opts.demux_i or opts.verify_i): group = (fq1, infiles.pop(0), infiles.pop(0)) elif opts.demux_r2: group = (fq1, None, infiles.pop(0)) elif opts.demux_i or opts.verify_i: group = (fq1, infiles.pop(0), None) infile_groups.append(group) except IndexError: print "ERROR: wrong number of input files" print "Processing " + str(len(infile_groups)) + " Tn-seq fileset(s)" # Validation: make sure we have the required input files for the chosen options (r1_file, i_file, r2_file) = infile_groups[0] if (opts.verify_i or opts.demux_i) and i_file is None: print "Please supply index fastq files when using demultiplex-by-index or verify-by-index options" exit(1) if (opts.demux_r2) and r2_file is None: print "Please supply 2nd-end fastq files when using demultiplex-by-read2 option" exit(1) # Create the working directory if needed try: os.makedirs(opts.workdir) except OSError: if not os.path.isdir(opts.workdir): raise # Chastity filter chaste_files = list() if opts.dochastity: for (r1_file, i_file, r2_file) in infile_groups: fname = common.add_suffix(os.path.basename(r1_file), common.CHASTE_SUFFIX) r1_out = os.path.join(opts.workdir, fname) cmd = ["python", os.path.join(scriptpath, "ch_filter.py"), "--end1", r1_file, "--outfile_e1", r1_out] if r2_file is None: r2_out = None else: fname = common.add_suffix(os.path.basename(r2_file), common.CHASTE_SUFFIX) r2_out = os.path.join(opts.workdir, fname) cmd.extend(["--end2", r2_file, "--outfile_e2", r2_out]) if i_file is None: i_out = None else: fname = common.add_suffix(os.path.basename(i_file), common.CHASTE_SUFFIX) i_out = os.path.join(opts.workdir, fname) cmd.extend(["--index", i_file, "--outfile_i", i_out]) common.run_cmd(cmd) chaste_group = (r1_out, i_out, r2_out) chaste_files.append(chaste_group) else: for (r1_file, i_file, r2_file) in infile_groups: if r1_file: r1_out = os.path.join(opts.workdir, os.path.basename(r1_file)) shutil.copyfile(r1_file, r1_out) else: r1_out = None if r2_file: r2_out = os.path.join(opts.workdir, os.path.basename(r2_file)) shutil.copyfile(r2_file, r2_out) else: r2_out = None if i_file: i_out = os.path.join(opts.workdir, os.path.basename(i_file)) shutil.copyfile(i_file, i_out) else: i_out = None chaste_files.append((r1_out, i_out, r2_out)) # De-multiplex using index, keeping read1 files demux_files = list() if opts.demux_i: barcodes = read_barcodes(opts.barcodefile) print "Expected barcodes: " + ", ".join(barcodes) for (r1_file, i_file, r2_file) in chaste_files: (out_prefix_r1, out_extn) = os.path.splitext(r1_file) cmd = ["python", os.path.join(scriptpath, "demux.py"), "--seqs", opts.barcodefile, "--end1", r1_file, "--index", i_file] if r2_file: cmd.extend(["end2", r2_file]) common.run_cmd(cmd) for seq in barcodes: r1_file = out_prefix_r1 + "_" + seq + out_extn demux_files.append((r1_file, None, None)) else: demux_files = chaste_files # Hash and count if opts.verify_i: hash_logs = list() for (r1_ch_file, ind_ch_file, r2_ch_file) in chaste_files: parts = os.path.splitext(ind_ch_file) outfile = parts[0] + common.HASH_EXTENSION common.run_cmd(["python", os.path.join(scriptpath, "hash_index_reads.py"), "--infile", ind_ch_file, "--outfile", outfile, "--tn_end_length", str(len(opts.tn_end_seq))]) hash_logs.append((r1_ch_file, ind_ch_file, r2_ch_file, outfile)) # Filter first-end reads if passed hash if opts.verify_i: filtered_files = list() for (r1_ch_file, ind_ch_file, r2_ch_file, hash_log) in hash_logs: (index_ok, ratio) = check_hash(hash_log, opts.tn_end_seq) if index_ok: print "Index sequences are primarily " + opts.tn_end_seq + " (" + str(ratio * 100) + "%)" outfile1 = common.add_suffix(r1_ch_file, common.TNEND_SUFFIX) cmd = ["python", os.path.join(scriptpath, "tnend_filter.py"), "--end1", r1_ch_file, "--index", ind_ch_file, "--outfile1", outfile1, "--tn_end", opts.tn_end_seq] if r2_file: outfile2 = common.add_suffix(r2_ch_file, common.TNEND_SUFFIX) cmd.extend(["--end2", r2_ch_file, "--outfile2", outfile2]) else: outfile2 = None common.run_cmd(cmd) filtered_files.append((outfile1, None, outfile2)) else: print "WARNING: most index counts do not match expected sequence - skipping Tn end filter (" + str(ratio * 100) + "% " + opts.tn_end_seq + ")" filtered_files.append((r1_ch_file, None, r2_ch_file)) else: filtered_files = demux_files # Filter and trim first-end reads if opts.verify_r1: trimmed_files = list() for (r1_file, i_file, r2_file) in filtered_files: outfile = common.add_suffix(r1_file, common.TRIM_SUFFIX) common.run_cmd(["python", os.path.join(scriptpath, "r1_filter.py"), "--end1", r1_file, "--outfile", outfile, "--seq", opts.tn_end_seq]) trimmed_files.append((outfile, None, None)) else: trimmed_files = filtered_files # De-multiplex using read2, keeping read1 and index files mappable_files = list() if opts.demux_r2: barcodes = read_barcodes(opts.barcodefile) for (r1_file, i_file, r2_file) in trimmed_files: (out_prefix_r1, out_extn) = os.path.splitext(r1_file) cmd = ["python", os.path.join(scriptpath, "demux.py"), "--seqs", opts.barcodefile, "--end1", r1_file, "--end2", r2_file] if i_file: cmd.extend(["--index", i_file]) (out_prefix_i, _) = os.path.splitext(i_file) common.run_cmd(cmd) for seq in barcodes: r1_out = out_prefix_r1 + "_" + seq + out_extn if i_file: i_out = out_prefix_i + "_" + seq + out_extn else: i_out = None mappable_files.append((r1_out, i_out)) else: mappable_files = [(r1, ind) for (r1, ind, r2) in trimmed_files] # Map primary reads against genome using desired aligner cores = multiprocessing.cpu_count() if cores > 2: cores -= 1 sam_files = list() if opts.use_bowtie: print "Aligning reads with Bowtie" common.run_cmd([common.BOWTIE_BUILD, "--quiet", opts.reference_fa, opts.reference_fa]) for (e1_filtered_fq, _) in mappable_files: parts = os.path.splitext(e1_filtered_fq) label = parts[0] sam_file = label + common.SAM_EXTENSION common.run_cmd([common.BOWTIE, "--sam", "--threads", str(cores), opts.reference_fa, e1_filtered_fq, sam_file]) sam_files.append(sam_file) else: print "Aligning reads with BWA" common.run_cmd([common.BWA, "index", opts.reference_fa]) for (e1_filtered_fq, _) in mappable_files: parts = os.path.splitext(e1_filtered_fq) label = parts[0] aln_file = label + common.ALN_EXTENSION with open(aln_file, "w") as aln_fh: # common.run_cmd_file_out([common.BWA, "aln", "-t", str(cores), opts.reference_fa, e1_filtered_fq], aln_fh) common.run_cmd_file_out([common.BWA, "aln", "-l", "1000", "-t", str(cores), "-n", str(common.BWA_PCT_MISSING), opts.reference_fa, e1_filtered_fq], aln_fh) sam_file = label + common.SAM_EXTENSION with open(sam_file, "w") as sam_fh: common.run_cmd_file_out([common.BWA, "samse", opts.reference_fa, aln_file, e1_filtered_fq], sam_fh) sam_files.append(sam_file) # Summarize mappings sum_files = list() for sam_file in sam_files: parts = os.path.splitext(sam_file) outfile = parts[0] + common.SUM_EXTENSION common.run_cmd(["python", os.path.join(scriptpath, "summarize_mappings.py"), "--infile", sam_file, "--outfile", outfile]) sum_files.append(outfile) # Merge slipped reads sum_mg_files = list() for sum_file in sum_files: outfile = common.add_suffix(sum_file, common.MERGE_SUFFIX) if opts.merge_slipped: common.run_cmd(["python", os.path.join(scriptpath, "merge_slipped.py"), "--infile", sum_file, "--outfile", outfile]) sum_mg_files.append(outfile) else: sum_mg_files.append(sum_file) # Normalize read counts norm_files = list() filenum = 0 for mg_file in sum_mg_files: filenum += 1 if opts.norm_factor > 0: parts = os.path.splitext(mg_file) norm_file = parts[0] + common.NORM_SUFFIX + parts[1] if opts.norm_factor is None: common.run_cmd(["python", os.path.join(scriptpath, "norm.py"), "--infile", mg_file, "--outfile", norm_file]) else: common.run_cmd(["python", os.path.join(scriptpath, "norm.py"), "--infile", mg_file, "--outfile", norm_file, "--norm-factor", str(opts.norm_factor)]) norm_files.append(norm_file) else: norm_files.append(mg_file) # This file list should be pasted into the annotation command print "Finished reads lists: " + " ".join(norm_files)