def process(infiles, opts, replicons):
    scriptpath = os.path.dirname(os.path.abspath(__file__))

    # Split summary files
    split_files_all = list()
    split_files_q0 = list()
    for norm_file in infiles:
        outfile_all = common.add_suffix(norm_file, common.ALL_SUFFIX)
        outfile_q0 = common.add_suffix(norm_file, common.Q0_SUFFIX)
        common.run_cmd([
            "python",
            os.path.join(scriptpath, "split_sum.py"), norm_file, outfile_all,
            outfile_q0
        ])
        split_files_all.append(outfile_all)
        split_files_q0.append(outfile_q0)

    # Compile summary sets
    readscomp = os.path.join(opts.workdir, common.READSCOMP)
    cmd = ["python", os.path.join(scriptpath, "compile_sets.py")]
    cmd.extend(split_files_all)
    cmd.extend(split_files_q0)
    cmd.append(readscomp)
    common.run_cmd(cmd)

    # Annotate positions
    reads_anno = common.add_suffix(readscomp, common.ANNO_SUFFIX)
    cmd = [
        "python",
        os.path.join(scriptpath,
                     "annotate.py"), "--infile", readscomp, "--annofiles",
        opts.annofiles, "--outfile", reads_anno, "--fasta", opts.reference_fa
    ]
    common.run_cmd(cmd)

    # Merge annotations and counts
    anno_hits_file = os.path.join(opts.workdir, opts.outfile_anno)
    anno_reduced = common.add_suffix(reads_anno, common.TAB_SUFFIX)
    replicon_names = ",".join(sorted(replicons.values(), key=replicons.get))
    common.run_cmd([
        "python",
        os.path.join(scriptpath, "merge_anno.py"), "--reads_file", readscomp,
        "--anno_file", reads_anno, "--names", replicon_names, "--outfile1",
        anno_hits_file, "--outfile2", anno_reduced
    ])

    # Tabulate by gene
    tab_file = os.path.join(opts.workdir, opts.outfile_tab)
    cmd = [
        "python",
        os.path.join(scriptpath,
                     "tabulate.py"), "--infile", anno_reduced, "--annofiles",
        opts.annofiles, "--outfile", tab_file, "--fasta", opts.reference_fa
    ]
    common.run_cmd(cmd)

    print "Processing complete. Output in " + anno_hits_file + " and " + tab_file
Esempio n. 2
0
def main(mode = ct.PAPER_TRADING, start_date = '20170815', end_date = '20171113'):
    if mode == ct.PAPER_TRADING:
        cash = 100000
        beta = 9.49
        mean = -0.282
        std  = 34.73
        feed = dataFramefeed.Feed()
        instruments = ['300296', '300613']
        fpath       = '/Users/hellobiek/Documents/workspace/python/quant/smart_deal_tool/configure/tushare.json' 
        ts_client   = get_tushare_client(fpath)
        for code in instruments:
            df = ts.pro_bar(pro_api = ts_client, ts_code = add_suffix(code), adj = 'qfq', start_date = start_date, end_date = end_date)
            df = df.rename(columns = {"ts_code": "code", "trade_date": "date", "vol": "volume", "pct_change": "pchange"})
            df['date'] = df.date.apply(lambda x: time.strftime('%Y-%m-%d', time.strptime(x, "%Y%m%d")))
            df = df.set_index("date")
            feed.addBarsFromDataFrame(code, df)

        # broker setting
        # broker commission类设置
        broker_commission = broker.backtesting.TradePercentage(0.002)
        # fill strategy设置
        fill_stra = broker.fillstrategy.DefaultStrategy(volumeLimit = 1.0)
        sli_stra  = broker.slippage.NoSlippage()
        fill_stra.setSlippageModel(sli_stra)
        # 完善broker类
        brk = broker.backtesting.Broker(cash, feed, broker_commission)
        brk.setFillStrategy(fill_stra)

    pStrategy = PairTradingStrategy(feed, instruments, brk, beta, mean, std, cash)

    returnsAnalyzer = sreturn.Returns()
    pStrategy.attachAnalyzer(returnsAnalyzer)

    sharpeRatioAnalyzer = sharpe.SharpeRatio()
    pStrategy.attachAnalyzer(sharpeRatioAnalyzer)

    drawDownAnalyzer = drawdown.DrawDown()
    pStrategy.attachAnalyzer(drawDownAnalyzer)

    tradesAnalyzer = trades.Trades()
    pStrategy.attachAnalyzer(tradesAnalyzer)

    plt = plotter.StrategyPlotter(pStrategy)

    # Plot the simple returns on each bar.
    plt.getOrCreateSubplot("returns").addDataSeries("Simple returns", returnsAnalyzer.getReturns())

    pStrategy.run()

    plt.plot()

    print("Final portfolio value: $%.2f" % pStrategy.getResult())
    print("Cumulative returns: %.2f %%" % (returnsAnalyzer.getCumulativeReturns()[-1] * 100))
    print("Sharpe ratio: %.2f" % (sharpeRatioAnalyzer.getSharpeRatio(0.05)))
    print("Max. drawdown: %.2f %%" % (drawDownAnalyzer.getMaxDrawDown() * 100))
    print("Longest drawdown duration: %s" % (drawDownAnalyzer.getLongestDrawDownDuration()))
Esempio n. 3
0
def process(infiles, opts, replicons):
    scriptpath = os.path.dirname(os.path.abspath(__file__))

    # Split summary files
    split_files_all = list()
    split_files_q0 = list()
    for norm_file in infiles:
        outfile_all = common.add_suffix(norm_file, common.ALL_SUFFIX)
        outfile_q0 = common.add_suffix(norm_file, common.Q0_SUFFIX)
        common.run_cmd(["python", os.path.join(scriptpath, "split_sum.py"), norm_file, outfile_all, outfile_q0])
        split_files_all.append(outfile_all)
        split_files_q0.append(outfile_q0)

    # Compile summary sets
    readscomp = os.path.join(opts.workdir, common.READSCOMP)
    cmd = ["python", os.path.join(scriptpath, "compile_sets.py"), "--outfile", readscomp]
    if opts.ok_locs_file:
        cmd.extend(["--ok_locs_file", opts.ok_locs_file])
    cmd.extend(split_files_all)
    cmd.extend(split_files_q0)
    common.run_cmd(cmd)

    # Annotate positions
    reads_anno = common.add_suffix(readscomp, common.ANNO_SUFFIX)
    cmd = ["python", os.path.join(scriptpath, "annotate.py"), "--infile", readscomp, "--annofiles", opts.annofiles, "--outfile", reads_anno, "--fasta", opts.reference_fa]
    common.run_cmd(cmd)

    # Merge annotations and counts
    anno_hits_file = os.path.join(opts.workdir, opts.outfile_anno)
    anno_reduced = common.add_suffix(reads_anno, common.TAB_SUFFIX)
    replicon_names = ",".join(sorted(replicons.values(), key=replicons.get))
    common.run_cmd(["python", os.path.join(scriptpath, "merge_anno.py"), "--reads_file", readscomp, "--anno_file", reads_anno, "--names", replicon_names, "--outfile1", anno_hits_file, "--outfile2", anno_reduced])

    # Tabulate by gene
    tab_file = os.path.join(opts.workdir, opts.outfile_tab)
    cmd = ["python", os.path.join(scriptpath, "tabulate.py"), "--infile", anno_reduced, "--annofiles", opts.annofiles, "--outfile", tab_file, "--fasta", opts.reference_fa]
    common.run_cmd(cmd)

    print "Processing complete. Output in " + anno_hits_file + " and " + tab_file
Esempio n. 4
0
def select_code(code_list, start_date, end_date):
    date_arrays = list()
    for mdate in get_dates_array(start_date, end_date, dformat = "%Y%m%d"):
        if CCalendar.is_trading_day(transfer_int_to_date_string(mdate), redis = mredis):
            date_arrays.append(mdate)
    #choose stock which is not suspended verry long
    total_df = pd.DataFrame()
    for code in code_list:
        df = ts.pro_bar(pro_api = ts_client, ts_code = add_suffix(code), adj = 'qfq', start_date = start_date, end_date = end_date)
        if df is None: continue
        if len(df) > int(0.8 * len(date_arrays)):
            df = df.rename(columns = {"ts_code": "code", "trade_date": "date", "pct_change": "pchange"})
            df = df.set_index('date')
            total_df[code] = df.close
    return total_df
Esempio n. 5
0
def process(infiles, opts):
    scriptpath = os.path.dirname(os.path.abspath(__file__))
    label = os.path.splitext(os.path.basename(infiles[0]))[0]

    # Group the input files (read1, index, read2) if using index reads and/or 2nd-end reads
    infile_groups = list()
    try:
        while len(infiles) > 0:
            fq1 = infiles.pop(0)
            group = (fq1, None, None)
            if opts.demux_r2 and (opts.demux_i or opts.verify_i):
                group = (fq1, infiles.pop(0), infiles.pop(0))
            elif opts.demux_r2:
                group = (fq1, None, infiles.pop(0))
            elif opts.demux_i or opts.verify_i:
                group = (fq1, infiles.pop(0), None)
            infile_groups.append(group)
    except IndexError:
        print "ERROR: wrong number of input files"
    print "Processing " + str(len(infile_groups)) + " Tn-seq fileset(s)"

    # Validation: make sure we have the required input files for the chosen options
    (r1_file, i_file, r2_file) = infile_groups[0]
    if (opts.verify_i or opts.demux_i) and i_file is None:
        print "Please supply index fastq files when using demultiplex-by-index or verify-by-index options"
        exit(1)
    if (opts.demux_r2) and r2_file is None:
        print "Please supply 2nd-end fastq files when using demultiplex-by-read2 option"
        exit(1)

    # Create the working directory if needed
    try:
        os.makedirs(opts.workdir)
    except OSError:
        if not os.path.isdir(opts.workdir):
            raise

    # Chastity filter
    chaste_files = list()
    if opts.dochastity:
        for (r1_file, i_file, r2_file) in infile_groups:
            fname = common.add_suffix(os.path.basename(r1_file),
                                      common.CHASTE_SUFFIX)
            r1_out = os.path.join(opts.workdir, fname)
            cmd = [
                "python",
                os.path.join(scriptpath, "ch_filter.py"), "--end1", r1_file,
                "--outfile_e1", r1_out
            ]
            if r2_file is None:
                r2_out = None
            else:
                fname = common.add_suffix(os.path.basename(r2_file),
                                          common.CHASTE_SUFFIX)
                r2_out = os.path.join(opts.workdir, fname)
                cmd.extend(["--end2", r2_file, "--outfile_e2", r2_out])
            if i_file is None:
                i_out = None
            else:
                fname = common.add_suffix(os.path.basename(i_file),
                                          common.CHASTE_SUFFIX)
                i_out = os.path.join(opts.workdir, fname)
                cmd.extend(["--index", i_file, "--outfile_i", i_out])
            common.run_cmd(cmd)
            chaste_group = (r1_out, i_out, r2_out)
            chaste_files.append(chaste_group)
    else:
        for (r1_file, i_file, r2_file) in infile_groups:
            if r1_file:
                r1_out = os.path.join(opts.workdir, os.path.basename(r1_file))
                shutil.copyfile(r1_file, r1_out)
            else:
                r1_out = None
            if r2_file:
                r2_out = os.path.join(opts.workdir, os.path.basename(r2_file))
                shutil.copyfile(r2_file, r2_out)
            else:
                r2_out = None
            if i_file:
                i_out = os.path.join(opts.workdir, os.path.basename(i_file))
                shutil.copyfile(i_file, i_out)
            else:
                i_out = None
            chaste_files.append((r1_out, i_out, r2_out))

    # De-multiplex using index, keeping read1 files
    demux_files = list()
    if opts.demux_i:
        barcodes = read_barcodes(opts.barcodefile)
        print "Expected barcodes: " + ", ".join(barcodes)
        for (r1_file, i_file, r2_file) in chaste_files:
            (out_prefix_r1, out_extn) = os.path.splitext(r1_file)
            cmd = [
                "python",
                os.path.join(scriptpath, "demux.py"), "--seqs",
                opts.barcodefile, "--end1", r1_file, "--index", i_file
            ]
            if r2_file:
                cmd.extend(["end2", r2_file])
            common.run_cmd(cmd)
            for seq in barcodes:
                r1_file = out_prefix_r1 + "_" + seq + out_extn
                demux_files.append((r1_file, None, None))
    else:
        demux_files = chaste_files

    # Hash and count
    if opts.verify_i:
        hash_logs = list()
        for (r1_ch_file, ind_ch_file, r2_ch_file) in chaste_files:
            parts = os.path.splitext(ind_ch_file)
            outfile = parts[0] + common.HASH_EXTENSION
            common.run_cmd([
                "python",
                os.path.join(scriptpath, "hash_index_reads.py"), "--infile",
                ind_ch_file, "--outfile", outfile, "--tn_end_length",
                str(len(opts.tn_end_seq))
            ])
            hash_logs.append((r1_ch_file, ind_ch_file, r2_ch_file, outfile))

    # Filter first-end reads if passed hash
    if opts.verify_i:
        filtered_files = list()
        for (r1_ch_file, ind_ch_file, r2_ch_file, hash_log) in hash_logs:
            (index_ok, ratio) = check_hash(hash_log, opts.tn_end_seq)
            if index_ok:
                print "Index sequences are primarily " + opts.tn_end_seq + " (" + str(
                    ratio * 100) + "%)"
                outfile1 = common.add_suffix(r1_ch_file, common.TNEND_SUFFIX)
                cmd = [
                    "python",
                    os.path.join(scriptpath, "tnend_filter.py"), "--end1",
                    r1_ch_file, "--index", ind_ch_file, "--outfile1", outfile1,
                    "--tn_end", opts.tn_end_seq
                ]
                if r2_file:
                    outfile2 = common.add_suffix(r2_ch_file,
                                                 common.TNEND_SUFFIX)
                    cmd.extend(["--end2", r2_ch_file, "--outfile2", outfile2])
                else:
                    outfile2 = None
                common.run_cmd(cmd)
                filtered_files.append((outfile1, None, outfile2))
            else:
                print "WARNING: most index counts do not match expected sequence - skipping Tn end filter (" + str(
                    ratio * 100) + "% " + opts.tn_end_seq + ")"
                filtered_files.append((r1_ch_file, None, r2_ch_file))
    else:
        filtered_files = demux_files

    # Filter and trim first-end reads
    if opts.verify_r1:
        trimmed_files = list()
        for (r1_file, i_file, r2_file) in filtered_files:
            outfile = common.add_suffix(r1_file, common.TRIM_SUFFIX)
            common.run_cmd([
                "python",
                os.path.join(scriptpath, "r1_filter.py"), "--end1", r1_file,
                "--outfile", outfile, "--seq", opts.tn_end_seq
            ])
            trimmed_files.append((outfile, None, None))
    else:
        trimmed_files = filtered_files

    # De-multiplex using read2, keeping read1 and index files
    mappable_files = list()
    if opts.demux_r2:
        barcodes = read_barcodes(opts.barcodefile)
        for (r1_file, i_file, r2_file) in trimmed_files:
            (out_prefix_r1, out_extn) = os.path.splitext(r1_file)
            cmd = [
                "python",
                os.path.join(scriptpath, "demux.py"), "--seqs",
                opts.barcodefile, "--end1", r1_file, "--end2", r2_file
            ]
            if i_file:
                cmd.extend(["--index", i_file])
                (out_prefix_i, _) = os.path.splitext(i_file)
            common.run_cmd(cmd)
            for seq in barcodes:
                r1_out = out_prefix_r1 + "_" + seq + out_extn
                if i_file:
                    i_out = out_prefix_i + "_" + seq + out_extn
                else:
                    i_out = None
                mappable_files.append((r1_out, i_out))
    else:
        mappable_files = [(r1, ind) for (r1, ind, r2) in trimmed_files]

    # Map primary reads against genome using desired aligner
    cores = multiprocessing.cpu_count()
    if cores > 2:
        cores -= 1
    sam_files = list()
    if opts.use_bowtie:
        print "Aligning reads with Bowtie"
        common.run_cmd([
            common.BOWTIE_BUILD, "--quiet", opts.reference_fa,
            opts.reference_fa
        ])
        for (e1_filtered_fq, _) in mappable_files:
            parts = os.path.splitext(e1_filtered_fq)
            label = parts[0]
            sam_file = label + common.SAM_EXTENSION
            common.run_cmd([
                common.BOWTIE, "--sam", "--threads",
                str(cores), opts.reference_fa, e1_filtered_fq, sam_file
            ])
            sam_files.append(sam_file)
    else:
        print "Aligning reads with BWA"
        common.run_cmd([common.BWA, "index", opts.reference_fa])
        for (e1_filtered_fq, _) in mappable_files:
            parts = os.path.splitext(e1_filtered_fq)
            label = parts[0]
            aln_file = label + common.ALN_EXTENSION
            with open(aln_file, "w") as aln_fh:
                #                common.run_cmd_file_out([common.BWA, "aln", "-t", str(cores), opts.reference_fa, e1_filtered_fq], aln_fh)
                common.run_cmd_file_out([
                    common.BWA, "aln", "-l", "1000", "-t",
                    str(cores), "-n",
                    str(common.BWA_PCT_MISSING), opts.reference_fa,
                    e1_filtered_fq
                ], aln_fh)
            sam_file = label + common.SAM_EXTENSION
            with open(sam_file, "w") as sam_fh:
                common.run_cmd_file_out([
                    common.BWA, "samse", opts.reference_fa, aln_file,
                    e1_filtered_fq
                ], sam_fh)
            sam_files.append(sam_file)

    # Summarize mappings
    sum_files = list()
    for sam_file in sam_files:
        parts = os.path.splitext(sam_file)
        outfile = parts[0] + common.SUM_EXTENSION
        run_cmd_list = [
            "python",
            os.path.join(scriptpath, "summarize_mappings.py"), "--infile",
            sam_file, "--outfile", outfile
        ]
        if opts.backendseq:
            run_cmd_list += ["--backendseq"]
        common.run_cmd(run_cmd_list)
        sum_files.append(outfile)

    # Merge slipped reads
    sum_mg_files = list()
    for sum_file in sum_files:
        outfile = common.add_suffix(sum_file, common.MERGE_SUFFIX)
        if opts.merge_slipped:
            common.run_cmd([
                "python",
                os.path.join(scriptpath, "merge_slipped.py"), "--infile",
                sum_file, "--outfile", outfile
            ])
            sum_mg_files.append(outfile)
        else:
            sum_mg_files.append(sum_file)

    # Normalize read counts
    norm_files = list()
    filenum = 0
    for mg_file in sum_mg_files:
        filenum += 1
        if opts.norm_factor > 0:
            parts = os.path.splitext(mg_file)
            norm_file = parts[0] + common.NORM_SUFFIX + parts[1]
            if opts.norm_factor is None:
                common.run_cmd([
                    "python",
                    os.path.join(scriptpath, "norm.py"), "--infile", mg_file,
                    "--outfile", norm_file
                ])
            else:
                common.run_cmd([
                    "python",
                    os.path.join(scriptpath, "norm.py"), "--infile", mg_file,
                    "--outfile", norm_file, "--norm-factor",
                    str(opts.norm_factor)
                ])
                norm_files.append(norm_file)
        else:
            norm_files.append(mg_file)

    # This file list should be pasted into the annotation command
    print "Finished reads lists: " + " ".join(norm_files)
Esempio n. 6
0
def process(infiles, opts):
    scriptpath = os.path.dirname(os.path.abspath(__file__))
    label = os.path.splitext(os.path.basename(infiles[0]))[0]

    # Group the input files (read1, index, read2) if using index reads and/or 2nd-end reads
    infile_groups = list()
    try:
        while len(infiles) > 0:
            fq1 = infiles.pop(0)
            group = (fq1, None, None)
            if opts.demux_r2 and (opts.demux_i or opts.verify_i):
                group = (fq1, infiles.pop(0), infiles.pop(0))
            elif opts.demux_r2:
                group = (fq1, None, infiles.pop(0))
            elif opts.demux_i or opts.verify_i:
                group = (fq1, infiles.pop(0), None)
            infile_groups.append(group)
    except IndexError:
        print "ERROR: wrong number of input files"
    print "Processing " + str(len(infile_groups)) + " Tn-seq fileset(s)"

    # Validation: make sure we have the required input files for the chosen options
    (r1_file, i_file, r2_file) = infile_groups[0]
    if (opts.verify_i or opts.demux_i) and i_file is None:
        print "Please supply index fastq files when using demultiplex-by-index or verify-by-index options"
        exit(1)
    if (opts.demux_r2) and r2_file is None:
        print "Please supply 2nd-end fastq files when using demultiplex-by-read2 option"
        exit(1)

    # Create the working directory if needed
    try:
        os.makedirs(opts.workdir)
    except OSError:
        if not os.path.isdir(opts.workdir):
            raise

    # Chastity filter
    chaste_files = list()
    if opts.dochastity:
        for (r1_file, i_file, r2_file) in infile_groups:
            fname = common.add_suffix(os.path.basename(r1_file), common.CHASTE_SUFFIX)
            r1_out = os.path.join(opts.workdir, fname)
            cmd = ["python", os.path.join(scriptpath, "ch_filter.py"), "--end1", r1_file, "--outfile_e1", r1_out]
            if r2_file is None:
                r2_out = None
            else:
                fname = common.add_suffix(os.path.basename(r2_file), common.CHASTE_SUFFIX)
                r2_out = os.path.join(opts.workdir, fname)
                cmd.extend(["--end2", r2_file, "--outfile_e2", r2_out])
            if i_file is None:
                i_out = None
            else:
                fname = common.add_suffix(os.path.basename(i_file), common.CHASTE_SUFFIX)
                i_out = os.path.join(opts.workdir, fname)
                cmd.extend(["--index", i_file, "--outfile_i", i_out])
            common.run_cmd(cmd)
            chaste_group = (r1_out, i_out, r2_out)
            chaste_files.append(chaste_group)
    else:
        for (r1_file, i_file, r2_file) in infile_groups:
            if r1_file:
                r1_out = os.path.join(opts.workdir, os.path.basename(r1_file))
                shutil.copyfile(r1_file, r1_out)
            else:
                r1_out = None
            if r2_file:
                r2_out = os.path.join(opts.workdir, os.path.basename(r2_file))
                shutil.copyfile(r2_file, r2_out)
            else:
                r2_out = None
            if i_file:
                i_out = os.path.join(opts.workdir, os.path.basename(i_file))
                shutil.copyfile(i_file, i_out)
            else:
                i_out = None
            chaste_files.append((r1_out, i_out, r2_out))

    # De-multiplex using index, keeping read1 files
    demux_files = list()
    if opts.demux_i:
        barcodes = read_barcodes(opts.barcodefile)
        print "Expected barcodes: " + ", ".join(barcodes)
        for (r1_file, i_file, r2_file) in chaste_files:
            (out_prefix_r1, out_extn) = os.path.splitext(r1_file)
            cmd = ["python", os.path.join(scriptpath, "demux.py"), "--seqs", opts.barcodefile, "--end1", r1_file, "--index", i_file]
            if r2_file:
                cmd.extend(["end2", r2_file])
            common.run_cmd(cmd)
            for seq in barcodes:
                r1_file = out_prefix_r1 + "_" + seq + out_extn
                demux_files.append((r1_file, None, None))
    else:
        demux_files = chaste_files

    # Hash and count
    if opts.verify_i:
        hash_logs = list()
        for (r1_ch_file, ind_ch_file, r2_ch_file) in chaste_files:
            parts = os.path.splitext(ind_ch_file)
            outfile = parts[0] + common.HASH_EXTENSION
            common.run_cmd(["python", os.path.join(scriptpath, "hash_index_reads.py"), "--infile", ind_ch_file, "--outfile", outfile, "--tn_end_length", str(len(opts.tn_end_seq))])
            hash_logs.append((r1_ch_file, ind_ch_file, r2_ch_file, outfile))

    # Filter first-end reads if passed hash
    if opts.verify_i:
        filtered_files = list()
        for (r1_ch_file, ind_ch_file, r2_ch_file, hash_log) in hash_logs:
            (index_ok, ratio) = check_hash(hash_log, opts.tn_end_seq)
            if index_ok:
                print "Index sequences are primarily " + opts.tn_end_seq + " (" + str(ratio * 100) + "%)"
                outfile1 = common.add_suffix(r1_ch_file, common.TNEND_SUFFIX)
                cmd = ["python", os.path.join(scriptpath, "tnend_filter.py"), "--end1", r1_ch_file, "--index", ind_ch_file, "--outfile1", outfile1, "--tn_end", opts.tn_end_seq]
                if r2_file:
                    outfile2 = common.add_suffix(r2_ch_file, common.TNEND_SUFFIX)
                    cmd.extend(["--end2", r2_ch_file, "--outfile2", outfile2])
                else:
                    outfile2 = None
                common.run_cmd(cmd)
                filtered_files.append((outfile1, None, outfile2))
            else:
                print "WARNING: most index counts do not match expected sequence - skipping Tn end filter (" + str(ratio * 100) + "% " + opts.tn_end_seq + ")" 
                filtered_files.append((r1_ch_file, None, r2_ch_file))
    else:
        filtered_files = demux_files

    # Filter and trim first-end reads
    if opts.verify_r1:
        trimmed_files = list()
        for (r1_file, i_file, r2_file) in filtered_files:
            outfile = common.add_suffix(r1_file, common.TRIM_SUFFIX)
            common.run_cmd(["python", os.path.join(scriptpath, "r1_filter.py"), "--end1", r1_file, "--outfile", outfile, "--seq", opts.tn_end_seq])
            trimmed_files.append((outfile, None, None))
    else:
        trimmed_files = filtered_files

    # De-multiplex using read2, keeping read1 and index files
    mappable_files = list()
    if opts.demux_r2:
        barcodes = read_barcodes(opts.barcodefile)
        for (r1_file, i_file, r2_file) in trimmed_files:
            (out_prefix_r1, out_extn) = os.path.splitext(r1_file)
            cmd = ["python", os.path.join(scriptpath, "demux.py"), "--seqs", opts.barcodefile, "--end1", r1_file, "--end2", r2_file]
            if i_file:
                cmd.extend(["--index", i_file])
                (out_prefix_i, _) = os.path.splitext(i_file)
            common.run_cmd(cmd)
            for seq in barcodes:
                r1_out = out_prefix_r1 + "_" + seq + out_extn
                if i_file:
                    i_out = out_prefix_i + "_" + seq + out_extn
                else:
                    i_out = None
                mappable_files.append((r1_out, i_out))
    else:
        mappable_files = [(r1, ind) for (r1, ind, r2) in trimmed_files]

    # Map primary reads against genome using desired aligner
    cores = multiprocessing.cpu_count()
    if cores > 2:
        cores -= 1
    sam_files = list()
    if opts.use_bowtie:
        print "Aligning reads with Bowtie"
        common.run_cmd([common.BOWTIE_BUILD, "--quiet", opts.reference_fa, opts.reference_fa])
        for (e1_filtered_fq, _) in mappable_files:
            parts = os.path.splitext(e1_filtered_fq)
            label = parts[0]
            sam_file = label + common.SAM_EXTENSION
            common.run_cmd([common.BOWTIE, "--sam", "--threads", str(cores),  opts.reference_fa, e1_filtered_fq, sam_file])
            sam_files.append(sam_file)
    else:
        print "Aligning reads with BWA"
        common.run_cmd([common.BWA, "index", opts.reference_fa])
        for (e1_filtered_fq, _) in mappable_files:
            parts = os.path.splitext(e1_filtered_fq)
            label = parts[0]
            aln_file = label + common.ALN_EXTENSION
            with open(aln_file, "w") as aln_fh:
#                common.run_cmd_file_out([common.BWA, "aln", "-t", str(cores), opts.reference_fa, e1_filtered_fq], aln_fh)
                common.run_cmd_file_out([common.BWA, "aln", "-l", "1000", "-t", str(cores), "-n", str(common.BWA_PCT_MISSING), opts.reference_fa, e1_filtered_fq], aln_fh)
            sam_file = label + common.SAM_EXTENSION
            with open(sam_file, "w") as sam_fh:
                common.run_cmd_file_out([common.BWA, "samse", opts.reference_fa, aln_file, e1_filtered_fq], sam_fh)
            sam_files.append(sam_file)

    # Summarize mappings
    sum_files = list()
    for sam_file in sam_files:
        parts = os.path.splitext(sam_file)
        outfile = parts[0] + common.SUM_EXTENSION
        common.run_cmd(["python", os.path.join(scriptpath, "summarize_mappings.py"), "--infile", sam_file, "--outfile", outfile])
        sum_files.append(outfile)

    # Merge slipped reads
    sum_mg_files = list()
    for sum_file in sum_files:
        outfile = common.add_suffix(sum_file, common.MERGE_SUFFIX)
        if opts.merge_slipped:
            common.run_cmd(["python", os.path.join(scriptpath, "merge_slipped.py"), "--infile", sum_file, "--outfile", outfile])
            sum_mg_files.append(outfile)
        else:
            sum_mg_files.append(sum_file)

    # Normalize read counts
    norm_files = list()
    filenum = 0
    for mg_file in sum_mg_files:
        filenum += 1
        if opts.norm_factor > 0:
            parts = os.path.splitext(mg_file)
            norm_file = parts[0] + common.NORM_SUFFIX + parts[1]
            if opts.norm_factor is None:
                common.run_cmd(["python", os.path.join(scriptpath, "norm.py"), "--infile", mg_file, "--outfile", norm_file])
            else:
                common.run_cmd(["python", os.path.join(scriptpath, "norm.py"), "--infile", mg_file, "--outfile", norm_file, "--norm-factor", str(opts.norm_factor)])
                norm_files.append(norm_file)
        else:
            norm_files.append(mg_file)

    # This file list should be pasted into the annotation command
    print "Finished reads lists: " + " ".join(norm_files)