Beispiel #1
0
def contamination(args):
    """
    %prog contamination folder Ecoli.fasta

    Remove contaminated reads.
    """
    from jcvi.apps.bowtie import align

    p = OptionParser(contamination.__doc__)
    p.add_option("--mapped", default=False, action="store_true",
                 help="Retain contaminated reads instead [default: %default]")
    p.set_cutoff(cutoff=800)
    p.set_mateorientation()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    folder, ecoli = args
    ecoli = get_abs_path(ecoli)
    tag = "--mapped" if opts.mapped else "--unmapped"
    for p, pf in iter_project(folder, 2):
        align_opts = [ecoli] + p + ["--bam", tag]
        align_opts += ["--cutoff={0}".format(opts.cutoff)]
        if opts.mateorientation:
            align_opts += ["--mateorientation={0}".format(opts.mateorientation)]
        samfile, logfile = align(align_opts)
Beispiel #2
0
    def __init__(self, filename, select=None):
        assert op.exists(filename), "File `{0}` not found".format(filename)

        # filename can be both .sizes file or FASTA formatted file
        sizesname = filename

        if not filename.endswith(".sizes"):
            sizesname = filename + ".sizes"
            filename = get_abs_path(filename)
            if need_update(filename, sizesname):
                cmd = "faSize"
                cmd += " -detailed {0}".format(filename)
                sh(cmd, outfile=sizesname)
            filename = sizesname

        assert filename.endswith(".sizes")

        super(Sizes, self).__init__(filename)
        self.fp = open(filename)

        # get sizes for individual contigs, both in list and dict
        # this is to preserve the input order in the sizes file
        sizes = list(self.iter_sizes())
        if select:
            assert select > 0
            sizes = [x for x in sizes if x[1] >= select]
        self.sizes_mapping = dict(sizes)

        # get cumulative sizes, both in list and dict
        ctgs, sizes = zip(*sizes)
        self.sizes = sizes
        cumsizes = np.cumsum([0] + list(sizes))
        self.ctgs = ctgs
        self.cumsizes = cumsizes
        self.cumsizes_mapping = dict(zip(ctgs, cumsizes))
Beispiel #3
0
 def update_abs_path(self):
     for r in self:
         path = r.value
         if path and op.exists(path):
             npath = get_abs_path(path)
             logging.debug("{0}={1} => {2}".format(r.tag, path, npath))
             r.value = npath
Beispiel #4
0
    def __init__(self, filename, select=None):
        assert op.exists(filename), "File `{0}` not found".format(filename)

        # filename can be both .sizes file or FASTA formatted file
        sizesname = filename

        if not filename.endswith(".sizes"):
            sizesname = filename + ".sizes"
            filename = get_abs_path(filename)
            if need_update(filename, sizesname):
                cmd = "faSize"
                cmd += " -detailed {0}".format(filename)
                sh(cmd, outfile=sizesname)
            filename = sizesname

        assert filename.endswith(".sizes")

        super(Sizes, self).__init__(filename)
        self.fp = open(filename)

        # get sizes for individual contigs, both in list and dict
        # this is to preserve the input order in the sizes file
        sizes = list(self.iter_sizes())
        if select:
            assert select > 0
            sizes = [x for x in sizes if x[1] >= select]
        self.sizes_mapping = dict(sizes)

        # get cumulative sizes, both in list and dict
        ctgs, sizes = zip(*sizes)
        self.sizes = sizes
        cumsizes = np.cumsum([0] + list(sizes))
        self.ctgs = ctgs
        self.cumsizes = cumsizes
        self.cumsizes_mapping = dict(zip(ctgs, cumsizes))
Beispiel #5
0
def run_megablast(infile=None, outfile=None, db=None, wordsize=None, \
        pctid=98, hitlen=100, best=None, evalue=0.01, task="megablast", cpus=16):

    assert db, "Need to specify database fasta file."

    db = get_abs_path(db)
    nin = db + ".nin"
    nin00 = db + ".00.nin"
    nin = nin00 if op.exists(nin00) else (db + ".nin")
    run_formatdb(infile=db, outfile=nin)

    cmd = "blastn"
    cmd += " -query {0} -db {1} -out {2}".format(infile, db, outfile)
    cmd += " -evalue {0} -outfmt 6 -num_threads {1}".format(evalue, cpus)
    cmd += " -task {0}".format(task)
    if wordsize:
        cmd += " -word_size {0}".format(wordsize)
    if pctid:
        cmd += " -perc_identity {0}".format(pctid)
    if best:
        cmd += " -max_target_seqs {0}".format(best)
    sh(cmd)

    if pctid and hitlen:
        blastfile = outfile
        filtered_blastfile = outfile + ".P{0}L{1}".format(pctid, hitlen)
        run_blast_filter(infile=blastfile, outfile=filtered_blastfile,
                pctid=pctid, hitlen=hitlen)
        shutil.move(filtered_blastfile, blastfile)
Beispiel #6
0
    def make_link(self, firstN=0):
        mkdir(self.genome)
        if firstN > 0:
            first([str(firstN), self.fastq, "--outfile={0}".format(self.link)])
            return

        if op.islink(self.link):
            os.unlink(self.link)
        os.symlink(get_abs_path(self.fastq), self.link)
Beispiel #7
0
def fastq(args):
    """
    %prog fastq fastqfile

    Convert reads formatted as FASTQ file, and convert to CA frg file.
    """
    p = OptionParser(fastq.__doc__)
    p.add_option("--sanger", dest="sanger", default=False, action="store_true",
            help="Are the qv sanger encodings? [default: %default]")
    p.add_option("--outtie", dest="outtie", default=False, action="store_true",
            help="Are these outie reads? [default: %default]")
    add_size_option(p)

    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(p.print_help())

    fastqfiles = [get_abs_path(x) for x in args]

    mated = (opts.size != 0)
    outtie = opts.outtie
    libname = op.basename(fastqfiles[0]).split(".")[0]
    libname = libname.replace("_1_sequence", "")

    if outtie:
        libname = "IlluminaMP_" + libname
    else:
        libname = "IlluminaPE_" + libname

    if mated:
        libname += "_Mated"
    else:
        if outtie:
            libname = "IlluminaMP_UnMated"
        else:
            libname = "IlluminaPE_UnMated"
    frgfile = libname + ".frg"

    mean, sv = get_mean_sv(opts.size)

    cmd = CAPATH("fastqToCA")
    cmd += " -libraryname {0} ".format(libname)
    fastqs = " ".join("-reads {0}".format(x) for x in fastqfiles)
    if mated:
        assert len(args) in (1, 2), "you need one or two fastq files for mated library"
        fastqs = "-mates {0}".format(",".join(fastqfiles))
        cmd += "-insertsize {0} {1} ".format(mean, sv)
    cmd += fastqs

    if opts.sanger:
        cmd += " -type sanger "
    if outtie:
        cmd += " -outtie "

    sh(cmd, outfile=frgfile)
Beispiel #8
0
Datei: bwa.py Projekt: rrane/jcvi
def align(args):
    """
    %prog align database.fasta read1.fq [read2.fq]

    Wrapper for three modes of BWA - mem (default), aln, bwasw (long reads).
    """
    valid_modes = ("bwasw", "aln", "mem")
    p = OptionParser(align.__doc__)
    p.add_option("--mode", default="mem", choices=valid_modes,
                 help="BWA mode [default: %default]")
    p.set_cutoff(cutoff=800)
    p.set_sam_options()

    opts, args = p.parse_args(args)
    mode = opts.mode
    nargs = len(args)

    if nargs not in (2, 3):
        sys.exit(not p.print_help())

    tag = "bwa-{0}: ".format(mode)
    c = mem
    if nargs == 2:
        tag += "Single-end alignment"
        if mode == "bwasw":
            c = bwasw
        elif mode == "aln":
            c = samse
    else:
        assert mode != "bwasw", "Cannot use --bwasw with paired-end mode"
        tag += "Paired-end alignment"
        if mode == "aln":
            c = sampe

    logging.debug(tag)
    args[0] = get_abs_path(args[0])
    cmd, samfile = c(args, opts)
    if cmd:
        cmd = output_bam(cmd, samfile)

    bam = opts.bam
    unmapped = opts.unmapped

    sh(cmd)
    if unmapped:
        dbfile, readfile = args[:2]
        mopts = [samfile, "--unmapped"]
        if bam:
            mopts += ["--bam"]
        mapped(mopts)
        FileShredder([samfile])

    return samfile, None
Beispiel #9
0
Datei: ca.py Projekt: rrane/jcvi
def fastq(args):
    """
    %prog fastq fastqfile

    Convert reads formatted as FASTQ file, and convert to CA frg file.
    """
    from jcvi.formats.fastq import guessoffset

    p = OptionParser(fastq.__doc__)
    p.add_option("--outtie", dest="outtie", default=False, action="store_true",
            help="Are these outie reads? [default: %default]")
    p.set_phred()
    p.set_size()

    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(p.print_help())

    fastqfiles = [get_abs_path(x) for x in args]
    size = opts.size
    outtie = opts.outtie
    if size > 1000 and (not outtie):
        logging.debug("[warn] long insert size {0} but not outtie".format(size))

    mated = (size != 0)
    libname = op.basename(args[0]).split(".")[0]
    libname = libname.replace("_1_sequence", "")

    frgfile = libname + ".frg"
    mean, sv = get_mean_sv(opts.size)

    cmd = "fastqToCA"
    cmd += " -libraryname {0} ".format(libname)
    fastqs = " ".join("-reads {0}".format(x) for x in fastqfiles)
    if mated:
        assert len(args) in (1, 2), "you need one or two fastq files for mated library"
        fastqs = "-mates {0}".format(",".join(fastqfiles))
        cmd += "-insertsize {0} {1} ".format(mean, sv)
    cmd += fastqs

    offset = int(opts.phred) if opts.phred else guessoffset([fastqfiles[0]])
    illumina = (offset == 64)
    if illumina:
        cmd += " -type illumina"
    if outtie:
        cmd += " -outtie"

    sh(cmd, outfile=frgfile)
Beispiel #10
0
def index(args):
    """
    %prog index database.fasta

    Wrapper for `bowtie2-build`. Same interface.
    """
    p = OptionParser(index.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    dbfile, = args
    dbfile = get_abs_path(dbfile)
    check_index(dbfile)
Beispiel #11
0
def fastq(args):
    """
    %prog fastq fastqfile

    Convert reads formatted as FASTQ file, and convert to CA frg file.
    """
    from jcvi.formats.fastq import guessoffset

    p = OptionParser(fastq.__doc__)
    phdchoices = ("33", "64")
    p.add_option("--outtie", dest="outtie", default=False, action="store_true",
            help="Are these outie reads? [default: %default]")
    p.add_option("--phred", default=None, choices=phdchoices,
            help="Phred score offset {0} [default: guess]".format(phdchoices))
    add_size_option(p)

    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(p.print_help())

    fastqfiles = [get_abs_path(x) for x in args]

    mated = (opts.size != 0)
    outtie = opts.outtie
    libname = op.basename(args[0]).split(".")[0]
    libname = libname.replace("_1_sequence", "")

    frgfile = libname + ".frg"
    mean, sv = get_mean_sv(opts.size)

    cmd = CAPATH("fastqToCA")
    cmd += " -libraryname {0} ".format(libname)
    fastqs = " ".join("-reads {0}".format(x) for x in fastqfiles)
    if mated:
        assert len(args) in (1, 2), "you need one or two fastq files for mated library"
        fastqs = "-mates {0}".format(",".join(fastqfiles))
        cmd += "-insertsize {0} {1} ".format(mean, sv)
    cmd += fastqs

    offset = int(opts.phred) if opts.phred else guessoffset([fastqfiles[0]])
    illumina = (offset == 64)
    if illumina:
        cmd += " -type illumina"
    if outtie:
        cmd += " -outtie"

    sh(cmd, outfile=frgfile)
Beispiel #12
0
Datei: sam.py Projekt: rrane/jcvi
def merge(args):
    """
    %prog merge merged_bams bams1_dir bams2_dir ...

    Merge BAM files. Treat the bams with the same prefix as a set.
    Output the commands first.
    """
    from jcvi.apps.softlink import get_abs_path
    from jcvi.apps.grid import MakeManager

    p = OptionParser(merge.__doc__)
    p.add_option("--sep", default="_",
                 help="Separator to group per prefix")
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    merged_bams = args[0]
    bamdirs = args[1:]

    mkdir(merged_bams)
    bams = []
    for x in bamdirs:
        bams += glob(op.join(x, "*.bam"))
    bams = [x for x in bams if "nsorted" not in x]

    logging.debug("Found a total of {0} BAM files.".format(len(bams)))

    sep = opts.sep
    key = lambda x: op.basename(x).split(sep)[0]
    bams.sort(key=key)
    mm = MakeManager()
    for prefix, files in groupby(bams, key=key):
        files = sorted(list(files))
        nfiles = len(files)
        source = " ".join(files)
        target = op.join(merged_bams, op.basename(files[0]))
        if nfiles == 1:
            source = get_abs_path(source)
            cmd = "ln -s {0} {1}".format(source, target)
            mm.add("", target, cmd)
        else:
            cmds = []
            cmds.append("rm {0}".format(target))
            cmds.append("samtools merge {0} {1}".format(target, source))
            mm.add(files, target, cmds)
    mm.write()
Beispiel #13
0
def run_vecscreen(infile=None, outfile=None, db="UniVec_Core",
        pctid=None, hitlen=None):
    """
    BLASTN parameters reference:
    http://www.ncbi.nlm.nih.gov/VecScreen/VecScreen_docs.html
    """
    db = get_abs_path(db)
    nin = db + ".nin"
    run_formatdb(infile=db, outfile=nin)

    cmd = "blastn"
    cmd += " -task blastn"
    cmd += " -query {0} -db {1} -out {2}".format(infile, db, outfile)
    cmd += " -penalty -5 -gapopen 4 -gapextend 4 -dust yes -soft_masking true"
    cmd += " -searchsp 1750000000000 -evalue 0.01 -outfmt 6 -num_threads 8"
    sh(cmd)
Beispiel #14
0
def soapX(args):
    """
    %prog soapX folder tag [*.fastq]

    Run SOAP on a folder of paired reads and apply tag before assembly.
    Optional *.fastq in the argument list will be symlinked in each folder and
    co-assembled.
    """
    p = OptionParser(soapX.__doc__)
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    folder, tag = args[:2]
    extra = args[2:]
    extra = [get_abs_path(x) for x in extra]
    tag = tag.split(",")
    for p, pf in iter_project(folder, n=3):
        soap_trios(p, pf, tag, extra)
Beispiel #15
0
def prepare(args):
    """
    %prog prepare barcode_key.csv reference.fasta

    Prepare TASSEL pipeline.
    """
    valid_enzymes = "ApeKI|ApoI|BamHI|EcoT22I|HinP1I|HpaII|MseI|MspI|" \
                    "NdeI|PasI|PstI|Sau3AI|SbfI|AsiSI-MspI|BssHII-MspI|" \
                    "FseI-MspI|PaeR7I-HhaI|PstI-ApeKI|PstI-EcoT22I|PstI-MspI" \
                    "PstI-TaqI|SalI-MspI|SbfI-MspI".split("|")
    p = OptionParser(prepare.__doc__)
    p.add_option("--enzyme", default="ApeKI", choices=valid_enzymes,
                 help="Restriction enzyme used [default: %default]")
    p.set_home("tassel")
    p.set_aligner(aligner="bwa")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    barcode, reference = args
    thome = opts.tassel_home
    reference = get_abs_path(reference)
    folders = ("fastq", "tagCounts", "mergedTagCounts", "topm",
               "tbt", "mergedTBT", "hapmap", "hapmap/raw",
               "hapmap/mergedSNPs", "hapmap/filt", "hapmap/bpec")
    for f in folders:
        mkdir(f)

    # Build the pipeline
    runsh = []
    o = "-i fastq -k {0} -e {1} -o tagCounts".format(barcode, opts.enzyme)
    cmd = run_pipeline(thome, "FastqToTagCountPlugin", o)
    runsh.append(cmd)

    o = "-i tagCounts -o mergedTagCounts/myMasterTags.cnt"
    o += " -c 5 -t mergedTagCounts/myMasterTags.cnt.fq"
    cmd = run_pipeline(thome, "MergeMultipleTagCountPlugin", o)
    runsh.append(cmd)
    runsh.append("cd mergedTagCounts")

    cmd = "python -m jcvi.apps.{0} align --cpus {1}".\
                format(opts.aligner, opts.cpus)
    cmd += " {0} myMasterTags.cnt.fq".format(reference)
    runsh.append(cmd)
    runsh.append("cd ..")

    o = "-i mergedTagCounts/*.sam -o topm/myMasterTags.topm"
    cmd = run_pipeline(thome, "SAMConverterPlugin", o)
    runsh.append(cmd)

    o = "-i mergedTBT/myStudy.tbt.byte -y -m topm/myMasterTags.topm"
    o += " -mUpd topm/myMasterTagsWithVariants.topm"
    o += " -o hapmap/raw/myGBSGenos_chr+.hmp.txt"
    o += " -mnF 0.8 -p myPedigreeFile.ped -mnMAF 0.02 -mnMAC 100000"
    o += " -ref {0} -sC 1 -eC 10".format(reference)
    cmd = run_pipeline(thome, "TagsToSNPByAlignmentPlugin", o)
    runsh.append(cmd)

    o = "-hmp hapmap/raw/myGBSGenos_chr+.hmp.txt"
    o += " -o hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt"
    o += " -misMat 0.1 -p myPedigreeFile.ped -callHets -sC 1 -eC 10"
    cmd = run_pipeline(thome, "MergeDuplicateSNPsPlugin", o)
    runsh.append(cmd)

    o = "-hmp hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt"
    o += " -o hapmap/filt/myGBSGenos_mergedSNPsFilt_chr+.hmp.txt"
    o += " -mnTCov 0.01 -mnSCov 0.2 -mnMAF 0.01 -sC 1 -eC 10"
    #o += "-hLD -mnR2 0.2 -mnBonP 0.005"
    cmd = run_pipeline(thome, "GBSHapMapFiltersPlugin", o)
    runsh.append(cmd)

    runfile = "run.sh"
    write_file(runfile, "\n".join(runsh), meta="run script")
Beispiel #16
0
def align(args):
    """
    %prog align database.fasta read1.fq [read2.fq]

    Wrapper for `bowtie2` single-end or paired-end, depending on the number of args.
    """
    from jcvi.formats.fastq import guessoffset

    p = OptionParser(align.__doc__)
    p.set_firstN(firstN=0)
    p.add_option("--full", default=False, action="store_true", help="Enforce end-to-end alignment [default: local]")
    p.add_option("--reorder", default=False, action="store_true", help="Keep the input read order [default: %default]")
    p.set_cutoff(cutoff=800)
    p.set_mateorientation(mateorientation="+-")
    p.set_sam_options(bowtie=True)

    opts, args = p.parse_args(args)
    extra = opts.extra
    mo = opts.mateorientation
    if mo == "+-":
        extra += ""
    elif mo == "-+":
        extra += "--rf"
    else:
        extra += "--ff"

    PE = True
    if len(args) == 2:
        logging.debug("Single-end alignment")
        PE = False
    elif len(args) == 3:
        logging.debug("Paired-end alignment")
    else:
        sys.exit(not p.print_help())

    firstN = opts.firstN
    mapped = opts.mapped
    unmapped = opts.unmapped
    gl = "--end-to-end" if opts.full else "--local"

    dbfile, readfile = args[0:2]
    dbfile = get_abs_path(dbfile)
    safile = check_index(dbfile)
    prefix = get_prefix(readfile, dbfile)
    samfile, mapped, unmapped = get_samfile(
        readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam
    )
    logfile = prefix + ".log"
    offset = guessoffset([readfile])

    if not need_update(safile, samfile):
        logging.error("`{0}` exists. `bowtie2` already run.".format(samfile))
        return samfile, logfile

    cmd = "bowtie2 -x {0}".format(dbfile)
    if PE:
        r1, r2 = args[1:3]
        cmd += " -1 {0} -2 {1}".format(r1, r2)
        cmd += " --maxins {0}".format(opts.cutoff)
        mtag, utag = "--al-conc", "--un-conc"
    else:
        cmd += " -U {0}".format(readfile)
        mtag, utag = "--al", "--un"

    if mapped:
        cmd += " {0} {1}".format(mtag, mapped)
    if unmapped:
        cmd += " {0} {1}".format(utag, unmapped)

    if firstN:
        cmd += " --upto {0}".format(firstN)
    cmd += " -p {0}".format(opts.cpus)
    cmd += " --phred{0}".format(offset)
    cmd += " {0}".format(gl)
    if opts.reorder:
        cmd += " --reorder"

    cmd += " {0}".format(extra)
    # Finally the log
    cmd += " 2> {0}".format(logfile)

    cmd = output_bam(cmd, samfile)
    sh(cmd)
    print >>sys.stderr, open(logfile).read()

    return samfile, logfile
Beispiel #17
0
def parallel(args):
    """
    %prog parallel genome.fasta N

    Partition the genome into parts and run separately. This is useful if MAKER
    is to be run on the grid.
    """
    from jcvi.formats.base import split

    p = OptionParser(parallel.__doc__)
    p.set_home("maker")
    p.set_tmpdir(tmpdir="tmp")
    p.set_grid_opts(array=True)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    genome, NN = args
    threaded = opts.threaded or 1
    tmpdir = opts.tmpdir

    mkdir(tmpdir)
    tmpdir = get_abs_path(tmpdir)

    N = int(NN)
    assert 1 <= N < 1000, "Required: 1 < N < 1000!"

    outdir = "outdir"
    fs = split([genome, outdir, NN])

    c = CTLFile("maker_opts.ctl")
    c.update_abs_path()
    if threaded > 1:
        c.update_tag("cpus", threaded)

    cwd = os.getcwd()
    dirs = []
    for name in fs.names:
        fn = get_abs_path(name)
        bn = op.basename(name)
        dirs.append(bn)
        c.update_tag("genome", fn)
        mkdir(bn)
        sh("cp *.ctl {0}".format(bn))

        os.chdir(bn)
        c.write_file("maker_opts.ctl")
        os.chdir(cwd)

    jobs = "jobs"
    fw = open(jobs, "w")
    print >> fw, "\n".join(dirs)
    fw.close()

    # Submit to grid
    ncmds = len(dirs)
    runfile = "array.sh"
    cmd = op.join(opts.maker_home, "bin/maker")
    if tmpdir:
        cmd += " -TMP {0}".format(tmpdir)

    engine = get_grid_engine()
    contents = arraysh.format(jobs, cmd) if engine == "SGE" \
                else arraysh_ua.format(N, threaded, jobs, cmd)
    write_file(runfile, contents, meta="run script")

    if engine == "PBS":
        return

    # qsub script
    outfile = "maker.\$TASK_ID.out"
    p = GridProcess(runfile, outfile=outfile, errfile=outfile,
                    arr=ncmds, grid_opts=opts)
    qsubfile = "qsub.sh"
    qsub = p.build()
    write_file(qsubfile, qsub, meta="run script")
Beispiel #18
0
def pairs(args):
    """
    %prog pairs folder reference.fasta

    Estimate insert size distribution. Compatible with a variety of aligners,
    including CLC, BOWTIE and BWA.
    """
    p = OptionParser(pairs.__doc__)
    p.set_firstN()
    p.set_mates()
    p.set_aligner()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    cwd = os.getcwd()
    aligner = opts.aligner
    work = "-".join(("pairs", aligner))
    mkdir(work)

    if aligner == "clc":
        from jcvi.apps.clc import align
        from jcvi.formats.cas import pairs as ps
    else:
        from jcvi.formats.sam import pairs as ps

    if aligner == "bowtie":
        from jcvi.apps.bowtie import align
    elif aligner == "bwa":
        from jcvi.apps.bwa import align

    folder, ref = args
    ref = get_abs_path(ref)
    messages = []
    for p, prefix in iter_project(folder, 2):
        samplefq = op.join(work, prefix + ".first.fastq")
        first([str(opts.firstN)] + p + ["-o", samplefq])

        os.chdir(work)
        align_args = [ref, op.basename(samplefq)]
        if aligner != "clc":
           align_args += ["--bam"]
        outfile, logfile = align(align_args)
        bedfile, stats = ps([outfile, "--rclip={0}".format(opts.rclip)])
        os.chdir(cwd)

        median = stats.median
        tag = "MP" if median > 1000 else "PE"
        median = str(median)
        pf, sf = median[:2], median[2:]
        if int(sf) != 0:
            pf = str(int(pf) + 1)  # Get the first two effective digits
        lib = "{0}-{1}".format(tag, pf.ljust(len(median), '0'))
        for i, xp in enumerate(p):
            suffix = "fastq.gz" if xp.endswith(".gz") else "fastq"
            link = "{0}-{1}.{2}.{3}".format(lib, prefix.replace("-", ""),
                                            i + 1, suffix)
            m = "\t".join(str(x) for x in (xp, link))
            messages.append(m)

    messages = "\n".join(messages)
    write_file("f.meta", messages, tee=True)