예제 #1
0
def make_blastdb(type, file, name):
    indexfile = name
    if type == 'nucl':
        indexfile += ".nin"
    else:
        indexfile += ".pin"

    if not os.path.exists(
            indexfile) or os.path.getctime(indexfile) < os.path.getctime(file):
        cmd = ['makeblastdb', '-dbtype', type, '-in', file, '-out', name]
        printCMD(cmd)
        call(cmd, stdout=DEVNULL, stderr=DEVNULL)
예제 #2
0
def run_dipspades(parser, args):

    if not args.workdir:
        args.workdir = 'dipspades_' + str(os.getpid())

    runcmd = [
        'dipspades.py', '--threads',
        str(args.cpus), '--cov-cutoff', 'auto', '--mem', args.memory, '-o',
        args.workdir
    ]

    if args.assembler_args:
        runcmd.extend(args.assembler_args)

    if args.haplocontigs:
        runcmd.extend(['--hap', args.haplocontigs])

    if args.tmpdir:
        runcmd.extend(['--tmp-dir', args.tmpdir])

    #find reads -- use --left/right or look for cleaned in tmpdir
    forReads, revReads = (None, ) * 2
    if args.left:
        forReads = os.path.abspath(args.left)
    if args.right:
        revReads = os.path.abspath(args.right)
    if not forReads:
        status('Unable to located FASTQ raw reads, provide --left')
        sys.exit(1)

    if not revReads:
        runcmd = runcmd + ['-s', forReads]
    else:
        runcmd = runcmd + ['--pe1-1', forReads, '--pe1-2', revReads]

        # this basically overrides everything above and only runs --restart-from option
    if os.path.isdir(args.workdir):
        runcmd = ['dipspades.py', '-o', args.workdir, '--continue']

    # now run the spades job
    status('Assembling FASTQ data using Spades')

    printCMD(runcmd)
    DEVNULL = open(os.devnull, 'w')
    if args.debug:
        subprocess.run(runcmd)
    else:
        subprocess.run(runcmd, stdout=DEVNULL, stderr=DEVNULL)
    #pull out assembly

    if args.out:
        finalOut = args.out
    else:
        finalOut = prefix + '.dipspades.fasta'
    dipspadesoutdir = os.path.join(args.workdir, 'dipspades')
    if os.path.isfile(os.path.join(args.workdir, 'consensus_contigs.fasta')):
        shutil.copyfile(os.path.join(args.workdir, 'consensus_contigs.fasta'),
                        finalOut)
        shutil.copyfile(
            os.path.join(args.workdir, 'dipspades',
                         'paired_consensus_contigs.fasta'),
            prefix + ".dipspades_consensus_paired.fasta")
        shutil.copyfile(
            os.path.join(args.workdir, 'dipspades',
                         'paired_consensus_contigs.fasta'),
            prefix + ".dipspades_consensus_unpaired.fasta")
        status('Dipspades assembly finished: {:}'.format(finalOut))
        status(
            'Dipspades assembly copied over: {:}'.format(
                prefix + ".dipspades_consensus_unpaired.fasta"),
            prefix + ".dipspades_consensus_paired.fasta")
        numSeqs, assemblySize = fastastats(finalOut)
        status('Assembly is {:,} scaffolds and {:,} bp'.format(
            numSeqs, assemblySize))
    else:
        status(
            'Spades assembly output missing -- check Dipspades logfile in {:}.'
            .format(os.path.join(args.workdir, 'dipspades', 'dipspades.log')))

    if not args.pipe:
        status(
            'Your next command might be:\n\tAAFTF vecscreen -i {:} -c {:}\n'.
            format(finalOut, args.cpus))
예제 #3
0
def run_megahit(parser, args):

    if not args.workdir:
        args.workdir = 'megahit_' + str(os.getpid())

    runcmd = ['megahit', '-t', str(args.cpus), '-o', args.workdir]

    if args.assembler_args:
        runcmd.extend(args.assembler_args)

    if args.memory:
        runcmd.extend(['--memory', args.memory])

    if args.tmpdir:
        runcmd.extend(['--tmp-dir', args.tmpdir])

    #find reads -- use --left/right or look for cleaned in tmpdir
    forReads, revReads = (None, ) * 2
    if args.left:
        forReads = os.path.abspath(args.left)
    if args.right:
        revReads = os.path.abspath(args.right)
    if not forReads:
        status('Unable to located FASTQ raw reads, provide --left')
        sys.exit(1)

    if not revReads:
        runcmd = runcmd + ['-r', forReads]
    else:
        runcmd = runcmd + ['-1', forReads, '-2', revReads]

    if os.path.isdir(args.workdir):
        status("Cannot re-run with existing folder {}".format(args.workdir))

    # now run the spades job
    status('Assembling FASTQ data using megahit')
    printCMD(runcmd)
    DEVNULL = open(os.devnull, 'w')
    if args.debug:
        subprocess.run(runcmd)
    else:
        subprocess.run(runcmd, stdout=DEVNULL, stderr=DEVNULL)
    #pull out assembly
    if args.out:
        finalOut = args.out
    else:
        finalOut = prefix + '.megahit.fasta'

    if os.path.isfile(os.path.join(args.workdir, 'final.contigs.fa')):
        shutil.copyfile(os.path.join(args.workdir, 'final.contigs.fa'),
                        finalOut)
        status('Megahit assembly finished: {:}'.format(finalOut))
        numSeqs, assemblySize = fastastats(finalOut)
        status('Assembly is {:,} scaffolds and {:,} bp'.format(
            numSeqs, assemblySize))
    else:
        status('Megahit assembly output missing -- check megahit logfile.')

    if not args.pipe:
        status(
            'Your next command might be:\n\tAAFTF vecscreen -i {:} -c {:}\n'.
            format(finalOut, args.cpus))
예제 #4
0
def run(parser, args):

    if not args.workdir:
        args.workdir = 'aaftf-sourpurge_' + str(uuid.uuid4())[:8]
    if not os.path.exists(args.workdir):
        os.mkdir(args.workdir)

    bamthreads = 4
    if args.cpus < 4:
        bamthreads = 1

    #find reads
    forReads, revReads = (None, ) * 2
    if args.left:
        forReads = os.path.abspath(args.left)
    if args.right:
        revReads = os.path.abspath(args.right)
    if not forReads:
        status(
            'Unable to located FASTQ raw reads, low coverage will be skipped. Provide -l,--left or -r,--right to enable low coverage filtering.'
        )
#        sys.exit(1)

#parse database locations
    if not args.sourdb:
        try:
            DB = os.environ["AAFTF_DB"]
        except KeyError:
            if args.AAFTF_DB:
                SOUR = os.path.join(args.AAFTF_DB, 'genbank-k31.lca.json.gz')
            else:
                status(
                    "$AAFTF_DB/genbank-k31.lca.json.gz not found, pass --sourdb"
                )
                sys.exit(1)
        SOUR = os.path.join(DB, 'genbank-k31.lca.json.gz')
        if not os.path.isfile(SOUR):
            status(
                "{:} sourmash database not found, download and rename to genbank-k31.lca.json.gz"
                .format(SOUR))
            sys.exit(1)
    else:
        SOUR = os.path.abspath(args.sourdb)

    # hard coded tmpfile
    assembly_working = 'assembly.fasta'
    megablast_working = 'megablast.out'
    blobBAM = 'remapped.bam'
    shutil.copyfile(args.input, os.path.join(args.workdir, assembly_working))
    numSeqs, assemblySize = fastastats(
        os.path.join(args.workdir, assembly_working))
    status('Assembly is {:,} contigs and {:,} bp'.format(
        numSeqs, assemblySize))
    DEVNULL = open(os.devnull, 'w')

    #now filter for taxonomy with sourmash lca classify
    status('Running SourMash to get taxonomy classification for each contig')
    sour_sketch = os.path.basename(assembly_working) + '.sig'
    sour_compute = [
        'sourmash', 'compute', '-k', '31', '--scaled=1000', '--singleton',
        assembly_working
    ]
    printCMD(sour_compute)
    subprocess.run(sour_compute, cwd=args.workdir, stderr=DEVNULL)
    sour_classify = [
        'sourmash', 'lca', 'classify', '--db', SOUR, '--query', sour_sketch
    ]
    printCMD(sour_classify)
    # output csv: ID,status,superkingdom,phylum,class,order,family,genus,species,strain
    Taxonomy = {}
    UniqueTax = []
    sourmashTSV = os.path.join(args.workdir, 'sourmash.csv')
    with open(sourmashTSV, 'w') as sour_out:
        for line in execute(sour_classify, args.workdir):
            sour_out.write(line)
            if not line or line.startswith('\n') or line.startswith(
                    'ID') or line.count(',') < 9:
                continue
            line = line.strip()
            cols = line.split(',')
            if 'found' in cols:
                idx = cols.index('found')
                Taxonomy[cols[0]] = cols[idx + 1:]
                taxClean = [x for x in cols[idx + 1:] if x]
                UniqueTax.append('{:}'.format(';'.join(taxClean)))
            elif 'nomatch' in cols:
                idx = cols.index('nomatch')
                Taxonomy[cols[0]] = cols[idx + 1:]
    UniqueTax = set(UniqueTax)
    status('Found {:} taxonomic classifications for contigs:\n{:}'.format(
        len(UniqueTax), '\n'.join(UniqueTax)))
    if args.taxonomy:
        sys.exit(1)
    Tax2Drop = []
    for k, v in Taxonomy.items():
        v = [x for x in v if x]  #remove empty items from list
        if args.debug:
            print('{:}\t{:}'.format(k, v))
        if len(v) > 0:
            if not any(i in v for i in args.phylum):
                Tax2Drop.append(k)

    #drop contigs from taxonomy before calculating coverage
    status('Dropping {:} contigs from taxonomy screen'.format(len(Tax2Drop)))
    sourTax = os.path.join(args.workdir, 'sourmashed-tax-screen.fasta')
    with open(sourTax, 'w') as outfile:
        with open(os.path.join(args.workdir, assembly_working),
                  'rU') as infile:
            for record in SeqIO.parse(infile, 'fasta'):
                if not record.id in Tax2Drop:
                    SeqIO.write(record, outfile, 'fasta')

    # only do coverage trimming if reads provided
    Contigs2Drop = [
    ]  # this will be empty if no reads given to gather by coverage
    if forReads:
        #check if BAM present, if so skip running
        if not os.path.isfile(os.path.join(args.workdir, blobBAM)):
            # index
            bwa_index = ['bwa', 'index', os.path.basename(sourTax)]
            status('Building BWA index')
            printCMD(bwa_index)
            subprocess.run(bwa_index, cwd=args.workdir, stderr=DEVNULL)
            #mapped reads to assembly using BWA
            bwa_cmd = [
                'bwa',
                'mem',
                '-t',
                str(args.cpus),
                os.path.basename(sourTax),  # assembly index base
                forReads
            ]
            if revReads:
                bwa_cmd.append(revReads)

                #run BWA and pipe to samtools sort
                status('Aligning reads to assembly with BWA')
                printCMD(bwa_cmd)
                p1 = subprocess.Popen(bwa_cmd,
                                      cwd=args.workdir,
                                      stdout=subprocess.PIPE,
                                      stderr=DEVNULL)
                p2 = subprocess.Popen([
                    'samtools', 'sort', '--threads',
                    str(bamthreads), '-o', blobBAM, '-'
                ],
                                      cwd=args.workdir,
                                      stdout=subprocess.PIPE,
                                      stderr=DEVNULL,
                                      stdin=p1.stdout)
                p1.stdout.close()
                p2.communicate()
                subprocess.run(['samtools', 'index', blobBAM],
                               cwd=args.workdir)

        #now calculate coverage from BAM file
        status('Calculating read coverage per contig')
        FastaBed = os.path.join(args.workdir, 'assembly.bed')
        lengths = []
        with open(FastaBed, 'w') as bedout:
            with open(sourTax, 'rU') as SeqIn:
                for record in SeqIO.parse(SeqIn, 'fasta'):
                    bedout.write('{:}\t{:}\t{:}\n'.format(
                        record.id, 0, len(record.seq)))
                    lengths.append(len(record.seq))

        N50 = calcN50(lengths)
        Coverage = {}
        coverageBed = os.path.join(args.workdir, 'coverage.bed')
        cov_cmd = ['samtools', 'bedcov', os.path.basename(FastaBed), blobBAM]
        printCMD(cov_cmd)
        with open(coverageBed, 'w') as bed_out:
            for line in execute(cov_cmd, args.workdir):
                bed_out.write(line)

                if not line or line.startswith('\n') or line.count('\t') < 3:
                    continue

                line = line.strip()
                cols = line.split('\t')
                cov = int(cols[3]) / float(cols[2])
                Coverage[cols[0]] = (int(cols[2]), cov)

        #get average coverage of N50 contigs
        n50Cov = []
        for k, v in Coverage.items():
            if args.debug:
                print('{:}; Len: {:}; Cov: {:.2f}'.format(k, v[0], v[1]))
            if v[0] >= N50:
                n50Cov.append(v[1])
        n50AvgCov = sum(n50Cov) / len(n50Cov)
        minpct = args.mincovpct / 100
        # should we make this a variable? 5% was something arbitrary
        min_coverage = float(n50AvgCov * minpct)
        status('Average coverage for N50 contigs is {:}X'.format(
            int(n50AvgCov)))

        #Start list of contigs to drop
        for k, v in Coverage.items():
            if v[1] <= min_coverage:
                Contigs2Drop.append(k)
        status(
            'Found {:,} contigs with coverage less than {:.2f}X ({:}%)'.format(
                len(Contigs2Drop), min_coverage, args.mincovpct))

    if args.debug:
        print('Contigs dropped due to coverage: {:}'.format(
            ','.join(Contigs2Drop)))
        print('Contigs dropped due to taxonomy: {:}'.format(
            ','.join(Tax2Drop)))

    DropFinal = Contigs2Drop + Tax2Drop
    DropFinal = set(DropFinal)
    status('Dropping {:,} total contigs based on taxonomy and coverage'.format(
        len(DropFinal)))
    with open(args.outfile, 'w') as outfile, open(sourTax, 'rU') as seqin:
        for record in SeqIO.parse(seqin, 'fasta'):
            if not record.id in DropFinal:
                SeqIO.write(record, outfile, 'fasta')

    numSeqs, assemblySize = fastastats(args.outfile)
    status('Sourpurged assembly is {:,} contigs and {:,} bp'.format(
        numSeqs, assemblySize))
    if '_' in args.outfile:
        nextOut = args.outfile.split('_')[0] + '.rmdup.fasta'
    elif '.' in args.outfile:
        nextOut = args.outfile.split('.')[0] + '.rmdup.fasta'
    else:
        nextOut = args.outfile + '.rmdup.fasta'

    if checkfile(sourmashTSV):
        baseinput = os.path.basename(args.input)
        if '.' in baseinput:
            baseinput = baseinput.rsplit('.', 1)[0]

        shutil.copy(sourmashTSV, baseinput + '.sourmash-taxonomy.csv')

    if not args.debug:
        SafeRemove(args.workdir)

    if not args.pipe:
        status('Your next command might be:\n\tAAFTF rmdup -i {:} -o {:}\n'.
               format(args.outfile, nextOut))
예제 #5
0
def run(parser, args):
    # first check if NOVOplasty and minimap2 are installed, else exit
    programs = ['NOVOplasty.pl', 'minimap2']
    for x in programs:
        if not which_path(x):
            status('ERROR: {} is not installed, exiting'.format(x))
            sys.exit(1)
    # first we need to generate working directory
    unique_id = str(uuid.uuid4())[:8]
    if not args.workdir:
        args.workdir = 'mito_' + unique_id
    if not os.path.isdir(args.workdir):
        os.makedirs(args.workdir)

    # now estimate read lengths of FASTQ
    read_len = GuessRL(args.left)

    # check for seed sequence, otherwise write one
    if not args.seed:
        if not args.reference:
            seedFasta = os.path.abspath(
                os.path.join(os.path.dirname(__file__), 'mito-seed.fasta'))
        else:
            seedFasta = os.path.abspath(args.reference)
    else:
        seedFasta = os.path.abspath(args.seed)

    # now write the novoplasty config file
    defaultConfig = os.path.join(os.path.dirname(__file__),
                                 'novoplasty-config.txt')
    novoConfig = os.path.join(args.workdir, 'novo-config.txt')
    if args.reference:
        refgenome = os.path.abspath(args.reference)
    else:
        refgenome = ''
    checkWords = ("<PROJECT>", "<MINLEN>", "<MAXLEN>", "<MAXMEM>", "<SEED>",
                  "<READLEN>", "<FORWARD>", "<REVERSE>", "<REFERENCE>")
    repWords = (unique_id, str(args.minlen), str(args.maxlen),
                str(int(getRAM() * .75)), seedFasta, str(read_len),
                os.path.abspath(args.left), os.path.abspath(args.right),
                refgenome)
    with open(novoConfig, 'w') as outfile:
        with open(defaultConfig, 'r') as infile:
            for line in infile:
                for check, rep in zip(checkWords, repWords):
                    line = line.replace(check, rep)
                outfile.write(line)

    # now we can finally run NOVOplasty.pl
    status('De novo assembling mitochondrial genome using NOVOplasty')
    cmd = ['NOVOPlasty.pl', '-c', 'novo-config.txt']
    printCMD(cmd)
    novolog = os.path.join(args.workdir, 'novoplasty.log')
    with open(novolog, 'w') as logfile:
        p1 = subprocess.Popen(cmd,
                              cwd=args.workdir,
                              stdout=logfile,
                              stderr=logfile)
        p1.communicate()

    # now parse the results
    draftMito = None
    circular = False
    for f in os.listdir(args.workdir):
        if f.startswith('Circularized_assembly_'):
            draftMito = os.path.join(args.workdir, f)
            circular = True
            break
        if f.startswith('Contigs_1_'):
            draftMito = os.path.join(args.workdir, f)
            break
        if f.startswith('Uncircularized_assemblies_'):
            draftMito = os.path.join(args.workdir, f)
            break
    if circular:
        status('NOVOplasty assembled complete circular genome')
        if args.starting:
            status('Rotating assembly to start with {}'.format(args.starting))
        else:
            status('Rotating assembly to start with Cytochrome b (cob) gene')
        orient_to_start(draftMito,
                        args.out,
                        folder=args.workdir,
                        start=args.starting)
    else:
        numContigs = 0
        contigLength = 0
        with open(args.out, 'w') as outfile:
            with open(draftMito, 'r') as infile:
                for title, seq in SimpleFastaParser(infile):
                    numContigs += 1
                    contigLength += len(seq)
                    outfile.write('>contig_{}\n{}\n'.format(
                        numContigs, softwrap(seq)))
        status(
            'NOVOplasty assembled {} contigs consiting of {:,} bp, but was unable to circularize genome'
            .format(numContigs, contigLength))

    status('AAFTF mito complete: {}'.format(args.out))
    if not args.pipe:
        shutil.rmtree(args.workdir)
예제 #6
0
def run(parser,args):

    #find reads for pilon
    forReads, revReads = (None,)*2
    if args.left:
        forReads = os.path.abspath(args.left)
    if args.right:
        revReads = os.path.abspath(args.right)

    if not forReads:
        status('Unable to located FASTQ raw reads, pass via -l,--left and/or -r,--right')
        sys.exit(1)

    custom_workdir = 1
    if not args.workdir:
        custom_workdir = 0
        args.workdir = 'aaftf-pilon_'+str(uuid.uuid4())[:8]
    if not os.path.exists(args.workdir):
        os.mkdir(args.workdir)

    bamthreads = 4
    if args.cpus < 4:
        bamthreads = args.cpus

    DEVNULL = open(os.devnull, 'w')
    for i in range(1, args.iterations+1):
        status('Starting Pilon polishing iteration {:}'.format(i))
        correctedFasta = 'pilon'+str(i)+'.fasta'
        if i == 1: #first loop
            initialFasta = args.infile
            shutil.copyfile(args.infile,
                            os.path.join(args.workdir,
                                         os.path.basename(args.infile)))
        else:
            initialFasta = os.path.join(args.workdir, 'pilon'+str(i-1)+'.fasta')

        pilonBAM = os.path.basename(initialFasta)+'.bwa.bam'
        if not os.path.isfile(os.path.join(args.workdir, pilonBAM)):
            bwa_index = ['bwa', 'index', os.path.basename(initialFasta)]
            printCMD(bwa_index)
            subprocess.run(bwa_index, cwd=args.workdir, stderr=DEVNULL)
            bwa_cmd = ['bwa', 'mem', '-t', str(args.cpus), os.path.basename(initialFasta), forReads]
            if revReads:
                bwa_cmd.append(revReads)

            #run BWA and pipe to samtools sort
            printCMD(bwa_cmd)
            p1 = subprocess.Popen(bwa_cmd, cwd=args.workdir,
                                  stdout=subprocess.PIPE, stderr=DEVNULL)
            p2 = subprocess.Popen(['samtools', 'sort',
                                   '-@', str(bamthreads),'-o', pilonBAM, '-'],
                                  cwd=args.workdir, stdout=subprocess.PIPE,
                                  stderr=DEVNULL, stdin=p1.stdout)
            p1.stdout.close()
            p2.communicate()

            #BAM file needs to be indexed for Pilon
            subprocess.run(['samtools', 'index', pilonBAM], cwd=args.workdir)

        #run Pilon
        pilon_cmd = ['pilon', '--genome', os.path.basename(initialFasta),
                     '--frags', pilonBAM,
                     '-Xmx{}g'.format(args.memory),
                     '--output', correctedFasta.split('.fasta')[0],
                     '--threads', str(args.cpus),
                     '--changes']
        pilon_log = 'pilon'+str(i)+'.log'
        printCMD(pilon_cmd)
        with open(os.path.join(args.workdir, pilon_log), 'w') as logfile:
            subprocess.run(pilon_cmd, cwd=args.workdir, stderr=logfile,
                           stdout=logfile)
        num_changes = line_count(os.path.join(args.workdir, 'pilon'+str(i)+'.changes'))

        status('Found {:,} changes in Pilon iteration {:}'.format(num_changes, i))

        #clean-up as we iterate to prevent tmp directory from blowing up
        dirty = [initialFasta+'.sa', initialFasta+'.amb', initialFasta+'.ann',
                 initialFasta+'.pac', initialFasta+'.bwt', os.path.join(args.workdir, pilonBAM),
                 os.path.join(args.workdir, pilonBAM+'.bai')]
        for f in dirty:
            if i == 1:
                if os.path.isfile(os.path.join(args.workdir, f)):
                    os.remove(os.path.join(args.workdir, f))
            else:
                if os.path.isfile(f):
                    os.remove(f)

    #copy last iteration to output
    if args.outfile:
        polishedFasta = args.outfile
    else:
        polishedFasta = os.path.basename(args.infile).split('.f')[0]+'.pilon.fasta'
    shutil.copyfile(os.path.join(args.workdir, 'pilon'+str(args.iterations)+'.fasta'), polishedFasta)

    status('AAFTF pilon completed {:} iterations.'.format(args.iterations))
    status('Pilon polished assembly: {:}'.format(polishedFasta))
    if '_' in polishedFasta:
        nextOut = polishedFasta.split('_')[0]+'.final.fasta'
    elif '.' in polishedFasta:
        nextOut = polishedFasta.split('.')[0]+'.final.fasta'
    else:
        nextOut = polishedFasta+'.final.fasta'

    if not args.debug and not custom_workdir:
        SafeRemove(args.workdir)

    if not args.pipe:
        status('Your next command might be:\n\tAAFTF sort -i {:} -o {:}\n'.format(polishedFasta, nextOut))
예제 #7
0
def run(parser, args):
    if not args.workdir:
        args.workdir = 'aaftf-vecscreen_' + str(uuid.uuid4())[:8]
    if not os.path.exists(args.workdir):
        os.mkdir(args.workdir)

    #parse database locations
    DB = None
    if not args.AAFTF_DB:
        try:
            DB = os.environ["AAFTF_DB"]
        except KeyError:
            if args.AAFTF_DB:
                DB = args.AAFTF_DB
            else:
                pass
    else:
        DB = args.AAFTF_DB

    if args.percent_id:
        percentid_cutoff = args.percent_id

    infile = args.infile
    outfile = os.path.basename(args.outfile)
    outdir = os.path.dirname(args.outfile)
    if '.f' in outfile:
        prefix = outfile.rsplit('.f', 1)[0]
        print("prefix is ", prefix)
    else:
        prefix = str(os.getpid())
    if not outfile:
        outfile = "%s.vecscreen.fasta" % prefix

    outfile_vec = os.path.join(args.workdir,
                               "%s.tmp_vecscreen.fasta" % (prefix))

    # Common Euk/Prot contaminats for blastable DB later on
    status('Building BLAST databases for contamination screen.')
    makeblastdblist = []
    for d in DB_Links:
        if d == 'sourmash':
            continue
        url = DB_Links[d]
        dbname = os.path.basename(str(url))
        #logger.debug("testing for url=%s dbname=%s"%(url,dbname))
        if DB:
            file = os.path.join(DB, dbname)
        else:
            file = os.path.join(args.workdir, dbname)
        if file.endswith(".gz"):
            nogz = os.path.splitext(file)[0]
            if not os.path.exists(nogz):
                if not os.path.exists(file):
                    urllib.request.urlretrieve(url, file)

                with gzip.open(file, 'rb') as ingz, open(nogz, 'wb') as outfa:
                    shutil.copyfileobj(ingz, outfa)
#                call(['gunzip', '-k', file])
                make_blastdb('nucl', nogz, os.path.join(args.workdir, d))
            else:
                make_blastdb('nucl', nogz, os.path.join(args.workdir, d))
        else:
            if not os.path.exists(file):
                urllib.request.urlretrieve(url, file)
            make_blastdb('nucl', file, os.path.join(args.workdir, d))

    global contigs_to_remove
    contigs_to_remove = {}
    regions_to_trim = {}

    #qaccver saccver pident length mismatch gapopen qstart qend sstart send evalue bitscore
    for contam in ["CONTAM_EUKS", "CONTAM_PROKS"]:
        status("%s Contamination Screen" % (contam))
        blastreport = os.path.join(args.workdir,
                                   "%s.%s.blastn" % (contam, prefix))
        blastnargs = [
            'blastn', '-query', infile, '-db',
            os.path.join(args.workdir, contam), '-num_threads',
            str(args.cpus), '-dust', 'yes', '-soft_masking', 'true',
            '-perc_identity', BlastPercent_ID_ContamMatch, '-lcase_masking',
            '-outfmt', '6', '-out', blastreport
        ]
        printCMD(blastnargs)
        call(blastnargs)
        hits = 0
        with open(blastreport) as report:
            colparser = csv.reader(report, delimiter="\t")
            for row in colparser:
                if ((float(row[2]) >= 98.0 and int(row[3]) >= 50)
                        or (float(row[2]) >= 94.0 and int(row[3]) >= 100)
                        or (float(row[2]) >= 90.0 and int(row[3]) >= 200)):
                    if not row[0] in regions_to_trim:
                        if int(row[6]) < int(row[7]):
                            start = int(row[6])
                            end = int(row[7])
                        else:
                            start = int(row[7])
                            end = int(row[6])
                        regions_to_trim[row[0]] = [(start, end, contam, row[1],
                                                    float(row[2]))]
                    else:
                        regions_to_trim[row[0]].append(
                            (start, end, contam, row[1], float(row[2])))
        status('{:} screening finished'.format(contam))

    eukCleaned = os.path.join(args.workdir,
                              "%s.euk-prot_cleaned.fasta" % (prefix))
    if len(regions_to_trim) > 0:
        with open(eukCleaned, 'w') as cleanout:
            with open(infile, 'rU') as fastain:
                for record in SeqIO.parse(fastain, 'fasta'):
                    if not record.id in regions_to_trim:
                        cleanout.write('>{:}\n{:}\n'.format(
                            record.id, softwrap(str(record.seq))))
                    else:
                        Seq = str(record.seq)
                        regions = regions_to_trim[record.id]
                        status(
                            'Splitting {:} due to contamination: {:}'.format(
                                record.id, regions))
                        lastpos = 0
                        newSeq = ''
                        for i, x in enumerate(regions):
                            newSeq = Seq[lastpos:x[0]]
                            lastpos = x[1]
                            cleanout.write('>split{:}_{:}\n{:}\n'.format(
                                i, record.id, softwrap(newSeq)))
                            if i == len(regions) - 1:
                                newSeq = Seq[x[1]:]
                                cleanout.write('>split{:}_{:}\n{:}\n'.format(
                                    i + 1, record.id, softwrap(newSeq)))
    else:
        eukCleaned = infile

    # MITO screen
    status('Mitochondria Contamination Screen')
    mitoHits = []
    blastreport = os.path.join(args.workdir, "%s.%s.blastn" % ('MITO', prefix))
    blastnargs = [
        'blastn', '-query', eukCleaned, '-db',
        os.path.join(args.workdir, 'MITO'), '-num_threads',
        str(args.cpus), '-dust', 'yes', '-soft_masking', 'true',
        '-perc_identity', BlastPercent_ID_MitoMatch, '-lcase_masking',
        '-outfmt', '6', '-out', blastreport
    ]
    printCMD(blastnargs)
    call(blastnargs)
    with open(blastreport) as report:
        colparser = csv.reader(report, delimiter="\t")
        for row in colparser:
            if int(row[3]) >= 120:
                contigs_to_remove[row[0]] = ('MitoScreen', row[1],
                                             float(row[2]))
                mitoHits.append(row[0])
    status('Mito screening finished.')

    #vecscreen starts here
    status(
        'Starting VecScreen, will remove terminal matches and split internal matches'
    )
    rnd = 0
    count = 1
    while (count > 0):
        filepref = "%s.r%d" % (prefix, rnd)
        report = os.path.join(args.workdir, "%s.vecscreen.tab" % (filepref))
        if not os.path.exists(report):
            cmd = [
                'blastn', '-task', 'blastn', '-reward', '1', '-penalty', '-5',
                '-gapopen', '3', '-gapextend', '3', '-dust', 'yes',
                '-soft_masking', 'true', '-evalue', '700', '-searchsp',
                '1750000000000', '-db',
                os.path.join(args.workdir, 'UniVec'), '-outfmt',
                '6 qaccver saccver pident length mismatch gapopen qstart qend sstart send evalue bitscore score qlen',
                '-num_threads',
                str(args.cpus), '-query', eukCleaned, '-out', report
            ]
            #logger.info('CMD: {:}'.format(printCMD(cmd,7)))
            call(cmd)
        # this needs to know/return the new fasta file?
        status("Parsing VecScreen round {:}: {:} for {:}".format(
            rnd + 1, filepref, report))
        (count,
         cleanfile) = parse_clean_blastn(eukCleaned,
                                         os.path.join(args.workdir, filepref),
                                         report, args.stringency)
        status("count is %d cleanfile is %s" % (count, cleanfile))
        if count == 0:  # if there are no vector matches < than the pid cutoff
            status("copying %s to %s" % (eukCleaned, outfile_vec))
            shutil.copy(eukCleaned, outfile_vec)
        else:
            rnd += 1
            eukCleaned = cleanfile

    status("{:,} contigs will be removed:".format(len(contigs_to_remove)))
    for k, v in sorted(contigs_to_remove.items()):
        print('\t{:} --> dbhit={:}; hit={:}; pident={:}'.format(
            k, v[0], v[1], v[2]))

    # this could instead use the outfile and strip .fasta/fsa/fna and add mito on it I suppose, but assumes
    # a bit about the naming structure

    mitochondria = os.path.join(outdir, prefix + '.mitochondria.fasta')
    with open(args.outfile, "w") as output_handle, open(mitochondria,
                                                        'w') as mito_handle:
        for record in SeqIO.parse(outfile_vec, "fasta"):
            if not record.id in contigs_to_remove:
                SeqIO.write(record, output_handle, "fasta")
            elif record.id in mitoHits:
                SeqIO.write(record, mito_handle, "fasta")
    status('Writing {:,} cleaned contigs to: {:}'.format(
        countfasta(args.outfile), args.outfile))
    status('Writing {:,} mitochondrial contigs to: {:}'.format(
        countfasta(mitochondria), mitochondria))
    if '_' in args.outfile:
        nextOut = args.outfile.split('_')[0] + '.sourpurge.fasta'
    elif '.' in args.outfile:
        nextOut = args.outfile.split('.')[0] + '.sourpurge.fasta'
    else:
        nextOut = args.outfile + '.sourpurge.fasta'

    if not args.pipe:
        status(
            'Your next command might be:\n\tAAFTF sourpurge -i {:} -o {:} -c {:} --phylum Ascomycota\n'
            .format(args.outfile, nextOut, args.cpus))

    if not args.debug:
        SafeRemove(args.workdir)
예제 #8
0
파일: filter.py 프로젝트: stajichlab/AAFTF
def run(parser, args):
    custom_workdir = 1
    if not args.workdir:
        custom_workdir = 0
        args.workdir = 'aaftf-filter_' + str(uuid.uuid4())[:8]
    if not os.path.exists(args.workdir):
        os.mkdir(args.workdir)

    #parse database locations
    DB = None
    if not args.AAFTF_DB:
        try:
            DB = os.environ["AAFTF_DB"]
        except KeyError:
            if args.AAFTF_DB:
                DB = args.AAFTF_DB
            else:
                pass
    else:
        DB = args.AAFTF_DB

    bamthreads = 4
    if args.cpus < 4:
        bamthreads = args.cpus

    earliest_file_age = -1
    contam_filenames = []
    # db of contaminant (PhiX)
    for url in Contaminant_Accessions.values():
        acc = os.path.basename(url)
        if DB:
            acc_file = os.path.join(DB, acc)
        else:
            acc_file = os.path.join(args.workdir, acc)
        contam_filenames.append(acc_file)
        if not os.path.exists(acc_file):
            urllib.request.urlretrieve(url, acc_file)
        if (earliest_file_age < 0
                or earliest_file_age < os.path.getctime(acc_file)):
            earliest_file_age = os.path.getctime(acc_file)

    # download univec too
    url = DB_Links['UniVec']
    acc = os.path.basename(DB_Links['UniVec'])
    if DB:
        acc_file = os.path.join(DB, acc)
    else:
        acc_file = os.path.join(args.workdir, acc)
    contam_filenames.append(acc_file)
    if not os.path.exists(acc_file):
        urllib.request.urlretrieve(url, acc_file)
        if (earliest_file_age < 0
                or earliest_file_age < os.path.getctime(acc_file)):
            earliest_file_age = os.path.getctime(acc_file)

    if args.screen_accessions:
        for acc in args.screen_accessions:
            if DB:
                acc_file = os.path.join(DB, acc + ".fna")
                if not os.path.exists(acc_file):
                    acc_file = os.path.join(args.workdir, acc + ".fna")
            else:
                acc_file = os.path.join(args.workdir, acc + ".fna")
            contam_filenames.append(acc_file)
            if not os.path.exists(acc_file):
                url = SeqDBs['nucleotide'] % (acc)
                urllib.request.urlretrieve(url, acc_file)
            if (earliest_file_age < 0
                    or earliest_file_age < os.path.getctime(acc_file)):
                earliest_file_age = os.path.getctime(acc_file)

    if args.screen_urls:
        for url in args.screen_urls:
            url_file = os.path.join(args.workdir, os.path.basename(url))
            contam_filenames.append(url_file)
            if not os.path.exists(url_file):
                urllib.request.urlretrieve(url, url_file)
            if (earliest_file_age < 0
                    or earliest_file_age < os.path.getctime(url_file)):
                earliest_file_age = os.path.getctime(url_file)

    if args.screen_local:
        for f in args.screen_local:
            contam_filenames.append(os.path.abspath(f))

    # concat vector db
    status('Generating combined contamination database:\n{:}'.format(
        '\n'.join(contam_filenames)))
    contamdb = os.path.join(args.workdir, 'contamdb.fa')
    if (not os.path.exists(contamdb)
            or (os.path.getctime(contamdb) < earliest_file_age)):
        with open(contamdb, 'wb') as wfd:
            for fname in contam_filenames:
                with open(fname,
                          'rb') as fd:  # reasonably fast copy for append
                    shutil.copyfileobj(fd, wfd)

    #find reads
    forReads, revReads = (None, ) * 2
    if args.left:
        forReads = os.path.abspath(args.left)
    if args.right:
        revReads = os.path.abspath(args.right)
    if not forReads:
        status("Must provide --left, unable to locate FASTQ reads")
        sys.exit(1)
    total = countfastq(forReads)
    if revReads:
        total = total * 2
    status('Loading {:,} total reads'.format(total))

    # seems like this needs to be stripping trailing extension?
    if not args.basename:
        if '_' in os.path.basename(forReads):
            args.basename = os.path.basename(forReads).split('_')[0]
        elif '.' in os.path.basename(forReads):
            args.basename = os.path.basename(forReads).split('.')[0]
        else:
            args.basename = os.path.basename(forReads)

    #logger.info('Loading {:,} FASTQ reads'.format(countfastq(forReads)))
    DEVNULL = open(os.devnull, 'w')
    alignBAM = os.path.join(args.workdir, args.basename + '_contam_db.bam')
    clean_reads = args.basename + "_filtered"
    refmatch_bbduk = [contamdb, 'phix', 'artifacts', 'lambda']
    if args.aligner == "bbduk":
        status('Kmer filtering reads using BBDuk')
        if args.memory:
            MEM = '-Xmx{:}g'.format(args.memory)
        else:
            MEM = '-Xmx{:}g'.format(round(0.6 * getRAM()))
        cmd = [
            'bbduk.sh', MEM, 't={:}'.format(args.cpus), 'hdist=1', 'k=27',
            'overwrite=true',
            'in=%s' % (forReads),
            'out=%s_1.fastq.gz' % (clean_reads)
        ]
        if revReads:
            cmd.extend(
                ['in2=%s' % (revReads),
                 'out2=%s_2.fastq.gz' % (clean_reads)])

        cmd.extend(['ref=%s' % (",".join(refmatch_bbduk))])
        #cmd.extend(['prealloc','qhdist=1'])
        printCMD(cmd)
        if args.debug:
            subprocess.run(cmd)
        else:
            subprocess.run(cmd, stderr=DEVNULL)

        if not args.debug and not custom_workdir:
            SafeRemove(args.workdir)

        clean = countfastq('{:}_1.fastq.gz'.format(clean_reads))
        if revReads:
            clean = clean * 2
        status('{:,} reads mapped to contamination database'.format(
            (total - clean)))
        status('{:,} reads unmapped and writing to file'.format(clean))

        status('Filtering complete:\n\tFor: {:}\n\tRev: {:}'.format(
            clean_reads + '_1.fastq.gz', clean_reads + '_2.fastq.gz'))
        if not args.pipe:
            status(
                'Your next command might be:\n\tAAFTF assemble -l {:} -r {:} -c {:} -o {:}\n'
                .format(clean_reads + '_1.fastq.gz',
                        clean_reads + '_2.fastq.gz', args.cpus,
                        args.basename + '.spades.fasta'))
        return

    elif args.aligner == 'bowtie2':
        # likely not used and less accurate than bbmap?
        if not os.path.isfile(alignBAM):
            status('Aligning reads to contamination database using bowtie2')
            if (not os.path.exists(contamdb + ".1.bt2")
                    or os.path.getctime(contamdb + ".1.bt2") <
                    os.path.getctime(contamdb)):
                # (re)build index if no index or index is older than
                # the db
                bowtie_index = ['bowtie2-build', contamdb, contamdb]
                printCMD(bowtie_index)
                subprocess.run(bowtie_index, stderr=DEVNULL, stdout=DEVNULL)

            bowtie_cmd = [
                'bowtie2', '-x',
                os.path.basename(contamdb), '-p',
                str(args.cpus), '--very-sensitive'
            ]
            if forReads and revReads:
                bowtie_cmd = bowtie_cmd + ['-1', forReads, '-2', revReads]
            elif forReads:
                bowtie_cmd = bowtie_cmd + ['-U', forReads]

            #now run and write to BAM sorted
            printCMD(bowtie_cmd)
            p1 = subprocess.Popen(bowtie_cmd,
                                  cwd=args.workdir,
                                  stdout=subprocess.PIPE,
                                  stderr=DEVNULL)
            p2 = subprocess.Popen([
                'samtools', 'sort', '-@',
                str(bamthreads), '-o',
                os.path.basename(alignBAM), '-'
            ],
                                  cwd=args.workdir,
                                  stdout=subprocess.PIPE,
                                  stderr=DEVNULL,
                                  stdin=p1.stdout)
            p1.stdout.close()
            p2.communicate()

    elif args.aligner == 'bwa':
        # likely less accurate than bbduk so may not be used
        if not os.path.isfile(alignBAM):
            status('Aligning reads to contamination database using BWA')
            if (not os.path.exists(contamdb + ".amb")
                    or os.path.getctime(contamdb + ".amb") <
                    os.path.getctime(contamdb)):
                bwa_index = ['bwa', 'index', contamdb]
                printCMD(bwa_index)
                subprocess.run(bwa_index, stderr=DEVNULL, stdout=DEVNULL)

            bwa_cmd = [
                'bwa', 'mem', '-t',
                str(args.cpus),
                os.path.basename(contamdb), forReads
            ]
            if revReads:
                bwa_cmd.append(revReads)

            #now run and write to BAM sorted
            printCMD(bwa_cmd)
            p1 = subprocess.Popen(bwa_cmd,
                                  cwd=args.workdir,
                                  stdout=subprocess.PIPE,
                                  stderr=DEVNULL)
            p2 = subprocess.Popen([
                'samtools', 'sort', '-@',
                str(bamthreads), '-o',
                os.path.basename(alignBAM), '-'
            ],
                                  cwd=args.workdir,
                                  stdout=subprocess.PIPE,
                                  stderr=DEVNULL,
                                  stdin=p1.stdout)
            p1.stdout.close()
            p2.communicate()

    elif args.aligner == 'minimap2':
        # likely not used but may be useful for pacbio/nanopore?
        if not os.path.isfile(alignBAM):
            status('Aligning reads to contamination database using minimap2')

            minimap2_cmd = [
                'minimap2', '-ax', 'sr', '-t',
                str(args.cpus),
                os.path.basename(contamdb), forReads
            ]
            if revReads:
                minimap2_cmd.append(revReads)

            #now run and write to BAM sorted
            printCMD(minimap2_cmd)
            p1 = subprocess.Popen(minimap2_cmd,
                                  cwd=args.workdir,
                                  stdout=subprocess.PIPE,
                                  stderr=DEVNULL)
            p2 = subprocess.Popen([
                'samtools', 'sort', '-@',
                str(bamthreads), '-o',
                os.path.basename(alignBAM), '-'
            ],
                                  cwd=args.workdir,
                                  stdout=subprocess.PIPE,
                                  stderr=DEVNULL,
                                  stdin=p1.stdout)
            p1.stdout.close()
            p2.communicate()
    else:
        status("Must specify bowtie2, bwa, or minimap2 for filtering")

    if os.path.isfile(alignBAM):
        #display mapping stats in terminal
        subprocess.run(['samtools', 'index', alignBAM])
        mapped, unmapped = bam_read_count(alignBAM)
        status('{:,} reads mapped to contamination database'.format(mapped))
        status('{:,} reads unmapped and writing to file'.format(unmapped))
        #now output unmapped reads from bamfile
        #this needs to be -f 5 so unmapped-pairs
        if forReads and revReads:
            samtools_cmd = [
                'samtools', 'fastq', '-f', '12', '-1',
                clean_reads + '_1.fastq.gz', '-2', clean_reads + '_2.fastq.gz',
                alignBAM
            ]
        elif forReads:
            samtools_cmd = [
                'samtools', 'fastq', '-f', '4', '-1',
                clean_reads + '.fastq.gz', alignBAM
            ]
        subprocess.run(samtools_cmd, stderr=DEVNULL)
        if not args.debug:
            SafeRemove(args.workdir)
        if revReads:
            status('Filtering complete:\n\tFor: {:}\n\tRev: {:}'.format(
                clean_reads + '_1.fastq.gz', clean_reads + '_2.fastq.gz'))
            if not args.pipe:
                status(
                    'Your next command might be:\n\tAAFTF assemble -l {:} -r {:} -c {:} -o {:}\n'
                    .format(clean_reads + '_1.fastq.gz',
                            clean_reads + '_2.fastq.gz', args.cpus,
                            args.basename + '.spades.fasta'))
        else:
            status('Filtering complete:\n\tSingle: {:}'.format(clean_reads +
                                                               '.fastq.gz'))
            if not args.pipe:
                status(
                    'Your next command might be:\n\tAAFTF assemble -l {:} -c {:} -o {:}\n'
                    .format(clean_reads + '.fastq.gz', args.cpus,
                            args.basename + '.spades.fasta'))
예제 #9
0
파일: trim.py 프로젝트: stajichlab/AAFTF
def run(parser, args):

    if not args.basename:
        if '_' in os.path.basename(args.left):
            args.basename = os.path.basename(args.left).split('_')[0]
        elif '.' in os.path.basename(args.left):
            args.basename = os.path.basename(args.left).split('.')[0]
        else:
            args.basename = os.path.basename(args.left)

    total = countfastq(args.left)
    if args.right:
        total = total * 2
    status('Loading {:,} total reads'.format(total))

    DEVNULL = open(os.devnull, 'w')
    if args.method == 'bbduk':
        if args.memory:
            MEM = '-Xmx{:}g'.format(args.memory)
        else:
            MEM = '-Xmx{:}g'.format(round(0.6 * getRAM()))

        status('Adapter trimming using BBDuk')
        cmd = [
            'bbduk.sh', MEM, 'ref=adapters', 't={:}'.format(args.cpus),
            'ktrim=r', 'k=23', 'mink=11', 'minlen={:}'.format(args.minlen),
            'hdist=1', 'ftm=5', 'tpe', 'tbo', 'overwrite=true'
        ]
        if args.left and args.right:
            cmd += [
                'in1={:}'.format(args.left), 'in2={:}'.format(args.right),
                'out1={:}_1P.fastq.gz'.format(args.basename),
                'out2={:}_2P.fastq.gz'.format(args.basename)
            ]
        elif args.left:
            cmd += [
                'in={:}'.format(args.left),
                'out={:}_1U.fastq.gz'.format(args.basename)
            ]

        printCMD(cmd)
        if args.debug:
            subprocess.run(cmd)
        else:
            subprocess.run(cmd, stderr=DEVNULL)

        if args.right:
            clean = countfastq('{:}_1P.fastq.gz'.format(args.basename))
            clean = clean * 2
            status('{:,} reads remaining and writing to file'.format(clean))
            status('Trimming finished:\n\tFor: {:}\n\tRev {:}'.format(
                args.basename + '_1P.fastq.gz',
                args.basename + '_2P.fastq.gz'))
            if not args.pipe:
                status(
                    'Your next command might be:\n\tAAFTF filter -l {:} -r {:} -o {:} -c {:}\n'
                    .format(args.basename + '_1P.fastq.gz',
                            args.basename + '_2P.fastq.gz', args.basename,
                            args.cpus))
        else:
            clean = countfastq('{:}_1U.fastq.gz'.format(args.basename))
            status('{:,} reads remaining and writing to file'.format(clean))
            status('Trimming finished:\n\tSingle: {:}'.format(args.basename +
                                                              '_1U.fastq.gz'))
            if not args.pipe:
                status(
                    'Your next command might be:\n\tAAFTF filter -l {:} -o {:} -c {:}\n'
                    .format(args.basename + '_1U.fastq.gz', args.basename,
                            args.cpus))

    elif args.method == 'trimmomatic':
        #find path
        trimmomatic_path = find_trimmomatic()
        if trimmomatic_path:
            jarfile = trimmomatic_path
        elif args.trimmomatic:
            jarfile = args.trimmomatic
        else:
            status(
                'Trimmomatic cannot be found - please provide location of trimmomatic.jar file.'
            )
            sys.exit(1)

        if jarfile:
            path_to_adaptors = args.trimmomatic_adaptors
            leadingwindow = "LEADING:%d" % (args.trimmomatic_leadingwindow)
            trailingwindow = "TRAILING:%d" % (args.trimmomatic_trailingwindow)
            slidingwindow = "SLIDINGWINDOW:%s" % (
                args.trimmomatic_slidingwindow)

            quality = args.trimmomatic_quality
            quality = "-%s" % (quality)  # add leading dash

            if not os.path.exists(path_to_adaptors):
                if args.right:
                    path_to_adaptors = dirname(
                        jarfile) + "/adapters/TruSeq3-PE.fa"
                else:
                    path_to_adaptors = dirname(
                        jarfile) + "/adapters/TruSeq3-SE.fa"

                if not os.path.exists(path_to_adaptors):
                    findpath = dirname(jarfile)
                    path_to_adaptors = ""
                    while findpath:
                        if os.path.exists(findpath + "/share"):
                            if args.right:
                                path_to_adaptors = findpath + "/share/trimmomatic/adapters/TruSeq3-PE.fa"
                            else:
                                path_to_adaptors = findpath + "/share/trimmomatic/adapters/TruSeq3-SE.fa"
                            break
                        findpath = dirname(findpath)

                if not os.path.exists(path_to_adaptors):
                    status(
                        "Cannot find adaptors file, please specify manually")
                    status(
                        "Cannot find adaptors file, please specify manually")
                    return

            clipstr = args.trimmomatic_clip % (path_to_adaptors)

            cmd = []

            if args.left and args.right:
                cmd = [
                    'java', '-jar', jarfile, 'PE', '-threads',
                    str(args.cpus), quality, args.left, args.right,
                    args.basename + '_1P.fastq', args.basename + '_1U.fastq',
                    args.basename + '_2P.fastq', args.basename + '_2U.fastq',
                    clipstr, leadingwindow, trailingwindow, slidingwindow,
                    "MINLEN:%d" % (args.minlen)
                ]
            elif args.left and not args.right:
                cmd = [
                    'java', '-jar', jarfile, 'SE', '-threads',
                    str(args.cpus), quality, args.left,
                    args.basename + '_1U.fastq', clipstr, leadingwindow,
                    trailingwindow, slidingwindow,
                    "MINLEN:%d" % (args.minlen)
                ]
            else:
                status("Must provide left and right pairs or single read set")
                return

            status('Running trimmomatic adapter and quality trimming')
            printCMD(cmd)
            if args.debug:
                subprocess.run(cmd)
            else:
                subprocess.run(cmd, stderr=DEVNULL)
            if args.right:
                status('Compressing trimmed PE FASTQ files')
                Fzip_inplace(args.basename + '_1P.fastq', args.cpus)
                Fzip_inplace(args.basename + '_2P.fastq', args.cpus)
                SafeRemove(args.basename + '_1U.fastq')
                SafeRemove(args.basename + '_2U.fastq')
                status('Trimming finished:\n\tFor: {:}\n\tRev {:}'.format(
                    args.basename + '_1P.fastq.gz',
                    args.basename + '_2P.fastq.gz'))
                if not args.pipe:
                    status(
                        'Your next command might be:\n\tAAFTF filter -l {:} -r {:} -o {:} -c {:}\n'
                        .format(args.basename + '_1P.fastq.gz',
                                args.basename + '_2P.fastq.gz', args.basename,
                                args.cpus))
            else:
                status('Compressing trimmed SE FASTQ file')
                Fzip_inplace(args.basename + '_1U.fastq', args.cpus)
                status(
                    'Trimming finished:\n\tSingle: {:}'.format(args.basename +
                                                               '_1U.fastq.gz'))
                if not args.pipe:
                    status(
                        'Your next command might be:\n\tAAFTF filter -l {:} -o {:} -c {:}\n'
                        .format(args.basename + '_1U.fastq.gz', args.basename,
                                args.cpus))