Esempio n. 1
0
def transrate(submitted, trinitydir, transrate_dir, transrate_out,
              trinity_fasta, sample, trimdir, sra, mmetsp):
    trim_1P = trimdir + sra + ".trim_1P.fq"
    trim_2P = trimdir + sra + ".trim_2P.fq"
    if os.path.isfile(trim_1P) and os.path.isfile(trim_2P):
        transrate_command = """
transrate --assembly={} --threads=8 \
--left={}{}.trim_1P.fq \
--right={}{}.trim_2P.fq \
--output=/tmp/transrate_out.{}
cp /tmp/transrate_out.{}/assemblies.csv {}{}.assemblies.csv
rm -rf /tmp/transrate_out.{}
""".format(trinity_fasta, trimdir, sra, trimdir, sra, sample, sample,
           transrate_dir, mmetsp, sample)
        print(transrate_command)
        commands = [transrate_command]
        process_name = "transrate"
        module_name_list = ""
        filename = mmetsp
        submitted.append(mmetsp)
        clusterfunc_py3.qsub_file(transrate_dir, process_name,
                                  module_name_list, filename, commands)
    else:
        print("trimfiles not present:", trim_1P, trim_2P)
    return submitted
Esempio n. 2
0
def get_sourmash_command(mmetsp):
    sourmash_command="""
sourmash compute --dna --protein /mnt/home/ljcohen/mmetsp_assemblies_trinity2.2.0/{}.trinity_out_2.2.0.Trinity.fasta -k 21 --name-from-first
""".format(mmetsp)
    commands = [sourmash_command]
    process_name = "sourmash"
    module_name_list = [""]
    filename = mmetsp
    clusterfunc_py3.qsub_file("/mnt/home/ljcohen/mmetsp_sourmash/",process_name, module_name_list, filename, commands)
Esempio n. 3
0
def consolidate(mmetsp_dir,item):
    combine_orphaned_string = combine_orphaned(mmetsp_dir,item)
    rename_pe_string = rename_pe(mmetsp_dir,item)
    split_reads_string = split_reads(mmetsp_dir,item)
    combine_string = combine(mmetsp_dir,item)
    consolidate_commands=[combine_orphaned_string,rename_pe_string,split_reads_string,combine_string]
    process_name="consolidate"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    clusterfunc_py3.qsub_file(mmetsp_dir,process_name,module_name_list,item,consolidate_commands)
Esempio n. 4
0
def run_move_files(trimdir, sra):
    orphan_string = make_orphans(trimdir, sra)
    mv_string1, mv_string2 = move_files(trimdir, sra)
    commands = [orphan_string, mv_string1, mv_string2]
    process_name = "move"
    module_name_list = ""
    filename = sra
    clusterfunc_py3.qsub_file(trimdir, process_name, module_name_list,
                              filename, commands)
Esempio n. 5
0
def run_trimmomatic_TruSeq(missing, trimmed, remaining, trimdir, file1, file2,
                           sra):
    bash_filename = trimdir + sra + ".trim.TruSeq.sh"
    clusterfunc_py3.check_dir(trimdir + "qsub_files/")
    listoffile = os.listdir(trimdir + "qsub_files/")
    trim_file = trimdir + "qsub_files/" "trim." + sra + ".log"
    matching = [s for s in listoffile if "trim." + sra + ".log" in s]
    matching_string = "TrimmomaticPE: Completed successfully"
    if os.path.isfile(trim_file):
        with open(trim_file) as f:
            content = f.readlines()
    if len(matching) != 0:
        trim_complete = [m for m in content if matching_string in m]
        if len(trim_complete) != 0:
            print("Already trimmed:", matching)
            trimmed.append(sra)
        else:
            missing.append(trimdir)
            j = """
java -jar /mnt/home/ljcohen/bin/Trimmomatic-0.33/trimmomatic-0.33.jar PE \\
-baseout {}.trim.fq \\
{} {} \\
ILLUMINACLIP:/mnt/home/ljcohen/bin/Trimmomatic-0.33/adapters/combined.fa:2:40:15 \\
SLIDINGWINDOW:4:2 \\
LEADING:2 \\
TRAILING:2 \\
MINLEN:25 &> trim.{}.log
""".format(sra, file1, file2, sra)
            orphan_string = make_orphans(trimdir, sra)
            commands = [j, orphan_string]
            process_name = "trim"
            module_name_list = ""
            filename = sra
            clusterfunc_py3.qsub_file(trimdir, process_name, module_name_list,
                                      filename, commands)
    else:
        remaining.append(trimdir)
        j = """
java -jar /mnt/home/ljcohen/bin/Trimmomatic-0.33/trimmomatic-0.33.jar PE \\
-baseout {}.trim.fq \\
{} {} \\
ILLUMINACLIP:/mnt/home/ljcohen/bin/Trimmomatic-0.33/adapters/combined.fa:2:40:15 \\
SLIDINGWINDOW:4:2 \\
LEADING:2 \\
TRAILING:2 \\
MINLEN:25 &> trim.{}.log
""".format(sra, file1, file2, sra)
        orphan_string = make_orphans(trimdir, sra)
        commands = [j, orphan_string]
        process_name = "trim"
        module_name_list = ""
        filename = sra
        clusterfunc_py3.qsub_file(trimdir, process_name, module_name_list,
                                  filename, commands)
    return missing, trimmed, remaining
Esempio n. 6
0
def run_filter_abund(diginormdir, sra):
    keep_dir = diginormdir + "qsub_files/"
    filter_string = """
filter-abund.py -V -Z 18 {}norm.C20k20.ct {}*.keep
""".format(diginormdir, keep_dir)
    extract_paired_string = extract_paired(diginormdir)
    commands = [filter_string, extract_paired_string]
    process_name = "filtabund"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = sra
    clusterfunc_py3.qsub_file(diginormdir, process_name, module_name_list,
                              filename, commands)
Esempio n. 7
0
def run_diginorm(diginormdir, interleavedir, trimdir, sra):
    normalize_median_string = """
normalize-by-median.py -p -k 20 -C 20 -M 4e9 \\
--savegraph {}norm.C20k20.ct \\
-u {}orphans.fq.gz \\
{}*.fq
""".format(diginormdir, trimdir, interleavedir)
    normalize_median_command = [normalize_median_string]
    process_name = "diginorm"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = sra
    clusterfunc_py3.qsub_file(diginormdir, process_name, module_name_list,
                              filename, normalize_median_command)
Esempio n. 8
0
def run_diginorm(mmetsp_dir, mmetsp):
    normalize_median_string = """
normalize-by-median.py -p -k 20 -C 20 -M 4e9 \\
--savegraph {}norm.C20k20.ct \\
-u {}orphans.fq.gz \\
{}*.interleaved.fq
""".format(mmetsp_dir, mmetsp_dir, mmetsp_dir)
    #s=subprocess.Popen("cat diginorm.sh",shell=True)
    # s.wait()
    normalize_median_command = [normalize_median_string]
    process_name = "diginorm"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = mmetsp
    clusterfunc_py3.qsub_file(mmetsp_dir, process_name, module_name_list,
                              filename, normalize_median_command)
Esempio n. 9
0
def transrate(transrate_dir, sample, trinity_fasta, mmetsp_assemblies_dir, filename):
    transrate_command = """
transrate -o /tmp/{} \\
--assembly {} \\
--reference {} \\
--threads 8
cp /tmp/{}/assemblies.csv {}{}.assemblies.csv
rm -rf /tmp/{}*
""".format(sample, trinity_fasta, filename,sample,transrate_dir,sample,sample)
    commands = [transrate_command]
    process_name = "trans_ref"
    module_name_list = ""
    filename = sample
    #print(transrate_command)
    clusterfunc_py3.qsub_file(mmetsp_assemblies_dir,process_name,module_name_list,filename,commands)
Esempio n. 10
0
def transrate(transrate_dir, sample, trinity_fasta, mmetsp_assemblies_dir, filename):
    transrate_command = """
transrate -o /tmp/{}_forw \\
--assembly {} \\
--reference {} \\
--threads 8
cp /tmp/{}_forw/{}*/contigs.csv {}{}.contigs.csv
rm -rf /tmp/{}_forw*
""".format(sample, trinity_fasta, filename,sample,sample,transrate_dir,sample,sample)
    commands = [transrate_command]
    process_name = "trans_ref"
    module_name_list = ""
    filename = sample
    #print(transrate_command)
    clusterfunc_py3.qsub_file(mmetsp_assemblies_dir,process_name,module_name_list,filename,commands)
Esempio n. 11
0
def interleave_reads(trimdir, sra, interleavedir):
    interleavefile = interleavedir + sra + ".trimmed.interleaved.fq"
    if os.path.isfile(interleavefile):
        print("already interleaved")
    else:
        interleave_string = "interleave-reads.py " + trimdir + sra + \
            ".trim_1P.fq " + trimdir + sra + ".trim_2P.fq > " + interleavefile
        print(interleave_string)
        interleave_command = [interleave_string]
        process_name = "interleave"
        module_name_list = ["GNU/4.8.3", "khmer/2.0"]
        filename = sra
        clusterfunc_py3.qsub_file(interleavedir, process_name,
                                  module_name_list, filename,
                                  interleave_command)
Esempio n. 12
0
def transrate_reverse(transrate_dir, sample, trinity_fasta, mmetsp_assemblies_dir, filename):
    transrate_command = """
transrate -o /tmp/{}_rev \\
--assembly {} \\
--reference {} \\
--threads 8
cp /tmp/{}_rev/assemblies.csv {}{}.assemblies.csv
rm -rf /tmp/{}_rev*
""".format(sample, filename,trinity_fasta,sample,transrate_dir,sample,sample)
    #print("This is the reverse transrate command:")
    commands = [transrate_command]
    process_name = "trans_ref_reverse"
    module_name_list = ""
    filename = sample
    print(transrate_command)
    clusterfunc_py3.qsub_file(mmetsp_assemblies_dir,process_name,module_name_list,filename,commands)
Esempio n. 13
0
def run_trinity(trinitydir, left, right, mmetsp):
    trinity_command = """
set -x
# stops execution if there is an error
set -e
if [ -f {}trinity_out_2.2.0.Trinity.fasta ]; then exit 0 ; fi
Trinity --left {} \\
--right {} --output /tmp/{}.trinity_out_2.2.0 --full_cleanup --seqType fq --max_memory 50G --CPU 16
cp /tmp/{}.trinity_out_2.2.0.Trinity.fasta /mnt/home/ljcohen/oysterriver_assemblies/
rm -rf /tmp/{}.trinity_out_2.2.0*
""".format(trinitydir, left, right, mmetsp, mmetsp, mmetsp)
    commands = [trinity_command]
    process_name = "trinity_2.2.0"
    module_name_list = ["trinity/2.2.0"]
    filename = mmetsp
    clusterfunc_py3.qsub_file(trinitydir, process_name, module_name_list,
                              filename, commands)
Esempio n. 14
0
def rename_files(trinitydir, diginormdir, diginormfile, SRA):
    # takes diginormfile in,splits reads and put into newdir
    rename_orphans = combine_orphans(diginormdir)
    split_paired = "split-paired-reads.py -d " + diginormdir + " " + diginormfile
    rename_string1 = "cat " + diginormdir + "*.1 > " + trinitydir + SRA + ".left.fq"
    rename_string2 = "cat " + diginormdir + \
        "*.2 > " + trinitydir + SRA + ".right.fq"
    rename_string3 = "gunzip -c " + diginormdir + \
        "orphans.keep.abundfilt.fq.gz >> " + trinitydir + SRA + ".left.fq"
    commands = [
        rename_orphans, split_paired, rename_string1, rename_string2,
        rename_string3
    ]
    process_name = "rename"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = SRA
    clusterfunc_py3.qsub_file(diginormdir, process_name, module_name_list,
                              filename, commands)
Esempio n. 15
0
def fastqc_report(fastq_file_list, newdir, fastqcdir, filename):
    # imports list of files in each directory
    print(fastq_file_list)
    print(fastqcdir + filename)
    if glob.glob(fastqcdir + filename + "_*_fastqc.zip"):
        print("fastqc already complete:", filename)
    else:
        # creates command to generate fastqc reports from all files in list
        file_string = str(fastq_file_list)
    # print fastq_file_list
        file_string = " ".join(fastq_file_list)
    # print file_string
        fastqc_string = "fastqc -o " + fastqcdir + " " + file_string
    print("fastqc reports being generated for: " + str(fastq_file_list))
    fastqc_command = [fastqc_string]
    process_name = "fastqc"
    module_name_list = ""
    filename = filename
    clusterfunc_py3.qsub_file(fastqcdir, process_name,
                          module_name_list, filename, fastqc_command)
Esempio n. 16
0
def run_busco(busco_dir,sample,basedir,filename):
    #protists_ensembl
    #eukaryota_odb9
    busco_command = """
source ~/.bashrc
module load GNU/4.8.3
module unload python
module load parallel
source activate busco_v3

python /mnt/home/ljcohen/bin/busco/scripts/run_BUSCO.py \
-i {}{} \
-o {} -l /mnt/home/ljcohen/bin/busco/eukaryota_odb9 \
-m tran --cpu 8
""".format(basedir,filename,sample)
    print(busco_command)
    commands = [busco_command]
    process_name = "busco_euk"
    module_name_list = ""
    filename = sample
    clusterfunc_py3.qsub_file(busco_dir, process_name,module_name_list, filename, commands)
Esempio n. 17
0
def run_streaming_diginorm(trimdir, SRA, diginormdir):
    # from Jessica's streaming protocol:
    diginormfile = diginormdir + SRA + ".stream.diginorm.sh"
    # os.chdir(diginormdir)
    stream_string = """#!/bin/bash
(interleave-reads.py {}{}.trim_1P.fq {}{}.trim_2P.fq && zcat {}orphans.fq.gz)| \\
(trim-low-abund.py -V -k 20 -Z 18 -C 2 - -o - -M 4e9 --diginorm --diginorm-coverage=20) | \\
(extract-paired-reads.py --gzip -p {}{}.paired.gz -s {}{}.single.gz) > /dev/null
""".format(trimdir, SRA, trimdir, SRA, trimdir, diginormdir, SRA, diginormdir, SRA)
    print(stream_string)
    # with open(diginormfile,"w") as diginorm_script:
    #   diginorm_script.write(stream_string)
    #s=subprocess.Popen("sudo bash "+diginormfile,shell=True)
    # s.wait()
    # print "file written:",diginormfile
    # os.chdir("/home/ubuntu/MMETSP/")
    streaming_diginorm_command = [stream_string]
    module_load_list = []
    process_name = "diginorm_stream"
    clusterfunc_py3.qsub_file(diginormdir, process_name,
                          module_load_list, SRA, streaming_diginorm_command)
Esempio n. 18
0
def interleave_reads(mmetsp_dir, mmetsp):
    interleave_string = """
cd {}
for filename in *.trim_1P.fq
do
	base=$(basename $filename .fq)
	echo $base
	base2=${{base/_1P/_2P}}
	echo $base2
	output=${{base/_1P/}}.interleaved.fq
	#echo $output
	(interleave-reads.py ${{base}}.fq ${{base2}}.fq | gzip > $output)
done
""".format(mmetsp_dir)
    print(interleave_string)
    interleave_command = [interleave_string]
    process_name = "interleave"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = mmetsp
    clusterfunc_py3.qsub_file(mmetsp_dir, process_name, module_name_list,
                              filename, interleave_command)
Esempio n. 19
0
def quant_salmon(salmon_indexdir, salmondir, sra, mmetsp, newdir,
                 trinity_fasta):
    file1 = newdir + "trim/" + sra + ".trim_1P.fq"
    file2 = newdir + "trim/" + sra + ".trim_2P.fq"
    if os.path.isfile(file1):
        print("file exists:", file1)
    else:
        print("missing:", file1)
    if os.path.isfile(file2):
        print("file exists:", file2)
    index, salmon_index_string = salmon_index(salmondir, salmon_indexdir, sra,
                                              trinity_fasta)

    salmon_string = "salmon quant -i " + index + " --libType IU -1 " + file1 + \
        " -2 " + file2 + " -o " + salmondir + mmetsp + "_" + sra + ".quant --dumpEq --auxDir aux"
    commands = [salmon_index_string, salmon_string]

    print(salmon_index_string)
    print(salmon_string)
    process_name = "salmon"
    module_name_list = ""
    filename = sra
    clusterfunc_py3.qsub_file(salmondir, process_name, module_name_list,
                              filename, commands)
Esempio n. 20
0
def run_dammit(dammit_string,dammitdir,mmetsp):
    dammit_command = [dammit_string]
    process_name = "dammit"
    module_name_list = []
    filename = mmetsp
    clusterfunc_py3.qsub_file(dammit_dir, process_name, module_name_list, filename, dammit_command)
Esempio n. 21
0
def run_dammit(dammit_string,dammitdir,mmetsp):
    dammit_command = [dammit_string]
    process_name = "dammit"
    module_name_list = []
    filename = mmetsp
    clusterfunc_py3.qsub_file(dammit_dir, process_name, module_name_list, filename, dammit_command)
Esempio n. 22
0
import os
import os.path
from os.path import basename
import subprocess
from subprocess import Popen, PIPE
import glob
# custom Lisa module
import clusterfunc_py3


def fastqc_report(fastq_file_list, newdir, fastqcdir, filename):
    print fastq_file_list
    print fastqcdir + filename
        file_string = str(fastq_file_list)
    # print fastq_file_list
        file_string = " ".join(fastq_file_list)
    # print file_string
        fastqc_string = "fastqc -o " + fastqcdir + " " + file_string
    print "fastqc reports being generated for: " + str(fastq_file_list)
    fastqc_command = [fastqc_string]
    process_name = "fastqc"
    module_name_list = ""
    filename = filename
    clusterfunc_py3.qsub_file(fastqcdir, process_name,
                          module_name_list, filename, fastqc_command)

with open("~/trimmed_files.txt") as