Beispiel #1
0
def run_trinity(assemblydir):
        assemblydirs=os.listdir(assemblydir)
        for genus_species in assemblydirs:
                print genus_species
		genus_species_dir=assemblydir+genus_species+"/"
                listoffiles=os.listdir(genus_species_dir)
		print listoffiles
		trinity_command="""
set -x
# stops execution if there is an error
set -e
if [ -f {}trinity_out/Trinity.fasta ]; then exit 0 ; fi
if [ -d {}trinity_out ]; then mv {}trinity_out_dir {}trinity_out_dir0 || true ; fi

Trinity --left {}{}.left.fq \\
--right {}{}.right.fq \\
--output {}trinity_out --seqType fq --monitoring --bflyCalculateCPU --max_memory 150G	\\
--CPU 32
""".format(genus_species_dir,genus_species_dir,genus_species_dir,genus_species_dir,genus_species_dir,genus_species,genus_species_dir,genus_species,genus_species_dir)
		print trinity_command
		trinity_command=[trinity_command]
		#module_load_list=["trinity/2.0.5"]
		module_load_list=["trinity/2.2.0"]
		process_name="trinity"
		clusterfunc.qsub_file(genus_species_dir,process_name,module_load_list,genus_species,trinity_command)
Beispiel #2
0
def run_move_files(trimdir,sra):
	orphan_string=make_orphans(trimdir,sra)
        mv_string1,mv_string2 = move_files(trimdir,sra)
	commands = [orphan_string,mv_string1,mv_string2]
        process_name="move"
        module_name_list=""
        filename=sra
        clusterfunc.qsub_file(trimdir,process_name,module_name_list,filename,commands)	
Beispiel #3
0
def rename_files(trinitydir, diginormdir, mmetsp):
	rename_string1 = "cat " + diginormdir + "*.1 > " + trinitydir + "/" + mmetsp + ".left.fq"
        rename_string2 = "cat " + diginormdir + "*.2 > " + trinitydir + "/" + mmetsp + ".right.fq"
        rename_string3 = "gunzip -c " + diginormdir + "orphans.keep.abundfilt.fq.gz >> " + trinitydir + mmetsp + ".left.fq"
        commands = [rename_string1, rename_string2, rename_string3]
        process_name = "rename"
        module_name_list = ["GNU/4.8.3", "khmer/2.0"]
        filename = mmetsp
 	clusterfunc.qsub_file(diginormdir,process_name,module_name_list,filename,commands)
Beispiel #4
0
def send_to_cluster(newdir,command_list,sra,names):
	commands = []
	for string in command_list:
		commands.append(string)
    		process_name = names
    		module_name_list = ""
    		filename = sra
    		clusterfunc.qsub_file(newdir, process_name,
                          module_name_list, filename, commands)
Beispiel #5
0
def run_trimmomatic_TruSeq(missing, trimmed, remaining, trimdir, file1, file2,
                           sra):
    bash_filename = trimdir + sra + ".trim.TruSeq.sh"
    clusterfunc.check_dir(trimdir + "qsub_files/")
    listoffile = os.listdir(trimdir + "qsub_files/")
    # print listoffile
    trim_file = trimdir + "qsub_files/" "trim." + sra + ".log"
    # print trim_file
    matching = [s for s in listoffile if "trim." + sra + ".log" in s]
    matching_string = "TrimmomaticPE: Completed successfully"
    if os.path.isfile(trim_file):
        with open(trim_file) as f:
            content = f.readlines()
    if len(matching) != 0:
        trim_complete = [m for m in content if matching_string in m]
        if len(trim_complete) != 0:
            print "Already trimmed:", matching
            trimmed.append(sra)
        else:
            missing.append(trimdir)
            j = """
java -jar /mnt/home/ljcohen/bin/Trimmomatic-0.33/trimmomatic-0.33.jar PE \\
-baseout {}.trim.fq \\
{} {} \\
ILLUMINACLIP:/mnt/home/ljcohen/bin/Trimmomatic-0.33/adapters/combined.fa:2:40:15 \\
SLIDINGWINDOW:4:2 \\
LEADING:2 \\
TRAILING:2 \\
MINLEN:25 &> trim.{}.log
""".format(sra, file1, file2, sra)
            orphan_string = make_orphans(trimdir, sra)
            commands = [j, orphan_string]
            process_name = "trim"
            module_name_list = ""
            filename = sra
            clusterfunc.qsub_file(trimdir, process_name, module_name_list,
                                  filename, commands)
    else:
        remaining.append(trimdir)
        j = """
java -jar /mnt/home/ljcohen/bin/Trimmomatic-0.33/trimmomatic-0.33.jar PE \\
-baseout {}.trim.fq \\
{} {} \\
ILLUMINACLIP:/mnt/home/ljcohen/bin/Trimmomatic-0.33/adapters/combined.fa:2:40:15 \\
SLIDINGWINDOW:4:2 \\
LEADING:2 \\
TRAILING:2 \\
MINLEN:25 &> trim.{}.log
""".format(sra, file1, file2, sra)
        orphan_string = make_orphans(trimdir, sra)
        commands = [j, orphan_string]
        process_name = "trim"
        module_name_list = ""
        filename = sra
        clusterfunc.qsub_file(trimdir, process_name, module_name_list,
                              filename, commands)
    return missing, trimmed, remaining
Beispiel #6
0
def send_to_cluster(newdir, command_list, sra, names):
    commands = []
    for string in command_list:
        commands.append(string)
        process_name = names
        module_name_list = ""
        filename = sra
        clusterfunc.qsub_file(newdir, process_name, module_name_list, filename,
                              commands)
Beispiel #7
0
def rename_files(trinitydir, diginormdir, mmetsp):
    rename_string1 = "cat " + diginormdir + "*.1 > " + trinitydir + "/" + mmetsp + ".left.fq"
    rename_string2 = "cat " + diginormdir + "*.2 > " + trinitydir + "/" + mmetsp + ".right.fq"
    rename_string3 = "gunzip -c " + diginormdir + "orphans.keep.abundfilt.fq.gz >> " + trinitydir + mmetsp + ".left.fq"
    commands = [rename_string1, rename_string2, rename_string3]
    process_name = "rename"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = mmetsp
    clusterfunc.qsub_file(diginormdir, process_name, module_name_list,
                          filename, commands)
Beispiel #8
0
def run_trimmomatic_TruSeq(missing, trimmed, remaining, trimdir, file1, file2, sra):
	bash_filename=trimdir+sra+".trim.TruSeq.sh"
	clusterfunc.check_dir(trimdir+"qsub_files/")
	listoffile = os.listdir(trimdir+"qsub_files/")
	# print listoffile
	trim_file = trimdir+"qsub_files/""trim."+sra+".log"
	# print trim_file
	matching = [s for s in listoffile if "trim."+sra+".log" in s]
	matching_string = "TrimmomaticPE: Completed successfully"
	if os.path.isfile(trim_file):
		with open(trim_file) as f:
    			content = f.readlines()
	if len(matching)!=0:
		trim_complete = [m for m in content if matching_string in m]
		if len(trim_complete)!=0:
			print "Already trimmed:",matching
			trimmed.append(sra)
		else:
			missing.append(trimdir)
			j="""
java -jar /mnt/home/ljcohen/bin/Trimmomatic-0.33/trimmomatic-0.33.jar PE \\
-baseout {}.trim.fq \\
{} {} \\
ILLUMINACLIP:/mnt/home/ljcohen/bin/Trimmomatic-0.33/adapters/combined.fa:2:40:15 \\
SLIDINGWINDOW:4:2 \\
LEADING:2 \\
TRAILING:2 \\
MINLEN:25 &> trim.{}.log
""".format(sra,file1,file2,sra)
			orphan_string=make_orphans(trimdir,sra)
			commands = [j,orphan_string]
        		process_name="trim"
        		module_name_list=""
        		filename=sra
        		clusterfunc.qsub_file(trimdir,process_name,module_name_list,filename,commands)
	else:
		remaining.append(trimdir)
		j="""
java -jar /mnt/home/ljcohen/bin/Trimmomatic-0.33/trimmomatic-0.33.jar PE \\
-baseout {}.trim.fq \\
{} {} \\
ILLUMINACLIP:/mnt/home/ljcohen/bin/Trimmomatic-0.33/adapters/combined.fa:2:40:15 \\
SLIDINGWINDOW:4:2 \\
LEADING:2 \\
TRAILING:2 \\
MINLEN:25 &> trim.{}.log
""".format(sra,file1,file2,sra)
                orphan_string=make_orphans(trimdir,sra)
                commands = [j,orphan_string]
                process_name="trim"
                module_name_list=""
                filename=sra
                clusterfunc.qsub_file(trimdir,process_name,module_name_list,filename,commands)
	return missing,trimmed,remaining
Beispiel #9
0
def run_busco(busco_dir,trinity_fasta,sample):
	busco_command="""
busco -m trans -in {} \
--cpu 16 -l /mnt/research/ged/lisa/busco/metazoa -o {}.metazoa
""".format(trinity_fasta,sample)
	print busco_command
	commands = [busco_command]
        process_name = "busco"
        module_name_list = ""
        filename = sample
        clusterfunc.qsub_file(busco_dir,process_name,module_name_list,filename,commands) 	
Beispiel #10
0
def split_paired_reads(trinitydir, diginormdir, mmetsp):
	diginorm_files_dir = diginormdir + "qsub_files/"
	listoffiles = os.listdir(diginorm_files_dir)
	for digi_filename in listoffiles:
		if digi_filename.endswith(".pe"):
    			split_paired = "split-paired-reads.py -d " + diginormdir + " " + diginorm_files_dir + digi_filename
    			commands = [split_paired]
    			process_name = "split"
    			module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    			filename = mmetsp
    			clusterfunc.qsub_file(diginormdir,process_name,module_name_list,filename,commands)
Beispiel #11
0
def run_busco(busco_dir,trinity_fasta,sample,sra):
	busco_command="""
busco -m trans -in {} \
--cpu 30 -l /mnt/research/ged/lisa/busco/eukaryota -o {}.euk
""".format(trinity_fasta,sample)
	print busco_command
	commands = [busco_command]
        process_name = "busco"
        module_name_list = ""
        filename = sra
        clusterfunc.qsub_file(busco_dir,process_name,module_name_list,filename,commands) 	
Beispiel #12
0
def run_rap_clust(salmondir,rapclustdir,sra):
	quant_file=get_quant_file(salmondir,sra)
	config_file=get_config_file(quant_file,rapclustdir,sra)
	config_filename=rapclustdir+sra+"_config.yaml"
	rapclust_string="RapClust --config "+str(config_filename)
	print rapclust_string
	commands=[rapclust_string]
	process_name="rapclust"
	module_name_list=""
	filename=sra
	clusterfunc.qsub_file(rapclustdir,process_name,module_name_list,filename,commands)
Beispiel #13
0
def run_filter_abund(diginormdir, sra):
    keep_dir = diginormdir + "qsub_files/"
    filter_string = """
filter-abund.py -V -Z 18 {}norm.C20k20.ct {}*.keep
""".format(diginormdir, keep_dir)
    extract_paired_string = extract_paired()
    commands = [filter_string, extract_paired_string]
    process_name = "filtabund"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = sra
    clusterfunc.qsub_file(diginormdir, process_name, module_name_list,
                          filename, commands)
Beispiel #14
0
def run_rap_clust(salmondir, rapclustdir, sra):
    quant_file = get_quant_file(salmondir, sra)
    config_file = get_config_file(quant_file, rapclustdir, sra)
    config_filename = rapclustdir + sra + "_config.yaml"
    rapclust_string = "RapClust --config " + str(config_filename)
    print rapclust_string
    commands = [rapclust_string]
    process_name = "rapclust"
    module_name_list = ""
    filename = sra
    clusterfunc.qsub_file(rapclustdir, process_name, module_name_list,
                          filename, commands)
Beispiel #15
0
def run_filter_abund(diginormdir, sra):
    keep_dir = diginormdir + "qsub_files/"
    filter_string = """
filter-abund.py -V -Z 18 {}norm.C20k20.ct {}*.keep
""".format(diginormdir, keep_dir)
    extract_paired_string = extract_paired()
    commands = [filter_string, extract_paired_string]
    process_name = "filtabund"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = sra
    clusterfunc.qsub_file(diginormdir, process_name,
                          module_name_list, filename, commands)
Beispiel #16
0
def rename_files(trinitydir,diginormdir,diginormfile,SRA):
# takes diginormfile in,splits reads and put into newdir
        rename_orphans = combine_orphans(diginormdir)
        split_paired = "split-paired-reads.py -d "+diginormdir+" "+diginormfile
        rename_string1 = "cat "+diginormdir+"*.1 > "+trinitydir+SRA+".left.fq"
        rename_string2 = "cat "+diginormdir+"*.2 > "+trinitydir+SRA+".right.fq"
        rename_string3 = "gunzip -c "+diginormdir+"orphans.keep.abundfilt.fq.gz >> "+trinitydir+SRA+".left.fq"
        commands=[rename_orphans,split_paired,rename_string1,rename_string2,rename_string3]
        process_name="rename"
        module_name_list=["GNU/4.8.3","khmer/2.0"]
        filename=SRA
        clusterfunc.qsub_file(diginormdir,process_name,module_name_list,filename,commands)
Beispiel #17
0
def split_paired_reads(trinitydir, diginormdir, mmetsp):
    diginorm_files_dir = diginormdir + "qsub_files/"
    listoffiles = os.listdir(diginorm_files_dir)
    for digi_filename in listoffiles:
        if digi_filename.endswith(".pe"):
            split_paired = "split-paired-reads.py -d " + diginormdir + " " + diginorm_files_dir + digi_filename
            commands = [split_paired]
            process_name = "split"
            module_name_list = ["GNU/4.8.3", "khmer/2.0"]
            filename = mmetsp
            clusterfunc.qsub_file(diginormdir, process_name, module_name_list,
                                  filename, commands)
Beispiel #18
0
def run_normalize_by_median(diginormdir, mmetsp):
    normalize_median_string = """
normalize-by-median.py -p -k 20 -C 20 -M 4e9 \\
--savegraph {}norm.C20k20.ct \\
{}*.fq
""".format(diginormdir, diginormdir)
    normalize_median_command = [normalize_median_string]
    process_name = "diginorm"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = mmetsp
    clusterfunc.qsub_file(diginormdir, process_name, module_name_list,
                          filename, normalize_median_command)
Beispiel #19
0
def run_normalize_by_median(diginormdir, mmetsp):
    normalize_median_string = """
normalize-by-median.py -p -k 20 -C 20 -M 4e9 \\
--savegraph {}norm.C20k20.ct \\
{}*.fq
""".format(diginormdir, diginormdir)
    normalize_median_command = [normalize_median_string]
    process_name = "diginorm"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = mmetsp
    clusterfunc.qsub_file(diginormdir, process_name,
                          module_name_list, filename, normalize_median_command)
Beispiel #20
0
def run_diginorm(diginormdir, interleavedir, trimdir, sra):
    normalize_median_string = """
normalize-by-median.py -p -k 20 -C 20 -M 4e9 \\
--savegraph {}norm.C20k20.ct \\
-u {}orphans.fq.gz \\
{}*.fq
""".format(diginormdir, trimdir, interleavedir)
    normalize_median_command = [normalize_median_string]
    process_name = "diginorm"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = sra
    clusterfunc.qsub_file(diginormdir, process_name,
                          module_name_list, filename, normalize_median_command)
Beispiel #21
0
def interleave_reads(trimdir, sra, interleavedir):
    interleavefile = interleavedir + sra + ".trimmed.interleaved.fq"
    if os.path.isfile(interleavefile):
        print "already interleaved"
    else:
        interleave_string = "interleave-reads.py " + trimdir + sra + ".trim_1P.fq " + trimdir + sra + ".trim_2P.fq > " + interleavefile
        print interleave_string
        interleave_command = [interleave_string]
        process_name = "interleave"
        module_name_list = ["GNU/4.8.3", "khmer/2.0"]
        filename = sra
        clusterfunc.qsub_file(interleavedir, process_name, module_name_list,
                              filename, interleave_command)
Beispiel #22
0
def run_diginorm(diginormdir, interleavedir, trimdir, sra):
    normalize_median_string = """
normalize-by-median.py -p -k 20 -C 20 -M 4e9 \\
--savegraph {}norm.C20k20.ct \\
-u {}orphans.fq.gz \\
{}*.fq
""".format(diginormdir, trimdir, interleavedir)
    normalize_median_command = [normalize_median_string]
    process_name = "diginorm"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = sra
    clusterfunc.qsub_file(diginormdir, process_name, module_name_list,
                          filename, normalize_median_command)
Beispiel #23
0
def interleave_reads(trimdir, sra, interleavedir):
    interleavefile = interleavedir + sra + ".trimmed.interleaved.fq"
    if os.path.isfile(interleavefile):
        print "already interleaved"
    else:
        interleave_string = "interleave-reads.py " + trimdir + sra + \
            ".trim_1P.fq " + trimdir + sra + ".trim_2P.fq > " + interleavefile
        print interleave_string
        interleave_command = [interleave_string]
        process_name = "interleave"
        module_name_list = ["GNU/4.8.3", "khmer/2.0"]
        filename = sra
        clusterfunc.qsub_file(interleavedir, process_name,
                              module_name_list, filename, interleave_command)
Beispiel #24
0
def combine_orphans(diginormdir,mmetsp):
	diginorm_files_dir = diginormdir + "qsub_files/"
    	rename_orphans = """
touch {}orphans.keep.abundfilt.fq.gz
for file in {}*.se
do
        gzip -9c ${{file}} >> {}orphans.keep.abundfilt.fq.gz
done
""".format(diginormdir,diginorm_files_dir, diginormdir)
	commands = [rename_orphans]
        process_name = "orphans"
        module_name_list = ["GNU/4.8.3", "khmer/2.0"]
        filename = mmetsp
        clusterfunc.qsub_file(diginormdir,process_name,module_name_list,filename,commands)
Beispiel #25
0
def combine_orphans(diginormdir, mmetsp):
    diginorm_files_dir = diginormdir + "qsub_files/"
    rename_orphans = """
touch {}orphans.keep.abundfilt.fq.gz
for file in {}*.se
do
        gzip -9c ${{file}} >> {}orphans.keep.abundfilt.fq.gz
done
""".format(diginormdir, diginorm_files_dir, diginormdir)
    commands = [rename_orphans]
    process_name = "orphans"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = mmetsp
    clusterfunc.qsub_file(diginormdir, process_name, module_name_list,
                          filename, commands)
Beispiel #26
0
def run_diginorm(mmetsp_dir,mmetsp):
    normalize_median_string = """
normalize-by-median.py -p -k 20 -C 20 -M 4e9 \\
--savegraph {}norm.C20k20.ct \\
-u {}orphans.fq.gz \\
{}*.interleaved.fq
""".format(mmetsp_dir,mmetsp_dir,mmetsp_dir)
    #s=subprocess.Popen("cat diginorm.sh",shell=True)
    # s.wait()
    normalize_median_command = [normalize_median_string]
    process_name = "diginorm"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = mmetsp
    clusterfunc.qsub_file(mmetsp_dir, process_name,
                          module_name_list, filename, normalize_median_command)
Beispiel #27
0
def get_trinity(trinitydir, left, right, SRA):
    trinity_command = """
set -x
# stops execution if there is an error
set -e
if [ -f {}trinity_out/Trinity.fasta ]; then exit 0 ; fi
#if [ -d {}trinity_out ]; then mv {}trinity_out_dir {}trinity_out_dir0 || true ; fi
Trinity --left {} \\
--right {} --output {}trinity_out --seqType fq --JM 20G --CPU 16
""".format(trinitydir, trinitydir, trinitydir, trinitydir, left, right, trinitydir)
    commands = [trinity_command]
    process_name = "trinity"
    module_name_list = ["trinity/20140413p1"]
    filename = SRA
    clusterfunc.qsub_file(trinitydir, process_name,
                          module_name_list, filename, commands)
Beispiel #28
0
def interleave_reads(fastq_list,mmetsp_dir, mmetsp, diginormdir):
	for i in range(0,len(fastq_list),2):
		print i
		left = fastq_list[i]
		print left
		right = fastq_list[i+1]
		print right
		interleave_file = diginormdir + left.split(".")[0] + ".interleaved.fq" 
        	interleave_string = "interleave-reads.py " + mmetsp_dir + left + " " + mmetsp_dir + right + " > " + interleave_file
        	print interleave_string
        	interleave_command = [interleave_string]
        	process_name = "interleave"
        	module_name_list = ["GNU/4.8.3", "khmer/2.0"]
        	filename = left.split(".")[0]
        	clusterfunc.qsub_file(diginormdir, process_name,
                              module_name_list, filename, interleave_command)
Beispiel #29
0
def interleave_reads(fastq_list, mmetsp_dir, mmetsp, diginormdir):
    for i in range(0, len(fastq_list), 2):
        print i
        left = fastq_list[i]
        print left
        right = fastq_list[i + 1]
        print right
        interleave_file = diginormdir + left.split(".")[0] + ".interleaved.fq"
        interleave_string = "interleave-reads.py " + mmetsp_dir + left + " " + mmetsp_dir + right + " > " + interleave_file
        print interleave_string
        interleave_command = [interleave_string]
        process_name = "interleave"
        module_name_list = ["GNU/4.8.3", "khmer/2.0"]
        filename = left.split(".")[0]
        clusterfunc.qsub_file(diginormdir, process_name, module_name_list,
                              filename, interleave_command)
Beispiel #30
0
def rename_files(trinitydir, diginormdir, diginormfile, SRA):
    # takes diginormfile in,splits reads and put into newdir
    rename_orphans = combine_orphans(diginormdir)
    split_paired = "split-paired-reads.py -d " + diginormdir + " " + diginormfile
    rename_string1 = "cat " + diginormdir + "*.1 > " + trinitydir + SRA + ".left.fq"
    rename_string2 = "cat " + diginormdir + "*.2 > " + trinitydir + SRA + ".right.fq"
    rename_string3 = "gunzip -c " + diginormdir + "orphans.keep.abundfilt.fq.gz >> " + trinitydir + SRA + ".left.fq"
    commands = [
        rename_orphans, split_paired, rename_string1, rename_string2,
        rename_string3
    ]
    process_name = "rename"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = SRA
    clusterfunc.qsub_file(diginormdir, process_name, module_name_list,
                          filename, commands)
Beispiel #31
0
def run_trinity(trinitydir, left, right, SRA):
    trinity_command = """
set -x
# stops execution if there is an error
set -e
if [ -f {}trinity_out/Trinity.fasta ]; then exit 0 ; fi
#if [ -d {}trinity_out ]; then mv {}trinity_out_dir {}trinity_out_dir0 || true ; fi
Trinity --left {} \\
--right {} --output {}trinity_out --seqType fq --JM 20G --CPU 16
""".format(trinitydir, trinitydir, trinitydir, trinitydir, left, right,
           trinitydir)
    commands = [trinity_command]
    process_name = "trinity"
    module_name_list = ["trinity/20140413p1"]
    filename = SRA
    clusterfunc.qsub_file(trinitydir, process_name, module_name_list, filename,
                          commands)
def run_trinity(trinitydir, left, right, mmetsp, output_dir, file_extension):
    trinity_command = """
set -x
# stops execution if there is an error
set -e

Trinity --left {} \\
--right {} --output /tmp/{}{} --full_cleanup --seqType fq --max_memory 20G --CPU 16

cp /tmp/{}*.fasta {}
rm -rf /tmp/{}*
""".format(left, right, mmetsp, file_extension, mmetsp, output_dir, mmetsp)
    commands = [trinity_command]
    process_name = "trinity_2.2.0"
    module_name_list = ["trinity/2.2.0"]
    filename = mmetsp
    clusterfunc.qsub_file(trinitydir, process_name, module_name_list, filename,
                          commands)
Beispiel #33
0
def quant_salmon(salmondir, sra, newdir, trinity_fasta):
    file1 = newdir + "trim/" + sra + ".trim_1P.fq"
    file2 = newdir + "trim/" + sra + ".trim_2P.fq"
    if os.path.isfile(file1):
        print "file exists:", file1
    else:
        print "missing:", file1
    if os.path.isfile(file2):
        print "file exists:", file2
    index, salmon_index_string = salmon_index(salmondir, sra, trinity_fasta)
    salmon_string = "salmon quant -i " + index + " --libType IU -1 " + file1 + \
        " -2 " + file2 + " -o " + salmondir + sra + ".quant --dumpEq --auxDir aux"
    commands = [salmon_index_string, salmon_string]
    process_name = "salmon"
    module_name_list = ""
    filename = sra
    clusterfunc.qsub_file(salmondir, process_name,
                          module_name_list, filename, commands)
Beispiel #34
0
def transrate(trinitydir, transrate_dir, transrate_out, trinity_fasta, sample,
              trimdir, sra):
    #transrate_command="""
    #transrate -o {} --assembly {}
    #""".format(transrate_out,trinity_fasta)
    transrate_command = """
transrate --assembly={}{}.Trinity.fixed.fa --threads=27 \
--left={}{}.trim_1P.fq \
--right={}{}.trim_2P.fq \
--output={}
""".format(trinitydir, sample, trimdir, sra, trimdir, sra, transrate_out)
    print transrate_command
    commands = [transrate_command]
    process_name = "transrate"
    module_name_list = ""
    filename = sra
    clusterfunc.qsub_file(transrate_dir, process_name, module_name_list,
                          filename, commands)
Beispiel #35
0
def run_trinity(trinitydir,left,right,mmetsp):
    trinity_command = """
set -x
# stops execution if there is an error
set -e
if [ -f {}trinity_out_2.2.0.Trinity.fasta ]; then exit 0 ; fi

Trinity --left {} \\
--right {} --output /tmp/{}.trinity_out_2.2.0 --full_cleanup --seqType fq --max_memory 20G --CPU 16

cp /tmp/{}.trinity_out_2.2.0.Trinity.fasta /mnt/home/ljcohen/mmetsp_assemblies_trinity2.2.0/
rm -rf /tmp/{}.trinity_out_2.2.0*
""".format(trinitydir, left, right, mmetsp, mmetsp,mmetsp)
    commands = [trinity_command]
    process_name = "trinity_2.2.0"
    module_name_list = ["trinity/2.2.0"]
    filename = mmetsp
    clusterfunc.qsub_file(trinitydir, process_name,
                          module_name_list, filename, commands)
def run_trinity(trinitydir,left,right,mmetsp):
    trinity_command = """
set -x
# stops execution if there is an error
set -e
if [ -f {}trinity_out_2.2.0.Trinity.fasta ]; then exit 0 ; fi

Trinity --left {} \\
--right {} --output /tmp/{}.trinity_out_2.2.0 --full_cleanup --seqType fq --max_memory 20G --CPU 16

cp /tmp/{}.trinity_out_2.2.0.Trinity.fasta /mnt/home/ljcohen/mmetsp_assemblies_trinity2.2.0/
rm -rf /tmp/{}.trinity_out_2.2.0*
""".format(trinitydir, left, right, mmetsp, mmetsp,mmetsp)
    commands = [trinity_command]
    process_name = "trinity_2.2.0"
    module_name_list = ["trinity/2.2.0"]
    filename = mmetsp
    clusterfunc.qsub_file(trinitydir, process_name,
                          module_name_list, filename, commands)
Beispiel #37
0
def fastqc_report(fastq_file_list, newdir, fastqcdir, filename):
    # imports list of files in each directory
    print fastq_file_list
    print fastqcdir + filename
    if glob.glob(fastqcdir + filename + "_*_fastqc.zip"):
        print "fastqc already complete:", filename
    else:
        # creates command to generate fastqc reports from all files in list
        file_string = str(fastq_file_list)
    # print fastq_file_list
        file_string = " ".join(fastq_file_list)
    # print file_string
        fastqc_string = "fastqc -o " + fastqcdir + " " + file_string
    print "fastqc reports being generated for: " + str(fastq_file_list)
    fastqc_command = [fastqc_string]
    process_name = "fastqc"
    module_name_list = ""
    filename = filename
    clusterfunc.qsub_file(fastqcdir, process_name,
                          module_name_list, filename, fastqc_command)
Beispiel #38
0
def fastqc_report(fastq_file_list, newdir, fastqcdir, filename):
    # imports list of files in each directory
    print fastq_file_list
    print fastqcdir + filename
    if glob.glob(fastqcdir + filename + "_*_fastqc.zip"):
        print "fastqc already complete:", filename
    else:
        # creates command to generate fastqc reports from all files in list
        file_string = str(fastq_file_list)
        # print fastq_file_list
        file_string = " ".join(fastq_file_list)
        # print file_string
        fastqc_string = "fastqc -o " + fastqcdir + " " + file_string
    print "fastqc reports being generated for: " + str(fastq_file_list)
    fastqc_command = [fastqc_string]
    process_name = "fastqc"
    module_name_list = ""
    filename = filename
    clusterfunc.qsub_file(fastqcdir, process_name, module_name_list, filename,
                          fastqc_command)
Beispiel #39
0
def get_sourmash_command(SRA,trinitydir):
	filename=SRA+".left.fq"
	full_filename=trinitydir+filename
	if os.path.isfile(full_filename):
		if os.stat(full_filename).st_size!=0:
			#sourmash_command="""
#head -4000000 {} > /mnt/scratch/ljcohen/mmetsp_tmp/{}.head
#""".format(full_filename,filename)
			sourmash_command="""
sourmash compute --protein -k 18,21 -f /mnt/scratch/ljcohen/mmetsp_tmp/{}.head
""".format(filename)
			#s=subprocess.Popen(sourmash_command,shell=True)
                        #s.wait()
			commands=[sourmash_command]
        		process_name="sourmash"
        		module_name_list=[""]
        		filename=SRA
        		clusterfunc.qsub_file("/mnt/scratch/ljcohen/mmetsp_tmp/",process_name,module_name_list,filename,commands)	
		else:
			print "File is empty:",filename
Beispiel #40
0
def interleave_reads(mmetsp_dir, mmetsp):
    interleave_string = """
cd {}
for filename in *.trim_1P.fq
do
	base=$(basename $filename .fq)
	echo $base
	base2=${{base/_1P/_2P}}
	echo $base2
	output=${{base/_1P/}}.interleaved.fq
	#echo $output
	(interleave-reads.py ${{base}}.fq ${{base2}}.fq | gzip > $output)
done
""".format(mmetsp_dir)
    print interleave_string
    interleave_command = [interleave_string]
    process_name = "interleave"
    module_name_list = ["GNU/4.8.3", "khmer/2.0"]
    filename = mmetsp
    clusterfunc.qsub_file(mmetsp_dir, process_name,
                              module_name_list, filename, interleave_command)
Beispiel #41
0
def run_streaming_diginorm(trimdir, SRA, diginormdir):
    # from Jessica's streaming protocol:
    diginormfile = diginormdir + SRA + ".stream.diginorm.sh"
    # os.chdir(diginormdir)
    stream_string = """#!/bin/bash
(interleave-reads.py {}{}.trim_1P.fq {}{}.trim_2P.fq && zcat {}orphans.fq.gz)| \\
(trim-low-abund.py -V -k 20 -Z 18 -C 2 - -o - -M 4e9 --diginorm --diginorm-coverage=20) | \\
(extract-paired-reads.py --gzip -p {}{}.paired.gz -s {}{}.single.gz) > /dev/null
""".format(trimdir, SRA, trimdir, SRA, trimdir, diginormdir, SRA, diginormdir, SRA)
    print stream_string
    # with open(diginormfile,"w") as diginorm_script:
    #	diginorm_script.write(stream_string)
    #s=subprocess.Popen("sudo bash "+diginormfile,shell=True)
    # s.wait()
    # print "file written:",diginormfile
    # os.chdir("/home/ubuntu/MMETSP/")
    streaming_diginorm_command = [stream_string]
    module_load_list = []
    process_name = "diginorm_stream"
    clusterfunc.qsub_file(diginormdir, process_name,
                          module_load_list, SRA, streaming_diginorm_command)
Beispiel #42
0
def run_streaming_diginorm(trimdir, SRA, diginormdir):
    # from Jessica's streaming protocol:
    diginormfile = diginormdir + SRA + ".stream.diginorm.sh"
    #os.chdir(diginormdir)
    stream_string = """#!/bin/bash
(interleave-reads.py {}{}.trim_1P.fq {}{}.trim_2P.fq && zcat {}orphans.fq.gz)| \\
(trim-low-abund.py -V -k 20 -Z 18 -C 2 - -o - -M 4e9 --diginorm --diginorm-coverage=20) | \\
(extract-paired-reads.py --gzip -p {}{}.paired.gz -s {}{}.single.gz) > /dev/null
""".format(trimdir, SRA, trimdir, SRA, trimdir, diginormdir, SRA, diginormdir,
           SRA)
    print stream_string
    #with open(diginormfile,"w") as diginorm_script:
    #	diginorm_script.write(stream_string)
    #s=subprocess.Popen("sudo bash "+diginormfile,shell=True)
    #s.wait()
    #print "file written:",diginormfile
    #os.chdir("/home/ubuntu/MMETSP/")
    streaming_diginorm_command = [stream_string]
    module_load_list = []
    process_name = "diginorm_stream"
    clusterfunc.qsub_file(diginormdir, process_name, module_load_list, SRA,
                          streaming_diginorm_command)
Beispiel #43
0
def get_assemblies(assemblydir):
	genus_species_dirs=os.listdir(assemblydir)
	for genus_species in genus_species_dirs:
		left=assemblydir+genus_species+"/"+genus_species+".left.fq"
		right=assemblydir+genus_species+"/"+genus_species+".right.fq"
		if os.path.isfile(left):
			print left
		else:
			print "there's a problem:",left
		if os.path.isfile(right):
			print right
		else:
			print "there's a problem:",right
		trinity_out_dir=assemblydir+genus_species+"/trinity_out/"
		trinity_fasta=trinity_out_dir+"Trinity.fasta"
		if os.path.isfile(trinity_fasta):
			fixed_trinity_fasta=fix_fasta(trinity_fasta,trinity_out_dir,genus_species)
			transrate_command=transrate(fixed_trinity_fasta,genus_species,left,right)
        		transrate_command=[transrate_command]
			module_load_list=["BLAST+/2.2.31"]
        		process_name="transrate"
        		clusterfunc.qsub_file(trinity_out_dir,process_name,module_load_list,genus_species,transrate_command)
		else:
			print "Assembly not completed:",genus_species
Beispiel #44
0
def send_to_cluster(basedir,commands,name):
    process_name = "delete"
    module_name_list = ""
    filename = name
    clusterfunc.qsub_file(basedir, process_name, module_name_list, filename, commands)
Beispiel #45
0
#6. Create symbolic link from data files to working directory

def sym_link(newdir):
    listoffiles=os.listdir(newdir)
    for i in listoffiles:
    	if i.endswith(".subset100k.fastq"):
    		symlink_string="ln -fs "+newdir+i+" /mnt/mmetsp/"+i
		print symlink_string
	    
=======
    	print "fastqc reports being generated for: "+str(fastq_file_list)
	fastqc_command=[fastqc_string]
        process_name="fastqc"
        module_name_list=""
        filename=filename
        clusterfunc.qsub_file(fastqcdir,process_name,module_name_list,filename,fastqc_command)
>>>>>>> .merge_file_eM2D9z

# this is the main function to execute

def execute(basedir,url_data):
    for item in url_data.keys():
        #Creates directory for each file to be downloaded
        #Directory will be located according to organism and read type (single or paired)
        organism=item[0]
        seqtype=item[1]
        org_seq_dir=basedir+organism+"/"
	print org_seq_dir
	clusterfunc.check_dir(org_seq_dir)
        url_list=url_data[item]
        for url in url_list:
Beispiel #46
0
def send_to_cluster(basedir, commands, name):
    process_name = "delete"
    module_name_list = ""
    filename = name
    clusterfunc.qsub_file(basedir, process_name, module_name_list, filename,
                          commands)
Beispiel #47
0
		print "file exists:",file2
<<<<<<< .merge_file_ylVy1d
	else:
		print "missing:",file2
	salmon_string="salmon quant -i "+index+" --libType IU -1 "+file1+" -2 "+file2+" -o "+salmondir+sra+".quant"
        s=subprocess.Popen(salmon_string,shell=True)
	s.wait()
	os.chdir("/home/ubuntu/MMETSP/")
=======
	index,salmon_index_string = salmon_index(salmondir,sra,trinity_fasta)
	salmon_string="salmon quant -i "+index+" --libType IU -1 "+file1+" -2 "+file2+" -o "+salmondir+sra+".quant --dumpEq --auxDir aux"
	commands = [salmon_index_string,salmon_string]
	process_name = "salmon"
	module_name_list = ""
	filename = sra
	clusterfunc.qsub_file(salmondir,process_name,module_name_list,filename,commands)	
	
def gather_counts():
        gather_counts_string="python /home/ubuntu/MMETSP/gather-counts.py"
	return gather_counts_string
>>>>>>> .merge_file_hxAl3W
	


def gather_counts(salmondir):
	os.chdir(salmondir)
	gather_counts="python /home/ubuntu/MMETSP/gather-counts.py"
	print os.getcwd()
	print gather_counts
	#s=subprocess.Popen(gather_counts,shell=True)
        #s.wait()