Ejemplo n.º 1
0
def execute(listoffiles,trimdir,interleavedir,diginormdir,assemblydir):
        #files_dictionary=get_files(listoffiles,trimdir)
	#print files_dictionary
	#for sample in files_dictionary:
		#fileslist=sorted(files_dictionary[sample])
		#interleavefile=interleave_reads(trimdir,interleavedir,fileslist,sample)
        interleave_files=os.listdir(interleavedir)
	interleave_files_dictionary=get_files_population(interleave_files,interleavedir)	
	print interleave_files_dictionary
	#diginorm_files=get_diginorm_files(diginormdir)
	for population in interleave_files_dictionary.keys():
                #fileslist=sorted(files_dictionary[population])
		#get_orphans(fileslist,trimdir,sample)
		#orphansfile=interleavedir+population+".trim.orphans.fq.gz"
		#graph_count_filename=run_diginorm(diginormdir,orphansfile,interleavedir,population)
		#diginorm_files=get_diginorm_files(diginormdir)
		#for diginormfile in diginorm_files[sample]:
		#run_filt_abund(diginormdir,graph_count_filename,population)
		genus_species_dir=assemblydir+population+"/"
		rename_command=get_rename(diginormdir,population)
                process_name="split"
                rename_command=[rename_command]
                module_name_list=""
                filename=population
                
		#clusterfunc.sbatch_file(diginormdir,process_name,module_name_list,filename,rename_command)
		split_command1=split_reads(diginormdir,population)
		process_name="split"
        	module_name_list=""
        	split_command=[split_command1]
        	clusterfunc.sbatch_file(diginormdir,process_name,module_name_list,filename,split_command)
Ejemplo n.º 2
0
def combine_files(merge_dictionary, basedir, combine_dir):
    for sample in merge_dictionary:
        R1 = []
        R2 = []
        for i in merge_dictionary[sample]:
            file_fields = i.split("/")
            fields = file_fields[5].split("_")
            if fields[3] == "R1":
                R1.append(i)
            elif fields[3] == "R2":
                R2.append(i)
            else:
                print "Wrong field.", fields
        print sample, "R1 files:", R1
        print sample, "R2 files:", R2
        fields_read = R1[0].split("/")[5].split("_")
        sample = fields_read[0]
        extension = fields_read[4][3:]
        newfilename_R1 = combine_dir + sample + "_R1" + extension
        newfilename_R2 = combine_dir + sample + "_R2" + extension
        files_string_R1 = " ".join(R1)
        files_string_R2 = " ".join(R2)
        combine_string_R1 = "cat " + files_string_R1 + " > " + newfilename_R1
        combine_string_R2 = "cat " + files_string_R2 + " > " + newfilename_R2
        print combine_string_R1
        print combine_string_R2
        #s=subprocess.Popen(combine_string,shell=True)
        #s.wait()
        combine_command = [combine_string_R1, combine_string_R2]
        module_load_list = [""]
        process_name = "combine"
        clusterfunc.sbatch_file(basedir, process_name, module_load_list,
                                sample, combine_command)
Ejemplo n.º 3
0
def run_filt_abund(sample_diginormdir,sample):
	abund_filt=get_filter_abund(sample_diginormdir,sample)
	abund_filt_command=[abund_filt]
        process_name="abundfilt"
        module_name_list=""
        filename=sample
        clusterfunc.sbatch_file(trimdir,process_name,module_name_list,filename,abund_filt_command)
Ejemplo n.º 4
0
def split_reads(assemblydir):
	assemblydirs=os.listdir(assemblydir)
	for genus_species in assemblydirs:
		genus_species_dir=assemblydir+genus_species+"/"
		listoffiles=os.listdir(genus_species_dir)
		for filename in listoffiles:
			
			if filename.endswith("pe.keep.abundfilt.fq"):
# next time you run this, specify output file,
# otherwise output will be put in sbatch_files directory
# moved manually 2/7/2016
				split_command="""
split-paired-reads.py -1 {}{}.1 -2 {}{}.2 {}{}
""".format(genus_species_dir,filename,genus_species_dir,filename,genus_species_dir,filename)
				process_name="split"
				module_name_list=""
				split_command=[split_command]
				#clusterfunc.sbatch_file(genus_species_dir,process_name,module_name_list,filename,split_command)
			#else:
			#	print "Not found:",filename
		combine="""
cat {}*.1 > {}{}.left.fq
cat {}*.2 > {}{}.right.fq
gunzip -c {}*orphans.keep.abundfilt.fq.gz >> {}{}.left.fq
""".format(genus_species_dir,genus_species_dir,genus_species,genus_species_dir,genus_species_dir,genus_species,genus_species_dir,genus_species_dir,genus_species)
		combine_command=[combine]
		process_name="combine"
                module_name_list=""
		clusterfunc.sbatch_file(genus_species_dir,process_name,module_name_list,genus_species,combine_command)
Ejemplo n.º 5
0
def combine_orphans_after_diginorm(genus_species_diginormdir,sample):
	consolidate_command=consolidate(genus_species_diginormdir,sample)			
	consolidate_command=[consolidate_command]
	process_name="gzip"
	module_name_list=""
	filename=sample
	clusterfunc.sbatch_file(genus_species_diginormdir,process_name,module_name_list,filename,consolidate_command)
Ejemplo n.º 6
0
def run_extract_paired(genus_species_dir,sample):
	abund_filt_filename = genus_species_dir + sample + ".abundfilt"
        extract_paired=get_extract_paired(genus_species_dir,sample,abund_filt_filename)
        extract_command=[extract_paired]
        process_name="extract"
        module_name_list=""
        filename=sample
        clusterfunc.sbatch_file(genus_species_dir,process_name,module_name_list,filename,extract_command)
Ejemplo n.º 7
0
def interleave_reads(trimdir,interleavedir,files_list,sample):
	interleavefile=interleavedir+sample+".interleaved.fq.gz"
	paired_list=get_pairs(files_list,trimdir,sample)
	interleave_string=[get_interleave_string(paired_list,interleavefile)]
	process_name="interleave"
	module_name_list=""
	filename=sample
	clusterfunc.sbatch_file(interleavedir,process_name,module_name_list,filename,interleave_string)
	return interleavefile
Ejemplo n.º 8
0
def execute(assemblydirs,assemblydir):
        #files_dictionary=get_files(assemblydirs,assemblydir)
	for genus_species in assemblydirs:
		genus_species_dir=assemblydir+genus_species+"/"
		cat_command=get_cat_command(genus_species_dir,genus_species)			
		command=[cat_command]
		module_load_list=[""]
        	process_name="cat"
        	clusterfunc.sbatch_file(genus_species_dir,process_name,module_load_list,genus_species,command)
Ejemplo n.º 9
0
def combine_split():
	combine="""
cat {}*.1 > {}{}.left.fq
cat {}*.2 > {}{}.right.fq
gunzip -c {}*orphans.keep.abundfilt.fq >> {}{}.left.fq
""".format(genus_species_dir,genus_species_dir,genus_species,genus_species_dir,genus_species_dir,genus_species,genus_species_dir,genus_species_dir,genus_species)
	combine_command=[combine]
	process_name="combine"
        module_name_list=""
	clusterfunc.sbatch_file(genus_species_dir,process_name,module_name_list,genus_species,combine_command)
Ejemplo n.º 10
0
def run_filt_abund(diginormdir,graph_count_filename,diginorm_sample):
#	if diginormfile.endswith("orphans.fq.gz.keep"):
#		abundfilt_filename=diginormdir+diginorm_sample+".orphans"
#	else:
#		abundfilt_filename=diginormdir+diginorm_sample
	abund_filt=get_filter_abund(diginormdir,diginorm_sample)
	abund_filt_command=[abund_filt]
	process_name="abundfilt"
        module_name_list=""
        filename=diginorm_sample
        clusterfunc.sbatch_file(diginormdir,process_name,module_name_list,filename,abund_filt_command)
Ejemplo n.º 11
0
def transrate(transratedir,transrate_out,trinity_fasta,sample,left,right):
	transrate_command = """
transrate --assembly={} --threads=4 \
--left={} \
--right={} \
--output={}
""".format(trinity_fasta,left,right,transrate_out)
    	print transrate_command
    	commands = [transrate_command]
    	process_name = "transrate"
   	module_name_list = ""
    	filename = sample
    	clusterfunc.sbatch_file(transratedir, process_name,module_name_list, filename, commands)
Ejemplo n.º 12
0
def get_assemblies(assemblydir):
	#genus_species_dirs=os.listdir(assemblydir)
	genus_species_dirs=["F_heteroclitus.MDPP","F_heteroclitus.MDPL"]
	for genus_species in genus_species_dirs:
		print genus_species
		genus_species_dir=assemblydir+genus_species+"/"
		assemblyfile=genus_species_dir+"Trinity.fasta"
		bam_out=genus_species_dir+genus_species+".bam"
		flagstat_out=genus_species_dir+genus_species+".flagstat.txt"
		module_load_list=["bwa/0.7.9a","samtools/1.2"]
		bwa_command=[bwa_mem(assemblyfile,bam_out,flagstat_out)]
		process_name="bwa"
		clusterfunc.sbatch_file(genus_species_dir,process_name,module_load_list,genus_species,bwa_command)
Ejemplo n.º 13
0
def get_orphans(files_list,trimdir,sample):
	orphans_list=[]
	for filename in files_list:
		#print filename
        	if filename.endswith("1U.fq"):
			orphans_list.append(filename)
		elif filename.endswith("2U.fq"):
			orphans_list.append(filename)
        orphans_string=[make_orphans(trimdir,orphans_list,sample)]
        process_name="orphans"
        module_name_list=""
        filename=sample
        clusterfunc.sbatch_file(trimdir,process_name,module_name_list,filename,orphans_string)
Ejemplo n.º 14
0
def quant_salmon(newdir,dirname,genus_species,trinity_fasta,species):
	salmon_index_string,index=salmon_index(newdir,genus_species,trinity_fasta)
	print salmon_index_string
	salmon_string="""
for i in {}{}*.trim_1P.fq
do
	BASE=$(basename $i .trim_1P.fq)
	salmon quant -i {}{} --libType IU -1 {}$BASE.trim_1P.fq -2 {}$BASE.trim_2P.fq -o {}$BASE.quant;
done
""".format(dirname,species,newdir,index,dirname,dirname,newdir)
	print salmon_string
	salmonstring=[salmon_index_string,salmon_string]
        process_name="salmon"
        module_name_list=""
        clusterfunc.sbatch_file(newdir,process_name,module_name_list,genus_species,salmonstring)
Ejemplo n.º 15
0
def run_trinity(assemblydir):
        assemblydirs=os.listdir(assemblydir)
        for genus_species in assemblydirs:
                genus_species_dir=assemblydir+genus_species+"/"
                listoffiles=os.listdir(genus_species_dir)
		trinity_command="""
set -x
# stops execution if there is an error
set -e
if [ -f {}trinity_out/Trinity.fasta ]; then exit 0 ; fi
if [ -d {}trinity_out ]; then mv {}trinity_out_dir {}trinity_out_dir0 || true ; fi

Trinity --left {}{}.left.fq \\
--right {}{}.right.fq \\
--output {}trinity_out --seqType fq --max_memory 14G	\\
--CPU ${{THREADS:-2}}
""".format(genus_species_dir,genus_species_dir,genus_species_dir,genus_species_dir,genus_species_dir,genus_species,genus_species_dir,genus_species,genus_species_dir)
		print trinity_command
		trinity_command=[trinity_command]
		module_load_list=["rsem/1.2.23","trinity/2.0.5"]
		process_name="trinity"
		clusterfunc.sbatch_file(genus_species_dir,process_name,module_load_list,genus_species,trinity_command)
Ejemplo n.º 16
0
def fastqc_report(fastqcdir, fastq_file, sample_name):
    fastqc_string = "fastqc -o " + fastqcdir + " " + fastq_file
    process_string = [fastqc_string]
    process_name = "fastqc"
    module_load_list = ["fastqc/0.10.1"]
    clusterfunc.sbatch_file(basedir, process_name, module_load_list, sample_name, process_string)
Ejemplo n.º 17
0
def run_trimmomatic(trimdir,file1,file2,sample):
	trimmomatic_string=get_trimmomatic(trimdir,file1,file2,sample)
	process_string=[trimmomatic_string]
	module_load_list=""
	process_name="trim"
	clusterfunc.sbatch_file(trimdir,process_name,module_load_list,sample,process_string)