def clean_fastqc_output(param,readfile): """ Function that delete unwanted FastQC output files. Takes two arguments : - param [dict] : dictionnary containing all parameters - readfile [dict] : fastq file used by Fastqc Returns one argument : - file [dict] : containing quality control information about the readfile """ # Save the working directory work_dir = os.getcwd() # Delete unwanted files ---------------------------------------------------- # get prefix of readfile to delete unwanted files generated by FastQC file_1 = ce.get_file_prefix(readfile) # delete os.remove('{0}/Fastqc/{1}_fastqc.zip'.format(param['output'], file_1)) os.chdir('{0}/Fastqc/{1}_fastqc'.format(param['output'], file_1)) os.system('mv ../{0}_fastqc.html .'.format(file_1)) os.system('rm -r fastqc_report.html fastqc.fo summary.txt Icons') # Get Quality control information about readfile --------------------------- # regex filename_re = re.compile("^Filename\t(.*)") encoding_re = re.compile("^Encoding\t(.*)") total_seq_re = re.compile("^Total Sequences\t(.*)") seq_length_re = re.compile("^Sequence length\t(.*)") GC_perc_re = re.compile("^%GC\t(.*)") # create a dict file1 = {} # get information with open("fastqc_data.txt", "rt") as f: for line in f: # filename match = filename_re.search(line) if match: file1['filename'] = match.group(1) # encoding match = encoding_re.search(line) if match: file1['encoding'] = match.group(1) # total sequences match = total_seq_re.search(line) if match: file1['total_sequence'] = match.group(1) # sequence length match = seq_length_re.search(line) if match: file1['sequence_length'] = match.group(1) # percentage GC match = GC_perc_re.search(line) if match: file1['GC_perc'] = match.group(1) # go back to the working directory os.chdir(work_dir) return file1
def commandline_input_output(param, cmd, nb, inout): """ Function that add to 'cmd' the input and output files commandline depending on if it's the first or second step of trimming (information given by nb) Takes 4 arguments : - param [dict] : dictionnary containing all parameters - cmd [string] : base command line of the programme (trimmomatic) - nb [integer] : number of executed trimming command - inout [dict] : dictionnary containing all generated files on the user's computer Returns two arguments: - cmd [string] : the command line with the input and output files - inout [dict] : with new files names """ # SINGLE-END DATA ---------------------------------------------------------- if(param['layout'] == 'SE'): cmd += ' SE' cmd += ' -threads {0}'.format(param['threads']) # Creating output filename(s) ------------------------------------------ # get input file prefix to create new filename(s) prefix = ce.get_file_prefix(param['input'][0]) trimmed = "{0}/trimmed_{1}.fastq".format(param['output'],prefix) # add the compression format if choosen if 'compress' in param : trimmed += "{0}".format(param['compress']) # Generation of commandline -------------------------------------------- # if it's a the first trimming if (nb == 0): cmd += ' {0} {1}'.format(param['input'][0], trimmed) # adding the input and output files to inout inout['input'] = param['input'] inout['trimmed'] = trimmed # else step 2 input files are the output files of step 1 elif (nb == 1): cmd += ' {0} {1}'.format(inout['tmp'], trimmed) # PAIRED-END DATA ---------------------------------------------------------- elif(param['layout'] == 'PE') : cmd += ' PE' cmd += ' -threads {0} '.format(param['threads']) # Creating output filename(s) ------------------------------------------ # get input file prefix to create new filename(s) prefix_1 = ce.get_file_prefix(param['input'][0]) prefix_2 = ce.get_file_prefix(param['input'][1]) trimmed_1 = "{0}/trimmed_{1}.fastq".format(param['output'],prefix_1) trimmed_2 = "{0}/trimmed_{1}.fastq".format(param['output'],prefix_2) single_1 = "{0}/single_{1}.fastq".format(param['output'],prefix_1) single_2 = "{0}/single_{1}.fastq".format(param['output'],prefix_2) # add the compression format if choosen if 'compress' in param : trimmed_1 += "{0}".format(param['compress']) trimmed_2 += "{0}".format(param['compress']) single_1 += "{0}".format(param['compress']) single_2 += "{0}".format(param['compress']) # Generation of commandline -------------------------------------------- # if it's a the first trimming if(nb==0): cmd += ' {0} {1} {2} {3} {4} {5}'.format(param['input'][0], param['input'][1], trimmed_1, single_1, trimmed_2, single_2) # adding the input and output files to inout inout['input'] = param['input'] inout['trimmed'] = trimmed_1, trimmed_2 inout['single'] = single_1, single_2 # else step 2 input files are the output files of step 1 elif(nb==1): cmd += '{0} {1} {2} {3} {4} {5}'.format(inout['tmp'][0], inout['tmp'][1], trimmed_1, single_1, trimmed_2, single_2) return cmd,inout
def change_output_as_input(inout, param): """ Function that change step1 output files into step2 input files. Takes 2 arguments : - inout [dict] : dictionnary containing all generated files on the user's working directory - param [dict] : dictionnary containing all parameters Returns one argument: inout [dict] : containing the new files names """ # SINGLE-END --------------------------------------------------------------- if(param['layout'] == 'SE'): # Creating temporary filename(s) --------------------------------------- # get the prefix of the file filename = ce.get_file_prefix(param['input'][0]) # new temporary filename tmp = '{0}/tmp{1}.fastq'.format(param['output'], filename) if 'compress' in param: tmp += '{0}'.format(param['compress']) # Rename step 1 trimming file in temporary ----------------------------- os.rename(inout['trimmed'], tmp) # Add the temporary file in io ----------------------------------------- inout['tmp'] = tmp # PAIRED-END --------------------------------------------------------------- else : # Creating temporary filename(s) --------------------------------------- # get the prefix of files filename_1 = ce.get_file_prefix(param['input'][0]) filename_2 = ce.get_file_prefix(param['input'][1]) # new temporary filenames tmp_1 = '{0}/tmp{1}.fastq'.format(param['output'],filename_1) tmp_2 = '{0}/tmp{1}.fastq'.format(param['output'],filename_2) if 'compress' in param: tmp_1 += '.{0}'.format(param['compress']) tmp_2 += '.{0}'.format(param['compress']) # Rename step 1 trimming file in temporary ----------------------------- os.rename(inout['trimmed'][0], tmp_1) os.rename(inout['trimmed'][1], tmp_2) # Add the temporary file in io ----------------------------------------- inout['tmp'] = tmp_1, tmp_2 # Delete step1 singleton read files ------------------------------------ os.remove(inout['single'][0]) os.remove(inout['single'][1]) return inout