def process(self): cmd = self.get_exec_path("cutadapt") + \ " --error-rate " + str(self.error_rate) + \ (" --overlap " + str(self.min_overlap) if self.min_overlap == None else "") + \ (" --discard-untrimmed" if self.discard_untrimmed else "") + \ " -" + self.adaptor_type + " file:" + self.adaptor_file if len(self.in_R2) == 0: # Process single read cutadapt cmd += " --output $2" + \ " $1" + \ " > $3" + \ " 2> $4" cutadapt_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(cutadapt_fct, inputs=[self.in_R1], outputs=[self.out_R1, self.stdout, self.stderr], includes=[self.adaptor_file]) else: # Process paired-end cutadapt cmd += " --output $3" + \ " --paired-output $4" + \ " $1" + \ " $2" + \ " > $5" + \ " 2> $6" cutadapt_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap( cutadapt_fct, inputs=[self.in_R1, self.in_R2], outputs=[self.out_R1, self.out_R2, self.stdout, self.stderr], includes=[self.adaptor_file])
def process(self): # Create bam list list_filepath = os.path.join(self.output_directory, "aln_list.txt") with open(list_filepath, "w") as FH_out: for curr_aln in self.aln: FH_out.write(curr_aln + "\n") # Set commands start_idx = 1 inputs = [self.genome, self.intervals, self.targets, list_filepath] if self.status_annotations != None: start_idx = 2 inputs.insert(0, self.status_annotations) cmd = self.get_exec_path("msings_venv") + " " + self.get_exec_path("create_baseline.py") + \ " --java-path " + self.get_exec_path("java") + \ " --java-mem " + str(self.java_mem) + \ ("" if self.status_annotations == None else " --input-annotations $1") + \ " --input-genome ${}".format(start_idx) + \ " --input-intervals ${}".format(start_idx + 1) + \ " --input-targets ${}".format(start_idx + 2) + \ " --input-list ${}".format(start_idx + 3) + \ " --output-baseline ${}".format(start_idx + 4) + \ " 2> ${}".format(start_idx + 5) baseline_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') baseline_fct(inputs=inputs, outputs=[self.baseline, self.stderr], includes=self.aln)
def process(self): # Create combined lists targets = getAreas(self.targets_design) loci_reports_list_in_spl = list() reports_in_spl = two_dim_stack(self.loci_reports, len(targets)) for spl_idx, reports in enumerate(reports_in_spl): list_filepath = os.path.join( self.output_directory, "spl_{}_comb_reports_list.tsv".format(spl_idx)) loci_reports_list_in_spl.append(list_filepath) with open(list_filepath, "w") as FH_out: FH_out.write("#Locus_position\tLocus_name\tFilepath\n") for target_idx, curr_report in enumerate(reports): target_id = "{}:{}-{}".format( targets[target_idx].chrom, targets[target_idx].start - 1, targets[target_idx].end) FH_out.write("{}\t{}\t{}\n".format( target_id, targets[target_idx].name, curr_report)) # Set commands cmd = self.get_exec_path("gatherLocusRes.py") + \ " --method-name '{}'".format(self.result_method) + \ " --method-class-name '{}'".format(self.result_class_name) + \ " --result-keys 'nb_by_length=nb_by_length'" + \ " --input-loci-metrics-list $1" + \ " --output-report $2 " + \ " 2> $3" report_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') for spl_idx, spl_name in enumerate(self.samples_names): report_fct( inputs=[loci_reports_list_in_spl[spl_idx]], outputs=[self.out_report[spl_idx], self.stderr[spl_idx]], includes=[reports_in_spl[spl_idx]])
def process(self): # Combine reads for idx, curr_prefix in enumerate(self.prefixes): flash2 = ShellFunction( self.get_exec_path("flash2") + " --compress " + " --threads " + str(self.nb_threads) + " --min-overlap " + str(self.min_overlap) + ("" if self.min_overlap == None else " --min-overlap " + str(self.min_overlap)) + " --max-mismatch-density " + str(self.mismatch_ratio) + " --phred-offset " + str(self.phred_offset) + " --output-prefix " + os.path.basename(curr_prefix) + " --output-directory " + self.output_directory + " $1 " + " $2 " + " 2> $3", cmd_format='{EXE} {IN} {OUT}' ) flash2( inputs=[self.R1[idx], self.R2[idx]], outputs=[self.stderr[idx], self.out_hist[idx], self.out_combined[idx], self.out_histogram[idx], self.out_not_combined_R1[idx], self.out_not_combined_R2[idx]] ) # Write report report_fct = PythonFunction(writeReport, cmd_format="{EXE} {IN} {OUT}") MultiMap(report_fct, inputs=[self.out_combined, self.out_not_combined_R1], outputs=[self.out_report])
def process(self): # Create combined lists targets = getAreas(self.targets_design) comb_reports_list_in_spl = list() reports_in_spl = two_dim_stack(self.combination_report, len(targets)) for spl_idx, reports in enumerate(reports_in_spl): list_filepath = os.path.join( self.output_directory, "spl_{}_comb_reports_list.tsv".format(spl_idx)) comb_reports_list_in_spl.append(list_filepath) with open(list_filepath, "w") as FH_out: FH_out.write("#Locus_position\tLocus_name\tFilepath\n") for target_idx, curr_report in enumerate(reports): target_id = "{}:{}-{}".format( targets[target_idx].chrom, targets[target_idx].start - 1, targets[target_idx].end) FH_out.write("{}\t{}\t{}\n".format( target_id, targets[target_idx].name, curr_report)) # Set commands cmd = self.get_exec_path("tagMSIAmplSize.py") + \ " --min-support " + str(self.min_support) + \ " --input-combined-list $1" + \ " --input-models $2" + \ " --output-report $3 " + \ " 2> $4" report_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') for spl_idx, spl_name in enumerate(self.samples_names): report_fct( inputs=[comb_reports_list_in_spl[spl_idx], self.models], outputs=[self.out_report[spl_idx], self.stderr[spl_idx]], includes=[reports_in_spl[spl_idx]])
def process(self): if self.split_targets: self.process_split_targets() # Exec command cmd = self.get_exec_path("bamAreasToFastq.py") + \ " --min-overlap " + str(self.min_overlap) + \ " --input-targets $4" + \ " --input-aln $5" + \ ("" if len(self.R1) == 0 else " --input-R1 $6") + \ ("" if len(self.R2) == 0 else " --input-R2 $7") + \ " --output-R1 $1" + \ " --output-R2 $2" + \ " 2> $3" bam2fastq_fct = ShellFunction(cmd, cmd_format='{EXE} {OUT} {IN}') inputs = [ self.repeated_targets, (self.repeated_aln if self.split_targets else self.aln) ] if len(self.R1) > 0 and len(self.R2) > 0: inputs.extend([ (self.repeated_R1 if self.split_targets else self.R1), (self.repeated_R2 if self.split_targets else self.R2) ]) MultiMap( bam2fastq_fct, inputs=inputs, outputs=[self.out_R1, self.out_R2, self.stderr], )
def process(self): cmd = self.get_exec_path("addLociAnnotations.py") + \ " --input-loci-annot " + self.annotations_file + \ " --input-report $1 " + \ " --output-report $2" + \ " 2> $3" add_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(add_fct, inputs=[self.annotations_file, self.msi_files], outputs=[self.out_report, self.stderr], includes=self.info_file)
def process(self): cmd = self.get_exec_path("MSIMergeReports.py") + \ " --inputs-reports $1 $2" + \ " --output-report $3" + \ " 2> $4" merges_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap( merges_fct, inputs=[self.first_report, self.second_report], outputs=[self.out_report, self.stderr] )
def process(self): tmp_report = [curr_path + ".tmp" for curr_path in self.report] # Process mSINGS cmd = self.get_exec_path("msings_venv") + " " + self.get_exec_path("run_msings.py") + \ " --java-path " + self.get_exec_path("java") + \ " --java-mem " + str(self.java_mem) + \ " --multiplier " + str(self.multiplier) + \ " --msi-min-threshold " + str(self.msi_min_threshold) + \ " --msi-max-threshold " + str(self.msi_max_threshold) + \ " --input-baseline " + self.baseline + \ " --input-genome " + self.genome + \ " --input-intervals " + self.intervals + \ " --input-targets " + self.targets + \ " --input-aln $1 " + \ " --output-analyzer $2 " + \ " --output-report $3 " + \ " 2> $4" msings_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(msings_fct, inputs=[self.aln], outputs=[self.analysis, tmp_report, self.msings_stderr], includes=[self.genome, self.intervals, self.baseline, self.targets]) # Remove suffix in samples names cmd = self.get_exec_path("sed") + \ " -e 's/_analysis//'" + \ " $1" + \ " > $2" + \ " 2> $3" rename_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(rename_fct, inputs=[tmp_report], outputs=[self.report, self.rename_stderr]) # Aggregate report and analysis cmd = self.get_exec_path("mSINGSToReport.py") + \ " --input-report $1 " + \ " --input-analysis $2 " + \ " --output $3 " + \ " 2> $4" convert_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(convert_fct, inputs=[self.report, self.analysis], outputs=[self.aggreg_report, self.aggreg_stderr])
def process(self): cmd = self.get_exec_path("msiFilter.py") + \ " --consensus-method " + str(self.consensus_method) + \ " --method-name " + str(self.method_name) + \ " --min-voting-loci " + str(self.min_voting_loci) + \ " --min-distrib-support " + str(self.min_distrib_support) + \ " --undetermined-weight " + str(self.undetermined_weight) + \ (" --locus-weight-is-score" if self.locus_weight_is_score else "") + \ (" --instability-ratio " + str(self.instability_ratio) if self.consensus_method == "ratio" else "") + \ (" --instability-count " + str(self.instability_count) if self.consensus_method == "count" else "") + \ " --input-reports $1" + \ " --output-reports $2" + \ " 2> $3" filter_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(filter_fct, inputs=[self.in_reports], outputs=[self.out_report, self.stderr])
def add_shell_execution(self, source, inputs=[], outputs=[], arguments=[], includes=[], cmd_format=None, map=False, shell=None, collect=False, local=False): shell_function = ShellFunction(source, shell=shell, cmd_format=cmd_format, modules=self.modules) # if abstraction is map or multimap if map: # if input and output are list or filelist if issubclass(inputs.__class__, list) and issubclass( outputs.__class__, list): MultiMap(shell_function, inputs=inputs, outputs=outputs, includes=includes, collect=collect, local=local, arguments=arguments) else: logging.getLogger("jflow").exception( "add_shell_execution: '" + source + "' map requires a list as inputs and output") raise Exception("add_shell_execution: '" + source + "' map requires a list as inputs and output") else: shell_function(inputs=inputs, outputs=outputs, arguments=arguments, includes=includes) self.__write_trace(source, inputs, outputs, arguments, cmd_format, map, "Shell")
def process(self): cmd = self.get_exec_path("miamsClassify.py") + \ ("" if self.random_seed == None else " --random-seed " + str(self.random_seed)) + \ " --classifier " + self.classifier + \ (" --classifier-params '" + self.classifier_params + "'" if self.classifier_params != None else "") + \ " --consensus-method " + self.consensus_method + \ " --method-name " + self.method_name + \ " --min-voting-loci " + str(self.min_voting_loci) + \ " --min-support-fragments " + str(self.min_support_fragments) + \ " --undetermined-weight " + str(self.undetermined_weight) + \ (" --locus-weight-is-score" if self.locus_weight_is_score else "") + \ (" --instability-ratio " + str(self.instability_ratio) if self.consensus_method == "ratio" else "") + \ (" --instability-count " + str(self.instability_count) if self.consensus_method == "count" else "") + \ " --input-references " + self.references_samples + \ " --input-evaluated $1 " + \ " --output-report $2 " + \ " 2> $3" classifier_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(classifier_fct, inputs=[self.evaluated_samples], outputs=[self.out_report, self.stderr], includes=[self.references_samples])
def process(self): """ Run the component, can be implemented by subclasses for a more complex process """ # get all parameters parameters = [] inputs = [] outputs = [] for param_name in self.params_order: param = self.__getattribute__(param_name) if isinstance(param, AbstractParameter): if isinstance(param, AbstractInputFile): inputs.append(param) elif isinstance(param, AbstractOutputFile): outputs.append(param) else: parameters.append(param) # sort parameters using argpos parameters = sorted(parameters, key=attrgetter('argpos')) inputs = sorted(inputs, key=attrgetter('argpos')) outputs = sorted(outputs, key=attrgetter('argpos')) filteredparams = [] commandline = self.get_exec_path(self.get_command()) for p in parameters: if isinstance(p, BoolParameter): if p: commandline += " %s " % p.cmd_format else: if p.default: commandline += " %s %s " % (p.cmd_format, p.default) abstraction = self.get_abstraction() if abstraction == None: cpt = 1 for file in inputs + outputs: if isinstance(file, InputFile) or isinstance(file, OutputFile): commandline += ' %s $%s ' % (file.cmd_format, cpt) cpt += 1 # input file list or output file list / pattern / ends with else: for e in file: commandline += ' %s $%s ' % (file.cmd_format, cpt) cpt += 1 function = ShellFunction(commandline, cmd_format='{EXE} {IN} {OUT}', modules=self.modules) function(inputs=inputs, outputs=outputs) # weaver map abstraction elif abstraction == 'map': if not (len(inputs) == len(outputs) == 1): display_error_message( "You can only have one type of input and one type of output for the map abstraction" ) for file in inputs: commandline += ' %s $1 ' % file.cmd_format if isinstance(file, ParameterList): inputs = file for file in outputs: commandline += ' %s $2 ' % file.cmd_format if isinstance(file, ParameterList): outputs = file function = ShellFunction(commandline, cmd_format='{EXE} {IN} {OUT}', modules=self.modules) exe = Map(function, inputs=inputs, outputs=outputs) # jflow multimap elif abstraction == 'multimap': cpt = 1 for file in inputs + outputs: if not (isinstance(file, ParameterList)): display_error_message( "Multimap abstraction can be used only with ParameterList" ) commandline += ' %s $%s ' % (file.cmd_format, cpt) cpt += 1 function = ShellFunction(commandline, cmd_format='{EXE} {IN} {OUT}', modules=self.modules) exe = MultiMap(function, inputs=inputs, outputs=outputs) # anything other than that will be considered errored else: raise Exception('Unsupported abstraction %s ' % abstraction)