def _get_common_parameters(version): global _DEPRECATION_WARNING_PRINTED if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) try: if not _DEPRECATION_WARNING_PRINTED and version_check.version < (2, 0): import pypeline.ui as ui ui.print_warn("\nWARNING: AdapterRemoval v1.5.x is deprecated;") ui.print_warn(" Upgrading to 2.1.x is strongly adviced!\n") ui.print_warn( " Download the newest version of AdapterRemoval at ") ui.print_warn( " https://github.com/MikkelSchubert/adapterremoval\n") _DEPRECATION_WARNING_PRINTED = True except versions.VersionRequirementError: pass return cmd
def __init__(self, parameters): self._version = parameters.version self._basename = parameters.basename if len(parameters.input_files_1) != len(parameters.input_files_2): raise CmdError("Number of mate 1 files differ from mate 2 files: %i != %i" \ % (len(parameters.input_files_1), len(parameters.input_files_2))) zcat_pair_1 = _build_unicat_command(parameters.input_files_1, "uncompressed_input_1") zcat_pair_2 = _build_unicat_command(parameters.input_files_2, "uncompressed_input_2") zip_pair_1 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair1.truncated") zip_pair_2 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair2.truncated") zip_discarded = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded") adapterrm = parameters.command.finalize() commands = [adapterrm, zip_pair_1, zip_pair_2] if parameters.version == VERSION_15: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed") zip_aln_trunc = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed.truncated") zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.truncated") commands += [zip_aln, zip_aln_trunc, zip_unaligned] else: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.aln.truncated") zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.unaln.truncated") commands += [zip_aln, zip_unaligned] commands += [zip_discarded, zcat_pair_1, zcat_pair_2] # Opening of pipes block, so the order of these commands is dependent upon # the order of file-opens in atomiccmd and the the programs themselves. commands = ParallelCmds(commands) description = "<PE_AdapterRM: %s -> '%s.*'>" \ % (fileutils.describe_files(parameters.input_files_1).replace("file", "pair"), parameters.output_prefix) CommandNode.__init__(self, command=commands, description=description, dependencies=parameters.dependencies)
def __init__(self, commands): self._ready = False commands = safe_coerce_to_tuple(commands) for command in commands: if not isinstance(command, (AtomicCmd, _CommandSet)): raise CmdError( "ParallelCmds must only contain AtomicCmds or other ParallelCmds!" ) _CommandSet.__init__(self, commands)
def _are_fastq_checks_required(version): if version == VERSION_14: return True elif version == VERSION_15: try: return _VERSION_15_CHECK.version < (2, 0) except versions.VersionRequirementError: return True else: raise CmdError("Unknown version: %s" % version)
def _build_zip_command(output_format, prefix, name, output=None): if output_format == "bz2": command, ext = "bzip2", ".bz2" elif output_format == "gz": command, ext = "gzip", ".gz" else: raise CmdError("Invalid output-format (%s), please select 'gz' or 'bz2'" \ % repr(output_format)) basename = os.path.basename(prefix) return AtomicCmd([command, "-c"], TEMP_IN_STDIN=basename + name, OUT_STDOUT=prefix + (output or name) + ext)
def _build_zip_command(output_format, prefix, name, output=None): if output_format not in ("gz", "bz2"): message = "Invalid output-format (%r), please select 'gz' or 'bz2'" raise CmdError(message % (output_format, )) basename = os.path.basename(prefix) compress = factory.new("zip") compress.set_option("--format", output_format) compress.add_value("%(TEMP_IN_PIPE)s") compress.set_kwargs(TEMP_IN_PIPE=basename + name, OUT_STDOUT=prefix + (output or name) + "." + output_format) return compress.finalize()
def _get_common_parameters(version): if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) return cmd
def _validate_commands(self): if len(self._commands) != len(set(self._commands)): raise ValueError("Same command included multiple times in %s" \ % (self.__class__.__name__,)) filenames = collections.defaultdict(int) for command in self._commands: for filename in command.expected_temp_files: filenames[filename] += 1 for filename in command.optional_temp_files: filenames[filename] += 1 clobbered = [ filename for (filename, count) in filenames.items() if (count > 1) ] if any(clobbered): raise CmdError("Commands clobber each others' files: %s" % (", ".join(clobbered), ))
def _get_common_parameters(version): if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Allow 1/3 mismatches in the aligned region cmd.set_option("--mm", 3, fixed=False) # Minimum length of trimmed reads cmd.set_option("--minlength", 25, fixed=False) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) # Offset of quality scores cmd.set_option("--qualitybase", 33, fixed=False) return cmd
def stdout(self): raise CmdError("%s does not implement property 'stdout'!" \ % (self.__class__.__name__,))
def __init__(self, commands): self._commands = safe_coerce_to_tuple(commands) if not self._commands: raise CmdError("Empty list passed to command set") self._validate_commands()
def __init__(self, parameters): self._quality_offset = parameters.quality_offset self._version = parameters.version self._basename = parameters.basename self._collapse = parameters.collapse self._check_fastqs = _are_fastq_checks_required(parameters.version) if len(parameters.input_files_1) != len(parameters.input_files_2): raise CmdError( "Number of mate 1 files differ from mate 2 files: " "%i != %i" % (len(parameters.input_files_1), len(parameters.input_files_2))) zcat_pair_1 = _build_cat_command(parameters.input_files_1, "uncompressed_input_1") zcat_pair_2 = _build_cat_command(parameters.input_files_2, "uncompressed_input_2") zip_pair_1 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair1.truncated") zip_pair_2 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair2.truncated") zip_discarded = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded") adapterrm = parameters.command.finalize() commands = [adapterrm, zip_pair_1, zip_pair_2] if parameters.version == VERSION_15: zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.truncated") if parameters.collapse: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed") zip_aln_trunc = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed.truncated") commands += [zip_aln, zip_aln_trunc, zip_unaligned] else: commands += [zip_unaligned] else: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.aln.truncated") zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.unaln.truncated") commands += [zip_aln, zip_unaligned] commands += [zip_discarded, zcat_pair_1, zcat_pair_2] commands = ParallelCmds(commands) description = "<AdapterRM (PE): %s -> '%s.*'>" \ % (fileutils.describe_paired_files(parameters.input_files_1, parameters.input_files_2), parameters.output_prefix) CommandNode.__init__(self, command=commands, description=description, dependencies=parameters.dependencies)