Beispiel #1
0
    def __init__(
            self,
            input_file_1,
            input_file_2,
            output_prefix,
            collapse=True,
            threads=1,
            options={},
            dependencies=(),
    ):
        cmd = _get_common_parameters(threads=threads, options=options)

        # Prefix for output files, to ensure that all end up in temp folder
        cmd.set_option("--basename", "%(TEMP_OUT_BASENAME)s")

        output_tmpl = output_prefix + ".%s.gz"
        cmd.set_kwargs(
            TEMP_OUT_BASENAME=os.path.basename(output_prefix),
            OUT_SETTINGS=output_prefix + ".settings",
            OUT_READS_1=output_tmpl % ("pair1.truncated", ),
            OUT_READS_2=output_tmpl % ("pair2.truncated", ),
            OUT_SINGLETON=output_tmpl % ("singleton.truncated", ),
            OUT_DISCARDED=output_tmpl % ("discarded", ),
        )

        if collapse:
            cmd.set_option("--collapse")

            cmd.set_kwargs(
                OUT_COLLAPSED=output_tmpl % ("collapsed", ),
                OUT_COLLAPSED_TRUNC=output_tmpl % ("collapsed.truncated", ),
            )

        cmd.set_option("--file1", "%(IN_READS_1)s")
        cmd.set_option("--file2", "%(IN_READS_2)s")
        cmd.set_kwargs(IN_READS_1=input_file_1, IN_READS_2=input_file_2)

        apply_options(cmd, options)

        CommandNode.__init__(
            self,
            command=cmd.finalize(),
            threads=threads,
            description="<AdapterRM (PE): %s -> '%s.*'>" % (
                fileutils.describe_paired_files(input_file_1, input_file_2),
                output_prefix,
            ),
            dependencies=dependencies,
        )
Beispiel #2
0
    def __init__(self, parameters):
        command = parameters.command.finalize()
        self._multi_file_input = len(parameters.input_files_1) > 1
        if self._multi_file_input:
            cat_1 = _build_cat_command(parameters.input_files_1, "uncompressed_input_1")
            cat_2 = _build_cat_command(parameters.input_files_2, "uncompressed_input_2")
            command = ParallelCmds((command, cat_1, cat_2))

        CommandNode.__init__(self,
                             command=command,
                             threads=parameters.threads,
                             description="<AdapterRM (PE): %s -> '%s.*'>"
                             % (fileutils.describe_paired_files(parameters.input_files_1,
                                                                parameters.input_files_2),
                                parameters.output_prefix),
                             dependencies=parameters.dependencies)
Beispiel #3
0
def _get_node_description(name, input_files_1, input_files_2=(),
                          algorithm=None, prefix=None, threads=1):
    info = []
    if prefix is not None:
        prefix = os.path.basename(prefix)
        if prefix.endswith(".fasta") or prefix.endswith(".fa"):
            prefix = prefix.rsplit(".", 1)[0]

        info.append(prefix)

    if algorithm is not None:
        info.append(algorithm)

    if threads > 1:
        info.append("%i threads" % (threads,))

    file_desc = describe_paired_files(input_files_1, input_files_2 or ())

    return "<%s (%s): %s>" % (name, ", ".join(info), file_desc)
Beispiel #4
0
def _get_node_description(name, input_files_1, input_files_2=None,
                          algorithm=None, prefix=None, threads=1):
    info = []
    if prefix is not None:
        prefix = os.path.basename(prefix)
        if prefix.endswith(".fasta") or prefix.endswith(".fa"):
            prefix = prefix.rsplit(".", 1)[0]

        info.append(prefix)

    if algorithm is not None:
        info.append(algorithm)

    if threads > 1:
        info.append("%i threads" % (threads,))

    file_desc = describe_paired_files(input_files_1, input_files_2 or ())

    return "<%s (%s): %s>" % (name, ", ".join(info), file_desc)
def test_describe_paired_files__different_path_and_files():
    files_1 = ("foo/1_abc", "bar/2_def")
    files_2 = ("zed/3_ghi", "not/4_jkl")
    expected = "2 pair(s) of files"
    result = describe_paired_files(files_1, files_2)
    assert_equal(result, expected)
def test_describe_paired_files__same_path__similar_files__too_different():
    files_1 = ("foo/1a_abc", "foo/1a_def")
    files_2 = ("foo/2b_ghi", "foo/2b_jkl")
    expected = "2 pair(s) of files in 'foo'"
    result = describe_paired_files(files_1, files_2)
    assert_equal(result, expected)
def test_describe_paired_files__same_path__different_file_lens():
    files_1 = ("foo/1_a", "foo/2_de")
    files_2 = ("foo/3_g", "foo/4_jk")
    expected = "2 pair(s) of files in 'foo'"
    result = describe_paired_files(files_1, files_2)
    assert_equal(result, expected)
def test_describe_paired_files__identical_files():
    fpath = "/var/foo/bar"
    ftuple = (fpath,)
    assert_equal(describe_paired_files(ftuple, ftuple), repr(fpath))
def test_describe_paired_files__same_path__similar_files__different_prefixes():
    files_1 = ("foo/1_abc", "foo/1_def")
    files_2 = ("foo/2_ghi", "foo/2_jkl")
    expected = "'foo/[12]_???'"
    result = describe_paired_files(files_1, files_2)
    assert_equal(result, expected)
Beispiel #10
0
def test_describe_paired_files__different_path_and_files():
    files_1 = ("foo/1_abc", "bar/2_def")
    files_2 = ("zed/3_ghi", "not/4_jkl")
    expected = "2 pair(s) of files"
    result = describe_paired_files(files_1, files_2)
    assert_equal(result, expected)
def test_describe_paired_files__single_file():
    fpath = "/var/foo/bar"
    assert_equal(describe_paired_files((fpath,), ()), repr(fpath))
Beispiel #12
0
def test_describe_paired_files__same_path__different_file_lens():
    files_1 = ("foo/1_a", "foo/2_de")
    files_2 = ("foo/3_g", "foo/4_jk")
    expected = "2 pair(s) of files in 'foo'"
    result = describe_paired_files(files_1, files_2)
    assert_equal(result, expected)
Beispiel #13
0
def test_describe_paired_files__same_path__similar_files__too_different():
    files_1 = ("foo/1a_abc", "foo/1a_def")
    files_2 = ("foo/2b_ghi", "foo/2b_jkl")
    expected = "2 pair(s) of files in 'foo'"
    result = describe_paired_files(files_1, files_2)
    assert_equal(result, expected)
Beispiel #14
0
def test_describe_paired_files__same_path__similar_files__different_prefixes():
    files_1 = ("foo/1_abc", "foo/1_def")
    files_2 = ("foo/2_ghi", "foo/2_jkl")
    expected = "'foo/[12]_???'"
    result = describe_paired_files(files_1, files_2)
    assert_equal(result, expected)
Beispiel #15
0
def test_describe_paired_files__identical_files():
    fpath = "/var/foo/bar"
    ftuple = (fpath, )
    assert_equal(describe_paired_files(ftuple, ftuple), repr(fpath))
Beispiel #16
0
def test_describe_paired_files__single_file():
    fpath = "/var/foo/bar"
    assert_equal(describe_paired_files((fpath, ), ()), repr(fpath))