Exemple #1
0
 def _run(self, _config, _temp):
     for filename in self.input_files:
         check_fasta_file(filename)
     output_file, = self.output_files
     make_dirs(os.path.dirname(output_file))
     with open(output_file, "w"):
         pass
Exemple #2
0
 def _create_temp_dir(self, config):
     """Called by 'run' in order to create a temporary folder.
     To allow restarting from checkpoints, we use a fixed folder
     determined by the output_template."""
     temp = os.path.join(self._dirname, self._template % ("temp",))
     fileutils.make_dirs(temp)
     return temp
Exemple #3
0
 def _create_temp_dir(self, _config):
     """Called by 'run' in order to create a temporary folder.
     To allow restarting from checkpoints, we use a fixed folder
     determined by the output_template."""
     temp = os.path.join(self._dirname, self._template % ("temp", ))
     fileutils.make_dirs(temp)
     return temp
Exemple #4
0
 def _run(self, _config, _temp):
     for filename in self.input_files:
         check_fasta_file(filename)
     output_file, = self.output_files
     make_dirs(os.path.dirname(output_file))
     with open(output_file, "w"):
         pass
Exemple #5
0
def test_move_file__move_to_existing_folder(temp_folder):
    assert make_dirs(os.path.join(temp_folder, "src"))
    assert make_dirs(os.path.join(temp_folder, "dst"))
    file_1 = os.path.join(temp_folder, "src", "file_1")
    file_2 = os.path.join(temp_folder, "dst", "file_2")
    set_file_contents(file_1, "2")
    move_file(file_1, file_2)
    assert_equal(os.listdir(os.path.dirname(file_1)), [])
    assert_equal(os.listdir(os.path.dirname(file_2)), ["file_2"])
    assert_equal(get_file_contents(file_2), "2")
Exemple #6
0
def test_move_file__move_to_existing_folder(temp_folder):
    assert make_dirs(os.path.join(temp_folder, "src"))
    assert make_dirs(os.path.join(temp_folder, "dst"))
    file_1 = os.path.join(temp_folder, "src", "file_1")
    file_2 = os.path.join(temp_folder, "dst", "file_2")
    set_file_contents(file_1, "2")
    move_file(file_1, file_2)
    assert_equal(os.listdir(os.path.dirname(file_1)), [])
    assert_equal(os.listdir(os.path.dirname(file_2)), ["file_2"])
    assert_equal(get_file_contents(file_2), "2")
Exemple #7
0
def test_copy_file__copy_to_new_folder(temp_folder):
    assert make_dirs(os.path.join(temp_folder, "src"))
    file_1 = os.path.join(temp_folder, "src", "file_1")
    file_2 = os.path.join(temp_folder, "dst", "file_2")
    set_file_contents(file_1, "2")
    copy_file(file_1, file_2)
    assert_equal(os.listdir(os.path.dirname(file_1)), ["file_1"])
    assert_equal(os.listdir(os.path.dirname(file_2)), ["file_2"])
    assert_equal(get_file_contents(file_1), "2")
    assert_equal(get_file_contents(file_2), "2")
Exemple #8
0
    def _write_config_file(self, config, defaults):
        """Writes a basic config files, using the values previously found in the
        config files, and specified on the command-line."""
        defaults_cfg = ConfigParser.SafeConfigParser()
        defaults_cfg.add_section("Defaults")
        for key in defaults:
            value = getattr(config, key)
            if isinstance(value, (types.ListType, types.TupleType)):
                value = ";".join(value)

            defaults_cfg.set("Defaults", key, str(value))

        filename = self._filenames[-1]
        make_dirs(os.path.dirname(filename))
        with open(filename, "w") as handle:
            defaults_cfg.write(handle)

        print_info("Wrote config file %r" % (filename,))
        sys.exit(0)
Exemple #9
0
def test_copy_file__copy_to_new_folder(temp_folder):
    assert make_dirs(os.path.join(temp_folder, "src"))
    file_1 = os.path.join(temp_folder, "src", "file_1")
    file_2 = os.path.join(temp_folder, "dst", "file_2")
    set_file_contents(file_1, "2")
    copy_file(file_1, file_2)
    assert_equal(os.listdir(os.path.dirname(file_1)), ["file_1"])
    assert_equal(os.listdir(os.path.dirname(file_2)), ["file_2"])
    assert_equal(get_file_contents(file_1), "2")
    assert_equal(get_file_contents(file_2), "2")
Exemple #10
0
    def _write_config_file(self, config, defaults):
        """Writes a basic config files, using the values previously found in the
        config files, and specified on the command-line."""
        defaults_cfg = ConfigParser.SafeConfigParser()
        defaults_cfg.add_section("Defaults")
        for key in defaults:
            value = getattr(config, key)
            if isinstance(value, (types.ListType, types.TupleType)):
                value = ";".join(value)

            defaults_cfg.set("Defaults", key, str(value))

        filename = self._filenames[-1]
        make_dirs(os.path.dirname(filename))
        with open(filename, "w") as handle:
            defaults_cfg.write(handle)

        print_info("Wrote config file %r" % (filename, ))
        sys.exit(0)
Exemple #11
0
def with_temp_folder(func):
    """Decorator for unit-tests:
    Creates a unique temporary folder before running 'func'. The
    function is is assumed to take at least one parameter, the first
    of which is assumed to represent the temporary folder."""
    temp_root = os.path.join(tempfile.gettempdir(), os.getlogin())
    make_dirs(temp_root)  # Ensure that this subdirectory exists

    @nose.tools.istest
    def _wrapper(*args, **kwargs):
        try:
            temp_folder = None
            temp_folder = tempfile.mkdtemp(dir=temp_root, prefix="pypeline_unit")
            func(temp_folder, *args, **kwargs)
        finally:
            if temp_folder:
                shutil.rmtree(temp_folder)

    _wrapper.__name__ = func.__name__ + "__wrapped_by_with_temp_folder"
    return _wrapper
Exemple #12
0
def with_temp_folder(func):
    """Decorator for unit-tests:
    Creates a unique temporary folder before running 'func'. The
    function is is assumed to take at least one parameter, the first
    of which is assumed to represent the temporary folder."""
    temp_root = os.path.join(tempfile.gettempdir(), os.getlogin())
    make_dirs(temp_root) # Ensure that this subdirectory exists

    @nose.tools.istest
    def _wrapper(*args, **kwargs):
        try:
            temp_folder = None
            temp_folder = tempfile.mkdtemp(dir    = temp_root,
                                           prefix = "pypeline_unit")
            func(temp_folder, *args, **kwargs)
        finally:
            if temp_folder:
                shutil.rmtree(temp_folder)
    _wrapper.__name__ = func.__name__ + "__wrapped_by_with_temp_folder"
    return _wrapper
Exemple #13
0
def convert_reads(config, destination, record, sink_cache):
    # Source name is used, to re-merge split lanes
    name = record.tags.get("PU_src")
    destination = os.path.join(destination, name)
    make_dirs(os.path.join(config.destination, destination))

    def _open_se_sink(reads_type):
        key = (name, reads_type)
        if not get_in(sink_cache, key):
            filename = ReadSink.get_filename(destination, reads_type.lower())
            set_in(sink_cache, key, ReadSink.open(config.destination,
                                                  filename))
        return key

    for (reads_type, bam_files) in record.bams.iteritems():
        # Processed reads are pre-aligned BAMs which have been cleaned up
        if reads_type in ("Paired", "Processed"):
            # Record "Single" reads; these may result from orphan SE reads
            _open_se_sink("Single")

            key = (name, "Paired")
            if not get_in(sink_cache, key):
                set_in(sink_cache, key,
                       PEReadSink.open(config.destination, destination))
        else:
            key = _open_se_sink(reads_type)

        sink = get_in(sink_cache, key)
        for filename in bam_files:
            print("%sProcessing file %r" % (_INDENTATION * 4, filename))
            with pysam.Samfile(filename) as handle:

                def _keep_record(record):
                    return (record.qual >= config.min_quality) and \
                        (len(record.seq) >= config.min_length)

                sink.write_records(record for record in handle
                                   if _keep_record(record))
Exemple #14
0
def test_move_dirs__permission_denied(temp_folder):
    dst_folder = os.path.join(temp_folder, "dst")
    file_1 = os.path.join(temp_folder, "file")
    file_2 = os.path.join(dst_folder, "file")
    set_file_contents(file_1, "1")

    # Make destination folder read-only
    assert make_dirs(os.path.join(temp_folder, "dst"))
    mode = os.stat(dst_folder).st_mode
    ro_mode = mode & ~(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH)
    os.chmod(dst_folder, ro_mode)

    # Non ENOENT errors should be re-raised:
    assert_raises(IOError, move_file, file_1, file_2)
Exemple #15
0
def test_make_dirs__creation_preemted(temp_folder):
    unwrapped, preempted = os.makedirs, []
    def _wrap_os_makedirs(*args, **kwargs):
        # Simulate somebody else creating the directory first
        preempted.append(True)
        unwrapped(*args, **kwargs)
        unwrapped(*args, **kwargs)

    with Monkeypatch("os.makedirs", _wrap_os_makedirs):
        work_folder = os.path.join(temp_folder, "test")
        assert not make_dirs(work_folder)
        assert os.path.exists(work_folder)
        assert_equal(os.listdir(temp_folder), ["test"])
        assert_equal(preempted, [True])
Exemple #16
0
    def run(self, _):
        handles = []
        try:
            sequences = []
            for fpath in self.input_files:
                handle = pysam.Samfile(fpath)
                handles.append(handle)

                sequence = izip_longest(handle, (), fillvalue=fpath)
                sequences.append(sequence)

            position = 0
            records = chain_sorted(*sequences, key=self._key_by_tid_pos)
            observed_reads = collections.defaultdict(list)
            for (record, fpath) in records:
                if record.pos != position:
                    self._process_reads(observed_reads, self.output_files)
                    observed_reads.clear()
                    position = record.pos
                elif record.is_unmapped:
                    break

                # Ignore supplementary / secondary alignments
                if not record.flag & 0x900:
                    key = (record.is_reverse, record.qname,
                           record.seq, record.qual)
                    observed_reads[key].append(fpath)
            self._process_reads(observed_reads, self.output_files)

            # Everything is ok, touch the output files
            for fpath in self.output_files:
                make_dirs(os.path.dirname(fpath))
                with open(fpath, "w"):
                    pass
        finally:
            for handle in handles:
                handle.close()
Exemple #17
0
    def run(self, _):
        handles = []
        try:
            sequences = []
            for fpath in self.input_files:
                handle = pysam.Samfile(fpath)
                handles.append(handle)

                sequence = izip_longest(handle, (), fillvalue=fpath)
                sequences.append(sequence)

            position = 0
            records = chain_sorted(*sequences, key=self._key_by_tid_pos)
            observed_reads = collections.defaultdict(list)
            for (record, fpath) in records:
                if record.pos != position:
                    self._process_reads(observed_reads, self.output_files)
                    observed_reads.clear()
                    position = record.pos
                elif record.is_unmapped:
                    break

                # Ignore supplementary / secondary alignments
                if not record.flag & 0x900:
                    key = (record.is_reverse, record.qname, record.seq,
                           record.qual)
                    observed_reads[key].append(fpath)
            self._process_reads(observed_reads, self.output_files)

            # Everything is ok, touch the output files
            for fpath in self.output_files:
                make_dirs(os.path.dirname(fpath))
                with open(fpath, "w"):
                    pass
        finally:
            for handle in handles:
                handle.close()
Exemple #18
0
def convert_reads(config, destination, record, sink_cache):
    # Source name is used, to re-merge split lanes
    name = record.tags.get("PU_src")
    destination = os.path.join(destination, name)
    make_dirs(os.path.join(config.destination, destination))

    def _open_se_sink(reads_type):
        key = (name, reads_type)
        if not get_in(sink_cache, key):
            filename = ReadSink.get_filename(destination, reads_type.lower())
            set_in(sink_cache, key, ReadSink.open(config.destination, filename))
        return key

    for (reads_type, bam_files) in record.bams.iteritems():
        # Processed reads are pre-aligned BAMs which have been cleaned up
        if reads_type in ("Paired", "Processed"):
            # Record "Single" reads; these may result from orphan SE reads
            _open_se_sink("Single")

            key = (name, "Paired")
            if not get_in(sink_cache, key):
                set_in(sink_cache, key, PEReadSink.open(config.destination,
                                                        destination))
        else:
            key = _open_se_sink(reads_type)

        sink = get_in(sink_cache, key)
        for filename in bam_files:
            print("%sProcessing file %r" % (_INDENTATION * 4, filename))
            with pysam.Samfile(filename) as handle:
                def _keep_record(record):
                    return (record.qual >= config.min_quality) and \
                        (len(record.seq) >= config.min_length)

                sink.write_records(record for record in handle
                                   if _keep_record(record))
Exemple #19
0
def test_make_dirs__creation_preemted(temp_folder):
    unwrapped, preempted = os.makedirs, []

    def _wrap_os_makedirs(*args, **kwargs):
        # Simulate somebody else creating the directory first
        preempted.append(True)
        unwrapped(*args, **kwargs)
        unwrapped(*args, **kwargs)

    with Monkeypatch("os.makedirs", _wrap_os_makedirs):
        work_folder = os.path.join(temp_folder, "test")
        assert not make_dirs(work_folder)
        assert os.path.exists(work_folder)
        assert_equal(os.listdir(temp_folder), ["test"])
        assert_equal(preempted, [True])
Exemple #20
0
def main(argv):
    config, args = parse_options(argv)
    if config is None:
        return 1

    # Get default options for bam_pipeline
    bam_config, _ = bam_cfg.parse_config(args)
    makefiles = bam_pipeline.read_makefiles(bam_config, args)
    # Build .fai files for reference .fasta files
    bam_pipeline.index_references(bam_config, makefiles)

    for makefile in makefiles:
        mkfile_fname = makefile["Statistics"]["Filename"]
        bam_config.destination = os.path.dirname(mkfile_fname)
        tasks = bam_pipeline.build_pipeline_full(bam_config, makefile,
                                                 return_nodes=False)

        make_dirs(config.destination)
        makefile_name = add_postfix(makefile["Statistics"]["Filename"],
                                    config.postfix)
        makefile_path = reroot_path(config.destination, makefile_name)
        if samefile(makefile["Statistics"]["Filename"], makefile_path):
            sys.stderr.write("ERROR: Would overwrite source makefile at %r\n" % (makefile_path,))
            sys.stderr.write("       Please set --destination and/or --output-name-postfix\n")
            sys.stderr.write("       before continuing.\n")
            return 1

        print("Writing makefile", makefile_path)

        found_prefix = False
        for prefix in makefile["Prefixes"]:
            if prefix != config.prefix:
                print("%sSkipping %s" % (_INDENTATION, prefix))
            else:
                found_prefix = True

        if not found_prefix:
            sys.stderr.write("\nERROR:\n")
            sys.stderr.write("Could not find prefix %r in %r! Aborting ...\n"
                             % (config.prefix, mkfile_fname))
            return 1

        with open(makefile_path, "w") as makefile_handle:
            bam_mkfile.print_header(dst=makefile_handle)
            makefile_handle.write("\n" * 3)

            for target in tasks:
                target_name = add_postfix(target.name, config.postfix)
                print("%sTarget: %s -> %s" % (_INDENTATION,
                                              target.name,
                                              target_name))

                makefile_handle.write('%s"%s":\n' % (_INDENTATION * 0,
                                                     target_name))
                for prefix in target.prefixes:
                    if prefix.name != config.prefix:
                        continue

                    for sample in prefix.samples:
                        print("%sSample: %s" % (_INDENTATION * 2, sample.name))

                        makefile_handle.write('%s"%s":\n' % (_INDENTATION * 1,
                                                             sample.name))

                        for library in sample.libraries:
                            print("%sLibrary: %s" % (_INDENTATION * 3,
                                                     library.name))
                            makefile_handle.write('%s"%s":\n'
                                                  % (_INDENTATION * 2,
                                                     library.name))

                            sink_cache = {}
                            destination = os.path.join(target_name,
                                                       "reads",
                                                       sample.name,
                                                       library.name)

                            for lane in library.lanes:
                                convert_reads(config, destination, lane, sink_cache)
                            ReadSink.close_all_sinks()

                            for lane_name in sorted(sink_cache):
                                makefile_handle.write('%s"%s":\n' % (_INDENTATION * 3, lane_name))
                                for (reads_type, sink) in sorted(sink_cache[lane_name].items()):
                                    makefile_handle.write('%s%s "%s"\n'
                                                          % (_INDENTATION * 4,
                                                             ("%s:" % (reads_type,)).ljust(20),
                                                             sink.filename))
                                makefile_handle.write("\n")
        print("\tDone ...")
        print()

    return 0
Exemple #21
0
def test_make_dirs__empty_directory():
    make_dirs("")
Exemple #22
0
def test_make_dirs__permissions(temp_folder):
    work_dir = os.path.join(temp_folder, "test_1")
    assert make_dirs(work_dir, mode=0511)
    stats = os.stat(work_dir)
    assert_equal(oct(stats.st_mode & 0777), oct(0511))
Exemple #23
0
def test_make_dirs__sub_directories(temp_folder):
    assert not os.listdir(temp_folder)
    assert make_dirs(os.path.join(temp_folder, "test", "123"))
    assert_equal(os.listdir(temp_folder), ["test"])
    assert_equal(os.listdir(os.path.join(temp_folder, "test")), ["123"])
Exemple #24
0
def test_make_dirs__subdirs_return_values(temp_folder):
    assert make_dirs(os.path.join(temp_folder, "test"))
    assert make_dirs(os.path.join(temp_folder, "test", "234"))
    assert not make_dirs(os.path.join(temp_folder, "test", "234"))
Exemple #25
0
def test_make_dirs__empty_directory():
    make_dirs("")
Exemple #26
0
def test_make_dirs__permissions(temp_folder):
    work_dir = os.path.join(temp_folder, "test_1")
    assert make_dirs(work_dir, mode = 0511)
    stats   = os.stat(work_dir)
    assert_equal(oct(stats.st_mode & 0777), oct(0511))
Exemple #27
0
def test_make_dirs__create_dir(temp_folder):
    assert not os.listdir(temp_folder)
    assert make_dirs(os.path.join(temp_folder, "test123"))
    assert_equal(os.listdir(temp_folder), ["test123"])
Exemple #28
0
 def _run(self, _config, _temp):
     check_fastq_files(self.input_files, self._offset, True)
     output_file = tuple(self.output_files)[0]
     make_dirs(os.path.dirname(output_file))
     with open(output_file, "w"):
         pass
Exemple #29
0
def test_make_dirs__subdirs_return_values(temp_folder):
    assert make_dirs(os.path.join(temp_folder, "test"))
    assert make_dirs(os.path.join(temp_folder, "test", "234"))
    assert not make_dirs(os.path.join(temp_folder, "test", "234"))
Exemple #30
0
def test_make_dirs__sub_directories(temp_folder):
    assert not os.listdir(temp_folder)
    assert make_dirs(os.path.join(temp_folder, "test", "123"))
    assert_equal(os.listdir(temp_folder), ["test"])
    assert_equal(os.listdir(os.path.join(temp_folder, "test")), ["123"])
Exemple #31
0
 def _run(self, _config, _temp):
     check_fastq_files(self.input_files, self._offset, True)
     output_file = tuple(self.output_files)[0]
     make_dirs(os.path.dirname(output_file))
     with open(output_file, "w"):
         pass
Exemple #32
0
def main(argv):
    config, args = parse_options(argv)
    if config is None:
        return 1

    # Get default options for bam_pipeline
    bam_config, _ = bam_cfg.parse_config(args)
    makefiles = bam_pipeline.read_makefiles(bam_config, args)
    # Build .fai files for reference .fasta files
    bam_pipeline.index_references(bam_config, makefiles)

    for makefile in makefiles:
        mkfile_fname = makefile["Statistics"]["Filename"]
        bam_config.destination = os.path.dirname(mkfile_fname)
        tasks = bam_pipeline.build_pipeline_full(bam_config,
                                                 makefile,
                                                 return_nodes=False)

        make_dirs(config.destination)
        makefile_name = add_postfix(makefile["Statistics"]["Filename"],
                                    config.postfix)
        makefile_path = reroot_path(config.destination, makefile_name)
        if samefile(makefile["Statistics"]["Filename"], makefile_path):
            sys.stderr.write("ERROR: Would overwrite source makefile at %r\n" %
                             (makefile_path, ))
            sys.stderr.write(
                "       Please set --destination and/or --output-name-postfix\n"
            )
            sys.stderr.write("       before continuing.\n")
            return 1

        print("Writing makefile", makefile_path)

        found_prefix = False
        for prefix in makefile["Prefixes"]:
            if prefix != config.prefix:
                print("%sSkipping %s" % (_INDENTATION, prefix))
            else:
                found_prefix = True

        if not found_prefix:
            sys.stderr.write("\nERROR:\n")
            sys.stderr.write("Could not find prefix %r in %r! Aborting ...\n" %
                             (config.prefix, mkfile_fname))
            return 1

        with open(makefile_path, "w") as makefile_handle:
            bam_mkfile.print_header(dst=makefile_handle)
            makefile_handle.write("\n" * 3)

            for target in tasks:
                target_name = add_postfix(target.name, config.postfix)
                print("%sTarget: %s -> %s" %
                      (_INDENTATION, target.name, target_name))

                makefile_handle.write('%s"%s":\n' %
                                      (_INDENTATION * 0, target_name))
                for prefix in target.prefixes:
                    if prefix.name != config.prefix:
                        continue

                    for sample in prefix.samples:
                        print("%sSample: %s" % (_INDENTATION * 2, sample.name))

                        makefile_handle.write('%s"%s":\n' %
                                              (_INDENTATION * 1, sample.name))

                        for library in sample.libraries:
                            print("%sLibrary: %s" %
                                  (_INDENTATION * 3, library.name))
                            makefile_handle.write(
                                '%s"%s":\n' % (_INDENTATION * 2, library.name))

                            sink_cache = {}
                            destination = os.path.join(target_name, "reads",
                                                       sample.name,
                                                       library.name)

                            for lane in library.lanes:
                                convert_reads(config, destination, lane,
                                              sink_cache)
                            ReadSink.close_all_sinks()

                            for lane_name in sorted(sink_cache):
                                makefile_handle.write(
                                    '%s"%s":\n' %
                                    (_INDENTATION * 3, lane_name))
                                for (reads_type, sink) in sorted(
                                        sink_cache[lane_name].items()):
                                    makefile_handle.write(
                                        '%s%s "%s"\n' %
                                        (_INDENTATION * 4,
                                         ("%s:" % (reads_type, )).ljust(20),
                                         sink.filename))
                                makefile_handle.write("\n")
        print("\tDone ...")
        print()

    return 0
Exemple #33
0
def test_make_dirs__create_dir(temp_folder):
    assert not os.listdir(temp_folder)
    assert make_dirs(os.path.join(temp_folder, "test123"))
    assert_equal(os.listdir(temp_folder), ["test123"])