def _run(self, _config, _temp): for filename in self.input_files: check_fasta_file(filename) output_file, = self.output_files make_dirs(os.path.dirname(output_file)) with open(output_file, "w"): pass
def _create_temp_dir(self, config): """Called by 'run' in order to create a temporary folder. To allow restarting from checkpoints, we use a fixed folder determined by the output_template.""" temp = os.path.join(self._dirname, self._template % ("temp",)) fileutils.make_dirs(temp) return temp
def _create_temp_dir(self, _config): """Called by 'run' in order to create a temporary folder. To allow restarting from checkpoints, we use a fixed folder determined by the output_template.""" temp = os.path.join(self._dirname, self._template % ("temp", )) fileutils.make_dirs(temp) return temp
def test_move_file__move_to_existing_folder(temp_folder): assert make_dirs(os.path.join(temp_folder, "src")) assert make_dirs(os.path.join(temp_folder, "dst")) file_1 = os.path.join(temp_folder, "src", "file_1") file_2 = os.path.join(temp_folder, "dst", "file_2") set_file_contents(file_1, "2") move_file(file_1, file_2) assert_equal(os.listdir(os.path.dirname(file_1)), []) assert_equal(os.listdir(os.path.dirname(file_2)), ["file_2"]) assert_equal(get_file_contents(file_2), "2")
def test_copy_file__copy_to_new_folder(temp_folder): assert make_dirs(os.path.join(temp_folder, "src")) file_1 = os.path.join(temp_folder, "src", "file_1") file_2 = os.path.join(temp_folder, "dst", "file_2") set_file_contents(file_1, "2") copy_file(file_1, file_2) assert_equal(os.listdir(os.path.dirname(file_1)), ["file_1"]) assert_equal(os.listdir(os.path.dirname(file_2)), ["file_2"]) assert_equal(get_file_contents(file_1), "2") assert_equal(get_file_contents(file_2), "2")
def _write_config_file(self, config, defaults): """Writes a basic config files, using the values previously found in the config files, and specified on the command-line.""" defaults_cfg = ConfigParser.SafeConfigParser() defaults_cfg.add_section("Defaults") for key in defaults: value = getattr(config, key) if isinstance(value, (types.ListType, types.TupleType)): value = ";".join(value) defaults_cfg.set("Defaults", key, str(value)) filename = self._filenames[-1] make_dirs(os.path.dirname(filename)) with open(filename, "w") as handle: defaults_cfg.write(handle) print_info("Wrote config file %r" % (filename,)) sys.exit(0)
def _write_config_file(self, config, defaults): """Writes a basic config files, using the values previously found in the config files, and specified on the command-line.""" defaults_cfg = ConfigParser.SafeConfigParser() defaults_cfg.add_section("Defaults") for key in defaults: value = getattr(config, key) if isinstance(value, (types.ListType, types.TupleType)): value = ";".join(value) defaults_cfg.set("Defaults", key, str(value)) filename = self._filenames[-1] make_dirs(os.path.dirname(filename)) with open(filename, "w") as handle: defaults_cfg.write(handle) print_info("Wrote config file %r" % (filename, )) sys.exit(0)
def with_temp_folder(func): """Decorator for unit-tests: Creates a unique temporary folder before running 'func'. The function is is assumed to take at least one parameter, the first of which is assumed to represent the temporary folder.""" temp_root = os.path.join(tempfile.gettempdir(), os.getlogin()) make_dirs(temp_root) # Ensure that this subdirectory exists @nose.tools.istest def _wrapper(*args, **kwargs): try: temp_folder = None temp_folder = tempfile.mkdtemp(dir=temp_root, prefix="pypeline_unit") func(temp_folder, *args, **kwargs) finally: if temp_folder: shutil.rmtree(temp_folder) _wrapper.__name__ = func.__name__ + "__wrapped_by_with_temp_folder" return _wrapper
def with_temp_folder(func): """Decorator for unit-tests: Creates a unique temporary folder before running 'func'. The function is is assumed to take at least one parameter, the first of which is assumed to represent the temporary folder.""" temp_root = os.path.join(tempfile.gettempdir(), os.getlogin()) make_dirs(temp_root) # Ensure that this subdirectory exists @nose.tools.istest def _wrapper(*args, **kwargs): try: temp_folder = None temp_folder = tempfile.mkdtemp(dir = temp_root, prefix = "pypeline_unit") func(temp_folder, *args, **kwargs) finally: if temp_folder: shutil.rmtree(temp_folder) _wrapper.__name__ = func.__name__ + "__wrapped_by_with_temp_folder" return _wrapper
def convert_reads(config, destination, record, sink_cache): # Source name is used, to re-merge split lanes name = record.tags.get("PU_src") destination = os.path.join(destination, name) make_dirs(os.path.join(config.destination, destination)) def _open_se_sink(reads_type): key = (name, reads_type) if not get_in(sink_cache, key): filename = ReadSink.get_filename(destination, reads_type.lower()) set_in(sink_cache, key, ReadSink.open(config.destination, filename)) return key for (reads_type, bam_files) in record.bams.iteritems(): # Processed reads are pre-aligned BAMs which have been cleaned up if reads_type in ("Paired", "Processed"): # Record "Single" reads; these may result from orphan SE reads _open_se_sink("Single") key = (name, "Paired") if not get_in(sink_cache, key): set_in(sink_cache, key, PEReadSink.open(config.destination, destination)) else: key = _open_se_sink(reads_type) sink = get_in(sink_cache, key) for filename in bam_files: print("%sProcessing file %r" % (_INDENTATION * 4, filename)) with pysam.Samfile(filename) as handle: def _keep_record(record): return (record.qual >= config.min_quality) and \ (len(record.seq) >= config.min_length) sink.write_records(record for record in handle if _keep_record(record))
def test_move_dirs__permission_denied(temp_folder): dst_folder = os.path.join(temp_folder, "dst") file_1 = os.path.join(temp_folder, "file") file_2 = os.path.join(dst_folder, "file") set_file_contents(file_1, "1") # Make destination folder read-only assert make_dirs(os.path.join(temp_folder, "dst")) mode = os.stat(dst_folder).st_mode ro_mode = mode & ~(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH) os.chmod(dst_folder, ro_mode) # Non ENOENT errors should be re-raised: assert_raises(IOError, move_file, file_1, file_2)
def test_make_dirs__creation_preemted(temp_folder): unwrapped, preempted = os.makedirs, [] def _wrap_os_makedirs(*args, **kwargs): # Simulate somebody else creating the directory first preempted.append(True) unwrapped(*args, **kwargs) unwrapped(*args, **kwargs) with Monkeypatch("os.makedirs", _wrap_os_makedirs): work_folder = os.path.join(temp_folder, "test") assert not make_dirs(work_folder) assert os.path.exists(work_folder) assert_equal(os.listdir(temp_folder), ["test"]) assert_equal(preempted, [True])
def run(self, _): handles = [] try: sequences = [] for fpath in self.input_files: handle = pysam.Samfile(fpath) handles.append(handle) sequence = izip_longest(handle, (), fillvalue=fpath) sequences.append(sequence) position = 0 records = chain_sorted(*sequences, key=self._key_by_tid_pos) observed_reads = collections.defaultdict(list) for (record, fpath) in records: if record.pos != position: self._process_reads(observed_reads, self.output_files) observed_reads.clear() position = record.pos elif record.is_unmapped: break # Ignore supplementary / secondary alignments if not record.flag & 0x900: key = (record.is_reverse, record.qname, record.seq, record.qual) observed_reads[key].append(fpath) self._process_reads(observed_reads, self.output_files) # Everything is ok, touch the output files for fpath in self.output_files: make_dirs(os.path.dirname(fpath)) with open(fpath, "w"): pass finally: for handle in handles: handle.close()
def main(argv): config, args = parse_options(argv) if config is None: return 1 # Get default options for bam_pipeline bam_config, _ = bam_cfg.parse_config(args) makefiles = bam_pipeline.read_makefiles(bam_config, args) # Build .fai files for reference .fasta files bam_pipeline.index_references(bam_config, makefiles) for makefile in makefiles: mkfile_fname = makefile["Statistics"]["Filename"] bam_config.destination = os.path.dirname(mkfile_fname) tasks = bam_pipeline.build_pipeline_full(bam_config, makefile, return_nodes=False) make_dirs(config.destination) makefile_name = add_postfix(makefile["Statistics"]["Filename"], config.postfix) makefile_path = reroot_path(config.destination, makefile_name) if samefile(makefile["Statistics"]["Filename"], makefile_path): sys.stderr.write("ERROR: Would overwrite source makefile at %r\n" % (makefile_path,)) sys.stderr.write(" Please set --destination and/or --output-name-postfix\n") sys.stderr.write(" before continuing.\n") return 1 print("Writing makefile", makefile_path) found_prefix = False for prefix in makefile["Prefixes"]: if prefix != config.prefix: print("%sSkipping %s" % (_INDENTATION, prefix)) else: found_prefix = True if not found_prefix: sys.stderr.write("\nERROR:\n") sys.stderr.write("Could not find prefix %r in %r! Aborting ...\n" % (config.prefix, mkfile_fname)) return 1 with open(makefile_path, "w") as makefile_handle: bam_mkfile.print_header(dst=makefile_handle) makefile_handle.write("\n" * 3) for target in tasks: target_name = add_postfix(target.name, config.postfix) print("%sTarget: %s -> %s" % (_INDENTATION, target.name, target_name)) makefile_handle.write('%s"%s":\n' % (_INDENTATION * 0, target_name)) for prefix in target.prefixes: if prefix.name != config.prefix: continue for sample in prefix.samples: print("%sSample: %s" % (_INDENTATION * 2, sample.name)) makefile_handle.write('%s"%s":\n' % (_INDENTATION * 1, sample.name)) for library in sample.libraries: print("%sLibrary: %s" % (_INDENTATION * 3, library.name)) makefile_handle.write('%s"%s":\n' % (_INDENTATION * 2, library.name)) sink_cache = {} destination = os.path.join(target_name, "reads", sample.name, library.name) for lane in library.lanes: convert_reads(config, destination, lane, sink_cache) ReadSink.close_all_sinks() for lane_name in sorted(sink_cache): makefile_handle.write('%s"%s":\n' % (_INDENTATION * 3, lane_name)) for (reads_type, sink) in sorted(sink_cache[lane_name].items()): makefile_handle.write('%s%s "%s"\n' % (_INDENTATION * 4, ("%s:" % (reads_type,)).ljust(20), sink.filename)) makefile_handle.write("\n") print("\tDone ...") print() return 0
def test_make_dirs__empty_directory(): make_dirs("")
def test_make_dirs__permissions(temp_folder): work_dir = os.path.join(temp_folder, "test_1") assert make_dirs(work_dir, mode=0511) stats = os.stat(work_dir) assert_equal(oct(stats.st_mode & 0777), oct(0511))
def test_make_dirs__sub_directories(temp_folder): assert not os.listdir(temp_folder) assert make_dirs(os.path.join(temp_folder, "test", "123")) assert_equal(os.listdir(temp_folder), ["test"]) assert_equal(os.listdir(os.path.join(temp_folder, "test")), ["123"])
def test_make_dirs__subdirs_return_values(temp_folder): assert make_dirs(os.path.join(temp_folder, "test")) assert make_dirs(os.path.join(temp_folder, "test", "234")) assert not make_dirs(os.path.join(temp_folder, "test", "234"))
def test_make_dirs__permissions(temp_folder): work_dir = os.path.join(temp_folder, "test_1") assert make_dirs(work_dir, mode = 0511) stats = os.stat(work_dir) assert_equal(oct(stats.st_mode & 0777), oct(0511))
def test_make_dirs__create_dir(temp_folder): assert not os.listdir(temp_folder) assert make_dirs(os.path.join(temp_folder, "test123")) assert_equal(os.listdir(temp_folder), ["test123"])
def _run(self, _config, _temp): check_fastq_files(self.input_files, self._offset, True) output_file = tuple(self.output_files)[0] make_dirs(os.path.dirname(output_file)) with open(output_file, "w"): pass
def main(argv): config, args = parse_options(argv) if config is None: return 1 # Get default options for bam_pipeline bam_config, _ = bam_cfg.parse_config(args) makefiles = bam_pipeline.read_makefiles(bam_config, args) # Build .fai files for reference .fasta files bam_pipeline.index_references(bam_config, makefiles) for makefile in makefiles: mkfile_fname = makefile["Statistics"]["Filename"] bam_config.destination = os.path.dirname(mkfile_fname) tasks = bam_pipeline.build_pipeline_full(bam_config, makefile, return_nodes=False) make_dirs(config.destination) makefile_name = add_postfix(makefile["Statistics"]["Filename"], config.postfix) makefile_path = reroot_path(config.destination, makefile_name) if samefile(makefile["Statistics"]["Filename"], makefile_path): sys.stderr.write("ERROR: Would overwrite source makefile at %r\n" % (makefile_path, )) sys.stderr.write( " Please set --destination and/or --output-name-postfix\n" ) sys.stderr.write(" before continuing.\n") return 1 print("Writing makefile", makefile_path) found_prefix = False for prefix in makefile["Prefixes"]: if prefix != config.prefix: print("%sSkipping %s" % (_INDENTATION, prefix)) else: found_prefix = True if not found_prefix: sys.stderr.write("\nERROR:\n") sys.stderr.write("Could not find prefix %r in %r! Aborting ...\n" % (config.prefix, mkfile_fname)) return 1 with open(makefile_path, "w") as makefile_handle: bam_mkfile.print_header(dst=makefile_handle) makefile_handle.write("\n" * 3) for target in tasks: target_name = add_postfix(target.name, config.postfix) print("%sTarget: %s -> %s" % (_INDENTATION, target.name, target_name)) makefile_handle.write('%s"%s":\n' % (_INDENTATION * 0, target_name)) for prefix in target.prefixes: if prefix.name != config.prefix: continue for sample in prefix.samples: print("%sSample: %s" % (_INDENTATION * 2, sample.name)) makefile_handle.write('%s"%s":\n' % (_INDENTATION * 1, sample.name)) for library in sample.libraries: print("%sLibrary: %s" % (_INDENTATION * 3, library.name)) makefile_handle.write( '%s"%s":\n' % (_INDENTATION * 2, library.name)) sink_cache = {} destination = os.path.join(target_name, "reads", sample.name, library.name) for lane in library.lanes: convert_reads(config, destination, lane, sink_cache) ReadSink.close_all_sinks() for lane_name in sorted(sink_cache): makefile_handle.write( '%s"%s":\n' % (_INDENTATION * 3, lane_name)) for (reads_type, sink) in sorted( sink_cache[lane_name].items()): makefile_handle.write( '%s%s "%s"\n' % (_INDENTATION * 4, ("%s:" % (reads_type, )).ljust(20), sink.filename)) makefile_handle.write("\n") print("\tDone ...") print() return 0