def test_od_copy(): od = OrderedDiot() od.i = 0 od2 = od.copy() assert od2.i == 0 od2.j = 1 od3 = od.copy() assert "j" not in od3
def __init__(self, *procs, **kwargs): """@API Constructor @params: *procs (Proc) : the set of processes **kwargs: Other arguments to instantiate a `ProcSet` depends (bool): Whether auto deduce depends. Default: `True` id (str): The id of the procset. Default: `None` (the variable name) tag (str): The tag of the processes. Default: `None` copy (bool): Whether copy the processes or just use them. Default: `True` """ self.__dict__['id'] = kwargs.get('id') or varname(context = 101) self.__dict__['tag'] = kwargs.get('tag') self.__dict__['starts'] = Proxy() self.__dict__['ends'] = Proxy() self.__dict__['delegates'] = OrderedDiot() self.__dict__['procs'] = OrderedDiot() self.__dict__['modules'] = Diot(diot_nest = False) # save initial states before a module is called # states will be resumed before each module is called self.__dict__['initials'] = Diot(diot_nest = False) ifcopy = kwargs.get('copy', True) depends = kwargs.get('depends', True) prevproc = None for proc in procs: assert hasattr(proc, 'id') and hasattr(proc, 'tag'), \ 'Argument has to be a Proc object: %r.' % proc if ifcopy: self.procs[proc.id] = proc.copy(proc.id, tag = (self.tag or proc.tag.split('@', 1)[0]) + '@' + self.id) else: self.procs[proc.id] = proc proc.config.tag = (self.tag or proc.tag.split('@', 1)[0]) + '@' + self.id if depends and prevproc is None: self.starts.add(self[proc.id]) if depends and prevproc: self.procs[proc.id].depends = prevproc prevproc = self.procs[proc.id] if depends and prevproc: self.ends.add(prevproc) self.delegate('input', 'starts') self.delegate('depends', 'starts') self.delegate('ex*', 'ends')
def test_od_iter(): od = OrderedDiot([("b", 1), ("a", 2)]) assert list(od) == ["b", "a"] it = iter(od) assert next(it) == "b" assert next(it) == "a" od.__diot__["orderedkeys"] = ["a", "b"] assert list(od) == ["a", "b"] it = iter(od) assert next(it) == "a" assert next(it) == "b"
def suffix(self): """@API Calcuate a uid for the process according to the configuration The philosophy: 1. procs from different script must have different suffix (sys.argv[0]) 2. procs from the same script: - procs with different id or tag have different suffix - procs with different input have different suffix (depends, input) @returns: (str): The uniq id of the process """ if self.props._suffix: return self.props._suffix sigs = OrderedDiot() # use cmdy.which instead? what about "python test.py" sigs.argv0 = path.realpath(sys.argv[0]) sigs.id = self.id sigs.tag = self.tag if isinstance(self.config.input, dict): sigs.input = self.config.input.copy() for key, val in self.config.input.items(): # lambda is not pickable # convert others to string to make sure it's pickable. Issue #65 sigs.input[key] = utils.funcsig(val) if callable(val) else str( val) else: sigs.input = str(self.config.input) # Add depends to avoid the same suffix for processes with the same depends # but different input files # They could have the same suffix because they are using input callbacks # callbacks could be the same though even if the input files are different if self.depends: sigs.depends = [ p.name(True) + '#' + p.suffix for p in self.depends ] try: signature = sigs.to_json() except TypeError as exc: # pragma: no cover raise ProcInputError('Unexpected input data type: %s' % exc) from None logger.debug('Suffix decided by: %s' % signature, proc=self.id) # suffix is only depending on where it comes from (sys.argv[0]) and # it's name (id and tag) to avoid too many different workdirs being generated self.props._suffix = utils.uid(signature) #self.props.suffix = utils.uid(path.realpath(sys.argv[0]) + ':' + self.id) return self._suffix
def test_clear(): bx = OrderedDiot() bx.a = 1 bx.c = 4 bx['g'] = 7 bx.d = 2 assert list(bx.keys()) == ['a', 'c', 'g', 'd'] bx.clear() assert bx == {} assert list(bx.keys()) == [] assert bx.__diot__['orderedkeys'] == []
def test_succeed(job0, caplog): job0.rc = 1 job0.proc.rc = [0] assert not job0.succeed() job0.proc.rc = [0, 1] (job0.dir / 'output').mkdir() job0.proc.expect = TemplateLiquid('') assert job0.succeed() job0.output = OrderedDiot(outfile=('file', job0.dir / 'output' / 'notexists')) job0.rc = 1 caplog.clear() assert not job0.succeed() assert 'Outfile not generated' in caplog.text assert job0.rc == 1 + (1 << 9) (job0.dir / 'output' / 'notexists').write_text('') job0.proc.expect = TemplateLiquid('grep abc {{o.outfile}}') job0.rc = 1 caplog.clear() assert not job0.succeed() assert 'Check expectation' in caplog.text assert job0.rc == 1 + (1 << 10)
def annotate_args(cls, args, warn_missing): if not args: cls.annotated.args = None return cls.annotated.args = OrderedDiot(diot_nest=False) parsed_items = { parsed_item.name: parsed_item for parsed_item in args.section } for key, val in cls.args.items(): if key not in parsed_items and warn_missing: warnings.warn(f"Missing annotation for args: {key}", AnnotateMissingWarning) cls.annotated.args[key] = ParsedItem( name=key, type=None, desc="Undescribed.", more=[ ParsedPara([f'Default: {repr(val) if val == "" else val}']) ], ) else: item = parsed_items[key] cls.annotated.args[key] = ParsedItem( name=key, type=item.type, desc=item.desc, more=(item.more or []) + [ParsedPara([f'Default: {repr(val) if val == "" else val}'])], )
def annotate_input(cls, input, warn_missing): if not input: cls.annotated.input = None return cls.annotated.input = OrderedDiot(diot_nest=False) parsed_items = { parsed_item.name: parsed_item for parsed_item in input.section } input_keys = cls.input if isinstance(input_keys, str): input_keys = [input_key.strip() for input_key in input_keys.split(",")] for input_key_type in input_keys or []: if ":" not in input_key_type: input_key_type = f"{input_key_type}:{ProcInputType.VAR}" input_key, input_type = input_key_type.split(":", 1) if input_key not in parsed_items and warn_missing: warnings.warn( f"Missing annotation for input: {input_key}", AnnotateMissingWarning, ) cls.annotated.input[input_key] = ParsedItem(name=input_key, type=input_type, desc=None, more=None) else: item = parsed_items[input_key] cls.annotated.input[input_key] = ParsedItem(name=item.name, type=input_type, desc=item.desc, more=item.more)
def test_reest(job0): job0.ntry = 0 (job0.dir / 'output').mkdir() (job0.dir / 'output' / 'outfile.txt').write_text('') (job0.dir / 'output' / '.jobcache').mkdir() (job0.dir / 'job.rc').write_text('') (job0.dir / 'job.stdout').write_text('out') (job0.dir / 'job.stderr').write_text('err') (job0.dir / 'job.pid').write_text('') (job0.dir / 'retry.1').mkdir() job0.reset() assert not fs.exists(job0.dir / 'retry.1') assert not fs.exists(job0.dir / 'job.rc') # recreated assert (job0.dir / 'job.stdout').read_text() == '' assert (job0.dir / 'job.stderr').read_text() == '' assert not fs.exists(job0.dir / 'job.pid') assert fs.exists(job0.dir / 'output') # recreated assert not fs.exists(job0.dir / 'output' / 'outfile.txt') job0.ntry = 1 (job0.dir / 'output' / 'outfile.txt').write_text('') (job0.dir / 'output' / '.jobcache' / 'cached.txt').write_text('') job0.reset() assert fs.exists(job0.dir / 'retry.1') assert not fs.exists(job0.dir / 'retry.1' / '.jobcache') assert fs.exists(job0.dir / 'output' / '.jobcache' / 'cached.txt') # remove whole output directory job0.ntry = 0 fs.remove(job0.dir / 'output' / '.jobcache') (job0.dir / 'output' / 'outfile.txt').write_text('') job0.reset() assert not fs.exists(job0.dir / 'output' / 'outfile.txt') # move whole output directory job0.ntry = 1 fs.remove(job0.dir / 'output' / '.jobcache') (job0.dir / 'output' / 'outfile.txt').write_text('') job0.reset() assert not fs.exists(job0.dir / 'output' / 'outfile.txt') # restore output directory and stdout, stderr job0.output = OrderedDiot( outdir=('dir', job0.dir / 'output' / 'outdir'), outfile=('stdout', job0.dir / 'output' / 'outfile'), errfile=('stderr', job0.dir / 'output' / 'errfile'), ) job0.ntry = 0 job0.reset() assert fs.isdir(job0.dir / 'output' / 'outdir') assert fs.islink(job0.dir / 'output' / 'outfile') assert fs.islink(job0.dir / 'output' / 'errfile') assert fs.samefile(job0.dir / 'job.stdout', job0.dir / 'output' / 'outfile') assert fs.samefile(job0.dir / 'job.stderr', job0.dir / 'output' / 'errfile') # what if outdir exists job0.reset()
def _docSecs(self): ret = OrderedDiot(desc = []) name = 'desc' for line in self.docs: if not line.startswith('@'): ret[name].append(line) else: name = line.strip('@: ') ret[name] = [] return ret
def sort(self): """Sort the blocks in a wiggle file by chrom and start. """ block_ids = sorted(self.blocks.keys(), key=lambda block: (_chrom_to_sortable(self.blocks[ block].chrom), self.blocks[block].start)) orig_blocks = self.blocks self.blocks = OrderedDiot() for block_id in block_ids: self.blocks[block_id] = orig_blocks[block_id] del orig_blocks
def annotate_output(cls, output, warn_missing): if not output: cls.annotated.output = None return cls.annotated.output = OrderedDiot(diot_nest=False) parsed_items = { parsed_item.name: parsed_item for parsed_item in output.section } # output can be arbitrary template string. # its structure is resolved after its rendered. # here we are trying to parse the output if it's just single strings # For example: # >>> output = "afile:file:..., bfile:file:..." # or # >>> output = ["afile:file:...", "bfile:file:..."] # give up parsing if any error happens output = cls.output def parse_one_output(out): parts = out.split(":") if not parts[0].isidentifier(): return None if len(parts) < 3: return parts[0], ProcInputType.VAR, parts[1] return parts if not isinstance(output, (list, tuple)): output = [out.strip() for out in output.split(",")] for out in output: parsed = parse_one_output(out) if not parsed: continue if parsed[0] not in parsed_items and warn_missing: warnings.warn( f"Missing annotation for output: {parsed[0]}", AnnotateMissingWarning, ) cls.annotated.output[parsed[0]] = ParsedItem( name=parsed[0], type=parsed[1], desc="Undescribed.", more=[ParsedPara([f"Default: {parsed[2]}"])], ) else: cls.annotated.output[parsed[0]] = ParsedItem( name=parsed[0], type=parsed[1], desc=parsed_items[parsed[0]].desc, more=(parsed_items[parsed[0]].more or []) + [ParsedPara([f"Default: {parsed[2]}"])], )
def get_enabled_plugins(self, raw: bool = False) -> Dict[str, SimplugWrapper]: """Get a mapping of all enabled plugins Args: raw: Whether return the raw plugin or not (the one when it's registered) If a plugin is registered as a module by its name, the module is returned. Returns: The mapping of all enabled plugins """ return OrderedDiot([(name, plugin.plugin if raw else plugin) for name, plugin in self.hooks._registry.items() if plugin.enabled])
def test_prepoutput(job0, tmpdir): job0.proc.output = OrderedDiot() job0._prepOutput() assert len(job0.output) == 0 job0.proc.output.out = ('var', TemplateLiquid('abc')) job0.proc.output.outfile = ('file', TemplateLiquid('outfile{{job.index}}.txt')) job0._prepOutput() assert len(job0.output) == 2 assert job0.output.out == ('var', 'abc') assert job0.output.outfile == ('file', job0.dir / 'output' / 'outfile0.txt') job0.proc.output.clear() job0.proc.output.abs = ('file', TemplateLiquid('/a/b/c')) with pytest.raises(JobOutputParseError): job0._prepOutput()
def test_or_ior(): a = Diot({"data": 2, "count": 5}) b = Diot(data=2, count=5) c = a | {"data": 3} assert c == {"data": 3, "count": 5} c = a | [("data", 3)] assert c == {"data": 3, "count": 5} a |= {"data": 3} assert a == {"data": 3, "count": 5} with pytest.raises(TypeError): a | 1 od = OrderedDiot([("b", 1), ("a", 2)]) od |= {"a": 1, "b": 2} assert od.__diot__["orderedkeys"] == ["b", "a"] assert od.a == 1 assert od.b == 2
def test_inheritance_copy(): class Box2(Diot): pass b = Box2(a=1) c = b.copy() assert c == b assert isinstance(c, Diot) c = b.__copy__() assert c == b assert isinstance(c, Diot) d = OrderedDiot() d.b = 1 d.a = 0 d.x = 9 assert list(d.copy().keys()) == ['b', 'a', 'x']
def test_signature(job0, tmpdir, caplog): fs.remove(job0.dir / 'job.script') assert job0.signature() == '' (job0.dir / 'job.script').write_text('') assert job0.signature() == Diot(script=filesig(job0.dir / 'job.script'), i={ 'var': {}, 'file': {}, 'files': {} }, o={ 'var': {}, 'file': {}, 'dir': {} }) infile = tmpdir / 'test_signature_input.txt' infile.write_text('') infile1 = tmpdir / 'test_signature_input_not_exists.txt' job0.input = Diot(invar=('var', 'abc'), infile=('file', infile), infiles=('files', [infile])) job0._signature = None assert job0.signature().i == { 'var': { 'invar': 'abc' }, 'file': { 'infile': filesig(infile) }, 'files': { 'infiles': [filesig(infile)] }, } job0.input = Diot(invar=('var', 'abc'), infile=('file', infile1)) job0._signature = None assert job0.signature() == '' assert 'Empty signature because of input file' in caplog.text job0.input = Diot(invar=('var', 'abc'), infiles=('files', [infile1])) job0._signature = None assert job0.signature() == '' assert 'Empty signature because of one of input files' in caplog.text job0.input = {} outfile = tmpdir / 'test_signature_outfile.txt' outfile.write_text('') outfile1 = tmpdir / 'test_signature_outfile_not_exists.txt' outdir = tmpdir / 'test_signature_outdir' outdir.mkdir() outdir1 = tmpdir / 'test_signature_outdir_not_exists' job0.output = OrderedDiot(out=('var', 'abc'), outfile=('file', outfile), outdir=('dir', outdir)) job0._signature = None assert job0.signature().o == { 'var': { 'out': 'abc' }, 'file': { 'outfile': filesig(outfile) }, 'dir': { 'outdir': filesig(outdir, dirsig=job0.proc.dirsig) } } job0.output = OrderedDiot(outfile=('file', outfile1)) job0._signature = None assert job0.signature() == '' assert 'Empty signature because of output file:' in caplog.text job0.output = OrderedDiot(outdir=('dir', outdir1)) job0._signature = None assert job0.signature() == '' assert 'Empty signature because of output dir:' in caplog.text
def test_ordereddiot_repr(): d = OrderedDiot(a_b=1) assert d.a_b == 1 assert repr(d) == "OrderedDiot([('a_b', 1)])"
def test_revesed(): bx = OrderedDiot() bx.a = 1 bx.c = 2 assert list(reversed(bx)) == ['c', 'a']
def test_iter(): bx = OrderedDiot() bx.a = 1 bx.c = 2 assert list(bx.__iter__()) == ['a', 'c']
def test_ordered_box(): bx = OrderedDiot(h=1) bx.a = 1 bx.c = 4 bx['g'] = 7 bx.d = 2 assert list(bx.keys()) == ['h', 'a', 'c', 'g', 'd'] del bx.a bx.pop('c') bx.__delattr__('g') assert list(bx.keys()) == ['h', 'd']
def test_export(job0, tmpdir, caplog): job0.proc.exdir = '' job0.export() assert 'Exported' not in caplog.text job0.proc.exdir = '/path/not/exists' with pytest.raises(AssertionError): job0.export() job0.proc.exdir = tmpdir / 'test_export' job0.proc.exdir.mkdir() job0.proc.expart = None with pytest.raises(AssertionError): job0.export() job0.proc.expart = [] job0.export() assert 'Exported' not in caplog.text # export everything outfile1 = job0.dir / 'output' / 'test_export_outfile.txt' outfile1.parent.mkdir() outfile1.write_text('') job0.output = OrderedDiot(outfile=('file', outfile1)) job0.proc.exhow = 'copy' job0.proc.exow = True job0.proc._log.shorten = 0 job0.export() assert fs.exists(job0.proc.exdir / outfile1.name) assert not fs.islink(outfile1) assert not fs.samefile(outfile1, job0.proc.exdir / outfile1.name) assert ('Exported: %s' % (job0.proc.exdir / outfile1.name)) in caplog.text job0.proc.exhow = 'move' job0.export() assert fs.exists(job0.proc.exdir / outfile1.name) assert fs.islink(outfile1) assert fs.samefile(outfile1, job0.proc.exdir / outfile1.name) assert ('Exported: %s' % (job0.proc.exdir / outfile1.name)) in caplog.text # outfile is a link, then copy the file job0.export() assert fs.exists(job0.proc.exdir / outfile1.name) assert not fs.islink(job0.proc.exdir / outfile1.name) assert fs.islink(outfile1) assert fs.samefile(outfile1, job0.proc.exdir / outfile1.name) job0.proc.exhow = 'link' job0.export() assert fs.exists(job0.proc.exdir / outfile1.name) assert fs.islink(job0.proc.exdir / outfile1.name) assert not fs.islink(outfile1) assert fs.samefile(outfile1, job0.proc.exdir / outfile1.name) job0.proc.exhow = 'gzip' job0.export() assert fs.exists(job0.proc.exdir / (outfile1.name + '.gz')) job0.proc.expart = [TemplateLiquid('outfile')] fs.remove(job0.proc.exdir / (outfile1.name + '.gz')) job0.export() assert fs.exists(job0.proc.exdir / (outfile1.name + '.gz')) job0.proc.expart = [TemplateLiquid('*.txt')] fs.remove(job0.proc.exdir / (outfile1.name + '.gz')) job0.export() assert fs.exists(job0.proc.exdir / (outfile1.name + '.gz'))
def test_isexptcached(job0, tmpdir, caplog): job0.proc.cache = False assert not job0.isExptCached() job0.proc.cache = 'export' job0.proc.exhow = 'link' assert not job0.isExptCached() assert 'Job is not export-cached using symlink export.' in caplog.text caplog.clear() job0.proc.exhow = 'copy' job0.proc.expart = [TemplateLiquid('outfile')] assert not job0.isExptCached() assert 'Job is not export-cached using partial export.' in caplog.text caplog.clear() job0.proc.expart = None job0.proc.exdir = '' assert not job0.isExptCached() assert 'Job is not export-cached since export directory is not set.' in caplog.text caplog.clear() job0.proc.exdir = tmpdir / 'test_isexptcached_exdir' job0.proc.exdir.mkdir() outfile1 = tmpdir / 'test_isexptcached_outfile1.txt' outfile1.write_text('') outfile2 = tmpdir / 'test_isexptcached_outfile_not_exists.txt' outdir1 = tmpdir / 'test_isexptcached_outdir1' outdir1.mkdir() fs.gzip(outfile1, job0.proc.exdir / (outfile1.name + '.gz')) fs.gzip(outdir1, job0.proc.exdir / (outdir1.name + '.tgz')) job0.output = OrderedDiot(outfile=('file', outfile1), outdir=('dir', outdir1), out=('var', 'abc')) # overwriting existing (job0.dir / 'output').mkdir() (job0.dir / 'output' / outfile1.name).write_text('') job0.proc.exhow = 'gzip' assert job0.isExptCached() assert 'Overwrite file for export-caching:' in caplog.text assert job0.isTrulyCached() caplog.clear() fs.remove(job0.proc.exdir / (outfile1.name + '.gz')) assert not job0.isExptCached() assert 'Job is not export-cached since exported file not exists:' in caplog.text caplog.clear() job0.output = OrderedDiot(outfile=('file', outfile1)) job0.proc.exhow = 'move' assert not job0.isExptCached() assert 'Job is not export-cached since exported file not exists:' in caplog.text fs.link(outfile1, job0.proc.exdir / outfile1.name) assert job0.isExptCached() caplog.clear() # overwriting existing fs.remove(job0.proc.exdir / outfile1.name) (job0.proc.exdir / outfile1.name).write_text('') assert job0.isExptCached() assert 'Overwrite file for export-caching: ' in caplog.text
def test_prebuild(job0, tmp_path, caplog): job0.proc.config.export_dir = False assert not job0.is_cached() job0.proc.cache = 'export' job0.proc.config.export_dir = 'export' job0.proc.config.export_how = 'link' job_prebuild(job0) assert not job0.is_cached() assert 'Job is not export-cached using symlink export.' in caplog.text caplog.clear() job0.proc.config.export_how = 'copy' job0.proc.config.export_part = [('outfile')] job_prebuild(job0) assert not job0.is_cached() assert 'Job is not export-cached using partial export.' in caplog.text caplog.clear() job0.proc.config.export_part = None job0.proc.config.export_dir = '' job_prebuild(job0) assert not job0.is_cached() caplog.clear() job0.proc.config.export_dir = tmp_path / 'test_is_cached_exdir' job0.proc.config.export_dir.mkdir() outfile1 = tmp_path / 'test_is_cached_outfile1.txt' outfile1.write_text('') outfile2 = tmp_path / 'test_is_cached_outfile_not_exists.txt' outdir1 = tmp_path / 'test_is_cached_outdir1' outdir1.mkdir() fs.gzip(outfile1, job0.proc.config.export_dir / (outfile1.name + '.gz')) fs.gzip(outdir1, job0.proc.config.export_dir / (outdir1.name + '.tgz')) job0.__attrs_property_cached__['output'] = OrderedDiot(outfile=('file', outfile1), outdir=('dir', outdir1), out=('var', 'abc')) # overwriting existing (job0.dir / 'output').mkdir() (job0.dir / 'output' / outfile1.name).write_text('') job0.proc.config.export_how = 'gzip' job_prebuild(job0) assert job0.is_cached() assert 'Overwrite file for export-caching:' in caplog.text assert job0.is_cached() caplog.clear() fs.remove(job0.proc.config.export_dir / (outfile1.name + '.gz')) job_prebuild(job0) assert 'Job is not export-cached since exported file not exists:' in caplog.text caplog.clear() job0.__attrs_property_cached__['output'] = OrderedDiot(outfile=('file', outfile1)) job0.proc.config.export_how = 'move' job_prebuild(job0) assert 'Job is not export-cached since exported file not exists:' in caplog.text fs.link(outfile1, job0.proc.config.export_dir / outfile1.name) job_prebuild(job0) assert job0.is_cached() caplog.clear() # overwriting existing fs.remove(job0.proc.config.export_dir / outfile1.name) (job0.proc.config.export_dir / outfile1.name).write_text('') job_prebuild(job0) assert job0.is_cached() assert 'Overwrite file for export-caching: ' in caplog.text
def test_export(job0, tmp_path, caplog): job0.proc.config.export_dir = '' job_done(job0, 'succeeded') assert 'Exported' not in caplog.text job0.proc.config.export_dir = tmp_path / 'test_export' proc_prerun(job0.proc) job0.proc.config.export_part = [] job_done(job0, 'succeeded') assert 'Exported' not in caplog.text # export everything outfile1 = job0.dir / 'output' / 'test_export_outfile.txt' outfile1.parent.mkdir(exist_ok=True) outfile1.write_text('') job0.__attrs_property_cached__['output'] = OrderedDiot(outfile=('file', outfile1)) job0.proc.config.export_how = 'copy' job0.proc.config.export_ow = True job_done(job0, 'succeeded') assert fs.exists(job0.proc.config.export_dir / outfile1.name) assert not fs.islink(outfile1) assert not fs.samefile(outfile1, job0.proc.config.export_dir / outfile1.name) assert ('Exported: %s' % (job0.proc.config.export_dir / outfile1.name)) in caplog.text job0.proc.config.export_how = 'move' job_done(job0, 'succeeded') assert fs.exists(job0.proc.config.export_dir / outfile1.name) assert fs.islink(outfile1) assert fs.samefile(outfile1, job0.proc.config.export_dir / outfile1.name) assert ('Exported: %s' % (job0.proc.config.export_dir / outfile1.name)) in caplog.text # outfile is a link, then copy the file job_done(job0, 'succeeded') assert fs.exists(job0.proc.config.export_dir / outfile1.name) assert not fs.islink(job0.proc.config.export_dir / outfile1.name) assert fs.islink(outfile1) assert fs.samefile(outfile1, job0.proc.config.export_dir / outfile1.name) job0.proc.config.export_how = 'link' job_done(job0, 'succeeded') assert fs.exists(job0.proc.config.export_dir / outfile1.name) assert fs.islink(job0.proc.config.export_dir / outfile1.name) assert not fs.islink(outfile1) assert fs.samefile(outfile1, job0.proc.config.export_dir / outfile1.name) job0.proc.config.export_how = 'gzip' job_done(job0, 'succeeded') assert fs.exists(job0.proc.config.export_dir / (outfile1.name + '.gz')) job0.proc.config.export_part = ['outfile'] fs.remove(job0.proc.config.export_dir / (outfile1.name + '.gz')) job_done(job0, 'succeeded') assert fs.exists(job0.proc.config.export_dir / (outfile1.name + '.gz')) job0.proc.config.export_part = ['*.txt'] fs.remove(job0.proc.config.export_dir / (outfile1.name + '.gz')) job_done(job0, 'succeeded') assert fs.exists(job0.proc.config.export_dir / (outfile1.name + '.gz'))
def proc_postrun(proc, status): """Generate report for the process""" # skip if process failed or cached # pylint: disable=too-many-locals,too-many-branches,too-many-statements report_file = proc.workdir.joinpath('proc.report.md') template = proc.config.report_template template_file = None if template and template.startswith('file:'): template_file = Path(template[5:]) logger.debug("Using report template: %s", template_file, proc=proc.id) if not template and report_file.is_file(): report_file.unlink() if not template or status == 'failed': return signature = OrderedDiot([(key, value) for key, value in sorted(proc.config.items()) if key.startswith('report_')]) if template_file and template_file.is_file(): signature.template = filesig(template_file) signature = sha256(toml.dumps(signature).encode()).hexdigest() if status == 'cached' and report_file.is_file(): with report_file.open() as frpt: if frpt.readline().strip() == '<!--- %s -->' % signature: logger.debug("Report markdown file cached, skip.", proc=proc.id) return fs.remove(report_file) logger.debug('Rendering report template ...', proc=proc.id) if template_file: template = template_file.read_text() template = proc.template(textwrap.dedent(template), **proc.envs) rptdata = dict(jobs=[None] * proc.size, proc=proc, args=proc.args) for i, job in enumerate(proc.jobs): rptdata['jobs'][i] = job.data.job.copy() rptdata['jobs'][i]['i'] = job.data.i rptdata['jobs'][i]['o'] = job.data.o datafile = job.dir / 'output/job.report.data.toml' if datafile.is_file(): with datafile.open() as fdata: rptdata['jobs'][i].update(toml.load(fdata)) rptenvs = Diot(level=1, pre='', post='', title=proc.desc) rptenvs.update(proc.config.report_envs) rptdata['report'] = rptenvs try: reportmd = template.render(rptdata) except Exception as exc: raise RuntimeError('Failed to render report markdown for process: %s' % (proc)) from exc reportmd = reportmd.splitlines() codeblock = False for i, line in enumerate(reportmd): if line.startswith('#') and not codeblock: reportmd[i] = '#' * (rptenvs.level - 1) + line elif codeblock: if line.startswith('```') and len(line) - len( line.lstrip('`')) == codeblock: codeblock = False elif line.startswith('```'): codeblock = len(line) - len(line.lstrip('`')) report_file.write_text( '<!--- %s -->' % signature + proc.template(textwrap.dedent(rptenvs.pre), **proc.envs).render(rptdata) + '\n\n' + '\n'.join(reportmd) + '\n\n' + proc.template(textwrap.dedent(rptenvs.post), **proc.envs).render(rptdata) + '\n' )
DEFAULT_CONFIG = dict(default=dict( # default plugins _plugins=['pyppl_report', 'pyppl_flowchart'], # log options _log=dict( file=None, theme='greenOnBlack', levels='normal', leveldiffs=[], pbar=50, shorten=0, ), # The command to run after jobs start afterCmd='', # The extra arguments for the process args=OrderedDiot(diot_nest=True), # The command to run before jobs start beforeCmd='', # The cache option, True/False/export cache=True, # Do cleanup for cached jobs? acache=False, # The description of the job desc='No description', # Whether expand directory to check signature dirsig=True, # Whether to echo the stdout and stderr of the jobs to the screen # Could also be: # { # # or [0, 1, 2], just echo output of those jobs. # 'jobs': 0
def __init__(self, id=None, tag='notag', desc='No description.', **kwargs): """@API Proc constructor @params: tag (str) : The tag of the process desc (str) : The description of the process id (str) : The identify of the process **kwargs: Other properties of the process, which can be set by `proc.xxx` later. """ # Do not go through __getattr__ and __setattr__ # Get configuration from config self.__dict__['config'] = Config() # computed props self.__dict__['props'] = Diot(diot_nest=False) defaultconfig = dict.copy(utils.config) # The id (actually, it's the showing name) of the process defaultconfig['id'] = id if id else utils.varname() if ' ' in tag: raise ProcTagError("No space allowed in tag.") defaultconfig['tag'] = tag defaultconfig['desc'] = desc # The extra arguments for the process defaultconfig['args'] = dict.copy(defaultconfig['args']) # The callfront function of the process defaultconfig['callfront'] = None # The callback function of the process defaultconfig['callback'] = None # The dependencies specified defaultconfig['depends'] = [] # The input that user specified defaultconfig['input'] = '' # The output that user specified defaultconfig['output'] = '' # resume flag of the process # '' : Normal, do not resume # 'skip+' : Load data from previous run, pipeline resumes from future processes # 'resume+': Deduce input from 'skip+' processes # 'skip' : Just skip, do not load data # 'resume' : Load data from previous run, resume pipeline defaultconfig['resume'] = '' # The template environment, keep process indenpendent, even for the subconfigs defaultconfig['envs'] = utils.tryDeepCopy(defaultconfig['envs']) # The output channel of the process self.props.channel = Channel.create() # The dependencies computed self.props.depends = [] # the computed echo option self.props.echo = {} # computed expart self.props.expart = [] # computed expect self.props.expect = None # The computed input self.props.input = {} # The jobs self.props.jobs = [] # The locker for the process self.props.lock = None # non-cached job ids self.props.ncjobids = [] # The original name of the process if it's copied self.props.origin = defaultconfig['id'] # The computed output self.props.output = OrderedDiot() # data for proc.xxx in template self.props.procvars = {} # Valid return code self.props.rc = [0] # get the runner from the profile self.props.runner = 'local' # The computed script. Template object self.props.script = None # The unique identify of the process # cache the suffix self.props._suffix = '' # The template class self.props.template = None # timer for running time self.props.timer = None # The computed workdir self.props.workdir = '' # Remember the attr being set, they have the highest priority self.props.sets = set() # convert alias to its original name for aliaskey, aliasval in Proc.ALIAS.items(): if aliaskey in kwargs: kwargs[aliasval] = kwargs.pop(aliaskey) for key in kwargs: if key not in defaultconfig: raise ProcAttributeError(key) # update the conf with kwargs defaultconfig.update(kwargs) # collapse the loading trace, we don't need it anymore. self.config._load({'default': defaultconfig}) for key, val in kwargs.items(): if key[0] != '_': setattr(self, key, val)
None, # => '{ a : 1,\n' ' b11: 2, }'), ( { "a": 1, "b": 2 }, 4, 'x11', # => '[x11] { a: 1,\n' ' b: 2, }'), ( OrderedDiot([("a", 1), ("b", 2)]), 4, 'x11', # => '[x11] <OrderedDiot> \n' ' { a: 1,\n' ' b: 2, }'), ]) def test_formatDict(val, keylen, alias, expt): assert formatDict(val, keylen, alias) == expt @pytest.mark.parametrize( 'val,expt,asserts', [(None, None, ['is']), (1, 1, ['is']), (sys, sys, ['is']), (Diot(a=Diot(b=Diot(c=1))), Diot(a=Diot(b=Diot(c=1))), ['=']),
def __init__(self): self._registry = OrderedDiot() # type: OrderedDiot self._specs = {} # type: Dict[str, SimplugHook] self._registry_sorted = False # type: bool