Exemple #1
0
def test_od_copy():

    od = OrderedDiot()
    od.i = 0
    od2 = od.copy()
    assert od2.i == 0
    od2.j = 1

    od3 = od.copy()
    assert "j" not in od3
Exemple #2
0
	def __init__(self, *procs, **kwargs):
		"""@API
		Constructor
		@params:
			*procs (Proc) : the set of processes
			**kwargs: Other arguments to instantiate a `ProcSet`
				depends (bool): Whether auto deduce depends. Default: `True`
				id (str): The id of the procset. Default: `None` (the variable name)
				tag (str): The tag of the processes. Default: `None`
				copy (bool): Whether copy the processes or just use them. Default: `True`
		"""

		self.__dict__['id']        = kwargs.get('id') or varname(context = 101)
		self.__dict__['tag']       = kwargs.get('tag')
		self.__dict__['starts']    = Proxy()
		self.__dict__['ends']      = Proxy()
		self.__dict__['delegates'] = OrderedDiot()
		self.__dict__['procs']     = OrderedDiot()
		self.__dict__['modules']   = Diot(diot_nest = False)
		# save initial states before a module is called
		# states will be resumed before each module is called
		self.__dict__['initials']  = Diot(diot_nest = False)

		ifcopy  = kwargs.get('copy', True)
		depends = kwargs.get('depends', True)

		prevproc = None
		for proc in procs:
			assert hasattr(proc, 'id') and hasattr(proc, 'tag'), \
				'Argument has to be a Proc object: %r.' % proc
			if ifcopy:
				self.procs[proc.id] = proc.copy(proc.id,
					tag = (self.tag or proc.tag.split('@', 1)[0]) + '@' + self.id)
			else:
				self.procs[proc.id] = proc
				proc.config.tag = (self.tag or proc.tag.split('@', 1)[0]) + '@' + self.id

			if depends and prevproc is None:
				self.starts.add(self[proc.id])

			if depends and prevproc:
				self.procs[proc.id].depends = prevproc

			prevproc = self.procs[proc.id]

		if depends and prevproc:
			self.ends.add(prevproc)

		self.delegate('input', 'starts')
		self.delegate('depends', 'starts')
		self.delegate('ex*', 'ends')
Exemple #3
0
def test_od_iter():
    od = OrderedDiot([("b", 1), ("a", 2)])
    assert list(od) == ["b", "a"]

    it = iter(od)
    assert next(it) == "b"
    assert next(it) == "a"

    od.__diot__["orderedkeys"] = ["a", "b"]
    assert list(od) == ["a", "b"]

    it = iter(od)
    assert next(it) == "a"
    assert next(it) == "b"
Exemple #4
0
    def suffix(self):
        """@API
		Calcuate a uid for the process according to the configuration
		The philosophy:
		1. procs from different script must have different suffix (sys.argv[0])
		2. procs from the same script:
			- procs with different id or tag have different suffix
			- procs with different input have different suffix (depends, input)
		@returns:
			(str): The uniq id of the process
		"""
        if self.props._suffix:
            return self.props._suffix

        sigs = OrderedDiot()
        # use cmdy.which instead? what about "python test.py"
        sigs.argv0 = path.realpath(sys.argv[0])
        sigs.id = self.id
        sigs.tag = self.tag

        if isinstance(self.config.input, dict):
            sigs.input = self.config.input.copy()
            for key, val in self.config.input.items():
                # lambda is not pickable
                # convert others to string to make sure it's pickable. Issue #65
                sigs.input[key] = utils.funcsig(val) if callable(val) else str(
                    val)
        else:
            sigs.input = str(self.config.input)

        # Add depends to avoid the same suffix for processes with the same depends
        # but different input files
        # They could have the same suffix because they are using input callbacks
        # callbacks could be the same though even if the input files are different
        if self.depends:
            sigs.depends = [
                p.name(True) + '#' + p.suffix for p in self.depends
            ]
        try:
            signature = sigs.to_json()
        except TypeError as exc:  # pragma: no cover
            raise ProcInputError('Unexpected input data type: %s' %
                                 exc) from None
        logger.debug('Suffix decided by: %s' % signature, proc=self.id)
        # suffix is only depending on where it comes from (sys.argv[0]) and
        # it's name (id and tag) to avoid too many different workdirs being generated
        self.props._suffix = utils.uid(signature)
        #self.props.suffix = utils.uid(path.realpath(sys.argv[0]) + ':' + self.id)
        return self._suffix
Exemple #5
0
def test_clear():
    bx = OrderedDiot()
    bx.a = 1
    bx.c = 4
    bx['g'] = 7
    bx.d = 2
    assert list(bx.keys()) == ['a', 'c', 'g', 'd']
    bx.clear()
    assert bx == {}
    assert list(bx.keys()) == []
    assert bx.__diot__['orderedkeys'] == []
Exemple #6
0
def test_succeed(job0, caplog):
    job0.rc = 1
    job0.proc.rc = [0]
    assert not job0.succeed()

    job0.proc.rc = [0, 1]
    (job0.dir / 'output').mkdir()
    job0.proc.expect = TemplateLiquid('')
    assert job0.succeed()

    job0.output = OrderedDiot(outfile=('file',
                                       job0.dir / 'output' / 'notexists'))
    job0.rc = 1
    caplog.clear()
    assert not job0.succeed()
    assert 'Outfile not generated' in caplog.text
    assert job0.rc == 1 + (1 << 9)

    (job0.dir / 'output' / 'notexists').write_text('')
    job0.proc.expect = TemplateLiquid('grep abc {{o.outfile}}')
    job0.rc = 1
    caplog.clear()
    assert not job0.succeed()
    assert 'Check expectation' in caplog.text
    assert job0.rc == 1 + (1 << 10)
def annotate_args(cls, args, warn_missing):
    if not args:
        cls.annotated.args = None
        return

    cls.annotated.args = OrderedDiot(diot_nest=False)
    parsed_items = {
        parsed_item.name: parsed_item
        for parsed_item in args.section
    }

    for key, val in cls.args.items():
        if key not in parsed_items and warn_missing:
            warnings.warn(f"Missing annotation for args: {key}",
                          AnnotateMissingWarning)
            cls.annotated.args[key] = ParsedItem(
                name=key,
                type=None,
                desc="Undescribed.",
                more=[
                    ParsedPara([f'Default: {repr(val) if val == "" else val}'])
                ],
            )
        else:
            item = parsed_items[key]
            cls.annotated.args[key] = ParsedItem(
                name=key,
                type=item.type,
                desc=item.desc,
                more=(item.more or []) +
                [ParsedPara([f'Default: {repr(val) if val == "" else val}'])],
            )
def annotate_input(cls, input, warn_missing):
    if not input:
        cls.annotated.input = None
        return

    cls.annotated.input = OrderedDiot(diot_nest=False)
    parsed_items = {
        parsed_item.name: parsed_item
        for parsed_item in input.section
    }

    input_keys = cls.input
    if isinstance(input_keys, str):
        input_keys = [input_key.strip() for input_key in input_keys.split(",")]

    for input_key_type in input_keys or []:
        if ":" not in input_key_type:
            input_key_type = f"{input_key_type}:{ProcInputType.VAR}"
        input_key, input_type = input_key_type.split(":", 1)
        if input_key not in parsed_items and warn_missing:
            warnings.warn(
                f"Missing annotation for input: {input_key}",
                AnnotateMissingWarning,
            )
            cls.annotated.input[input_key] = ParsedItem(name=input_key,
                                                        type=input_type,
                                                        desc=None,
                                                        more=None)
        else:
            item = parsed_items[input_key]
            cls.annotated.input[input_key] = ParsedItem(name=item.name,
                                                        type=input_type,
                                                        desc=item.desc,
                                                        more=item.more)
Exemple #9
0
def test_reest(job0):
    job0.ntry = 0
    (job0.dir / 'output').mkdir()
    (job0.dir / 'output' / 'outfile.txt').write_text('')
    (job0.dir / 'output' / '.jobcache').mkdir()
    (job0.dir / 'job.rc').write_text('')
    (job0.dir / 'job.stdout').write_text('out')
    (job0.dir / 'job.stderr').write_text('err')
    (job0.dir / 'job.pid').write_text('')
    (job0.dir / 'retry.1').mkdir()
    job0.reset()
    assert not fs.exists(job0.dir / 'retry.1')
    assert not fs.exists(job0.dir / 'job.rc')
    # recreated
    assert (job0.dir / 'job.stdout').read_text() == ''
    assert (job0.dir / 'job.stderr').read_text() == ''
    assert not fs.exists(job0.dir / 'job.pid')
    assert fs.exists(job0.dir / 'output')
    # recreated
    assert not fs.exists(job0.dir / 'output' / 'outfile.txt')

    job0.ntry = 1
    (job0.dir / 'output' / 'outfile.txt').write_text('')
    (job0.dir / 'output' / '.jobcache' / 'cached.txt').write_text('')
    job0.reset()
    assert fs.exists(job0.dir / 'retry.1')
    assert not fs.exists(job0.dir / 'retry.1' / '.jobcache')
    assert fs.exists(job0.dir / 'output' / '.jobcache' / 'cached.txt')

    # remove whole output directory
    job0.ntry = 0
    fs.remove(job0.dir / 'output' / '.jobcache')
    (job0.dir / 'output' / 'outfile.txt').write_text('')
    job0.reset()
    assert not fs.exists(job0.dir / 'output' / 'outfile.txt')
    # move whole output directory
    job0.ntry = 1
    fs.remove(job0.dir / 'output' / '.jobcache')
    (job0.dir / 'output' / 'outfile.txt').write_text('')
    job0.reset()
    assert not fs.exists(job0.dir / 'output' / 'outfile.txt')

    # restore output directory and stdout, stderr
    job0.output = OrderedDiot(
        outdir=('dir', job0.dir / 'output' / 'outdir'),
        outfile=('stdout', job0.dir / 'output' / 'outfile'),
        errfile=('stderr', job0.dir / 'output' / 'errfile'),
    )
    job0.ntry = 0
    job0.reset()
    assert fs.isdir(job0.dir / 'output' / 'outdir')
    assert fs.islink(job0.dir / 'output' / 'outfile')
    assert fs.islink(job0.dir / 'output' / 'errfile')
    assert fs.samefile(job0.dir / 'job.stdout',
                       job0.dir / 'output' / 'outfile')
    assert fs.samefile(job0.dir / 'job.stderr',
                       job0.dir / 'output' / 'errfile')

    # what if outdir exists
    job0.reset()
Exemple #10
0
	def _docSecs(self):
		ret   = OrderedDiot(desc = [])
		name  = 'desc'
		for line in self.docs:
			if not line.startswith('@'):
				ret[name].append(line)
			else:
				name = line.strip('@: ')
				ret[name] = []
		return ret
Exemple #11
0
 def sort(self):
     """Sort the blocks in a wiggle file by chrom and start. """
     block_ids = sorted(self.blocks.keys(),
                        key=lambda block: (_chrom_to_sortable(self.blocks[
                            block].chrom), self.blocks[block].start))
     orig_blocks = self.blocks
     self.blocks = OrderedDiot()
     for block_id in block_ids:
         self.blocks[block_id] = orig_blocks[block_id]
     del orig_blocks
def annotate_output(cls, output, warn_missing):
    if not output:
        cls.annotated.output = None
        return

    cls.annotated.output = OrderedDiot(diot_nest=False)
    parsed_items = {
        parsed_item.name: parsed_item
        for parsed_item in output.section
    }

    # output can be arbitrary template string.
    # its structure is resolved after its rendered.
    # here we are trying to parse the output if it's just single strings
    # For example:
    # >>> output = "afile:file:..., bfile:file:..."
    # or
    # >>> output = ["afile:file:...", "bfile:file:..."]
    # give up parsing if any error happens
    output = cls.output

    def parse_one_output(out):
        parts = out.split(":")
        if not parts[0].isidentifier():
            return None
        if len(parts) < 3:
            return parts[0], ProcInputType.VAR, parts[1]
        return parts

    if not isinstance(output, (list, tuple)):
        output = [out.strip() for out in output.split(",")]

    for out in output:
        parsed = parse_one_output(out)
        if not parsed:
            continue
        if parsed[0] not in parsed_items and warn_missing:
            warnings.warn(
                f"Missing annotation for output: {parsed[0]}",
                AnnotateMissingWarning,
            )
            cls.annotated.output[parsed[0]] = ParsedItem(
                name=parsed[0],
                type=parsed[1],
                desc="Undescribed.",
                more=[ParsedPara([f"Default: {parsed[2]}"])],
            )
        else:
            cls.annotated.output[parsed[0]] = ParsedItem(
                name=parsed[0],
                type=parsed[1],
                desc=parsed_items[parsed[0]].desc,
                more=(parsed_items[parsed[0]].more or []) +
                [ParsedPara([f"Default: {parsed[2]}"])],
            )
Exemple #13
0
    def get_enabled_plugins(self,
                            raw: bool = False) -> Dict[str, SimplugWrapper]:
        """Get a mapping of all enabled plugins

        Args:
            raw: Whether return the raw plugin or not
                (the one when it's registered)
                If a plugin is registered as a module by its name, the module
                is returned.

        Returns:
            The mapping of all enabled plugins
        """
        return OrderedDiot([(name, plugin.plugin if raw else plugin)
                            for name, plugin in self.hooks._registry.items()
                            if plugin.enabled])
Exemple #14
0
def test_prepoutput(job0, tmpdir):
    job0.proc.output = OrderedDiot()
    job0._prepOutput()
    assert len(job0.output) == 0

    job0.proc.output.out = ('var', TemplateLiquid('abc'))
    job0.proc.output.outfile = ('file',
                                TemplateLiquid('outfile{{job.index}}.txt'))
    job0._prepOutput()
    assert len(job0.output) == 2
    assert job0.output.out == ('var', 'abc')
    assert job0.output.outfile == ('file',
                                   job0.dir / 'output' / 'outfile0.txt')

    job0.proc.output.clear()
    job0.proc.output.abs = ('file', TemplateLiquid('/a/b/c'))
    with pytest.raises(JobOutputParseError):
        job0._prepOutput()
Exemple #15
0
def test_or_ior():
    a = Diot({"data": 2, "count": 5})
    b = Diot(data=2, count=5)

    c = a | {"data": 3}
    assert c == {"data": 3, "count": 5}

    c = a | [("data", 3)]
    assert c == {"data": 3, "count": 5}

    a |= {"data": 3}
    assert a == {"data": 3, "count": 5}

    with pytest.raises(TypeError):
        a | 1

    od = OrderedDiot([("b", 1), ("a", 2)])
    od |= {"a": 1, "b": 2}

    assert od.__diot__["orderedkeys"] == ["b", "a"]
    assert od.a == 1
    assert od.b == 2
Exemple #16
0
def test_inheritance_copy():
    class Box2(Diot):
        pass

    b = Box2(a=1)
    c = b.copy()
    assert c == b
    assert isinstance(c, Diot)
    c = b.__copy__()
    assert c == b
    assert isinstance(c, Diot)

    d = OrderedDiot()
    d.b = 1
    d.a = 0
    d.x = 9
    assert list(d.copy().keys()) == ['b', 'a', 'x']
Exemple #17
0
def test_signature(job0, tmpdir, caplog):
    fs.remove(job0.dir / 'job.script')
    assert job0.signature() == ''
    (job0.dir / 'job.script').write_text('')
    assert job0.signature() == Diot(script=filesig(job0.dir / 'job.script'),
                                    i={
                                        'var': {},
                                        'file': {},
                                        'files': {}
                                    },
                                    o={
                                        'var': {},
                                        'file': {},
                                        'dir': {}
                                    })
    infile = tmpdir / 'test_signature_input.txt'
    infile.write_text('')
    infile1 = tmpdir / 'test_signature_input_not_exists.txt'
    job0.input = Diot(invar=('var', 'abc'),
                      infile=('file', infile),
                      infiles=('files', [infile]))
    job0._signature = None
    assert job0.signature().i == {
        'var': {
            'invar': 'abc'
        },
        'file': {
            'infile': filesig(infile)
        },
        'files': {
            'infiles': [filesig(infile)]
        },
    }

    job0.input = Diot(invar=('var', 'abc'), infile=('file', infile1))
    job0._signature = None
    assert job0.signature() == ''
    assert 'Empty signature because of input file' in caplog.text

    job0.input = Diot(invar=('var', 'abc'), infiles=('files', [infile1]))
    job0._signature = None
    assert job0.signature() == ''
    assert 'Empty signature because of one of input files' in caplog.text

    job0.input = {}
    outfile = tmpdir / 'test_signature_outfile.txt'
    outfile.write_text('')
    outfile1 = tmpdir / 'test_signature_outfile_not_exists.txt'
    outdir = tmpdir / 'test_signature_outdir'
    outdir.mkdir()
    outdir1 = tmpdir / 'test_signature_outdir_not_exists'
    job0.output = OrderedDiot(out=('var', 'abc'),
                              outfile=('file', outfile),
                              outdir=('dir', outdir))
    job0._signature = None
    assert job0.signature().o == {
        'var': {
            'out': 'abc'
        },
        'file': {
            'outfile': filesig(outfile)
        },
        'dir': {
            'outdir': filesig(outdir, dirsig=job0.proc.dirsig)
        }
    }

    job0.output = OrderedDiot(outfile=('file', outfile1))
    job0._signature = None
    assert job0.signature() == ''
    assert 'Empty signature because of output file:' in caplog.text

    job0.output = OrderedDiot(outdir=('dir', outdir1))
    job0._signature = None
    assert job0.signature() == ''
    assert 'Empty signature because of output dir:' in caplog.text
Exemple #18
0
def test_ordereddiot_repr():
    d = OrderedDiot(a_b=1)
    assert d.a_b == 1
    assert repr(d) == "OrderedDiot([('a_b', 1)])"
Exemple #19
0
def test_revesed():
    bx = OrderedDiot()
    bx.a = 1
    bx.c = 2
    assert list(reversed(bx)) == ['c', 'a']
Exemple #20
0
def test_iter():
    bx = OrderedDiot()
    bx.a = 1
    bx.c = 2
    assert list(bx.__iter__()) == ['a', 'c']
Exemple #21
0
def test_ordered_box():
    bx = OrderedDiot(h=1)
    bx.a = 1
    bx.c = 4
    bx['g'] = 7
    bx.d = 2
    assert list(bx.keys()) == ['h', 'a', 'c', 'g', 'd']
    del bx.a
    bx.pop('c')
    bx.__delattr__('g')
    assert list(bx.keys()) == ['h', 'd']
Exemple #22
0
def test_export(job0, tmpdir, caplog):
    job0.proc.exdir = ''
    job0.export()
    assert 'Exported' not in caplog.text

    job0.proc.exdir = '/path/not/exists'
    with pytest.raises(AssertionError):
        job0.export()

    job0.proc.exdir = tmpdir / 'test_export'
    job0.proc.exdir.mkdir()

    job0.proc.expart = None
    with pytest.raises(AssertionError):
        job0.export()

    job0.proc.expart = []
    job0.export()
    assert 'Exported' not in caplog.text

    # export everything
    outfile1 = job0.dir / 'output' / 'test_export_outfile.txt'
    outfile1.parent.mkdir()
    outfile1.write_text('')
    job0.output = OrderedDiot(outfile=('file', outfile1))
    job0.proc.exhow = 'copy'
    job0.proc.exow = True
    job0.proc._log.shorten = 0
    job0.export()
    assert fs.exists(job0.proc.exdir / outfile1.name)
    assert not fs.islink(outfile1)
    assert not fs.samefile(outfile1, job0.proc.exdir / outfile1.name)
    assert ('Exported: %s' % (job0.proc.exdir / outfile1.name)) in caplog.text

    job0.proc.exhow = 'move'
    job0.export()
    assert fs.exists(job0.proc.exdir / outfile1.name)
    assert fs.islink(outfile1)
    assert fs.samefile(outfile1, job0.proc.exdir / outfile1.name)
    assert ('Exported: %s' % (job0.proc.exdir / outfile1.name)) in caplog.text

    # outfile is a link, then copy the file
    job0.export()
    assert fs.exists(job0.proc.exdir / outfile1.name)
    assert not fs.islink(job0.proc.exdir / outfile1.name)
    assert fs.islink(outfile1)
    assert fs.samefile(outfile1, job0.proc.exdir / outfile1.name)

    job0.proc.exhow = 'link'
    job0.export()
    assert fs.exists(job0.proc.exdir / outfile1.name)
    assert fs.islink(job0.proc.exdir / outfile1.name)
    assert not fs.islink(outfile1)
    assert fs.samefile(outfile1, job0.proc.exdir / outfile1.name)

    job0.proc.exhow = 'gzip'
    job0.export()
    assert fs.exists(job0.proc.exdir / (outfile1.name + '.gz'))

    job0.proc.expart = [TemplateLiquid('outfile')]
    fs.remove(job0.proc.exdir / (outfile1.name + '.gz'))
    job0.export()
    assert fs.exists(job0.proc.exdir / (outfile1.name + '.gz'))

    job0.proc.expart = [TemplateLiquid('*.txt')]
    fs.remove(job0.proc.exdir / (outfile1.name + '.gz'))
    job0.export()
    assert fs.exists(job0.proc.exdir / (outfile1.name + '.gz'))
Exemple #23
0
def test_isexptcached(job0, tmpdir, caplog):
    job0.proc.cache = False
    assert not job0.isExptCached()

    job0.proc.cache = 'export'
    job0.proc.exhow = 'link'
    assert not job0.isExptCached()
    assert 'Job is not export-cached using symlink export.' in caplog.text
    caplog.clear()

    job0.proc.exhow = 'copy'
    job0.proc.expart = [TemplateLiquid('outfile')]
    assert not job0.isExptCached()
    assert 'Job is not export-cached using partial export.' in caplog.text
    caplog.clear()

    job0.proc.expart = None
    job0.proc.exdir = ''
    assert not job0.isExptCached()
    assert 'Job is not export-cached since export directory is not set.' in caplog.text
    caplog.clear()

    job0.proc.exdir = tmpdir / 'test_isexptcached_exdir'
    job0.proc.exdir.mkdir()
    outfile1 = tmpdir / 'test_isexptcached_outfile1.txt'
    outfile1.write_text('')
    outfile2 = tmpdir / 'test_isexptcached_outfile_not_exists.txt'
    outdir1 = tmpdir / 'test_isexptcached_outdir1'
    outdir1.mkdir()
    fs.gzip(outfile1, job0.proc.exdir / (outfile1.name + '.gz'))
    fs.gzip(outdir1, job0.proc.exdir / (outdir1.name + '.tgz'))
    job0.output = OrderedDiot(outfile=('file', outfile1),
                              outdir=('dir', outdir1),
                              out=('var', 'abc'))
    # overwriting existing
    (job0.dir / 'output').mkdir()
    (job0.dir / 'output' / outfile1.name).write_text('')
    job0.proc.exhow = 'gzip'
    assert job0.isExptCached()
    assert 'Overwrite file for export-caching:' in caplog.text
    assert job0.isTrulyCached()
    caplog.clear()

    fs.remove(job0.proc.exdir / (outfile1.name + '.gz'))
    assert not job0.isExptCached()
    assert 'Job is not export-cached since exported file not exists:' in caplog.text
    caplog.clear()

    job0.output = OrderedDiot(outfile=('file', outfile1))
    job0.proc.exhow = 'move'
    assert not job0.isExptCached()
    assert 'Job is not export-cached since exported file not exists:' in caplog.text

    fs.link(outfile1, job0.proc.exdir / outfile1.name)
    assert job0.isExptCached()
    caplog.clear()

    # overwriting existing
    fs.remove(job0.proc.exdir / outfile1.name)
    (job0.proc.exdir / outfile1.name).write_text('')
    assert job0.isExptCached()
    assert 'Overwrite file for export-caching: ' in caplog.text
Exemple #24
0
def test_prebuild(job0, tmp_path, caplog):
    job0.proc.config.export_dir = False
    assert not job0.is_cached()

    job0.proc.cache = 'export'
    job0.proc.config.export_dir = 'export'
    job0.proc.config.export_how = 'link'
    job_prebuild(job0)
    assert not job0.is_cached()
    assert 'Job is not export-cached using symlink export.' in caplog.text
    caplog.clear()

    job0.proc.config.export_how = 'copy'
    job0.proc.config.export_part = [('outfile')]
    job_prebuild(job0)
    assert not job0.is_cached()
    assert 'Job is not export-cached using partial export.' in caplog.text
    caplog.clear()

    job0.proc.config.export_part = None
    job0.proc.config.export_dir = ''
    job_prebuild(job0)
    assert not job0.is_cached()
    caplog.clear()

    job0.proc.config.export_dir = tmp_path / 'test_is_cached_exdir'
    job0.proc.config.export_dir.mkdir()
    outfile1 = tmp_path / 'test_is_cached_outfile1.txt'
    outfile1.write_text('')
    outfile2 = tmp_path / 'test_is_cached_outfile_not_exists.txt'
    outdir1 = tmp_path / 'test_is_cached_outdir1'
    outdir1.mkdir()
    fs.gzip(outfile1, job0.proc.config.export_dir / (outfile1.name + '.gz'))
    fs.gzip(outdir1, job0.proc.config.export_dir / (outdir1.name + '.tgz'))
    job0.__attrs_property_cached__['output'] = OrderedDiot(outfile=('file',
                                                                    outfile1),
                                                           outdir=('dir',
                                                                   outdir1),
                                                           out=('var', 'abc'))
    # overwriting existing
    (job0.dir / 'output').mkdir()
    (job0.dir / 'output' / outfile1.name).write_text('')
    job0.proc.config.export_how = 'gzip'
    job_prebuild(job0)
    assert job0.is_cached()
    assert 'Overwrite file for export-caching:' in caplog.text
    assert job0.is_cached()
    caplog.clear()

    fs.remove(job0.proc.config.export_dir / (outfile1.name + '.gz'))
    job_prebuild(job0)
    assert 'Job is not export-cached since exported file not exists:' in caplog.text
    caplog.clear()

    job0.__attrs_property_cached__['output'] = OrderedDiot(outfile=('file',
                                                                    outfile1))
    job0.proc.config.export_how = 'move'
    job_prebuild(job0)
    assert 'Job is not export-cached since exported file not exists:' in caplog.text

    fs.link(outfile1, job0.proc.config.export_dir / outfile1.name)
    job_prebuild(job0)
    assert job0.is_cached()
    caplog.clear()

    # overwriting existing
    fs.remove(job0.proc.config.export_dir / outfile1.name)
    (job0.proc.config.export_dir / outfile1.name).write_text('')
    job_prebuild(job0)
    assert job0.is_cached()
    assert 'Overwrite file for export-caching: ' in caplog.text
Exemple #25
0
def test_export(job0, tmp_path, caplog):
    job0.proc.config.export_dir = ''
    job_done(job0, 'succeeded')
    assert 'Exported' not in caplog.text

    job0.proc.config.export_dir = tmp_path / 'test_export'
    proc_prerun(job0.proc)

    job0.proc.config.export_part = []
    job_done(job0, 'succeeded')
    assert 'Exported' not in caplog.text

    # export everything
    outfile1 = job0.dir / 'output' / 'test_export_outfile.txt'
    outfile1.parent.mkdir(exist_ok=True)
    outfile1.write_text('')
    job0.__attrs_property_cached__['output'] = OrderedDiot(outfile=('file',
                                                                    outfile1))
    job0.proc.config.export_how = 'copy'
    job0.proc.config.export_ow = True
    job_done(job0, 'succeeded')
    assert fs.exists(job0.proc.config.export_dir / outfile1.name)
    assert not fs.islink(outfile1)
    assert not fs.samefile(outfile1,
                           job0.proc.config.export_dir / outfile1.name)
    assert ('Exported: %s' %
            (job0.proc.config.export_dir / outfile1.name)) in caplog.text

    job0.proc.config.export_how = 'move'
    job_done(job0, 'succeeded')
    assert fs.exists(job0.proc.config.export_dir / outfile1.name)
    assert fs.islink(outfile1)
    assert fs.samefile(outfile1, job0.proc.config.export_dir / outfile1.name)
    assert ('Exported: %s' %
            (job0.proc.config.export_dir / outfile1.name)) in caplog.text

    # outfile is a link, then copy the file
    job_done(job0, 'succeeded')
    assert fs.exists(job0.proc.config.export_dir / outfile1.name)
    assert not fs.islink(job0.proc.config.export_dir / outfile1.name)
    assert fs.islink(outfile1)
    assert fs.samefile(outfile1, job0.proc.config.export_dir / outfile1.name)

    job0.proc.config.export_how = 'link'
    job_done(job0, 'succeeded')
    assert fs.exists(job0.proc.config.export_dir / outfile1.name)
    assert fs.islink(job0.proc.config.export_dir / outfile1.name)
    assert not fs.islink(outfile1)
    assert fs.samefile(outfile1, job0.proc.config.export_dir / outfile1.name)

    job0.proc.config.export_how = 'gzip'
    job_done(job0, 'succeeded')
    assert fs.exists(job0.proc.config.export_dir / (outfile1.name + '.gz'))

    job0.proc.config.export_part = ['outfile']
    fs.remove(job0.proc.config.export_dir / (outfile1.name + '.gz'))
    job_done(job0, 'succeeded')
    assert fs.exists(job0.proc.config.export_dir / (outfile1.name + '.gz'))

    job0.proc.config.export_part = ['*.txt']
    fs.remove(job0.proc.config.export_dir / (outfile1.name + '.gz'))
    job_done(job0, 'succeeded')
    assert fs.exists(job0.proc.config.export_dir / (outfile1.name + '.gz'))
Exemple #26
0
def proc_postrun(proc, status):
    """Generate report for the process"""
    # skip if process failed or cached
    # pylint: disable=too-many-locals,too-many-branches,too-many-statements
    report_file = proc.workdir.joinpath('proc.report.md')
    template = proc.config.report_template
    template_file = None
    if template and template.startswith('file:'):
        template_file = Path(template[5:])
        logger.debug("Using report template: %s", template_file, proc=proc.id)

    if not template and report_file.is_file():
        report_file.unlink()

    if not template or status == 'failed':
        return

    signature = OrderedDiot([(key, value)
                             for key, value in sorted(proc.config.items())
                             if key.startswith('report_')])
    if template_file and template_file.is_file():
        signature.template = filesig(template_file)
    signature = sha256(toml.dumps(signature).encode()).hexdigest()

    if status == 'cached' and report_file.is_file():
        with report_file.open() as frpt:
            if frpt.readline().strip() == '<!--- %s -->' % signature:
                logger.debug("Report markdown file cached, skip.", proc=proc.id)
                return

    fs.remove(report_file)
    logger.debug('Rendering report template ...', proc=proc.id)
    if template_file:
        template = template_file.read_text()

    template = proc.template(textwrap.dedent(template), **proc.envs)
    rptdata = dict(jobs=[None] * proc.size, proc=proc, args=proc.args)
    for i, job in enumerate(proc.jobs):
        rptdata['jobs'][i] = job.data.job.copy()
        rptdata['jobs'][i]['i'] = job.data.i
        rptdata['jobs'][i]['o'] = job.data.o

        datafile = job.dir / 'output/job.report.data.toml'
        if datafile.is_file():
            with datafile.open() as fdata:
                rptdata['jobs'][i].update(toml.load(fdata))

    rptenvs = Diot(level=1, pre='', post='', title=proc.desc)
    rptenvs.update(proc.config.report_envs)
    rptdata['report'] = rptenvs
    try:
        reportmd = template.render(rptdata)
    except Exception as exc:
        raise RuntimeError('Failed to render report markdown for process: %s' %
                           (proc)) from exc
    reportmd = reportmd.splitlines()

    codeblock = False
    for i, line in enumerate(reportmd):
        if line.startswith('#') and not codeblock:
            reportmd[i] = '#' * (rptenvs.level - 1) + line
        elif codeblock:
            if line.startswith('```') and len(line) - len(
                    line.lstrip('`')) == codeblock:
                codeblock = False
        elif line.startswith('```'):
            codeblock = len(line) - len(line.lstrip('`'))

    report_file.write_text(
        '<!--- %s -->' % signature +
        proc.template(textwrap.dedent(rptenvs.pre),
                      **proc.envs).render(rptdata) + '\n\n' +
        '\n'.join(reportmd) + '\n\n' +
        proc.template(textwrap.dedent(rptenvs.post),
                      **proc.envs).render(rptdata) + '\n'
    )
Exemple #27
0
DEFAULT_CONFIG = dict(default=dict(
    # default plugins
    _plugins=['pyppl_report', 'pyppl_flowchart'],
    # log options
    _log=dict(
        file=None,
        theme='greenOnBlack',
        levels='normal',
        leveldiffs=[],
        pbar=50,
        shorten=0,
    ),
    # The command to run after jobs start
    afterCmd='',
    # The extra arguments for the process
    args=OrderedDiot(diot_nest=True),
    # The command to run before jobs start
    beforeCmd='',
    # The cache option, True/False/export
    cache=True,
    # Do cleanup for cached jobs?
    acache=False,
    # The description of the job
    desc='No description',
    # Whether expand directory to check signature
    dirsig=True,
    # Whether to echo the stdout and stderr of the jobs to the screen
    # Could also be:
    # {
    #    # or [0, 1, 2], just echo output of those jobs.
    #   'jobs': 0
Exemple #28
0
    def __init__(self, id=None, tag='notag', desc='No description.', **kwargs):
        """@API
		Proc constructor
		@params:
			tag  (str)   : The tag of the process
			desc (str)   : The description of the process
			id   (str)   : The identify of the process
			**kwargs: Other properties of the process, which can be set by `proc.xxx` later.
		"""
        # Do not go through __getattr__ and __setattr__
        # Get configuration from config
        self.__dict__['config'] = Config()
        # computed props
        self.__dict__['props'] = Diot(diot_nest=False)

        defaultconfig = dict.copy(utils.config)
        # The id (actually, it's the showing name) of the process
        defaultconfig['id'] = id if id else utils.varname()
        if ' ' in tag:
            raise ProcTagError("No space allowed in tag.")

        defaultconfig['tag'] = tag
        defaultconfig['desc'] = desc
        # The extra arguments for the process
        defaultconfig['args'] = dict.copy(defaultconfig['args'])
        # The callfront function of the process
        defaultconfig['callfront'] = None
        # The callback function of the process
        defaultconfig['callback'] = None
        # The dependencies specified
        defaultconfig['depends'] = []
        # The input that user specified
        defaultconfig['input'] = ''
        # The output that user specified
        defaultconfig['output'] = ''
        # resume flag of the process
        # ''       : Normal, do not resume
        # 'skip+'  : Load data from previous run, pipeline resumes from future processes
        # 'resume+': Deduce input from 'skip+' processes
        # 'skip'   : Just skip, do not load data
        # 'resume' : Load data from previous run, resume pipeline
        defaultconfig['resume'] = ''
        # The template environment, keep process indenpendent, even for the subconfigs
        defaultconfig['envs'] = utils.tryDeepCopy(defaultconfig['envs'])

        # The output channel of the process
        self.props.channel = Channel.create()
        # The dependencies computed
        self.props.depends = []
        # the computed echo option
        self.props.echo = {}
        # computed expart
        self.props.expart = []
        # computed expect
        self.props.expect = None
        # The computed input
        self.props.input = {}
        # The jobs
        self.props.jobs = []
        # The locker for the process
        self.props.lock = None
        # non-cached job ids
        self.props.ncjobids = []
        # The original name of the process if it's copied
        self.props.origin = defaultconfig['id']
        # The computed output
        self.props.output = OrderedDiot()
        # data for proc.xxx in template
        self.props.procvars = {}
        # Valid return code
        self.props.rc = [0]
        # get the runner from the profile
        self.props.runner = 'local'
        # The computed script. Template object
        self.props.script = None
        # The unique identify of the process
        # cache the suffix
        self.props._suffix = ''
        # The template class
        self.props.template = None
        # timer for running time
        self.props.timer = None
        # The computed workdir
        self.props.workdir = ''
        # Remember the attr being set, they have the highest priority
        self.props.sets = set()

        # convert alias to its original name
        for aliaskey, aliasval in Proc.ALIAS.items():
            if aliaskey in kwargs:
                kwargs[aliasval] = kwargs.pop(aliaskey)

        for key in kwargs:
            if key not in defaultconfig:
                raise ProcAttributeError(key)

        # update the conf with kwargs
        defaultconfig.update(kwargs)
        # collapse the loading trace, we don't need it anymore.
        self.config._load({'default': defaultconfig})
        for key, val in kwargs.items():
            if key[0] != '_':
                setattr(self, key, val)
Exemple #29
0
            None,
            # =>
            '{ a  : 1,\n'
            '      b11: 2, }'),
        (
            {
                "a": 1,
                "b": 2
            },
            4,
            'x11',
            # =>
            '[x11] { a: 1,\n'
            '                b: 2, }'),
        (
            OrderedDiot([("a", 1), ("b", 2)]),
            4,
            'x11',
            # =>
            '[x11] <OrderedDiot> \n'
            '              { a: 1,\n'
            '                b: 2, }'),
    ])
def test_formatDict(val, keylen, alias, expt):
    assert formatDict(val, keylen, alias) == expt


@pytest.mark.parametrize(
    'val,expt,asserts',
    [(None, None, ['is']), (1, 1, ['is']), (sys, sys, ['is']),
     (Diot(a=Diot(b=Diot(c=1))), Diot(a=Diot(b=Diot(c=1))), ['=']),
Exemple #30
0
    def __init__(self):

        self._registry = OrderedDiot()  # type: OrderedDiot
        self._specs = {}  # type: Dict[str, SimplugHook]
        self._registry_sorted = False  # type: bool