def test_linkinfile(job0, tmpdir): # clear up the input directory fs.mkdir(job0.dir / 'input', overwrite=True) infile1 = tmpdir / 'indir1' / 'test_linkinfile.infile.txt' infile1.parent.mkdir() infile1.write_text('') assert job0._linkInfile( infile1) == job0.dir / 'input' / 'test_linkinfile.infile.txt' # rename existing file with same name infile2 = tmpdir / 'indir2' / 'test_linkinfile.infile.txt' infile2.parent.mkdir() infile2.write_text('') assert job0._linkInfile( infile2) == job0.dir / 'input' / '[1]test_linkinfile.infile.txt' # do it again and it will detect infile2 and [1]... are the same file assert job0._linkInfile( infile2) == job0.dir / 'input' / '[1]test_linkinfile.infile.txt' # if a malformat file exists (job0.dir / 'input' / '[a]test_linkinfile.infile.txt').write_text('') infile3 = tmpdir / 'indir3' / 'test_linkinfile.infile.txt' infile3.parent.mkdir() infile3.write_text('') assert job0._linkInfile( infile3) == job0.dir / 'input' / '[2]test_linkinfile.infile.txt'
def test_buildinput(tmpdir, caplog): p10 = Proc() p10.input = 'a, b:file, ' p10.input = ('1', 'infile') p10._buildInput() assert len(p10.input) == 2 assert p10.input['a'] == ('var', ['1']) assert p10.input['b'] == ('file', ['infile']) assert p10.size == 1 p10.input = 'a:x:y' with pytest.raises(ProcInputError): p10._buildInput() p101 = Proc() p101.props.channel = Channel.create([(1, 3), (2, 4)]) p10.depends = p101 p10.input = 'a, b, c' p10.input = lambda ch: ch.cbind(1).cbind(2) caplog.clear() p10._buildInput() assert 'Not all data are used as input' in caplog.text assert len(p10.input) == 3 assert p10.size == 2 assert p10.input['a'] == ('var', [1, 2]) assert p10.input['b'] == ('var', [3, 4]) assert p10.input['c'] == ('var', [1, 1]) p10.input = 'a:files, b:files, c' p10.input = Channel.create([['infile1'], ['infile2']]) p10._buildInput() assert 'No data found for input key "b"' in caplog.text assert 'No data found for input key "c"' in caplog.text caplog.clear() assert len(p10.input) == 3 assert p10.size == 2 assert p10.input['a'] == ('files', [['infile1'], ['infile2']]) assert p10.input['b'] == ('files', [[], []]) assert p10.input['c'] == ('var', ['', '']) p10.props.template = TemplateLiquid p10.props.workdir = tmpdir / 'test_buildinput_p10' p10.resume = 'resume' fs.remove(Path(p10.workdir) / 'proc.settings.yaml') with pytest.raises(ProcInputError): p10._buildInput() fs.mkdir(p10.workdir) p10.props.input = OBox() p10.input['a'] = ('files', [['infile1'], ['infile2']]) p10.input['b'] = ('files', [[], []]) p10.input['c'] = ('var', ['', '']) p10._saveSettings() p10.props.input = None p10._buildInput() assert len(p10.input) == 3 assert p10.size == 2 assert p10.input['a'] == ('files', [['infile1'], ['infile2']]) assert p10.input['b'] == ('files', [[], []]) assert p10.input['c'] == ('var', ['', ''])
def test_gzip(tmpdir): tmpdir = Path(tmpdir) test1 = tmpdir / 'test1.gz' test2 = tmpdir / 'test2' test3 = tmpdir / 'test3' test2.write_text('1') with pytest.raises(OSError): fs.gzip(test1, test2, False) fs.gzip(test2, test1) assert fs.exists(test1) fs.gunzip(test1, test3) assert fs.exists(test3) assert test3.read_text() == '1' dir1 = tmpdir / 'dir1' dir2 = tmpdir / 'dir2' tgz = tmpdir / 'dir1.tgz' fs.mkdir(dir1) test3 = dir1 / 'test' test3.write_text('2') fs.gzip(dir1, tgz) assert fs.exists(tgz) fs.gunzip(tgz, dir2) assert fs.isdir(dir2) assert fs.exists(dir2 / 'test') with pytest.raises(OSError): fs.gunzip(tgz, dir2, False)
def test_mkdir(tmpdir): tmpdir = Path(tmpdir) dir1 = tmpdir / 'testdir1' dir1.mkdir() with pytest.raises(OSError): fs.mkdir(dir1, False) fs.mkdir(dir1) assert fs.isdir(dir1)
def job0(tmp_path): job = Job(0, Proc( workdir = tmp_path/'pJob', dirsig = True, config = Diot(echo_jobs=0, types='stderr') )) # pretend it's running job.proc.runtime_config = {'dirsig': True} fs.mkdir(job.dir) (job.dir / 'job.script').write_text('') return job
def job1(tmpdir): job = RunnerTest2( 0, Proc(workdir=tmpdir, size=1, procvars={ 'proc': { 'errhow': 'terminate' }, 'args': {} })) fs.mkdir(job.dir) (job.dir / 'job.script').write_text('') return job
def test_issignaturevalid(job0, tmpdir, caplog): scriptfile = job0.dir / 'job.script' cachefile = job0.dir / 'job.cache' fs.mkdir(job0.dir / DIR_OUTPUT) job0.rc = 0 assert not job0._isSignatureValid('') outfile1 = tmpdir / 'test_issignaturevalid_out1.txt' outfile1.write_text('') job0.output = {'outfile': ('file', outfile1)} job0._signature = None job0.signature() fs.remove(outfile1) assert not job0._isSignatureValid() assert 'Outfile (o.outfile) not exists:' in caplog.text caplog.clear() outfile1.write_text('') job0._signature = None assert job0._isSignatureValid() utime(outfile1, (100, 100)) job0._signature = None assert not job0._isSignatureValid() assert 'Script file is newer than output file' in caplog.text caplog.clear() infile1 = tmpdir / 'test_issignaturevalid_in1.txt' infile1.write_text('') outfile1.write_text('') job0._signature = None job0.input = {'infile': ('file', infile1)} job0.signature() fs.remove(infile1) assert not job0._isSignatureValid() assert 'Infile (i.infile) not exists' in caplog.text caplog.clear() Path(job0.script).write_text('') infile1.write_text('') job0._signature = None job0.signature() utime(outfile1, (job0._signature.script[1] + 100, ) * 2) utime(infile1, (job0._signature.script[1] + 200, ) * 2) job0._signature = None assert not job0._isSignatureValid() assert 'Infile (i.infile) is newer than output file:' in caplog.text caplog.clear()
def job0(tmpdir): job = RunnerTest( 0, Proc(workdir=tmpdir, size=1, dirsig=True, echo=Box(jobs=[0], type=['stderr']), procvars={ 'proc': { 'errhow': 'terminate' }, 'args': {} }, _log=Box({'shorten': 0}))) fs.mkdir(job.dir) (job.dir / 'job.script').write_text('') return job
def test_run(tmpdir, caplog): sys.argv = ['pytest'] p24 = Proc() p24.resume = 'resume' p24.props.workdir = tmpdir / 'test_run_p24' fs.mkdir(p24.workdir) (p24.workdir / 'proc.settings.yaml').write_text('input: ') p24.run('dry', Config()) assert 'Previous processes skipped.' in caplog.text assert p24.runner == 'dry' p25 = Proc() p25.resume = 'skip' caplog.clear() p25.run(None, Config()) assert 'Pipeline will resume from future processes.' in caplog.text p25.resume = 'skip+' caplog.clear() p25.props.workdir = tmpdir / 'test_run_p25' fs.mkdir(p25.workdir) (p25.workdir / 'proc.settings.yaml').write_text('input: ') p25.run(None, Config()) assert 'Data loaded, pipeline will resume from future processes.' in caplog.text
def test_istrulycached(job0, tmpdir, caplog): scriptfile = job0.dir / 'job.script' cachefile = job0.dir / 'job.cache' fs.mkdir(job0.dir / DIR_OUTPUT) job0.rc = 0 job0.proc.cache = False job0._signature = None job0.cache() assert not job0.isTrulyCached() job0.proc.cache = True job0._signature = None assert not job0.isTrulyCached() assert 'Not cached as cache file not exists.' in caplog.text cachefile.write_text('') job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because previous signature is empty.' in caplog.text job0.input = {} job0.output = {} fs.remove(scriptfile) cachefile.write_text('') job0._signature = None assert not job0.isTrulyCached() assert 'Empty signature because of script file' in caplog.text # CACHE_SCRIPT_NEWER scriptfile.write_text('') mtime = scriptfile.stat().st_mtime utime(scriptfile, (mtime - 10, mtime - 10)) job0._signature = None job0.cache() utime(scriptfile, (mtime, mtime)) caplog.clear() job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because script file(script) is newer' in caplog.text # CACHE_SIGINVAR_DIFF job0.input = {'in': ('var', 'abc')} job0._signature = None job0.cache() job0.input = {'in': ('var', 'abc1')} caplog.clear() job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because input variable(in) is different' in caplog.text # CACHE_SIGINFILE_DIFF infile1 = tmpdir / 'test_istrulycached1.txt' infile2 = tmpdir / 'test_istrulycached2.txt' infile1.write_text('') infile2.write_text('') job0.input = {'infile': ('file', infile1)} job0._signature = None job0.cache() job0.input = {'infile': ('file', infile2)} caplog.clear() job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because input file(infile) is different:' in caplog.text # CACHE_SIGINFILE_NEWER job0._signature = None job0.cache() utime(infile2, (mtime + 100, mtime + 100)) caplog.clear() job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because input file(infile) is newer: ' in caplog.text # CACHE_SIGINFILES_DIFF job0.input = {'infiles': ('files', [infile1])} job0._signature = None job0.cache() job0.input = {'infiles': ('files', [infile1, infile2])} caplog.clear() job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because lengths are different for input [files:infiles]:' in caplog.text # CACHE_SIGINFILES_NEWER job0.cache() utime(infile2, (mtime + 200, mtime + 200)) caplog.clear() job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because file 2 is newer for input [files:infiles]:' in caplog.text # CACHE_SIGOUTVAR_DIFF job0.input = {} job0.output = {'out': ('var', 'abc')} job0._signature = None job0.cache() job0.output = {'out': ('var', 'abc1')} caplog.clear() job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because output variable(out) is different' in caplog.text # CACHE_SIGOUTFILE_DIFF outfile1 = tmpdir / 'test_istrulycached_out1.txt' outfile2 = tmpdir / 'test_istrulycached_out2.txt' outfile1.write_text('') outfile2.write_text('') job0.output = {'outfile': ('file', outfile1)} job0._signature = None job0.cache() job0.output = {'outfile': ('file', outfile2)} caplog.clear() job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because output file(outfile) is different:' in caplog.text # CACHE_SIGOUTDIR_DIFF outdir1 = tmpdir / 'test_istrulycached_dir1.txt' outdir2 = tmpdir / 'test_istrulycached_dir2.txt' outdir1.mkdir() outdir2.mkdir() job0.output = {'outdir': ('dir', outdir1)} job0._signature = None job0.cache() job0.output = {'outdir': ('dir', outdir2)} caplog.clear() job0._signature = None assert not job0.isTrulyCached() assert 'Not cached because output dir(outdir) is different:' in caplog.text job0._signature = None job0.cache() assert job0.isTrulyCached() fs.remove(job0.dir / 'job.cache') # even without cache file assert job0.isTrulyCached() caplog.clear() job0.cache() # create cache file fs.remove(job0.dir / 'job.script') job0._signature = '' assert not job0.isTrulyCached()