class ScriptData(object): def __init__(self, tmpdir): self.tmpdir = tmpdir self.tmpdata = None self.pristine_data = LocalPath(os.path.dirname(__file__)).join('data') self.installed_packages = None def copy_data(self): if self.tmpdata and self.tmpdata.exists(): self.tmpdata.remove(ignore_errors=True) self.tmpdata = self.tmpdir.mkdir('data') self.pristine_data.copy(self.tmpdata, mode=True) # Can't add .git directories to the index git_no_scan = self.pristine_data.join('scripts/project/.hg') hg_no_scan = self.tmpdata.join('scripts/project/.git') git_no_scan.copy(hg_no_scan) return self.tmpdata def sysexec(self, script): print('Executing Script: %s' % script) return script.sysexec(cwd=str(script.dirpath())) def verify_data(self): if not self.tmpdata: return False for prissy in self.pristine_data.visit(): assert prissy.ext != '.pyc', \ 'Pristine has Python bytecode indicating execution from pristine directory!' rel = prissy.relto(self.pristine_data) tmp = self.tmpdata.join(rel) if prissy.check(dir=True): assert tmp.check(dir=True), 'Data integirty test failed: %s' % rel elif prissy.check(file=True): assert tmp.check(file=True), 'Data integirty test failed: %s' % rel assert prissy.computehash() == tmp.computehash(), 'Hash mismatch: %s' % rel for tmp in self.tmpdata.visit(): if '.git' in tmp.strpath or '__pycache__' in tmp.strpath or tmp.ext == '.pyc': continue rel = tmp.relto(self.tmpdata) prissy = self.pristine_data.join(rel) if tmp.check(dir=True): assert prissy.check(dir=True), 'Directory created in tmpdir: %s' % rel elif tmp.check(file=True): assert prissy.check(file=True), 'File created in tmpdir: %s' % rel return True def copy_installed(self): if not self.installed_packages: self.installed_packages = installed_packages() return copy.deepcopy(self.installed_packages)
def test_raises_valueerror_when_metadata_is_inconsistent( self, archive_meta_params: Tuple[str, str, SortedDict], archive_dir: LocalPath, import_dir: LocalPath, ): filename: str expectedfilename: str schema: SortedDict filename, expectedfilename, schema = archive_meta_params datafile = os.path.join(get_data_path(), filename + ".csv") bad_metafile = LocalPath( os.path.join(get_data_path(), filename + "-inconsistent.meta")) metafile = import_dir.join(filename + ".meta") bad_metafile.copy(metafile) schemafile = os.path.join(archive_dir, syphon.schema.DEFAULT_FILE) syphon.init(schema, schemafile) # Find the column that will be in the message. metaframe = DataFrame(read_csv(metafile, dtype=str)) column: Optional[str] = None for column in metaframe.columns: if len(metaframe[column].drop_duplicates().values) > 1: break del metaframe assert column is not None with pytest.raises(ValueError, match=column): syphon.archive( archive_dir, [datafile], meta_files=[metafile], schema_filepath=schemafile, overwrite=True, ) assert not os.path.exists( os.path.join(os.path.dirname(datafile), "#lock"))