class DiskJobResultTests(TestCase): def setUp(self): self.scratch_dir = TemporaryDirectory() def tearDown(self): self.scratch_dir.cleanup() def test_smoke(self): result = DiskJobResult({}) self.assertEqual(str(result), "None") self.assertEqual(repr(result), "<DiskJobResult outcome:None>") self.assertIsNone(result.outcome) self.assertIsNone(result.comments) self.assertEqual(result.io_log, ()) self.assertIsNone(result.return_code) def test_everything(self): result = DiskJobResult({ 'outcome': IJobResult.OUTCOME_PASS, 'comments': "it said blah", 'io_log_filename': make_io_log([ (0, 'stdout', b'blah\n') ], self.scratch_dir.name), 'return_code': 0 }) self.assertEqual(str(result), "pass") self.assertEqual(repr(result), "<DiskJobResult outcome:'pass'>") self.assertEqual(result.outcome, IJobResult.OUTCOME_PASS) self.assertEqual(result.comments, "it said blah") self.assertEqual(result.io_log, ((0, 'stdout', b'blah\n'),)) self.assertEqual(result.return_code, 0)
class TestStatelog(unittest.TestCase): def setUp(self): self.tmpdir = TemporaryDirectory() self.logpath = os.path.join(self.tmpdir.name, 'statelog') self.nonexist = os.path.join(self.tmpdir.name, 'nonexist') with open(self.logpath, 'wb') as fw: fw.write(b'001\n') fw.write(b'002\n') def tearDown(self): self.tmpdir.cleanup() def test_load(self): state = pop3.statelog_load(self.logpath) self.assertEqual(state, {b'001', b'002'}) def test_load_fallback(self): state = pop3.statelog_load(self.nonexist) self.assertEqual(state, set()) def test_create(self): pop3.statelog_save(self.logpath, {b'001', b'002'}) with open(self.logpath, 'rb') as fp: self.assertEqual(fp.readline(), b'001\n') self.assertEqual(fp.readline(), b'002\n')
class NamedFileInTemporaryDirectory(object): def __init__(self, filename, mode='w+b', bufsize=-1, **kwds): """ Open a file named `filename` in a temporary directory. This context manager is preferred over `NamedTemporaryFile` in stdlib `tempfile` when one needs to reopen the file. Arguments `mode` and `bufsize` are passed to `open`. Rest of the arguments are passed to `TemporaryDirectory`. """ self._tmpdir = TemporaryDirectory(**kwds) path = _os.path.join(self._tmpdir.name, filename) self.file = open(path, mode, bufsize) def cleanup(self): self.file.close() self._tmpdir.cleanup() __del__ = cleanup def __enter__(self): return self.file def __exit__(self, type, value, traceback): self.cleanup()
class NamedFileInTemporaryDirectory(object): """Open a file named `filename` in a temporary directory. This context manager is preferred over :class:`tempfile.NamedTemporaryFile` when one needs to reopen the file, because on Windows only one handle on a file can be open at a time. You can close the returned handle explicitly inside the context without deleting the file, and the context manager will delete the whole directory when it exits. Arguments `mode` and `bufsize` are passed to `open`. Rest of the arguments are passed to `TemporaryDirectory`. Usage example:: with NamedFileInTemporaryDirectory('myfile', 'wb') as f: f.write('stuff') f.close() # You can now pass f.name to things that will re-open the file """ def __init__(self, filename, mode='w+b', bufsize=-1, **kwds): self._tmpdir = TemporaryDirectory(**kwds) path = _os.path.join(self._tmpdir.name, filename) self.file = open(path, mode, bufsize) def cleanup(self): self.file.close() self._tmpdir.cleanup() __del__ = cleanup def __enter__(self): return self.file def __exit__(self, type, value, traceback): self.cleanup()
def test_dcm2niix_run(): dicomDir = os.path.join(TEST_DATA_DIR, "sourcedata", "sub-01") tmpBase = os.path.join(TEST_DATA_DIR, "tmp") #tmpDir = TemporaryDirectory(dir=tmpBase) tmpDir = TemporaryDirectory() app = Dcm2niix([dicomDir], tmpDir.name) app.run() helperDir = os.path.join( tmpDir.name, DEFAULT.tmpDirName, DEFAULT.helperDir, "*") ls = sorted(glob(helperDir)) firstMtime = [os.stat(_).st_mtime for _ in ls] assert 'localizer_20100603125600' in ls[0] #files should not be change after a rerun app.run() secondMtime = [os.stat(_).st_mtime for _ in ls] assert firstMtime == secondMtime #files should be change after a forced rerun app.run(force=True) thirdMtime = [os.stat(_).st_mtime for _ in ls] assert firstMtime != thirdMtime tmpDir.cleanup()
def test_load_from_file_with_relative_paths(self): """ When explicitly setting a config file, paths should be relative to the config file, not the working directory. """ config_dir = TemporaryDirectory() config_fname = os.path.join(config_dir.name, 'mkdocs.yml') docs_dir = os.path.join(config_dir.name, 'src') os.mkdir(docs_dir) config_file = open(config_fname, 'w') try: config_file.write("docs_dir: src\nsite_name: MkDocs Test\n") config_file.flush() config_file.close() cfg = base.load_config(config_file=config_file) self.assertTrue(isinstance(cfg, base.Config)) self.assertEqual(cfg['site_name'], 'MkDocs Test') self.assertEqual(cfg['docs_dir'], docs_dir) self.assertEqual(cfg.config_file_path, config_fname) self.assertIsInstance(cfg.config_file_path, utils.text_type) finally: config_dir.cleanup()
class GitRepositoryTest(TestCase): def setUp(self): self.tmpdir = TemporaryDirectory() self.repo1 = GitRepository( self.tmpdir.name, url="https://github.com/st-tu-dresden/inloop.git", branch="master" ) self.repo2 = GitRepository( self.tmpdir.name, url="https://github.com/st-tu-dresden/inloop-java-repository-example.git", branch="master" ) def tearDown(self): self.tmpdir.cleanup() def test_git_operations(self): self.repo1.synchronize() self.assertTrue(self.get_path(".git").exists()) self.assertTrue(self.get_path("manage.py").exists()) self.assertEqual(b"", self.run_command("git status -s")) self.repo2.synchronize() self.assertFalse(self.get_path("manage.py").exists()) self.assertTrue(self.get_path("build.xml").exists()) self.assertEqual(b"", self.run_command("git status -s")) def get_path(self, name): return Path(self.tmpdir.name).joinpath(name) def run_command(self, command): return check_output(command.split(), cwd=self.tmpdir.name)
class TemporaryRepository(object): """A Git repository initialized in a temporary directory as a context manager. usage: with TemporaryRepository() as tempRepo: print("workdir:", tempRepo.workdir) print("path:", tempRepo.path) index = repo.index index.read() index.add("...") index.write() tree = index.write_tree() repo.create_commit('HEAD', author, comitter, message, tree, []) """ def __init__(self, is_bare=False, clone_from_repo=None): self.temp_dir = TemporaryDirectory() if clone_from_repo: self.repo = clone_repository(clone_from_repo.path, self.temp_dir.name) else: self.repo = init_repository(self.temp_dir.name, is_bare) def __enter__(self): return self.repo def __exit__(self, type, value, traceback): self.temp_dir.cleanup()
def testInitNotExistingsRepo(self): dir = TemporaryDirectory() repo = quit.git.Repository(dir.name, create=True) self.assertFalse(repo.is_bare) self.assertEqual(len(repo.revisions()), 0) dir.cleanup()
def save_document(self): tempdirectory = TemporaryDirectory() document = self.generate_document(tempdirectory.name) if document: with open(document, 'rb') as f: self.data_file.save(path.basename(document), File(f)) self.last_update_of_data_file = datetime.datetime.now() tempdirectory.cleanup()
def test_dcm2bids(): tmpBase = os.path.join(TEST_DATA_DIR, "tmp") #bidsDir = TemporaryDirectory(dir=tmpBase) bidsDir = TemporaryDirectory() tmpSubDir = os.path.join(bidsDir.name, DEFAULT.tmpDirName, "sub-01") shutil.copytree( os.path.join(TEST_DATA_DIR, "sidecars"), tmpSubDir) app = Dcm2bids( [TEST_DATA_DIR], "01", os.path.join(TEST_DATA_DIR, "config_test.json"), bidsDir.name ) app.run() layout = BIDSLayout(bidsDir.name, validate=False) assert layout.get_subjects() == ["01"] assert layout.get_sessions() == [] assert layout.get_tasks() == ["rest"] assert layout.get_runs() == [1,2,3] app = Dcm2bids( [TEST_DATA_DIR], "01", os.path.join(TEST_DATA_DIR, "config_test.json"), bidsDir.name ) app.run() fmapFile = os.path.join( bidsDir.name, "sub-01", "fmap", "sub-01_echo-492_fmap.json") data = load_json(fmapFile) fmapMtime = os.stat(fmapFile).st_mtime assert data["IntendedFor"] == "dwi/sub-01_dwi.nii.gz" data = load_json(os.path.join( bidsDir.name, "sub-01", "localizer", "sub-01_run-01_localizer.json")) assert data["ProcedureStepDescription"] == "Modify by dcm2bids" #rerun shutil.rmtree(tmpSubDir) shutil.copytree( os.path.join(TEST_DATA_DIR, "sidecars"), tmpSubDir) app = Dcm2bids( [TEST_DATA_DIR], "01", os.path.join(TEST_DATA_DIR, "config_test.json"), bidsDir.name ) app.run() fmapMtimeRerun = os.stat(fmapFile).st_mtime assert fmapMtime == fmapMtimeRerun bidsDir.cleanup()
def testCloneNotExistingRepo(self): environ["QUIT_SSH_KEY_HOME"] = "./tests/assets/sshkey/" REMOTE_URL = '[email protected]:AKSW/ThereIsNoQuitStoreRepo.git' dir = TemporaryDirectory() with self.assertRaises(Exception) as context: quit.git.Repository(dir.name, create=True, origin=REMOTE_URL) dir.cleanup()
class MyTest(TestCase): def setUp(self): self.test_dir = TemporaryDirectory() def tearDown(self): self.test_dir.cleanup() # Test methods follow # 2016.07.08 add def test_sample(self): print(self.test_dir)
def testCloneRepo(self): REMOTE_NAME = 'origin' REMOTE_URL = 'git://github.com/AKSW/QuitStore.example.git' dir = TemporaryDirectory() repo = quit.git.Repository(dir.name, create=True, origin=REMOTE_URL) self.assertTrue(path.exists(path.join(dir.name, 'example.nq'))) self.assertFalse(repo.is_bare) dir.cleanup()
def testCloneRepoViaSSH(self): environ["QUIT_SSH_KEY_HOME"] = "./tests/assets/sshkey/" REMOTE_URL = '[email protected]:AKSW/QuitStore.example.git' dir = TemporaryDirectory() repo = quit.git.Repository(dir.name, create=True, origin=REMOTE_URL) self.assertTrue(path.exists(path.join(dir.name, 'example.nt'))) self.assertFalse(repo.is_bare) dir.cleanup()
class LoadScriptTest(unittest.TestCase): def setUp(self): self.tempdir = TemporaryDirectory() for filename, contents in self.script_files: with open(os.path.join(self.tempdir.name, filename), "xt") as f: f.write(contents) def tearDown(self): self.tempdir.cleanup()
def testCloneRepoViaSSHNoKeyFiles(self): environ["QUIT_SSH_KEY_HOME"] = "./tests/assets/nosshkey/" if "SSH_AUTH_SOCK" in environ: del environ["SSH_AUTH_SOCK"] REMOTE_URL = '[email protected]:AKSW/QuitStore.example.git' dir = TemporaryDirectory() with self.assertRaises(Exception) as context: quit.git.Repository(dir.name, create=True, origin=REMOTE_URL) dir.cleanup()
class BaseRecorderTest(unittest.TestCase): def setUp(self): unittest.TestCase.setUp(self) self.original_dir = os.getcwd() self.temp_dir = TemporaryDirectory() os.chdir(self.temp_dir.name) def tearDown(self): self.temp_dir.cleanup() os.chdir(self.original_dir) unittest.TestCase.tearDown(self)
class CLITestCase(unittest.TestCase): def setUp(self): self.temp_dir = TemporaryDirectory() self.output_file_index = 0 def tearDown(self): self.temp_dir.cleanup() def exec(self, string): args = ['-b', self.temp_dir.name] args.extend(shlex.split(string)) process_args(args) def list_output(self, string=''): temp_text_file = os.path.join(self.temp_dir.name, 'temp{}.txt'.format(self.output_file_index)) self.output_file_index += 1 self.exec('list --pipe-to "cat > {}" {}'.format(temp_text_file, string)) with open(temp_text_file) as f: text = f.read() return text def populate(self, num_entries=20): for i in range(num_entries): self.exec('new -m "{}"'.format(shlex.quote(random_text()))) def test_list(self): self.populate(20) text = self.list_output() self.assertTrue(len(text.split('\n'))>20*3) def test_edit(self): self.exec('new --message "Hello world"') original_text = self.list_output() self.exec('edit -m "New text"') modified_text = self.list_output() self.assertTrue(re.search('Hello world', original_text)) self.assertTrue(re.search('New text', modified_text)) self.assertTrue(re.sub('Hello world', 'New text', original_text)) def test_search(self): self.populate(2) search_string = 'stringthatwontgetgeneratedbyaccident' self.exec('new -m "hello world\n test text {}inthisentry"'.format(search_string)) self.populate(2) all_entries = self.list_output() search_matches = self.list_output(search_string) self.assertNotEqual(all_entries, search_matches) self.assertTrue(re.search(search_string, all_entries)) self.assertTrue(re.search(search_string, search_matches))
def setUp(self): self.dir = TemporaryDirectory() self.remotedir = TemporaryDirectory() self.file = NamedTemporaryFile(dir=self.dir.name, delete=False) self.filename = path.basename(self.file.name) self.author = Signature('QuitStoreTest', '*****@*****.**') self.comitter = Signature('QuitStoreTest', '*****@*****.**') # Initialize repository init_repository(self.dir.name, False)
class TestCaseSnapshotPath(TestCaseCfg): def setUp(self): super(TestCaseSnapshotPath, self).setUp() #use a new TemporaryDirectory for snapshotPath to avoid #side effects on leftovers self.tmpDir = TemporaryDirectory() self.cfg.dict['profile1.snapshots.path'] = self.tmpDir.name self.snapshotPath = self.cfg.snapshotsFullPath() def tearDown(self): super(TestCaseSnapshotPath, self).tearDown() self.tmpDir.cleanup()
class FileSystemBackendTest(unittest.TestCase): def setUp(self) -> None: self.tmpdir = TemporaryDirectory() self.cwd = os.getcwd() os.chdir(self.tmpdir.name) dirname = 'fsbackendtest' os.mkdir(dirname) # replace by temporary directory self.backend = FilesystemBackend(dirname) self.testdata = 'dshiuasduzchjbfdnbewhsdcuzd' self.alternative_testdata = "8u993zhhbn\nb3tadgadg" self.identifier = 'some name' def tearDown(self) -> None: os.chdir(self.cwd) self.tmpdir.cleanup() def test_put_and_get_normal(self) -> None: # first put the data self.backend.put(self.identifier, self.testdata) # then retrieve it again data = self.backend.get(self.identifier) self.assertEqual(data, self.testdata) def test_put_file_exists_no_overwrite(self) -> None: name = 'test_put_file_exists_no_overwrite' self.backend.put(name, self.testdata) with self.assertRaises(FileExistsError): self.backend.put(name, self.alternative_testdata) self.assertEqual(self.testdata, self.backend.get(name)) def test_put_file_exists_overwrite(self) -> None: name = 'test_put_file_exists_overwrite' self.backend.put(name, self.testdata) self.backend.put(name, self.alternative_testdata, overwrite=True) self.assertEqual(self.alternative_testdata, self.backend.get(name)) def test_instantiation_fail(self) -> None: with self.assertRaises(NotADirectoryError): FilesystemBackend("C\\#~~") def test_exists(self) -> None: name = 'test_exists' self.backend.put(name, self.testdata) self.assertTrue(self.backend.exists(name)) self.assertFalse(self.backend.exists('exists_not')) def test_get_not_existing(self) -> None: name = 'test_get_not_existing' with self.assertRaises(FileNotFoundError): self.backend.get(name)
def create_archive(archive, excluded): """Creates a new zip archive file by excluding files at positions found in excluded. """ new_archive_file = NamedTemporaryFile() temporary_directory = TemporaryDirectory() new_archive = ZipFile(new_archive_file, 'w') for index, filename in enumerate(archive.namelist()): if index not in excluded: archive.extract(filename, path=temporary_directory.name) new_archive.write(os.path.join(temporary_directory.name, filename)) temporary_directory.cleanup() return new_archive_file
class TestCase(unittest.TestCase): def __init__(self, methodName): os.environ['LANGUAGE'] = 'en_US.UTF-8' self.cfgFile = os.path.abspath(os.path.join(__file__, os.pardir, 'config')) logger.APP_NAME = 'BIT_unittest' logger.openlog() super(TestCase, self).__init__(methodName) def setUp(self): logger.DEBUG = '-v' in sys.argv self.run = False self.sharePathObj = TemporaryDirectory() self.sharePath = self.sharePathObj.name def tearDown(self): self.sharePathObj.cleanup() def callback(self, func, *args): func(*args) self.run = True def assertExists(self, *path): full_path = os.path.join(*path) if not os.path.exists(full_path): self.fail('File does not exist: {}'.format(full_path)) def assertNotExists(self, *path): full_path = os.path.join(*path) if os.path.exists(full_path): self.fail('File does unexpected exist: {}'.format(full_path)) def assertIsFile(self, *path): full_path = os.path.join(*path) if not os.path.isfile(full_path): self.fail('Not a File: {}'.format(full_path)) def assertIsNoFile(self, *path): full_path = os.path.join(*path) if os.path.isfile(full_path): self.fail('Unexpected File: {}'.format(full_path)) def assertIsDir(self, *path): full_path = os.path.join(*path) if not os.path.isdir(full_path): self.fail('Not a directory: {}'.format(full_path)) def assertIsLink(self, *path): full_path = os.path.join(*path) if not os.path.islink(full_path): self.fail('Not a symlink: {}'.format(full_path))
def _run_interface(self, runtime): # Get all inputs from the ApplyTransforms object ifargs = self.inputs.get() # Extract number of input images and transforms in_files = ifargs.pop('input_image') num_files = len(in_files) transforms = ifargs.pop('transforms') # Get number of parallel jobs num_threads = ifargs.pop('num_threads') save_cmd = ifargs.pop('save_cmd') # Remove certain keys for key in ['environ', 'ignore_exception', 'terminal_output', 'output_image']: ifargs.pop(key, None) # Get a temp folder ready tmp_folder = TemporaryDirectory(prefix='tmp-', dir=runtime.cwd) xfms_list = _arrange_xfms(transforms, num_files, tmp_folder) assert len(xfms_list) == num_files # Inputs are ready to run in parallel if num_threads < 1: num_threads = None if num_threads == 1: out_files = [_applytfms(( in_file, in_xfm, ifargs, i, runtime.cwd)) for i, (in_file, in_xfm) in enumerate(zip(in_files, xfms_list)) ] else: from concurrent.futures import ThreadPoolExecutor with ThreadPoolExecutor(max_workers=num_threads) as pool: out_files = list(pool.map(_applytfms, [ (in_file, in_xfm, ifargs, i, runtime.cwd) for i, (in_file, in_xfm) in enumerate(zip(in_files, xfms_list))] )) tmp_folder.cleanup() # Collect output file names, after sorting by index self._results['out_files'] = [el[0] for el in out_files] if save_cmd: self._results['log_cmdline'] = os.path.join(runtime.cwd, 'command.txt') with open(self._results['log_cmdline'], 'w') as cmdfile: print('\n-------\n'.join([el[1] for el in out_files]), file=cmdfile) return runtime
class TestDataSet(unittest.TestCase): def setUp(self): # Creating a temporary directory self.output_dir = TemporaryDirectory(prefix="post_hit_dataset_test_") def test_dataset(self): self.assertTrue(isinstance(dataset.DataSet(dataset_path = "roadmap_epigenomic.json"), dataset.DataSet)) self.assertTrue(isinstance(dataset.DataSet(dataset_path = "".join([POST_HIT_PATH,"data/datasets/phyloP100way.json"])), dataset.DataSet)) self.assertTrue(isinstance(dataset.DataSet(dataset_path ="ensembl"), dataset.DataSet)) self.assertTrue(isinstance(dataset.DataSet(dataset_path = "gtex.json"), dataset.DataSet)) self.assertRaises(ValueError, dataset.DataSet,"gtex.txt") self.assertRaises(ValueError, dataset.DataSet,"dummy") self.assertRaises(ValueError, dataset.DataSet,"dummy.json") self.dummy_dataset = dataset.DataSet(dataset_path = POST_HIT_PATH + "tmp/dummy_test.json", project= "TEST", description= "Dummy Test JSON FILE", project_link= "dummy.test", version= 0, data_path= "tmp/", protocole= "ftp", file_type= "gtf", ids=["DummyTest"], download_links=["ftp://ftp.ensembl.org/pub/release-75//gtf/homo_sapiens"], filenames=["DummyTest.gtf.gz"], metadata= [], data_representation={}) self.assertTrue(isinstance(self.dummy_dataset, dataset.DataSet)) self.assertTrue(os.path.exists(POST_HIT_PATH + "tmp/DummyTest.gtf.gz"), dataset.DataSet) folder = POST_HIT_PATH + 'tmp/' for the_file in os.listdir(folder): file_path = os.path.join(folder, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(e) def tearDown(self): """Finishes the test.""" # Deleting the output directory self.output_dir.cleanup()
class TestRestoreSSH(generic.SSHSnapshotsWithSidTestCase, TestRestoreLocal): def setUp(self): super(TestRestoreSSH, self).setUp() self.include = TemporaryDirectory() generic.create_test_files(os.path.join(self.remoteSIDBackupPath, self.include.name[1:])) #mount self.cfg.setCurrentHashId(mount.Mount(cfg = self.cfg).mount()) def tearDown(self): #unmount mount.Mount(cfg = self.cfg).umount(self.cfg.current_hash_id) super(TestRestoreSSH, self).tearDown() self.include.cleanup()
class ArchiveCopy(ar.Archive): def __init__(self, path): self.__dir = TemporaryDirectory() dest = '%s/%s' % (self.__dir.name, os.path.basename(path)) shutil.copyfile(path, dest) super().__init__(dest) def __enter__(self): self.__dir.__enter__() return super().__enter__() def __exit__(self, type, value, traceback): super().__exit__(type, value, traceback) self.__dir.__exit__(type, value, traceback)
def setUp(self): super(TestTakeSnapshotSSH, self).setUp() self.include = TemporaryDirectory() generic.create_test_files(self.include.name) #mount self.cfg.setCurrentHashId(mount.Mount(cfg = self.cfg).mount())
def setUp(self): super(TestRestoreSSH, self).setUp() self.include = TemporaryDirectory() generic.create_test_files(os.path.join(self.remoteSIDBackupPath, self.include.name[1:])) #mount self.cfg.setCurrentHashId(mount.Mount(cfg = self.cfg).mount())
def setUp(self): self.test_dir = TemporaryDirectory() self.test_path = Path(self.test_dir.name)
def run_cli( self, hql: Union[str, str], schema: Optional[str] = None, verbose: bool = True, hive_conf: Optional[Dict[Any, Any]] = None, ) -> Any: """ Run an hql statement using the hive cli. If hive_conf is specified it should be a dict and the entries will be set as key/value pairs in HiveConf :param hive_conf: if specified these key value pairs will be passed to hive as ``-hiveconf "key"="value"``. Note that they will be passed after the ``hive_cli_params`` and thus will override whatever values are specified in the database. :type hive_conf: dict >>> hh = HiveCliHook() >>> result = hh.run_cli("USE airflow;") >>> ("OK" in result) True """ conn = self.conn schema = schema or conn.schema if schema: hql = f"USE {schema};\n{hql}" with TemporaryDirectory(prefix='airflow_hiveop_') as tmp_dir: with NamedTemporaryFile(dir=tmp_dir) as f: hql += '\n' f.write(hql.encode('UTF-8')) f.flush() hive_cmd = self._prepare_cli_cmd() env_context = get_context_from_env_var() # Only extend the hive_conf if it is defined. if hive_conf: env_context.update(hive_conf) hive_conf_params = self._prepare_hiveconf(env_context) if self.mapred_queue: hive_conf_params.extend([ '-hiveconf', f'mapreduce.job.queuename={self.mapred_queue}', '-hiveconf', f'mapred.job.queue.name={self.mapred_queue}', '-hiveconf', f'tez.queue.name={self.mapred_queue}', ]) if self.mapred_queue_priority: hive_conf_params.extend([ '-hiveconf', f'mapreduce.job.priority={self.mapred_queue_priority}' ]) if self.mapred_job_name: hive_conf_params.extend([ '-hiveconf', f'mapred.job.name={self.mapred_job_name}' ]) hive_cmd.extend(hive_conf_params) hive_cmd.extend(['-f', f.name]) if verbose: self.log.info("%s", " ".join(hive_cmd)) sub_process: Any = subprocess.Popen(hive_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=tmp_dir, close_fds=True) self.sub_process = sub_process stdout = '' while True: line = sub_process.stdout.readline() if not line: break stdout += line.decode('UTF-8') if verbose: self.log.info(line.decode('UTF-8').strip()) sub_process.wait() if sub_process.returncode: raise AirflowException(stdout) return stdout
def hasherAndTree(hasher): tdir = TemporaryDirectory().name store = FileHashStore(tdir) m = CompactMerkleTree(hasher=hasher, hashStore=store) return hasher, m
def update(packages, no_detect, env_name, eager, all_packages, infra, global_install, admin, force, dev, as_module, self, quiet): """If the option --env is supplied, the update will be applied using that named virtual env. Unless the option --global is selected, the update will only affect the current user. Of course, this will have no effect if a virtual env is in use. The desired name of the admin user can be set with the `_DEFAULT_ADMIN_` environment variable. When performing a global update, your system may use an older version of pip that is incompatible with some features such as --eager. To force the use of these features, use --force. With no packages nor options selected, this will update packages by looking for a `requirements.txt` or a dev version of that in the current directory. If no --env is chosen, this will attempt to detect a project and use its virtual env before resorting to the default pip. No project detection will occur if a virtual env is active. To update this tool, use the --self flag. All other methods of updating will ignore `hatch`. See: https://github.com/pypa/pip/issues/1299 """ command = ['install', '--upgrade'] + (['-q'] if quiet else []) if not global_install or force: # no cov command.extend(['--upgrade-strategy', 'eager' if eager else 'only-if-needed']) infra_packages = ['pip', 'setuptools', 'wheel'] temp_dir = None # Windows' `runas` allows only a single argument for the # command so we catch this case and turn our command into # a string later. windows_admin_command = None if self: # no cov as_module = True if not self and env_name: venv_dir = os.path.join(get_venv_dir(), env_name) if not os.path.exists(venv_dir): echo_failure('Virtual env named `{}` does not exist.'.format(env_name)) sys.exit(1) with venv(venv_dir): executable = ( [get_proper_python(), '-m', 'pip'] if as_module or (infra and ON_WINDOWS) else [get_proper_pip()] ) command = executable + command if all_packages: installed_packages = infra_packages if infra else get_installed_packages() else: installed_packages = None elif not self and not venv_active() and not no_detect and is_project(): venv_dir = os.path.join(os.getcwd(), get_venv_folder()) if not is_venv(venv_dir): echo_info('A project has been detected!') echo_waiting('Creating a dedicated virtual env... ', nl=False) create_venv(venv_dir) echo_success('complete!') with venv(venv_dir): echo_waiting('Installing this project in the virtual env... ', nl=False) install_packages(['-q', '-e', '.']) echo_success('complete!') with venv(venv_dir): executable = ( [get_proper_python(), '-m', 'pip'] if as_module or (infra and ON_WINDOWS) else [get_proper_pip()] ) command = executable + command if all_packages: installed_packages = infra_packages if infra else get_installed_packages() else: installed_packages = None else: venv_dir = None executable = ( [sys.executable if self else get_proper_python(), '-m', 'pip'] if as_module or (infra and ON_WINDOWS) else [get_proper_pip()] ) command = executable + command if all_packages: installed_packages = infra_packages if infra else get_installed_packages() else: installed_packages = None if not venv_active(): # no cov if global_install: if not admin: if ON_WINDOWS: windows_admin_command = get_admin_command() else: command = get_admin_command() + command else: command.append('--user') if self: # no cov command.append('hatch') if ON_WINDOWS: echo_warning('After the update you may want to press Enter to flush stdout.') subprocess.Popen(command, shell=NEED_SUBPROCESS_SHELL) sys.exit() else: result = subprocess.run(command, shell=NEED_SUBPROCESS_SHELL) sys.exit(result.returncode) elif infra: command.extend(infra_packages) elif all_packages: installed_packages = [ package for package in installed_packages if package not in infra_packages and package != 'hatch' ] if not installed_packages: echo_failure('No packages installed.') sys.exit(1) command.extend(installed_packages) elif packages: packages = [package for package in packages if package != 'hatch'] if not packages: echo_failure('No packages to install.') sys.exit(1) command.extend(packages) # When https://github.com/pypa/pipfile is finalized, we'll use it. else: reqs = get_requirements_file(os.getcwd(), dev=dev) if not reqs: echo_failure('Unable to locate a requirements file.') sys.exit(1) with open(reqs, 'r') as f: lines = f.readlines() matches = [] for line in lines: match = re.match(r'^[^=<>]+', line.lstrip()) if match and match.group(0) == 'hatch': matches.append(line) if matches: for line in matches: lines.remove(line) temp_dir = TemporaryDirectory() reqs = os.path.join(temp_dir.name, basepath(reqs)) with open(reqs, 'w') as f: f.writelines(lines) command.extend(['-r', reqs]) if windows_admin_command: # no cov command = windows_admin_command + [' '.join(command)] if venv_dir: with venv(venv_dir): if env_name: echo_waiting('Updating virtual env `{}`...'.format(env_name)) else: echo_waiting('Updating for this project...') result = subprocess.run(command, shell=NEED_SUBPROCESS_SHELL) else: echo_waiting('Updating...') result = subprocess.run(command, shell=NEED_SUBPROCESS_SHELL) if temp_dir is not None: temp_dir.cleanup() sys.exit(result.returncode)
def report_main(args_parser): """ report mainline The mainline for the 'report' action. Args: args_parser: the argument parser to use for argument processing Returns: the exit code """ args_parser.add_argument('--full-config', '-C', action='store_true') args_parser.add_argument('--no-sanitize', action='store_true') args_parser.add_argument('--offline', action='store_true') known_args = sys.argv[1:] args, unknown_args = args_parser.parse_known_args(known_args) if unknown_args: logger.warn('unknown arguments: {}'.format(' '.join(unknown_args))) rv = 0 work_dir = args.work_dir if args.work_dir else os.getcwd() # setup sphinx engine to extract configuration config = {} configuration_load_issue = None confluence_instance_info = None publisher = ConfluencePublisher() try: with TemporaryDirectory() as tmp_dir: with docutils_namespace(): print('fetching configuration information...') app = Sphinx( work_dir, # document sources work_dir, # directory with configuration tmp_dir, # output for built documents tmp_dir, # output for doctree files ConfluenceReportBuilder.name) # builder to execute if app.config.confluence_publish: process_ask_configs(app.config) # extract configuration information for k, v in app.config.values.items(): raw = getattr(app.config, k) if raw is None: continue if callable(raw): value = '(callable)' else: value = raw if not args.full_config and not k.startswith('confluence_'): continue # always extract some known builder configurations if args.full_config and k.startswith(IGNORE_BUILDER_CONFS): continue config[k] = value # initialize the publisher (if needed later) publisher.init(app.config) except Exception as ex: logger.error(ex) if os.path.isfile(os.path.join(work_dir, 'conf.py')): configuration_load_issue = 'unable to load configuration' else: configuration_load_issue = 'no documentation/missing configuration' rv = 1 # attempt to fetch confluence instance version confluence_publish = config.get('confluence_publish') confluence_server_url = config.get('confluence_server_url') if not args.offline and confluence_publish and confluence_server_url: base_url = ConfluenceUtil.normalizeBaseUrl(confluence_server_url) info = '' session = None try: print('connecting to confluence instance...') publisher.connect() info += ' connected: yes\n' session = publisher.rest_client.session except Exception as ex: logger.error(ex) info += ' connected: no\n' rv = 1 if session: try: # fetch print('fetching confluence instance information...') manifest_url = base_url + MANIFEST_PATH rsp = session.get(manifest_url) if rsp.status_code == 200: info += ' fetched: yes\n' # extract print('decoding information...') rsp.encoding = 'utf-8' raw_data = rsp.text info += ' decoded: yes\n' # parse print('parsing information...') xml_data = ElementTree.fromstring(raw_data) info += ' parsed: yes\n' root = ElementTree.ElementTree(xml_data) for o in root.findall('typeId'): info += ' type: ' + o.text + '\n' for o in root.findall('version'): info += ' version: ' + o.text + '\n' for o in root.findall('buildNumber'): info += ' build: ' + o.text + '\n' else: logger.error('bad response from server ({})'.format( rsp.status_code)) info += ' fetched: error ({})\n'.format(rsp.status_code) rv = 1 except Exception as ex: logger.error(ex) info += 'failure to determine confluence data\n' rv = 1 confluence_instance_info = info def sensitive_config(key): if key in config: if config[key]: config[key] = '(set)' else: config[key] = '(set; empty)' # always sanitize out sensitive information sensitive_config('confluence_client_cert_pass') sensitive_config('confluence_server_pass') # optional sanitization if not args.no_sanitize: sensitive_config('author') sensitive_config('confluence_client_cert') sensitive_config('confluence_global_labels') sensitive_config('confluence_jira_servers') sensitive_config('confluence_parent_page') sensitive_config('confluence_parent_page_id_check') sensitive_config('confluence_proxy') sensitive_config('confluence_server_auth') sensitive_config('confluence_server_cookies') sensitive_config('confluence_server_user') sensitive_config('project') # remove confluence instance (attempt to keep scheme) if 'confluence_server_url' in config: value = config['confluence_server_url'] parsed = urlparse(value) if parsed.scheme: value = parsed.scheme + '://<removed>' else: value = '(set; no scheme)' if parsed.netloc and parsed.netloc.endswith('atlassian.net'): value += ' (cloud)' config['confluence_server_url'] = value # remove space name, but track casing if 'confluence_space_name' in config: value = config['confluence_space_name'] if value.isupper(): value = '(set; upper)' elif value.islower(): value = '(set; upper)' else: value = '(set; mixed)' config['confluence_space_name'] = value print('') print('Confluence builder report has been generated.') print('Please copy the following text for the GitHub issue:') print('') logger.note('------------[ cut here ]------------') print('(system)') print(' platform:', single_line_version(platform.platform())) print(' python:', single_line_version(sys.version)) print(' sphinx:', single_line_version(sphinx_version)) print(' requests:', single_line_version(requests_version)) print(' builder:', single_line_version(scb_version)) print('') print('(configuration)') if config: for k, v in OrderedDict(sorted(config.items())).items(): print('{}: {}'.format(k, v)) else: print('~default configuration~') if configuration_load_issue: print('') print('(error loading configuration)') print(configuration_load_issue) if confluence_instance_info: print('') print('(confluence instance)') print(confluence_instance_info.rstrip()) logger.note('------------[ cut here ]------------') return rv
def correction(activity: dict, collection_id=None, **kwargs): """Celery task to deal with Surface Reflectance processors.""" execution = execution_from_collection(activity, collection_id=collection_id, activity_type=correction.__name__) collection: Collection = execution.activity.collection scene_id = activity['sceneid'] logging.info( f'Starting Correction Task for {collection.name}(id={collection.id}, scene_id={scene_id})' ) data_collection = get_provider_collection_from_activity(activity) try: output_path = data_collection.path(collection) if collection._metadata and collection._metadata.get('processors'): processor_name = collection._metadata['processors'][0]['name'] with TemporaryDirectory(prefix='correction_', suffix=f'_{scene_id}') as tmp: shutil.unpack_archive(activity['args']['compressed_file'], tmp) # Process environment env = dict(**os.environ, INDIR=str(tmp), OUTDIR=str(output_path)) entry = scene_id entries = list(Path(tmp).iterdir()) if len(entries) == 1 and entries[0].suffix == '.SAFE': entry = entries[0].name if processor_name.lower() == 'sen2cor': output_path.parent.mkdir(exist_ok=True, parents=True) sen2cor_conf = Config.SEN2COR_CONFIG logging.info(f'Using {entry} of sceneid {scene_id}') # TODO: Use custom sen2cor version (2.5 or 2.8) cmd = f'''docker run --rm -i \ -v $INDIR:/mnt/input-dir \ -v $OUTDIR:/mnt/output-dir \ -v {sen2cor_conf["SEN2COR_AUX_DIR"]}:/home/lib/python2.7/site-packages/sen2cor/aux_data \ -v {sen2cor_conf["SEN2COR_CONFIG_DIR"]}:/root/sen2cor/2.8 \ {sen2cor_conf["SEN2COR_DOCKER_IMAGE"]} {entry}''' env['OUTDIR'] = str(Path(tmp) / 'output') else: output_path.mkdir(exist_ok=True, parents=True) lasrc_conf = Config.LASRC_CONFIG cmd = f'''docker run --rm -i \ -v $INDIR:/mnt/input-dir \ -v $OUTDIR:/mnt/output-dir \ --env INDIR=/mnt/input-dir \ --env OUTDIR=/mnt/output-dir \ -v {lasrc_conf["LASRC_AUX_DIR"]}:/mnt/lasrc-aux:ro \ -v {lasrc_conf["LEDAPS_AUX_DIR"]}:/mnt/ledaps-aux:ro \ {lasrc_conf["LASRC_DOCKER_IMAGE"]} {entry}''' logging.debug(cmd) # subprocess process = subprocess.Popen(cmd, shell=True, env=env, stdin=subprocess.PIPE) process.wait() assert process.returncode == 0 # TODO: We should be able to get output name from execution if processor_name.lower() == 'sen2cor': # Since sen2cor creates an custom directory name (based in scene_id) and changing processing date # we create it inside "output" folder. After that, get first entry of that directory output_tmp = list(Path(env['OUTDIR']).iterdir())[0] output_path = output_path.parent / output_tmp.name if execution.activity.args.get('file'): last_processed_file = execution.activity.args['file'] if last_processed_file and os.path.exists(last_processed_file) and \ last_processed_file.endswith('.SAFE'): # TODO: validate scene id (without processing_date) if len(os.listdir(last_processed_file)) < 9: shutil.rmtree(last_processed_file, ignore_errors=True) shutil.move(output_tmp, output_path) refresh_execution_args(execution, activity, file=str(output_path)) else: raise RuntimeError( f'Processor not supported. Check collection {collection.name} metadata processors' ) except Exception as e: logging.error(f'Error in correction {scene_id} - {str(e)}', exc_info=True) raise e return activity
def test_start_and_stop_schedule_cron_tab(restore_cron_tab, ): # pylint:disable=unused-argument,redefined-outer-name with TemporaryDirectory() as tempdir: instance = define_scheduler_instance(tempdir) with get_test_external_repo() as external_repo: # Start schedule instance.start_schedule_and_update_storage_state( external_repo.get_external_schedule( "no_config_pipeline_every_min_schedule")) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 1 # Try starting it again with pytest.raises(DagsterSchedulerError): instance.start_schedule_and_update_storage_state( external_repo.get_external_schedule( "no_config_pipeline_every_min_schedule")) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 1 # Start another schedule instance.start_schedule_and_update_storage_state( external_repo.get_external_schedule( "no_config_pipeline_daily_schedule")) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 2 # Stop second schedule instance.stop_schedule_and_update_storage_state( external_repo.get_external_schedule( "no_config_pipeline_daily_schedule"). get_external_origin_id()) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 1 # Try stopping second schedule again instance.stop_schedule_and_update_storage_state( external_repo.get_external_schedule( "no_config_pipeline_daily_schedule"). get_external_origin_id()) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 1 # Start second schedule instance.start_schedule_and_update_storage_state( external_repo.get_external_schedule( "no_config_pipeline_daily_schedule")) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 2 # Reconcile schedule state, should be in the same state instance.reconcile_scheduler_state(external_repo) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 2 instance.start_schedule_and_update_storage_state( external_repo.get_external_schedule( "default_config_pipeline_every_min_schedule")) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 3 # Reconcile schedule state, should be in the same state instance.reconcile_scheduler_state(external_repo) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 3 # Stop all schedules instance.stop_schedule_and_update_storage_state( external_repo.get_external_schedule( "no_config_pipeline_every_min_schedule"). get_external_origin_id()) instance.stop_schedule_and_update_storage_state( external_repo.get_external_schedule( "no_config_pipeline_daily_schedule"). get_external_origin_id()) instance.stop_schedule_and_update_storage_state( external_repo.get_external_schedule( "default_config_pipeline_every_min_schedule"). get_external_origin_id()) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 0 # Reconcile schedule state, should be in the same state instance.reconcile_scheduler_state(external_repo) cron_jobs = get_cron_jobs() assert len(cron_jobs) == 0
def test_start_non_existent_schedule(restore_cron_tab, ): # pylint:disable=unused-argument,redefined-outer-name with TemporaryDirectory() as tempdir: instance = define_scheduler_instance(tempdir) with pytest.raises(DagsterScheduleDoesNotExist): instance.stop_schedule_and_update_storage_state("asdf")
class GPGMailTests(unittest.TestCase): def setUp(self): self.temp_gpg_homedir = TemporaryDirectory() gpg = gnupg.GPG(gnupghome=self.temp_gpg_homedir.name) alice_input = gpg.gen_key_input( name_real="Alice", name_email="*****@*****.**", key_type="RSA", key_length=4096, key_usage="", subkey_type="RSA", subkey_length=4096, passphrase="test", subkey_usage="encrypt,sign,auth", ) self.alice_key = gpg.gen_key(alice_input) self.assertIsNotNone(self.alice_key) self.assertIsNotNone(self.alice_key.fingerprint) def tearDown(self): self.temp_gpg_homedir.cleanup() def test_encrypt_decrypt(self): mail = ( "Return-Path: <*****@*****.**>\nReceived: from example.com (example.com " + "[127.0.0.1])\n by example.com (Postfix) with ESMTPSA id E8DB612009F\n" + " for <*****@*****.**>; Tue, 7 Jan 2020 19:30:03 +0200 (CEST)\n" + 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\n' + "Content-Transfer-Encoding: 7bit\nSubject: Test\nFrom: " + "[email protected]\nTo: [email protected]\nDate: Tue, 07 Jan 2020 " + "19:30:03 -0000\nMessage-ID:\n <*****@*****.**>" + "\n\nThis is a test message.") msg = "This is a test message." p = Popen( [ "./gpgmail", "-e", "*****@*****.**", "--gnupghome", self.temp_gpg_homedir.name, ], stdout=PIPE, stdin=PIPE, stderr=PIPE, encoding="utf8", ) encrypted = p.communicate(input=mail)[0] self.assertTrue(msg not in encrypted) p = Popen( [ "./gpgmail", "-p", "test", "-d", "--gnupghome", self.temp_gpg_homedir.name, ], stdout=PIPE, stdin=PIPE, stderr=PIPE, encoding="utf8", ) decrypted = p.communicate(input=encrypted)[0] self.assertTrue(msg in decrypted) regex = ( r'Content-Type: multipart/mixed; protected-headers="v1";? boundary="' + r'===============\d+=="\nMIME-Version: 1\.0\nReturn-Path: <alice@example' + r"\.com>\nReceived: from example\.com \(example.com \[127\.0\.0\.1\]\)\n" + r" by example\.com \(Postfix\) with ESMTPSA id E8DB612009F\n for " + r"<alice@example\.com>; Tue, 7 Jan 2020 19:30:03 \+0200 \(CEST\)\n" + r"Subject: Test\nFrom: alice@example\.com\nTo: alice@example\.com\nDate: " + r"Tue, 07 Jan 2020 19:30:03 -0000\nMessage-ID: \n <123456789\.123456\." + r"123456789@example\.com>\n\n--===============\d+==\nContent-Type: " + r'text/rfc822-headers; protected-headers="v1"\nContent-Disposition: ' + r"inline\n(Date: Tue, 07 Jan 2020 19:30:03 -0000\n|Subject: Test\n|From: " + r"alice@example\.com\n|To: alice@example\.com\n|Message-ID: \n <123456789" + r"\.123456\.123456789@example\.com>\n)+\n\n--===============\d+==\n" + r'Content-Type: text/plain; charset="utf-8"\nContent-Transfer-Encoding: ' + r"7bit\n\nThis is a test message\.\n--===============\d+==--\n") self.assertIsNotNone(re.fullmatch(regex, decrypted)) def test_sign(self): mail = ( "Return-Path: <*****@*****.**>\nReceived: from example.com (example.com " + "[127.0.0.1])\n by example.com (Postfix) with ESMTPSA id E8DB612009F\n" + " for <*****@*****.**>; Tue, 7 Jan 2020 19:30:03 +0200 (CEST)\n" + 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\n' + "Content-Transfer-Encoding: 7bit\nSubject: Test\nFrom: " + "[email protected]\nTo: [email protected]\nDate: Tue, 07 Jan 2020 " + "19:30:03 -0000\nMessage-ID:\n <*****@*****.**>" + "\n\nThis is a test message.") msg = "This is a test message." p = Popen( [ "./gpgmail", "-s", "*****@*****.**", "--gnupghome", self.temp_gpg_homedir.name, "-p", "test", ], stdout=PIPE, stdin=PIPE, stderr=PIPE, encoding="utf8", ) signed = p.communicate(input=mail)[0] self.assertTrue(msg in signed) regex = ( r"Content-Type: multipart/signed; micalg=\"pgp-sha512\"; " + r"protocol=\"application/pgp-signature\"; " + r"boundary=\"===============\d+==\"\nMIME-Version: 1\.0\n" + r"Return-Path: <alice@example\.com>\nReceived: from " + r"example\.com \(example\.com \[127\.0\.0\.1\]\)\n by " + r"example\.com \(Postfix\) with ESMTPSA id E8DB612009F\n " + r"for <alice@example\.com>; Tue, 7 Jan 2020 19:30:03 \+0200 " + r"\(CEST\)\nSubject: Test\nFrom: alice@example\.com\nTo: " + r"alice@example\.com\nDate: Tue, 07 Jan 2020 19:30:03 " + r"-0000\nMessage-ID: \n <123456789\.123456\.123456789@example" + r"\.com>\n\n--===============\d+==\nContent-Type: " + r"multipart/mixed; protected-headers=\"v1\"; " + r"boundary=\"===============\d+==\"\nMIME-Version: 1\.0\n" + r"Return-Path: <alice@example\.com>\nReceived: from " + r"example\.com \(example\.com \[127\.0\.0\.1\]\)\n by " + r"example.com \(Postfix\) with ESMTPSA id E8DB612009F\n for" + r" <alice@example\.com>; Tue, 7 Jan 2020 19:30:03 \+0200 " + r"\(CEST\)\nSubject: Test\nFrom: [email protected]\nTo: " + r"alice@example\.com\nDate: Tue, 07 Jan 2020 19:30:03 -0000\n" + r"Message-ID: \n <123456789\.123456\.123456789@example\.com>\n" + r"\n--===============\d+==\nContent-Type: text/rfc822-headers;" + r" protected-headers=\"v1\"\nContent-Disposition: inline\n" + r"(Date: Tue, 07 Jan 2020 19:30:03 -0000\n|Subject: Test\n|" + r"From: alice@example\.com\n|To: alice@example\.com\n|" + r"Message-ID: \n <123456789\.123456\.123456789@example\.com>\n" + r")+\n\n--===============\d+==\nContent-Type: text/plain; " + r"charset=\"utf-8\"\nContent-Transfer-Encoding: 7bit\n\nThis " + r"is a test message\.\n--===============\d+==--\n\n" + r"--===============\d+==\nContent-Type: application/pgp-" + r"signature; name=\"signature\.asc\"\nContent-Description: " + r"OpenPGP digital signature\nContent-Disposition: attachment; " + r"filename=\"signature\.asc\"\n\n-----BEGIN PGP SIGNATURE-----" + r"\n\n[\w\+/\n=]+-----END PGP SIGNATURE-----\n\n" + r"--===============\d+==--\n") self.assertIsNotNone(re.fullmatch(regex, signed)) def test_sign_encrypt_decrypt(self): mail = ( "Return-Path: <*****@*****.**>\nReceived: from example.com (example.com " + "[127.0.0.1])\n by example.com (Postfix) with ESMTPSA id E8DB612009F\n" + " for <*****@*****.**>; Tue, 7 Jan 2020 19:30:03 +0200 (CEST)\n" + 'Content-Type: text/plain; charset="utf-8"\n MIME-Version: 1.0\n' + "Content-Transfer-Encoding: 7bit\nSubject: Test\nFrom: [email protected]" + "\nTo: [email protected]\nDate: Tue, 07 Jan 2020 19:30:03 -0000\n" + "Message-ID:\n <*****@*****.**>\n\nThis is a " + "test message.") msg = "This is a test message." p = Popen( [ "./gpgmail", "-E", "*****@*****.**", "--gnupghome", self.temp_gpg_homedir.name, "-p", "test", ], stdout=PIPE, stdin=PIPE, stderr=PIPE, encoding="utf8", ) encrypted = p.communicate(input=mail)[0] self.assertTrue(msg not in encrypted) p = Popen( [ "./gpgmail", "-p", "test", "-d", "--gnupghome", self.temp_gpg_homedir.name, ], stdout=PIPE, stdin=PIPE, stderr=PIPE, encoding="utf8", ) decrypted = p.communicate(input=encrypted)[0] self.assertTrue(msg in decrypted) regex = ( r'Content-Type: multipart/mixed; protected-headers="v1"; boundary="' + r'===============\d+=="\nMIME-Version: 1\.0\nReturn-Path: <alice@example' + r"\.com>\nReceived: from example\.com \(example.com \[127\.0\.0\.1\]\)\n" + r" by example\.com \(Postfix\) with ESMTPSA id E8DB612009F\n for " + r"<alice@example\.com>; Tue, 7 Jan 2020 19:30:03 \+0200 \(CEST\)\n" + r"Subject: Test\nFrom: alice@example\.com\nTo: alice@example\.com\nDate: " + r"Tue, 07 Jan 2020 19:30:03 -0000\nMessage-ID: \n <123456789\.123456\." + r"123456789@example\.com>\n\n--===============\d+==\nContent-Type: text/" + r'rfc822-headers; protected-headers="v1"\nContent-Disposition: inline\n' + r"(Date: Tue, 07 Jan 2020 19:30:03 -0000\n|Subject: Test\n|From: " + r"alice@example\.com\n|To: alice@example\.com\n|Message-ID: \n" + r" <123456789\.123456\.123456789@example\.com>\n)+\n\n--===============" + r'\d+==\nContent-Type: text/plain; charset="utf-8"\n MIME-Version: 1.0\n' + r"Content-Transfer-Encoding: 7bit\n\nThis is a test message\.\n--" + r"===============\d+==--\n") self.assertIsNotNone(re.fullmatch(regex, decrypted)) def test_encryptheaders(self): mail = ( "Return-Path: <*****@*****.**>\nReceived: from example.com (example.com " + "[127.0.0.1])\n by example.com (Postfix) with ESMTPSA id E8DB612009F\n" + " for <*****@*****.**>; Tue, 7 Jan 2020 19:30:03 +0200 (CEST)\n" + 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\n' + "Content-Transfer-Encoding: 7bit\nSubject: Test\nFrom: [email protected]" + "\nTo: [email protected]\nDate: Tue, 07 Jan 2020 19:30:03 -0000\n" + "Message-ID:\n <*****@*****.**>\n\nThis is a " + "test message.") msg = "This is a test message." p = Popen( [ "./gpgmail", "-e", "*****@*****.**", "--gnupghome", self.temp_gpg_homedir.name, "-H", ], stdout=PIPE, stdin=PIPE, stderr=PIPE, encoding="utf8", ) encrypted, stderr = p.communicate(input=mail) self.assertTrue(msg not in encrypted) self.assertIn("Date: ...\n", encrypted) self.assertIn("From: ...\n", encrypted) self.assertIn("Message-ID: ...\n", encrypted) self.assertIn("Subject: ...\n", encrypted) self.assertIn("To: ...\n", encrypted) def test_encryptfail(self): mail = ( "Return-Path: <*****@*****.**>\nReceived: from example.com (example.com " + "[127.0.0.1])\n by example.com (Postfix) with ESMTPSA id E8DB612009F\n" + " for <*****@*****.**>; Tue, 7 Jan 2020 19:30:03 +0200 (CEST)\n" + 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\n' + "Content-Transfer-Encoding: 7bit\nSubject: Test\nFrom: [email protected]" + "\nTo: [email protected]\nDate: Tue, 07 Jan 2020 19:30:03 -0000\n" + "Message-ID:\n <*****@*****.**>\n\nThis is a " + "test message.") p = Popen( [ "./gpgmail", "-e", "*****@*****.**", "--gnupghome", self.temp_gpg_homedir.name, "-H", ], stdout=PIPE, stdin=PIPE, stderr=PIPE, encoding="utf8", ) encrypted, stderr = p.communicate(input=mail) self.assertEqual(mail, encrypted)
def package_csar(self, output_file: str, service_template: str = None, csar_format: str = "zip") -> str: meta = self.parse_csar_meta() try: if not service_template: root_yaml_files = self.get_root_yaml_files() if meta is None and len(root_yaml_files) != 1: raise ParseError( f"You didn't specify the CSAR TOSCA entrypoint with '-t/--service-template' option. Therefore " f"there should be one YAML file in the root of the CSAR to select it as the entrypoint. More " f"than one YAML has been found: {list(map(str, root_yaml_files))}. Please select one of the " f"files as the CSAR entrypoint using '-t/--service-template' flag or remove all the excessive " f"YAML files.", self) service_template = root_yaml_files[0].name else: if not self._member_exists(PurePath(service_template)): raise ParseError( f"The supplied TOSCA service template file '{service_template}' does not exist in folder " f"'{self.csar_dir}'.", self) meta = self.parse_csar_meta() if meta is not None: template_entry = meta.entry_definitions if service_template and template_entry != service_template: raise ParseError( f"The file entry '{template_entry}' defined within 'Entry-Definitions' in " f"'TOSCA-Metadata/TOSCA.meta' does not match with the file name '{service_template}' supplied " f"in service_template CLI argument.", self) # check if "Entry-Definitions" points to an existing # template file in the CSAR if template_entry is not None and not self._member_exists( PurePath(template_entry)): raise ParseError( f"The file '{template_entry}' defined within 'Entry-Definitions' in " f"'TOSCA-Metadata/TOSCA.meta' does not exist.", self) return shutil.make_archive(output_file, csar_format, self.csar_dir) else: with TemporaryDirectory(prefix="opera-package-") as tempdir: extract_path = Path(tempdir) / "extract" shutil.copytree(self.csar_dir, extract_path) # create TOSCA-Metadata/TOSCA.meta file using the specified # TOSCA service template or directory root YAML file content = (f"TOSCA-Meta-File-Version: 1.1\n" f"CSAR-Version: 1.1\n" f"Created-By: xOpera TOSCA orchestrator\n" f"Entry-Definitions: {service_template}\n") meta_file_folder = extract_path / "TOSCA-Metadata" meta_file = (meta_file_folder / "TOSCA.meta") meta_file_folder.mkdir() meta_file.touch() meta_file.write_text(content, encoding="utf-8") return shutil.make_archive(output_file, csar_format, extract_path) except Exception as e: # noqa: W0703 raise ParseError(f"Error creating CSAR:\n{traceback.format_exc()}", self) from e
# # Unit test cases for buku # import os import re import sqlite3 from genericpath import exists from tempfile import TemporaryDirectory import pytest import unittest from unittest import mock as mock from buku import BukuDb, parse_tags, prompt TEST_TEMP_DIR_OBJ = TemporaryDirectory(prefix='bukutest_') TEST_TEMP_DIR_PATH = TEST_TEMP_DIR_OBJ.name TEST_TEMP_DBDIR_PATH = os.path.join(TEST_TEMP_DIR_PATH, 'buku') TEST_TEMP_DBFILE_PATH = os.path.join(TEST_TEMP_DBDIR_PATH, 'bookmarks.db') TEST_BOOKMARKS = [ [ 'http://slashdot.org', 'SLASHDOT', parse_tags(['old,news']), "News for old nerds, stuff that doesn't matter" ], [ 'http://www.zażółćgęśląjaźń.pl/', 'ZAŻÓŁĆ', parse_tags(['zażółć,gęślą,jaźń']), "Testing UTF-8, zażółć gęślą jaźń." ], [
def process_view(out_dir, subject, action, subaction, camera): subj_dir = path.join('extracted', subject) base_filename = metadata.get_base_filename(subject, action, subaction, camera) # Load joint position annotations with pycdf.CDF( path.join(subj_dir, 'Poses_D2_Positions', base_filename + '.cdf')) as cdf: poses_2d = np.array(cdf['Pose']) poses_2d = poses_2d.reshape(poses_2d.shape[1], 32, 2) with pycdf.CDF( path.join(subj_dir, 'Poses_D3_Positions_mono_universal', base_filename + '.cdf')) as cdf: poses_3d_univ = np.array(cdf['Pose']) poses_3d_univ = poses_3d_univ.reshape(poses_3d_univ.shape[1], 32, 3) with pycdf.CDF( path.join(subj_dir, 'Poses_D3_Positions_mono', base_filename + '.cdf')) as cdf: poses_3d = np.array(cdf['Pose']) poses_3d = poses_3d.reshape(poses_3d.shape[1], 32, 3) # Infer camera intrinsics camera_int = infer_camera_intrinsics(poses_2d, poses_3d) camera_int_univ = infer_camera_intrinsics(poses_2d, poses_3d_univ) frame_indices = select_frame_indices_to_include(subject, poses_3d_univ) frames = frame_indices + 1 video_file = path.join(subj_dir, 'Videos', base_filename + '.mp4') frames_dir = path.join(out_dir, 'imageSequence', camera) makedirs(frames_dir, exist_ok=True) # Check to see whether the frame images have already been extracted previously existing_files = {f for f in listdir(frames_dir)} frames_are_extracted = True for i in frames: filename = 'img_%06d.jpg' % i if filename not in existing_files: frames_are_extracted = False break if not frames_are_extracted: with TemporaryDirectory() as tmp_dir: # Use ffmpeg to extract frames into a temporary directory call([ 'ffmpeg', '-nostats', '-loglevel', '0', '-i', video_file, '-qscale:v', '3', path.join(tmp_dir, 'img_%06d.jpg') ]) # Move included frame images into the output directory for i in frames: filename = 'img_%06d.jpg' % i move(path.join(tmp_dir, filename), path.join(frames_dir, filename)) return { 'pose/2d': poses_2d[frame_indices], 'pose/3d-univ': poses_3d_univ[frame_indices], 'pose/3d': poses_3d[frame_indices], 'intrinsics/' + camera: camera_int, 'intrinsics-univ/' + camera: camera_int_univ, 'frame': frames, 'camera': np.full(frames.shape, int(camera)), 'subject': np.full(frames.shape, int(included_subjects[subject])), 'action': np.full(frames.shape, int(action)), 'subaction': np.full(frames.shape, int(subaction)), }
def load_df( self, df: pandas.DataFrame, table: str, field_dict: Optional[Dict[Any, Any]] = None, delimiter: str = ',', encoding: str = 'utf8', pandas_kwargs: Any = None, **kwargs: Any, ) -> None: """ Loads a pandas DataFrame into hive. Hive data types will be inferred if not passed but column names will not be sanitized. :param df: DataFrame to load into a Hive table :type df: pandas.DataFrame :param table: target Hive table, use dot notation to target a specific database :type table: str :param field_dict: mapping from column name to hive data type. Note that it must be OrderedDict so as to keep columns' order. :type field_dict: collections.OrderedDict :param delimiter: field delimiter in the file :type delimiter: str :param encoding: str encoding to use when writing DataFrame to file :type encoding: str :param pandas_kwargs: passed to DataFrame.to_csv :type pandas_kwargs: dict :param kwargs: passed to self.load_file """ def _infer_field_types_from_df(df: pandas.DataFrame) -> Dict[Any, Any]: dtype_kind_hive_type = { 'b': 'BOOLEAN', # boolean 'i': 'BIGINT', # signed integer 'u': 'BIGINT', # unsigned integer 'f': 'DOUBLE', # floating-point 'c': 'STRING', # complex floating-point 'M': 'TIMESTAMP', # datetime 'O': 'STRING', # object 'S': 'STRING', # (byte-)string 'U': 'STRING', # Unicode 'V': 'STRING', # void } order_type = OrderedDict() for col, dtype in df.dtypes.iteritems(): order_type[col] = dtype_kind_hive_type[dtype.kind] return order_type if pandas_kwargs is None: pandas_kwargs = {} with TemporaryDirectory(prefix='airflow_hiveop_') as tmp_dir: with NamedTemporaryFile(dir=tmp_dir, mode="w") as f: if field_dict is None: field_dict = _infer_field_types_from_df(df) df.to_csv( path_or_buf=f, sep=delimiter, header=False, index=False, encoding=encoding, date_format="%Y-%m-%d %H:%M:%S", **pandas_kwargs, ) f.flush() return self.load_file(filepath=f.name, table=table, delimiter=delimiter, field_dict=field_dict, **kwargs)
def setUp(self) -> None: os.chdir(os.path.join(TEST_ROOT, "assets/nixpkgs")) self.directory = TemporaryDirectory() os.environ["HOME"] = self.directory.name os.environ["GITHUB_TOKEN"] = "0000000000000000000000000000000000000000"
def harmonization(activity: dict, collection_id=None, **kwargs): """Harmonize Landsat and Sentinel-2 products.""" execution = execution_from_collection(activity, collection_id=collection_id, activity_type=harmonization.__name__) collection = execution.activity.collection from sensor_harm import landsat_harmonize, sentinel_harmonize with TemporaryDirectory(prefix='harmonization', suffix=activity['sceneid']) as tmp: data_collection = get_provider_collection_from_activity(activity) data_collection.path(collection) target_dir = str(data_collection.path(collection)) target_tmp_dir = Path(tmp) / 'target' target_tmp_dir.mkdir(exist_ok=True, parents=True) reflectance_dir = Path(activity['args']['file']) glob = list( reflectance_dir.glob(f'**/{activity["sceneid"]}_Fmask4.tif')) fmask = glob[0] shutil.copy(str(fmask), target_tmp_dir) if activity['sceneid'].startswith('S2'): shutil.unpack_archive(activity['args']['compressed_file'], tmp) entry = activity['sceneid'] entries = list(Path(tmp).glob('*.SAFE')) if len(entries) == 1 and entries[0].suffix == '.SAFE': entry = entries[0].name l1 = Path(tmp) / entry sentinel_harmonize(l1, activity['args']['file'], target_tmp_dir, apply_bandpass=True) else: product_version = int(data_collection.parser.satellite()) sat_sensor = '{}{}'.format(data_collection.parser.source()[:2], product_version) landsat_harmonize(sat_sensor, activity["sceneid"], activity['args']['file'], str(target_tmp_dir)) Path(target_dir).mkdir(exist_ok=True, parents=True) for entry in Path(target_tmp_dir).iterdir(): entry_name = entry.name target_entry = Path(target_dir) / entry_name if target_entry.exists(): os.remove(str(target_entry)) shutil.move(str(entry), target_dir) activity['args']['file'] = target_dir return activity
def pdf_to_images(self) -> list: with TemporaryDirectory() as path: pil_images = convert_from_path(self.file_path, output_folder=path) return pil_images
def download(activity: dict, **kwargs): """Celery tasks to deal with download data product from given providers.""" execution = create_execution(activity) collector_extension = flask_app.extensions['bdc:collector'] collection = execution.activity.collection scene_id = execution.activity.sceneid logging.info( f'Starting Download Task for {collection.name}(id={collection.id}, scene_id={scene_id})' ) # Use parallel flag for providers which has number maximum of connections per client (Sentinel-Hub only) download_order = collector_extension.get_provider_order(collection, lazy=True, parallel=True, progress=False) if len(download_order) == 0: raise RuntimeError( f'No provider set for collection {collection.id}({collection.name})' ) data_collection = get_provider_collection_from_activity(activity) download_file = data_collection.compressed_file(collection) has_compressed_file = download_file is not None # For files that does not have compressed file (Single file/folder), use native path if download_file is None: download_file = data_collection.path(collection) is_valid_file = False item = Item.query().filter(Item.collection_id == collection.id, Item.name == scene_id).first() if item: # TODO: Get asset name of download file item_path = item.assets['asset']['href'] item_path = item_path if not item_path.startswith( '/') else item_path[1:] item_path = Path(Config.DATA_DIR) / item_path if item_path.exists(): logging.info( f'Item {scene_id} exists. {str(item_path)} -> {str(download_file)}' ) download_file = item_path if download_file.exists() and has_compressed_file: logging.info('File {} downloaded. Checking file integrity...'.format( str(download_file))) # TODO: Should we validate using Factory Provider.is_valid() ? is_valid_file = is_valid_compressed_file( str(download_file)) if download_file.is_file() else False if not download_file.exists() or not is_valid_file: # Ensure file is removed since it may be corrupted if download_file.exists() and download_file.is_file(): download_file.unlink() if not has_compressed_file: download_file.mkdir(exist_ok=True, parents=True) else: download_file.parent.mkdir(exist_ok=True, parents=True) with TemporaryDirectory(prefix='download_', suffix=f'_{scene_id}') as tmp: temp_file: Path = None should_retry = False for collector in download_order: try: logging.info( f'Trying to download from {collector.provider_name}(id={collector.instance.id})' ) temp_file = Path( collector.download( scene_id, output=tmp, dataset=activity['args']['dataset'])) activity['args']['provider_id'] = collector.instance.id break except DataOfflineError: should_retry = True except Exception as e: logging.error( f'Download error in provider {collector.provider_name} - {str(e)}' ) if temp_file is None or not temp_file.exists(): if should_retry: raise DataOfflineError(scene_id) raise RuntimeError(f'Download fails {activity["sceneid"]}.') shutil.move(str(temp_file), str(download_file)) refresh_execution_args(execution, activity, compressed_file=str(download_file)) return activity
def _delete_cluster(self, terraform_dir=None): module_logger.debug( 'cluster._delete_cluster() entry, terraform_dir={} self.deploy_dir={} self.platform={}' .format(terraform_dir, self.deploy_dir, self.platform)) if terraform_dir is None: if self.name is None: raise ClusterNotInitialized( 'Cannot delete if no cluster name is specified.') terraform_dir = self.deploy_dir if 'openshift4-aws' in self.platform: deploy_dir_suffix = '/aws-ipi' else: deploy_dir_suffix = '' tfstate_file = '{}{}/terraform.tfstate'.format(terraform_dir, deploy_dir_suffix) module_logger.debug('tfstate_file={} exists={}'.format( tfstate_file, os.path.isfile(tfstate_file))) self._modify_modules_json() tfvars_file = '{}/terraform_inputs/.{}.tfvars'.format( self.git_repo.git_dir, self.platform) sh_env = os.environ.copy() sh_env['TF_VAR_private_key_file'] = self.private_key_file sh_env['TF_VAR_public_key_file'] = '{}.pub'.format( self.private_key_file) with with_cd(self.git_repo.git_dir): with TemporaryDirectory() as temp_tfstate_dir: temp_tfstate_dir = str(Path(temp_tfstate_dir).resolve()) if 'openshift4-aws' in self.platform: run('sed -i="" -e "s|__W3_EMAIL__|$TF_VAR_user_name|g" -e "s|__AWS_ACCESS_KEY__|$TF_VAR_aws_access_key|g" -e "s|__AWS_SECRET_ACCESS_KEY__|$TF_VAR_aws_secret_key|g" -e "s|__RH_SUBSCRIPTION_USERNAME__|$TF_VAR_rhel_subscription_username|g" -e "s|__RH_SUBSCRIPTION_PASSWORD__|$TF_VAR_rhel_subscription_password|g" -e "s|__OCP__PULL_SECRET_FILE__|$OCP_PULL_SECRET_FILE|g" -e "s|__PRIVATE_KEY_FILE__|$PRIVATE_KEY_FILE|g" -e "s|__PUBLIC_KEY_FILE__|$PUBLIC_KEY_FILE|g" -e "s|__EDITION__|$EDITION|g" -e "s|__FIXPACK__|$FIXPACK|g" -e "s|__VERSION__|$VERSION|g" -e "s|__REPO__|$DEPLOY_REPO|g" -e "s|__ARTIFACTORY_USER__|$ARTIFACTORY_USER|g" -e "s|__ARTIFACTORY_API_KEY__|$ARTIFACTORY_TOKEN|g" {}' .format(tfvars_file), shell=True, check=True, env=sh_env) if not os.path.isfile('pull-secret'): run('openssl aes-256-cbc -K ${} -iv ${} -in pull-secret.enc -out pull-secret -d' .format(os.environ['OCP_PULL_SECRET_FILE_VAR_KEY'], os.environ['OCP_PULL_SECRET_FILE_VAR_IV']), shell=True, check=True, env=sh_env) if 'travis' in self.platform: module_logger.debug( 'found openshift4-aws-travis in self.platform; running sed on __CLUSTER_NAME__ of {}.' .format(tfvars_file)) run('sed -i="" -e "s|__CLUSTER_NAME__|{}|g" {}'.format( self._get_terraform_output('cluster-name', self.deploy_dir, length_of_output=1), tfvars_file), shell=True, check=True, env=sh_env) if os.path.isfile('{}.enc'.format(tfstate_file)): decrypted_tfstate_file = '{}/terraform.tfstate'.format( temp_tfstate_dir) run_cmd = 'openssl enc -aes-256-cbc -d -in {}.enc -out {} -k {}'.format( tfstate_file, decrypted_tfstate_file, 'afro-donkey-seldom-waterfall-compute') run(run_cmd, shell=True) else: decrypted_tfstate_file = tfstate_file # aws-ipi needs this abomination because of its directory hierarchy; else cluster destroys fail destroyer_cmd = 'cd ..; make -s terraform:destroy TERRAFORM_DIR={}/aws-ipi TERRAFORM_VARS_FILE={} TERRAFORM_STATE_FILE={}'.format( self.deploy_dir, tfvars_file, decrypted_tfstate_file) else: destroyer_cmd = 'make -s terraform:destroy TERRAFORM_DIR={} TERRAFORM_VARS_FILE={} TERRAFORM_STATE_FILE={}'.format( self.deploy_dir, tfvars_file, tfstate_file) module_logger.debug( 'terraform invocation:\n {}'.format(destroyer_cmd)) output = run(destroyer_cmd, shell=True, env=sh_env) if output.returncode != 0: module_logger.info( 'Destroy completed with errors, but everything is likely cleaned up.' )
def install_frontend(self, source, oauth_key, oauth_secret, backend_url, settings_file=None, network='public', hub_id=None): """ Install connector-frontend in Odin Automation Hub, --source can be http(s):// or filepath""" with TemporaryDirectory() as tdir: is_http_source = True if source.startswith('http://') or source.startswith('https://') \ else False if is_http_source: package_name = _download_file(source, target=tdir) else: package_name = os.path.basename(source) copyfile(os.path.expanduser(source), os.path.join(tdir, package_name)) package_path = os.path.join(tdir, package_name) with zipfile.ZipFile(package_path, 'r') as zip_ref: meta_path = zip_ref.extract('APP-META.xml', path=tdir) tenant_schema_path = zip_ref.extract('schemas/tenant.schema', tdir) app_schema_path = zip_ref.extract('schemas/app.schema', tdir) try: zip_ref.extract('schemas/user.schema', tdir) user_service = True except KeyError: user_service = False tree = xml_et.ElementTree(file=meta_path) namespace = '{http://aps-standard.org/ns/2}' connector_id = tree.find('{}id'.format(namespace)).text version = tree.find('{}version'.format(namespace)).text release = tree.find('{}release'.format(namespace)).text # Get connector name from id as <name> field may not be unique url_path = urlparse(connector_id).path connector_name = os.path.split(url_path)[-1] if not settings_file: settings_file = {} else: settings_file = json.load(open(settings_file)) if backend_url.startswith('http://'): print( "WARN: Make sure that the APS development mode enabled for http backend. " "Run `apsconnect aps_devel_mode` command.") elif backend_url.startswith('https://'): pass else: print("Backend url must be URL http(s)://, got {}".format( backend_url)) sys.exit(1) cfg, hub = _get_cfg(), _get_hub() with open(package_path, 'rb') as package_binary: print("Importing connector {} {}-{}".format( connector_id, version, release)) import_kwargs = {'package_url': source} if is_http_source \ else {'package_body': xmlrpclib.Binary(package_binary.read())} response = hub.APS.importPackage(**import_kwargs) _osaapi_raise_for_status(response) application_id = str(response['result']['application_id']) print("Connector {} imported with id={} [ok]".format( connector_id, application_id)) payload = { 'aps': { 'package': { 'type': connector_id, 'version': version, 'release': release, }, 'endpoint': backend_url, 'network': network, 'auth': { 'oauth': { 'key': oauth_key, 'secret': oauth_secret, }, }, }, } # Get Unique OA id for using as hubId parameter while endpoint deploying base_aps_url = _get_aps_url( **{k: _get_cfg()[k] for k in APS_CONNECT_PARAMS}) app_properties = _get_properties(app_schema_path) if 'hubId' in app_properties: url = '{}/{}'.format( base_aps_url, 'aps/2/resources?implementing(http://parallels.com/aps/types/pa/poa/1.0)', ) response = request(method='GET', url=url, headers=_get_user_token(hub, cfg['user']), verify=False) response.raise_for_status() try: data = json.loads(response.content.decode('utf-8')) except ValueError: print("APSController provided non-json format") sys.exit(1) if not data and not hub_id: raise Exception( "Core OA resource is not found\n" "Use --hub-id={value} argument to specify the ID " "manually or --hub-id=auto to generate it automatically" ) elif data: hub_id = data[0]['aps']['id'] elif hub_id == 'auto': hub_id = str(uuid.uuid4()) payload.update({'app': {'hubId': hub_id}}) payload.update(settings_file) response = request(method='POST', url='{}/{}'.format(base_aps_url, 'aps/2/applications/'), headers=_get_user_token(hub, cfg['user']), verify=False, json=payload) try: response.raise_for_status() except Exception as e: if 'error' in response.json(): err = "{} {}".format(response.json()['error'], response.json()['message']) else: err = str(e) print("Installation of connector {} FAILED.\n" "Hub APS API response {} code.\n" "Error: {}".format(connector_id, response.status_code, err)) # Create app, tenant, users resource types resource_uid = json.loads( response.content.decode('utf-8'))['app']['aps']['id'] core_resource_types_payload = [ { 'resclass_name': 'rc.saas.service.link', 'name': connector_name, 'act_params': [ { 'var_name': 'app_id', 'var_value': application_id }, { 'var_name': 'resource_uid', 'var_value': resource_uid }, ] }, { 'resclass_name': 'rc.saas.service', 'name': '{} tenant'.format(connector_name), 'act_params': [ { 'var_name': 'app_id', 'var_value': application_id }, { 'var_name': 'service_id', 'var_value': 'tenant' }, { 'var_name': 'autoprovide_service', 'var_value': '1' }, ] }, ] # Collect ids for service template creation resource_types_ids = [] limited_resources = {} for type in core_resource_types_payload: response = hub.addResourceType(**type) _osaapi_raise_for_status(response) resource_types_ids.append( response['result']['resource_type_id']) for id in list(resource_types_ids): limited_resources[id] = 1 if user_service: user_resource_type_payload = { 'resclass_name': 'rc.saas.service', 'name': '{} users'.format(connector_name), 'act_params': [ { 'var_name': 'app_id', 'var_value': application_id }, { 'var_name': 'service_id', 'var_value': 'user' }, { 'var_name': 'autoprovide_service', 'var_value': '0' }, ] } response = hub.addResourceType(**user_resource_type_payload) _osaapi_raise_for_status(response) resource_types_ids.append( response['result']['resource_type_id']) # Create counters resource types counters = _get_counters(tenant_schema_path) for counter in counters: payload = { 'resclass_name': "rc.saas.resource.unit", 'name': '{} {}'.format(connector_name, counter), 'act_params': [ { 'var_name': 'app_id', 'var_value': application_id }, { 'var_name': 'service_id', 'var_value': "tenant" }, { 'var_name': 'resource_id', 'var_value': counter }, ] } response = hub.addResourceType(**payload) _osaapi_raise_for_status(response) resource_types_ids.append( response['result']['resource_type_id']) # Create parameters resource types parameters = _get_parameters(tenant_schema_path) for parameter in parameters: payload = { 'resclass_name': "rc.saas.resource.unit", 'name': '{} {}'.format(connector_name, parameter), 'act_params': [ { 'var_name': 'app_id', 'var_value': application_id }, { 'var_name': 'service_id', 'var_value': "tenant" }, { 'var_name': 'resource_id', 'var_value': parameter }, ] } response = hub.addResourceType(**payload) _osaapi_raise_for_status(response) resource_types_ids.append( response['result']['resource_type_id']) limited_resources[response['result']['resource_type_id']] = 0 print("Resource types creation [ok]") # Create service template payload = { 'name': connector_name, 'owner_id': 1, 'resources': [], } for type_id in resource_types_ids: payload['resources'].append({'resource_type_id': type_id}) response = hub.addServiceTemplate(**payload) _osaapi_raise_for_status(response) service_template_id = response['result']['st_id'] print("Service template \"{}\" created with id={} [ok]".format( connector_name, service_template_id)) # Set up limits payload = { 'st_id': service_template_id, 'limits': [], } for type_id, limit in limited_resources.items(): payload['limits'].append({ 'resource_id': type_id, 'resource_limit64': str(limit) }) response = hub.setSTRTLimits(**payload) _osaapi_raise_for_status(response) print("Limits for Service template \"{}\" are applied [ok]".format( service_template_id))
def main(): print(f"\x1b[33m[{now()}]\x1b[0m") base = Path(gettempdir()) / "texclip" text = ClipBoard.read(CF_UNICODETEXT) with TemporaryDirectory() as workspace: workspace = Path(workspace) template = tex_content(text) texfile = workspace / "main.tex" texfile.write_bytes(template.encode("utf-8")) tex_cmd = ["xelatex", texfile.as_posix()] try: tex_status = run(tex_cmd, shell=False, cwd=workspace, stdout=PIPE, stderr=PIPE, timeout=10) tex_status.check_returncode() except TimeoutExpired as e: print(f"error in tex: {text}") print("\x1b[31m") print(e.stdout.decode("utf-8")) print("\x1b[0m") return except CalledProcessError as e: print(f"error in tex: {text}") print("\x1b[31m") print(e.stderr.decode("utf-8")) print("\x1b[0m") return # main.pdf 已生成 pdffile = workspace / "main.pdf" outimgfile = base / \ f"texclip-{strftime('%Y%m%d-%H%M%S', localtime())}.png" magick_cmd = [ "magick", "-density", "300", pdffile.as_posix(), "-quality", "100", "-trim", outimgfile.absolute().as_posix() ] try: magick_status = run(magick_cmd, shell=False, cwd=workspace, stdout=PIPE, stderr=PIPE, timeout=5) magick_status.check_returncode() except TimeoutExpired as e: print(f"[{now()}] error in magick") print("\x1b[31m") print(e.stdout.decode("utf-8")) print("\x1b[0m") return except CalledProcessError as e: print(f"[{now()}] error in magick") print("\x1b[31m") print(e.stderr.decode("utf-8")) print("\x1b[0m") return print(f"\x1b[32m{text[:10]}.. saved at {outimgfile.as_posix()}\x1b[0m") ToastNotifier().show_toast("Task Completed", f"{outimgfile.absolute().as_posix()}")
def generate(self, initialized): with self.git_repo as r: creating_dir = '{}/{}/{}'.format(r.working_dir, self.cluster_directory, creating_id()) creating_file = '{}/{}'.format(creating_dir, CREATING_NAME) if not os.path.exists(creating_dir): os.makedirs(creating_dir) with open(creating_file, 'w') as f: f.write(self.identifying_info) r.index.add([creating_file]) # if we are not initialized, trigger another travis build if initialized: r.index.commit('cluster build started [skip ci]') else: r.index.commit('cluster build started') tfvars_file = '{}/terraform_inputs/.{}.tfvars'.format( r.working_dir, self.platform) with TemporaryDirectory() as temp_dir: temp_dir = str(Path(temp_dir).resolve()) self._temp_deploy_dir = temp_dir sh_env = os.environ.copy() sh_env['TF_VAR_private_key_file'] = self.private_key_file sh_env['TF_VAR_public_key_file'] = '{}.pub'.format( self.private_key_file) sh_env['TF_VAR_keypair_override'] = 'icp-cicd-pipeline-keypair' sh_env['TF_VAR_instance_name'] = INSTANCE_NAME exception = None tries = 1 try: if 'openstack' in self.platform: run('make -s deploy:openstack OPENSTACK_DEPLOY_DIR={} OPENSTACK_TERRAFORM_VARS_FILE={}' .format(temp_dir, tfvars_file), shell=True, check=True, env=sh_env) elif 'openshift-aws' in self.platform: run('make -s deploy:openshift:aws OPENSHIFT_AWS_DEPLOY_DIR={} OPENSHIFT_AWS_TERRAFORM_VARS_FILE={}' .format(temp_dir, tfvars_file), shell=True, check=True, env=sh_env) elif 'openshift4-aws' in self.platform: if 'travis' in self.platform: run('sed -i="" -e "s|__CLUSTER_NAME__|pool-os4-{}|g" {}' .format(creating_id(), tfvars_file), shell=True, check=True, env=sh_env) run('sed -i="" -e "s|__W3_EMAIL__|$TF_VAR_user_name|g" -e "s|__AWS_ACCESS_KEY__|$TF_VAR_aws_access_key|g" -e "s|__AWS_SECRET_ACCESS_KEY__|$TF_VAR_aws_secret_key|g" -e "s|__RH_SUBSCRIPTION_USERNAME__|$TF_VAR_rhel_subscription_username|g" -e "s|__RH_SUBSCRIPTION_PASSWORD__|$TF_VAR_rhel_subscription_password|g" -e "s|__OCP__PULL_SECRET_FILE__|$OCP_PULL_SECRET_FILE|g" -e "s|__PRIVATE_KEY_FILE__|$PRIVATE_KEY_FILE|g" -e "s|__PUBLIC_KEY_FILE__|$PUBLIC_KEY_FILE|g" -e "s|__EDITION__|$EDITION|g" -e "s|__FIXPACK__|$FIXPACK|g" -e "s|__VERSION__|$VERSION|g" -e "s|__REPO__|$DEPLOY_REPO|g" -e "s|__ARTIFACTORY_USER__|$ARTIFACTORY_USER|g" -e "s|__ARTIFACTORY_API_KEY__|$ARTIFACTORY_TOKEN|g" {}' .format(tfvars_file), shell=True, check=True, env=sh_env) run('make -s deploy:openshift4:aws OPENSHIFT_4_AWS_DEPLOY_DIR={} OPENSHIFT_4_AWS_TERRAFORM_VARS_FILE={}' .format(temp_dir, tfvars_file), shell=True, check=True, env=sh_env) elif 'aws' in self.platform: run('make -s deploy:aws AWS_DEPLOY_DIR={} AWS_TERRAFORM_VARS_FILE={}' .format(temp_dir, tfvars_file), shell=True, check=True, env=sh_env) else: raise ClusterException( 'No rule in place to handle the tfvars file passed: {}. Ensure that you are trying to deploy a supported cluster.' .format(tfvars_file)) exception = None except CalledProcessError as e: exception = e module_logger.error( 'Cluster failed to create successfully; retries left: {}'. format(self.max_tries - tries)) while tries < self.max_tries: tries += 1 if exception is None: # If we don't have an exception, there is no need to retry. break try: run('make -s terraform:apply TERRAFORM_DIR={} TERRAFORM_VARS_FILE={}' .format(temp_dir, tfvars_file), shell=True, check=True, env=sh_env) exception = None except CalledProcessError as e: exception = e module_logger.error( 'Cluster failed to create successfully; retries left: {}' .format(self.max_tries - tries)) if exception is not None: module_logger.error( 'Cluster failed to create successfully; deleting cluster') module_logger.error(exception) if not self.allow_failed_create: self.delete(terraform_dir=temp_dir, skip_ci=False) return else: module_logger.info( 'Deletion skipped; we are allowing failed clusters') self.name = self._get_terraform_output('cluster-name', self.deploy_dir, length_of_output=1) if self.name == '': self.name = None self.delete(terraform_dir=temp_dir) return # If there is a duplicate directory, shutil.copytree will fail with FileExistsError # TODO determine what to do when this happens try: with self.git_repo as r: dest_repo_dir = '{}/{}'.format(self.cluster_dir, DEPLOY_DIR_NAME) if 'openshift4-aws' in self.platform: src_repo_tfstate_dir = '{}/aws-ipi'.format( self.deploy_dir, DEPLOY_DIR_NAME) dest_repo_tfstate_dir = '{}/aws-ipi'.format( dest_repo_dir, DEPLOY_DIR_NAME) else: src_repo_tfstate_dir = self.deploy_dir dest_repo_tfstate_dir = dest_repo_dir shutil.copytree(temp_dir, '{}'.format(dest_repo_dir), ignore=shutil.ignore_patterns( '.git*', 'terraform.tfstate')) # encrypt tfstate file, remove real one before pushing to github run_cmd = 'openssl enc -aes-256-cbc -salt -in {}/terraform.tfstate -out {}/terraform.tfstate.enc -k {}'.format( src_repo_tfstate_dir, dest_repo_tfstate_dir, 'afro-donkey-seldom-waterfall-compute') run(run_cmd, shell=True) touch('{}/{}'.format(self.cluster_dir, AVAILABLE_NAME)) self._modify_modules_json(key='Dir', value='{}/{}/{}'.format( self.cluster_directory, self.name, DEPLOY_DIR_NAME)) r.index.add([self.cluster_dir]) r.index.move([ creating_file, '{}/source-job.md'.format(self.cluster_dir) ]) r.index.commit('save cluster state {} [skip ci]'.format( self.name)) with with_cd(self.git_repo.git_dir): run('pwd; git status', shell=True) except Exception as e: module_logger.error( 'Error trying to save the cluster. Deleting.') module_logger.error(e) self.delete(terraform_dir=temp_dir) raise e self._temp_deploy_dir = None
def predict(self, dataset: Dataset, **kwargs) -> Iterator[Forecast]: with TemporaryDirectory() as tempdir: predictor_path = Path(tempdir) self._base_predictor.serialize(predictor_path) # TODO: Consider using shared memory for the data transfer. self._input_queues = [mp.Queue() for _ in range(self._num_workers)] self._output_queue = mp.Queue() workers = [] for worker_id, in_q in enumerate(self._input_queues): worker = mp.Process( target=_worker_loop, args=(predictor_path, in_q, self._output_queue, worker_id), kwargs=kwargs, ) worker.daemon = True worker.start() workers.append(worker) self._num_running_workers += 1 self._workers = workers chunked_data = self._grouper(dataset, self._chunk_size) self._send_idx = 0 self._next_idx = 0 self._data_buffer = {} worker_ids = list(range(self._num_workers)) def receive(): idx, worker_id, result = self._output_queue.get() if isinstance(idx, WorkerError): self._num_running_workers -= 1 self.terminate() raise Exception(idx.msg) if idx is not None: self._data_buffer[idx] = result return idx, worker_id, result def get_next_from_buffer(): while self._next_idx in self._data_buffer: result_batch = self._data_buffer.pop(self._next_idx) self._next_idx += 1 yield from result_batch def send(worker_id, chunk): q = self._input_queues[worker_id] q.put((self._send_idx, chunk)) self._send_idx += 1 try: # prime the queues for wid in worker_ids: chunk = next(chunked_data) send(wid, chunk) while True: idx, wid, result = receive() yield from get_next_from_buffer() chunk = next(chunked_data) send(wid, chunk) except StopIteration: # signal workers end of data for q in self._input_queues: q.put((None, None)) # collect any outstanding results while self._num_running_workers > 0: idx, worker_id, result = receive() if idx is None: self._num_running_workers -= 1 continue yield from get_next_from_buffer() assert len(self._data_buffer) == 0 assert self._send_idx == self._next_idx
def temp_dir(): with TemporaryDirectory() as tempdir: yield tempdir
# We check the kinetic energy to be sure that the particles are not overlapping and do not explode def checkKineticEnergy(lowerBound, upperBound): Ekin = kineticEnergy() if (Ekin > upperBound or Ekin < lowerBound): O.pause() raise YadeCheckError( "Kinetic energy %E is not within bounds from %E to %E! Error!" % (Ekin, lowerBound, upperBound)) else: print("Kinetic energy OK %E" % (Ekin)) #write some restart files with TemporaryDirectory() as tmp_dir: O.save(tmp_dir + '/restartMinWorkEx_' + partType + '_Initial') # Run simulation to check the kinetic energy O.run(10, True) checkKineticEnergy(0.6e-9, 1.4e-9) # Let the object settle O.run(100000, True) curIter = O.iter checkKineticEnergy(0.07, 0.16) # Save simulation O.save(tmp_dir + '/restartMinWorkEx_%s_%d' % (partType, curIter)) time.sleep(1)
def handle(self): output_directory = Path(self.args.output_directory) # We simulate pybabel and sphinx-build commands. Variable names are chosen to match upstream code. # For sphinx-build, the code path is: # # * bin/sphinx-build calls main() in sphinx, which calls build_main(), which calls main() in sphinx.cmdline # * main() calls Sphinx(…).build(…) in sphinx.application # sphinx-build -E -q … kwargs = { 'confoverrides': { 'source_suffix': ['.rst', '.md'], 'source_parsers': { '.md': CommonMarkParser, }, }, 'freshenv': True, 'parallel': 1, } if not self.args.verbose: kwargs.update(status=None) # For pybabel, the code path is: # # * bin/pybabel calls main() in babel.messages.frontend # * main() calls CommandLineInterface().run(sys.argv) # * CommandLineInterface() calls extract_messages(), which: # 1. Reads the input path and method map from command-line options # 2. Instantiates a catalog # 3. Calls extract_from_dir() in babel.messages.extract to extract messages # 4. extract_from_dir() calls check_and_call_extract_file() to find the method in the method map # 5. check_and_call_extract_file() calls extract_from_file() to open a file for extraction # 6. extract_from_file() calls extract() to extract messages # 7. Adds the messages to the catalog # 8. Writes a POT file # 1. Reads the input path and method map from command-line options arguments = [ # pybabel extract -F babel_ocds_codelist.cfg . -o $(POT_DIR)/$(DOMAIN_PREFIX)codelists.pot ('codelists.pot', [ ('codelists/*.csv', extract_codelist), ]), # pybabel extract -F babel_ocds_schema.cfg . -o $(POT_DIR)/$(DOMAIN_PREFIX)schema.pot ('schema.pot', [ ('*-schema.json', extract_schema), ('extension.json', extract_extension_metadata), ]), ] for version in self.versions(): if not version.download_url: logger.warning('No Download URL for {}=={}'.format( version.id, version.version)) outdir = output_directory / version.id / version.version outdir.mkdir(parents=True, exist_ok=True) # See the `files` method of `ExtensionVersion` for similar code. response = requests.get(version.download_url, allow_redirects=True) response.raise_for_status() with closing(ZipFile(BytesIO(response.content))) as zipfile: names = zipfile.namelist() start = len(names[0]) for output_file, method_map in arguments: # 2. Instantiates a catalog catalog = Catalog() # 3. Calls extract_from_dir() in babel.messages.extract to extract messages for name in names[1:]: filename = name[start:] # 4. extract_from_dir() calls check_and_call_extract_file() for pattern, method in method_map: if not pathmatch(pattern, filename): continue # 5. check_and_call_extract_file() calls extract_from_file() with zipfile.open(name) as fileobj: # 6. extract_from_file() calls extract() to extract messages for lineno, message, comments, context in extract( method, fileobj): # 7. Adds the messages to the catalog catalog.add(message, None, [(filename, lineno)], auto_comments=comments, context=context) break # 8. Writes a POT file if catalog: with open(outdir / output_file, 'wb') as outfile: write_po(outfile, catalog) with TemporaryDirectory() as srcdir: for info in zipfile.infolist()[1:]: filename = info.filename[start:] if filename[-1] != '/' and filename.startswith( 'docs/') or filename == 'README.md': info.filename = filename zipfile.extract(info, srcdir) with cd(srcdir): # Eliminates a warning, without change to output. with open('contents.rst', 'w') as f: f.write( '.. toctree::\n :glob:\n\n docs/*\n README' ) # sphinx-build -b gettext $(DOCS_DIR) $(POT_DIR) app = Sphinx('.', None, '.', '.', 'gettext', **kwargs) app.build(True) # https://stackoverflow.com/questions/15408348 content = subprocess.run(['msgcat', *glob('*.pot')], check=True, stdout=subprocess.PIPE).stdout with open(outdir / 'docs.pot', 'wb') as f: f.write(content)
def start_python_pipeline( # pylint: disable=too-many-arguments self, variables: dict, py_file: str, py_options: List[str], py_interpreter: str = "python3", py_requirements: Optional[List[str]] = None, py_system_site_packages: bool = False, process_line_callback: Optional[Callable[[str], None]] = None, ): """ Starts Apache Beam python pipeline. :param variables: Variables passed to the pipeline. :type variables: Dict :param py_options: Additional options. :type py_options: List[str] :param py_interpreter: Python version of the Apache Beam pipeline. If None, this defaults to the python3. To track python versions supported by beam and related issues check: https://issues.apache.org/jira/browse/BEAM-1251 :type py_interpreter: str :param py_requirements: Additional python package(s) to install. If a value is passed to this parameter, a new virtual environment has been created with additional packages installed. You could also install the apache-beam package if it is not installed on your system or you want to use a different version. :type py_requirements: List[str] :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. See virtualenv documentation for more information. This option is only relevant if the ``py_requirements`` parameter is not None. :type py_system_site_packages: bool :param on_new_job_id_callback: Callback called when the job ID is known. :type on_new_job_id_callback: callable """ if "labels" in variables: variables["labels"] = [ f"{key}={value}" for key, value in variables["labels"].items() ] if py_requirements is not None: if not py_requirements and not py_system_site_packages: warning_invalid_environment = textwrap.dedent("""\ Invalid method invocation. You have disabled inclusion of system packages and empty list required for installation, so it is not possible to create a valid virtual environment. In the virtual environment, apache-beam package must be installed for your job to be \ executed. To fix this problem: * install apache-beam on the system, then set parameter py_system_site_packages to True, * add apache-beam to the list of required packages in parameter py_requirements. """) raise AirflowException(warning_invalid_environment) with TemporaryDirectory(prefix="apache-beam-venv") as tmp_dir: py_interpreter = prepare_virtualenv( venv_directory=tmp_dir, python_bin=py_interpreter, system_site_packages=py_system_site_packages, requirements=py_requirements, ) command_prefix = [py_interpreter] + py_options + [py_file] self._start_pipeline( variables=variables, command_prefix=command_prefix, process_line_callback=process_line_callback, ) else: command_prefix = [py_interpreter] + py_options + [py_file] self._start_pipeline( variables=variables, command_prefix=command_prefix, process_line_callback=process_line_callback, )
def test_command_line_interface(): """Test the CLI.""" runner = CliRunner() help_result = runner.invoke(cli.main, ['--help']) assert help_result.exit_code == 0 assert re.search(r'--help\s+Show this message and exit.', help_result.output, flags=re.DOTALL) is not None # Test replacing multi-char deletion and SNPs and setting sample name via command-line with TemporaryDirectory(prefix='vcf_consensus_builder', dir='/tmp') as tempdir: temppath = Path(tempdir) full_fasta_output = temppath / OUTPUT_FASTA sample_name = 'SAMPLE1' result = runner.invoke(cli.main, ['-v', VCF_DEL, '-d', DEPTHS, '-r', REF_FASTA, '-o', full_fasta_output, '--sample-name', sample_name]) assert result.exit_code == 0 rec = SeqIO.read(full_fasta_output, 'fasta') assert str(rec.seq) == 'NACCGTANACAATAN--', 'There must be a deletion of 3 characters in the middle of the seq' assert rec.id == sample_name # Test changing no coverage threshold with TemporaryDirectory(prefix='vcf_consensus_builder', dir='/tmp') as tempdir: temppath = Path(tempdir) full_fasta_output = temppath / OUTPUT_FASTA sample_name = 'SAMPLE1' result = runner.invoke(cli.main, ['-v', VCF_DEL, '-d', DEPTHS, '-r', REF_FASTA, '-o', full_fasta_output, '--sample-name', sample_name, '--no-coverage', 4]) assert result.exit_code == 0 rec = SeqIO.read(full_fasta_output, 'fasta') assert str(rec.seq) == '-ACCGTA-ACAATA---', 'Positions below 5X coverage must be replaced with "-"' assert rec.id == sample_name # Test replacing low and no coverage characters with other characters than default N and - respectively with TemporaryDirectory(prefix='vcf_consensus_builder', dir='/tmp') as tempdir: temppath = Path(tempdir) full_fasta_output = temppath / OUTPUT_FASTA sample_name = 'SAMPLE1' result = runner.invoke(cli.main, ['-v', VCF_DEL, '-d', DEPTHS, '-r', REF_FASTA, '-o', full_fasta_output, '--sample-name', sample_name, '--no-cov-char', '=', '--low-cov-char', '@']) assert result.exit_code == 0 rec = SeqIO.read(full_fasta_output, 'fasta') assert str(rec.seq) == '@ACCGTA@ACAATA@==', \ 'No coverage positions must be replaced with "=". Low coverage (<5X) positions must be replaced with "@".' assert rec.id == sample_name # Test replacing multi-char insertion and SNPs with TemporaryDirectory(prefix='vcf_consensus_builder', dir='/tmp') as tempdir: temppath = Path(tempdir) full_fasta_output = temppath / OUTPUT_FASTA result = runner.invoke(cli.main, ['-v', VCF_INS, '-d', DEPTHS, '-r', REF_FASTA, '-o', full_fasta_output]) assert result.exit_code == 0 rec = SeqIO.read(full_fasta_output, 'fasta') assert str(rec.seq) == 'NACCGTATTTGTCNACAATAN--', 'There must be an insertion of "TTT" in the middle of the seq' assert rec.id == 'sample1', \ 'FASTA ID must be the first sample name in the VCF if not explicitly specified as a command-line arg'
mock_detector.save_attributes("FeatureExtraction") attribute_dict = mock_detector.toolset["attributes"] test_id = checkpoint_save_config["test_id"] for attribute_name in attribute_dict: attribute_val = attribute_dict[attribute_name][test_id] dummy_val = dummy_attributes[attribute_name] if isinstance(attribute_val, torch.Tensor): assert torch.all(torch.eq(attribute_val, dummy_val)) else: assert attribute_val == dummy_val @pytest.mark.parametrize( "checkpoint_restore_config", [ TemporaryDirectory().name, os.path.join(TemporaryDirectory().name, "attributes.pkl"), ], indirect=True, ) def test_restore_attribute(checkpoint_restore_config, checkpoint_save_config, dummy_attributes): """ Test attributes restored by checkpointer. Args: checkpoint_save_config (dict): Dictionary with the config to save attributes checkpoint_restore_config (dict): Dictionary with the config to save attributes dummy_attributes (dict): Dictionary with value for the attributes Return: None
def write_pandas( conn: 'SnowflakeConnection', df: 'pandas.DataFrame', table_name: str, database: Optional[str] = None, schema: Optional[str] = None, chunk_size: Optional[int] = None, compression: str = 'gzip', on_error: str = 'abort_statement', parallel: int = 4, quote_identifiers: bool = True ) -> Tuple[bool, int, int, Sequence[Tuple[str, str, int, int, int, int, Optional[str], Optional[int], Optional[int], Optional[str]]]]: """Allows users to most efficiently write back a pandas DataFrame to Snowflake. It works by dumping the DataFrame into Parquet files, uploading them and finally copying their data into the table. Returns whether all files were ingested correctly, number of chunks uploaded, and number of rows ingested with all of the COPY INTO command's output for debugging purposes. Example usage: import pandas from snowflake.connector.pandas_tools import write_pandas df = pandas.DataFrame([('Mark', 10), ('Luke', 20)], columns=['name', 'balance']) success, nchunks, nrows, _ = write_pandas(cnx, df, 'customers') Args: conn: Connection to be used to communicate with Snowflake. df: Dataframe we'd like to write back. table_name: Table name where we want to insert into. database: Database schema and table is in, if not provided the default one will be used (Default value = None). schema: Schema table is in, if not provided the default one will be used (Default value = None). chunk_size: Number of elements to be inserted once, if not provided all elements will be dumped once (Default value = None). compression: The compression used on the Parquet files, can only be gzip, or snappy. Gzip gives supposedly a better compression, while snappy is faster. Use whichever is more appropriate (Default value = 'gzip'). on_error: Action to take when COPY INTO statements fail, default follows documentation at: https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions (Default value = 'abort_statement'). parallel: Number of threads to be used when uploading chunks, default follows documentation at: https://docs.snowflake.com/en/sql-reference/sql/put.html#optional-parameters (Default value = 4). quote_identifiers: By default, identifiers, specifically database, schema, table and column names (from df.columns) will be quoted. If set to False, identifiers are passed on to Snowflake without quoting. I.e. identifiers will be coerced to uppercase by Snowflake. (Default value = True) Returns: Returns the COPY INTO command's results to verify ingestion in the form of a tuple of whether all chunks were ingested correctly, # of chunks, # of ingested rows, and ingest's output. """ if database is not None and schema is None: raise ProgrammingError( "Schema has to be provided to write_pandas when a database is provided" ) # This dictionary maps the compression algorithm to Snowflake put copy into command type # https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#type-parquet compression_map = {'gzip': 'auto', 'snappy': 'snappy'} if compression not in compression_map.keys(): raise ProgrammingError( "Invalid compression '{}', only acceptable values are: {}".format( compression, compression_map.keys())) if quote_identifiers: location = ((('"' + database + '".') if database else '') + (('"' + schema + '".') if schema else '') + ('"' + table_name + '"')) else: location = ((database + '.' if database else '') + (schema + '.' if schema else '') + (table_name)) if chunk_size is None: chunk_size = len(df) cursor = conn.cursor() stage_name = None # Forward declaration while True: try: stage_name = ''.join( random.choice(string.ascii_lowercase) for _ in range(5)) create_stage_sql = ( 'create temporary stage /* Python:snowflake.connector.pandas_tools.write_pandas() */ ' '"{stage_name}"').format(stage_name=stage_name) logger.debug("creating stage with '{}'".format(create_stage_sql)) cursor.execute(create_stage_sql, _is_internal=True).fetchall() break except ProgrammingError as pe: if pe.msg.endswith('already exists.'): continue raise with TemporaryDirectory() as tmp_folder: for i, chunk in chunk_helper(df, chunk_size): chunk_path = os.path.join(tmp_folder, 'file{}.txt'.format(i)) # Dump chunk into parquet file chunk.to_parquet(chunk_path, compression=compression) # Upload parquet file upload_sql = ( 'PUT /* Python:snowflake.connector.pandas_tools.write_pandas() */ ' '\'file://{path}\' @"{stage_name}" PARALLEL={parallel}' ).format(path=chunk_path.replace('\\', '\\\\').replace('\'', '\\\''), stage_name=stage_name, parallel=parallel) logger.debug("uploading files with '{}'".format(upload_sql)) cursor.execute(upload_sql, _is_internal=True) # Remove chunk file os.remove(chunk_path) if quote_identifiers: columns = '"' + '","'.join(list(df.columns)) + '"' else: columns = ','.join(list(df.columns)) # in Snowflake, all parquet data is stored in a single column, $1, so we must select columns explicitly # see (https://docs.snowflake.com/en/user-guide/script-data-load-transform-parquet.html) parquet_columns = '$1:' + ',$1:'.join(df.columns) copy_into_sql = ( 'COPY INTO {location} /* Python:snowflake.connector.pandas_tools.write_pandas() */ ' '({columns}) ' 'FROM (SELECT {parquet_columns} FROM @"{stage_name}") ' 'FILE_FORMAT=(TYPE=PARQUET COMPRESSION={compression}) ' 'PURGE=TRUE ON_ERROR={on_error}').format( location=location, columns=columns, parquet_columns=parquet_columns, stage_name=stage_name, compression=compression_map[compression], on_error=on_error) logger.debug("copying into with '{}'".format(copy_into_sql)) copy_results = cursor.execute(copy_into_sql, _is_internal=True).fetchall() cursor.close() return (all(e[1] == 'LOADED' for e in copy_results), len(copy_results), sum(e[3] for e in copy_results), copy_results)
def diff_it(role_dir: Path, rev1: str, rev2: str) -> Any: g = Git(role_dir) g.checkout(rev1) role_v1 = Role.load(role_tmp_dir, role_base_dir) g.checkout(rev2) role_v2 = Role.load(role_tmp_dir, role_base_dir) return [d for d in role_v1.diff(role_v2) if d] if sys.argv[2] == 'all': revs = [t.name for t in Repo(role_dir).tags][::-1] else: revs = sys.argv[2:] with TemporaryDirectory() as tmpd: role_tmp_dir = Path(tmpd) / role_name copytree(role_dir, role_tmp_dir, symlinks=True) for rev2, rev1 in zip(revs, revs[1:]): header = f'{rev1} -> {rev2}' print(header) print('=' * len(header)) print() try: diffs = diff_it(role_tmp_dir, rev1, rev2) for diff in sorted(diffs, key=lambda d: d.object_id): print(diff) print() except AnsibleError as exc: print(exc) print()