def execute_librec_thread(exp_count, base_path, var_params, value_tuple, config_out, config, command): paths = get_experiment_paths(exp_count, base_path, create=True) exp_path = paths.get_path('exp') for key, value in zip(var_params, value_tuple): config_out[key] = value # Loop over all variables, value pairs paths.add_to_config(config_out, 'result') save_properties(config_out, exp_path / "conf/librec.properties") # Pass list of vars and tuple save_status("Executing", exp_count, var_params, value_tuple, config, paths) # log file appends by default # os.unlink(LOG_PATH) # Try block might be better? librec_log = Path(LOG_PATH) if librec_log.is_file(): librec_log.unlink() execute_librec(exp_path, command) save_status("Completed", exp_count, var_params, value_tuple, config, paths) # Aldo's fault - This is only because if we 'split' something with splits made already, it will throw and error flag_val = 0 try: # copy_log(paths.get_path('log')) flag_val = flag_val + 1 except: pass return flag_val
def _cache_file(): p = Path(ip_lookup._cache_path()) p.parent.mkdir(parents=True, exist_ok=True) # pylint: disable=no-member yield p if p.exists(): p.unlink()
def pytest_unconfigure(config): if config_existed: config_backup.rename(str(path_config)) else: os.remove(str(path_config)) if config.option.link_gmx_mpi: gmx_mpi = Path('~/gmx_mpi').expanduser() gmx_mpi.unlink()
def _skip_test_access_by_naver(self): test_data = '21149144.naver' lp, is_created = LegacyPlace.get_or_create_smart(test_data) path = Path(lp.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) lp.access() self.assertEqual(path.exists(), True)
def test_access_by_kakao(self): test_data = '14720610.kakao' lp, is_created = LegacyPlace.get_or_create_smart(test_data) path = Path(lp.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) lp.access() self.assertEqual(path.exists(), True)
def test_access_by_mango(self): test_data = 'f-YvkBx8IemC.mango' lp, is_created = LegacyPlace.get_or_create_smart(test_data) path = Path(lp.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) lp.access() self.assertEqual(path.exists(), True)
def test_access_by_google(self): if WORK_ENVIRONMENT: return test_data = 'ChIJs1Et3lYABDQR32tSk7gPEK4.google' lp, is_created = LegacyPlace.get_or_create_smart(test_data) path = Path(lp.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) lp.access() self.assertEqual(path.exists(), True)
def test_access_by_4square(self): if WORK_ENVIRONMENT: return test_data = '4ccffc63f6378cfaace1b1d6.4square' lp, is_created = LegacyPlace.get_or_create_smart(test_data) path = Path(lp.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) lp.access_force() self.assertEqual(path.exists(), True)
def __skip__test_access_methods(self): test_data = '031-724-2733' phone, is_created = PhoneNumber.get_or_create_smart(test_data) path = Path(phone.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) phone.access_force() self.assertEqual(path.exists(), True)
def __skip__test_access_methods(self): test_data = '자기랑 진우랑 찰칵~ ^^' inote, is_created = ImageNote.get_or_create_smart(test_data) path = Path(inote.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) inote.access_force() self.assertEqual(path.exists(), True)
def __skip__test_access_methods(self): test_data = '경기도 하남시 풍산로 270, 206동 402호 (선동, 미사강변도시2단지)' addr, is_created = Address.get_or_create_smart(test_data) path = Path(addr.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) addr.access_force() self.assertEqual(path.exists(), True)
def __skip__test_access_methods(self): test_data = '능이백숙 국물 죽이네~ ㅎㅎ' pnote, is_created = PlaceNote.get_or_create_smart(test_data) path = Path(pnote.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) pnote.access_force() self.assertEqual(path.exists(), True)
def __skip__test_access_methods(self): test_data = '관심' tname, is_created = TagName.get_or_create_smart(test_data) path = Path(tname.path_accessed) if path.exists(): path.unlink() self.assertEqual(path.exists(), False) tname.access_force() self.assertEqual(path.exists(), True)
def access_local(self, source): file = Path(self.path_accessed) if not file.parent.exists(): file.parent.mkdir(parents=True) summary = Path(self.path_summarized) if not Path(self.path_summarized).parent.exists(): summary.parent.mkdir(parents=True) if file.exists(): file.unlink() try: file.symlink_to(source) except OSError: print('[Content.access_local()] source file does not exists: %s' % source)
def del_oldest_tile(self): """ Deletes the oldest tile from the cache. """ arr = self._get_cache_arr() oldestAddr = None oldestTs = Inf for k,v in arr.iteritems(): if v.get('ts',Inf) < oldestTs: oldestTs = v.get('tx',Inf) oldestAddr = k if oldestAddr is not None: p = Path(arr[oldestAddr].get('path',None)) if p is None: raise IOError('Invalid Path!') p.unlink() else: raise IOError('No tiles to delete!')
def test_backup_key_create_web(register_builtin_html, site, monkeypatch): store_path = Path(cmk.utils.paths.default_config_dir, "backup_keys.mk") assert not store_path.exists() mode = wato.ModeBackupEditKey() # First create a backup key mode._create_key({ "alias": u"älias", "passphrase": "passphra$e", }) assert store_path.exists() # Then test key existence test_mode = wato.ModeBackupEditKey() keys = test_mode.load() assert len(keys) == 1 assert store_path.exists() store_path.unlink()
def test_acl_from_file(): from modules.arp.arp_module import ACL p = Path('temp_test_acl_from_file.txt') with p.open(mode='w') as f: f.writelines([ '10.0.0.1 11:ba:da:a5:55:11\n', '10.0.0.2 11:ba:da:a5:55:11\n', '192.168.178.5 11:8b:ad:f0:0d:11\n', '12.12.12.12 11:de:fa:ce:d0:11\n', '12.12.12.12 11:de:fa:ce:d0:13\n' ]) acl = ACL.from_file(p) assert dict(acl.acl) == { '10.0.0.1': ['11:ba:da:a5:55:11'], '10.0.0.2': ['11:ba:da:a5:55:11'], '192.168.178.5': ['11:8b:ad:f0:0d:11'], '12.12.12.12': ['11:de:fa:ce:d0:11', '11:de:fa:ce:d0:13'] } p.unlink()
def get(self): """ Return an object constructed from the artifact file Currently supported types: Numpy.array, pandas.DataFrame, PIL.Image, dict (json) All other types will return a pathlib2.Path object pointing to a local copy of the artifacts file (or directory) :return: One of the following objects Numpy.array, pandas.DataFrame, PIL.Image, dict (json), pathlib2.Path """ if self._object: return self._object local_file = self.get_local_copy() if self.type == 'numpy' and np: self._object = np.load(local_file)[self.name] elif self.type in ('pandas', Artifacts._pd_artifact_type) and pd: self._object = pd.read_csv(local_file) elif self.type == 'image': self._object = Image.open(local_file) elif self.type == 'JSON': with open(local_file, 'rt') as f: self._object = json.load(f) local_file = Path(local_file) if self._object is None: self._object = local_file else: from trains.storage.helper import StorageHelper # only of we are not using cache, we should delete the file if not hasattr(StorageHelper, 'get_cached_disabled'): # delete the temporary file, we already used it try: local_file.unlink() except Exception: pass return self._object
def monolithic_adaboost(haar_feature_sums, num_wc, save_path="c:/", save_list=False): """ Generate a monolithic classifier using AdaBoost :param haar_feature_sums: feature sums dataframes (object) :param num_wc: number of weak classifiers to have (int) :param save_path: directory where progress is saved (str) :param save_list: save weak classifier list between steps (bool) :return: list of weak classifiers (a strong classifier) (list) """ save_path = Path(save_path) / "wc_lst_progress.pkl" if save_path.exists(): print("Loading prior progress for monolithic classifier") with open(str(save_path), 'rb') as f: [wc_lst, weights, start] = pickle.load(f) haar_feature_sums.face_weights, haar_feature_sums.non_face_weights = weights else: wc_lst = [] start = 0 for it in range(start, num_wc): wc = WeakClassifier(haar_feature_sums) # Current weak classifier cannot be generated (prior wc was a perfect split) if wc.stop_flag: break wc_lst.append(wc) if save_list: with open(save_path, 'wb') as f: weights = haar_feature_sums.face_weights, haar_feature_sums.non_face_weights progress = [wc_lst, weights, it + 1] pickle.dump(progress, f) print("\tCalculated weak classifier: " + str(it + 1) + " / " + str(num_wc)) if save_path.exists(): save_path.unlink() return wc_lst
def tearDown(self): path = Path(self.filename) path.unlink()
class TestReader(TestCase): """References TestReader Test suite for the Reader class. `Dat` and `Csv` may be used in unit tests because they does not contains any logic. """ def setUp(self) -> None: """Initializing the object to test """ # dummy simple csv for tests purpose self.dummy_csv = Path(FileUtils.Csv.CSV_NAME) self.dummy_csv.touch() self.dummy_csv.write_text(FileUtils.Csv.CSV_CONTENT) def tearDown(self) -> None: """Reinitialize state after unit tests execution """ self.dummy_csv.unlink() def test_invalid_initialization_unknown_file(self): """A non-existing file should throw an exception """ with self.assertRaises(FileNotFoundError): Reader(FileUtils.Csv.NON_EXISTING_NAME) def test_valid_properties_columns(self): """The reader should correctly get the columns number """ # arrange expected = len( FileUtils.Csv.CSV_CONTENT.split(Csv.line_end)[0].split( Csv.separator)) # act reader = Reader(str(self.dummy_csv)) # assert self.assertEqual(expected, reader.columns) def test_valid_properties_rows(self): """The reader should correctly get the columns number """ # arrange expected = len(FileUtils.Csv.CSV_CONTENT.split(Csv.line_end)) # act reader = Reader(str(self.dummy_csv)) # assert self.assertEqual(expected, reader.rows) def test_valid_read_content(self): """The reader should correctly gather the fields """ # arrange first_line = FileUtils.Csv.CSV_CONTENT \ .split(Csv.line_end)[0].split(Csv.separator) # extract data from formatted string first_line = [ field[1:-1] if field.startswith(Csv.delimiter) and field.endswith(Csv.delimiter) else field for field in first_line ] expected_len = len(first_line) reader = Reader(str(self.dummy_csv)) # act # read the content of the file content = list(reader.read_content()) # only stores the first line content = content[0] # assert # the length of the content read should be the same as specified self.assertEqual(expected_len, len(content)) # the files writen and loaded should be the same self.assertListEqual(first_line, content)
def delete_session_file(shared_file_name): shared_file_name = Path(tc_parameters.WORKING_DIR, shared_file_name) shared_file_name.unlink()
class TestNormalizer(TestCase): """References TestNormalizer Test suite for the Normalizer class. `Dat` and `Csv` may be used in unit tests because they does not contains any logic. """ def setUp(self) -> None: """Initializing the object to test """ self.normalizer = Normalizer(to_normalize_ext=Dat.ext, separator=Dat.separator) self.dummy_csv = Path(FileUtils.Csv.CSV_NAME) self.dummy_csv.touch() self.dummy_dat = Path(FileUtils.Csv.DAT_NAME) self.dummy_dat.touch() def tearDown(self) -> None: """Reinitialize state after unit tests execution """ self.dummy_csv.unlink() self.dummy_dat.unlink() def test_invalid_is_valid_csv_field_number(self): """A bad formatted number should be invalid """ # trailing quotes self.assertFalse(Normalizer.is_valid_csv_field('1337"')) # beginning quotes self.assertFalse(Normalizer.is_valid_csv_field('"1337')) # no quotes self.assertFalse(Normalizer.is_valid_csv_field('1337')) def test_valid_is_valid_csv_field_number(self): """A well formatted number should be valid """ # int self.assertTrue(Normalizer.is_valid_csv_field('"42"')) # float self.assertTrue(Normalizer.is_valid_csv_field('"13.37"')) # negative self.assertTrue(Normalizer.is_valid_csv_field('"-3.14"')) def test_valid_is_valid_csv_field_string(self): """A well formatted string should be valid """ # single string self.assertTrue(Normalizer.is_valid_csv_field('"field"')) # with spaces self.assertTrue(Normalizer.is_valid_csv_field('"some field"')) def test_invalid_convert_to_csv_no_file(self): """A non-existing file should throw an exception """ # with an incorrect extension too with self.assertRaises(FileNotFoundError): self.normalizer.convert_to_csv( dat_path=FileUtils.Csv.NON_EXISTING_NAME) # with the appropriate extension with self.assertRaises(FileNotFoundError): self.normalizer.convert_to_csv( dat_path=FileUtils.Csv.NON_EXISTING_NAME + Dat.ext) def test_invalid_convert_to_csv_bad_file_dat_ext(self): """A bad DAT file extension should throw an exception """ with self.assertRaises(BadFileFormatException): self.normalizer.convert_to_csv(dat_path=str(self.dummy_csv)) def test_invalid_convert_to_csv_bad_file_dat_csv(self): """A bad CSV file extension should throw an exception """ with self.assertRaises(BadFileFormatException): self.normalizer.convert_to_csv(dat_path=str(self.dummy_dat), csv_path=str(self.dummy_dat)) def test_invalid_convert_to_csv_from_folder_non_existing_folder(self): """A non-existing folder should throw an exception """ with self.assertRaises(BadFileFormatException): self.normalizer.convert_to_csv_from_folder( dat_folder=FileUtils.Csv.NON_EXISTING_NAME) def test_invalid_convert_to_csv_from_folder_not_folder(self): """A non-existing folder should throw an exception """ with self.assertRaises(BadFileFormatException): self.normalizer.convert_to_csv_from_folder( dat_folder=self.dummy_dat)
def delete_session_file(shared_file_name, syslog_ng_testcase): working_dir = syslog_ng_testcase.testcase_parameters.get_working_dir() shared_file_name = Path(working_dir, shared_file_name) shared_file_name.unlink()
class AdaBoostCascade: def __init__(self, hf_data, hf_path, neg_set="", pos_val_set="", neg_val_set="", extension="", max_layer_fpr=1, min_layer_tpr=0, target_fpr=1): """ Generate a cascade of classifiers :param hf_data: Haar-like features training dataframes (object) :param hf_path: path to pckl files of Haar-like feature for first cascade (string) :param neg_set: path to nonface training data (string) :param pos_val_set: path to face validation set (string) :param neg_val_set: path to nonface validation set (string) :param extension: extension of training data (string) :param max_layer_fpr: maximum allowable false positive rate per layer (float) :param min_layer_tpr: minimum allowable true positive rate per layer (float) :param target_fpr: desired cumulative product of layer FPRs (float) """ cascade_path = Path(hf_path) / "cascade.pkl" extended_cascade_path = Path(hf_path) / "extended_cascade.pkl" # Load existing classifier or generate/continue generating cascade if cascade_path.exists(): with open(str(cascade_path), 'rb') as f: self.cascade = pickle.load(f) elif extended_cascade_path.exists(): with open(str(extended_cascade_path), 'rb') as f: self.cascade = pickle.load(f) else: self.progress_path = Path(hf_path) / "cascade_progress.pkl" # Load progress later if it exists if not self.progress_path.exists(): # Convert positive and negative validation sets to integral images self.pos_validation, self.neg_validation, self.cascade_neg_test = [ self.set_test_intg_img_with_position(path, extension) for path in [pos_val_set, neg_val_set, neg_set] ] if 0 in [ len(self.pos_validation), len(self.neg_validation), len(self.cascade_neg_test) ]: pos_check = "Positive validation set has: " + str( len(self.pos_validation)) + " images\n" neg_check = "Negative validation set has: " + str( len(self.neg_validation)) + " images\n" train_check = "Negative training set has: " + str( len(self.cascade_neg_test)) + " images" raise Exception(pos_check + neg_check + train_check) print("Positive validation set:", len(self.pos_validation)) print("Negative validation set:", len(self.neg_validation)) # Initialize training dataframes and cascade self.hf = hf_data self.cascade = {} # Train cascade self.train_cascade_layer(max_layer_fpr, target_fpr, min_layer_tpr) if self.progress_path.exists(): self.progress_path.unlink() with open(str(cascade_path), 'wb') as f: pickle.dump(self.cascade, f) def train_cascade_layer(self, max_layer_fpr, target_fpr, min_layer_tpr): """ Train a cascade of strong classifiers :param max_layer_fpr: :param target_fpr: :param min_layer_tpr: :return: """ wc_lst, pos_removals, neg_removals = [], [], [] threshold = float('inf') if not self.progress_path.exists(): all_true_negatives = [] no_cascade = 1 fpr_product = 1.0 else: with open(str(self.progress_path), 'rb') as f: print("Loading cascade progress") [ no_cascade, fpr_product, all_true_negatives, self.hf, self.cascade, self.pos_validation, self.neg_validation, self.cascade_neg_test ] = pickle.load(f) # While the product of layer FPRs is above the desired holistic FPR product while fpr_product > target_fpr: fpr_layer = 1.0 print("\nTraining layer", no_cascade) # While the current layer's FPR is higher than the maximum acceptable FPR while fpr_layer > max_layer_fpr: # Add a single weak classifier per iteration - weights are updated per iteration print("\tStrong classifier might have", len(wc_lst) + 1, "weak classifier(s)") wc = WeakC.monolithic_adaboost(self.hf, 1) if len(wc) == 0: break wc_lst.append(wc[0]) # Evaluate threshold on positive and negative validation sets threshold, pos_removals = self.evaluate_positive_validation_set( wc_lst, min_layer_tpr) fpr_layer, neg_removals = self.evaluate_negative_validation( threshold, wc_lst) # Remove false negatives and true negatives print("Removing", len(pos_removals), "false negatives from the positive validation set") for idx in pos_removals: self.pos_validation.pop(idx) print("Removing", len(neg_removals), "true negatives from the negative validation set") for idx in neg_removals: self.neg_validation.pop(idx) # Update cumulative product and update cascade/training set if needed fpr_product *= fpr_layer self.cascade[no_cascade] = [wc_lst, threshold] if fpr_product <= target_fpr: print( "Target false positive rate reached - saving strong classifier" ) break elif len(self.pos_validation.keys()) == 0 or len( self.neg_validation.keys()) == 0: print( "No more positive or negative validation images - saving strong classifier" ) break else: o_len_neg_test = len(self.cascade_neg_test.keys()) df_imno_to_remove = self.determine_training_nonface_images( threshold, wc_lst) # New strong classifier classified negative training set completely correctly if len(df_imno_to_remove) == 0: print("No false positives from nonface training set") break # Reset weights if no nonface training images are to be removed, remove true negatives otherwise all_true_negatives += [df_imno_to_remove] self.hf.cascade_remove_negative_features(all_true_negatives) no_cascade += 1 wc_lst = [] print("Saving cascade layer") with open(str(self.progress_path), 'wb') as f: progress = [ no_cascade, fpr_product, all_true_negatives, self.hf, self.cascade, self.pos_validation, self.neg_validation, self.cascade_neg_test ] pickle.dump(progress, f) def evaluate_positive_validation_set(self, wc_lst, min_layer_tpr): """ Mock evaluation of cascade - iteratively removing false negatives: - Evaluate an inital TPR with the default threshold and collect false negatives' thresholds - Decrease threshold to meet minimum acceptable layer TPR if needed - Identify which images are false positives with the determined threshold - If the threshold's FPR is acceptable - false negatives will be removed from subsequent evaluations to replicate a cascade :param wc_lst: weak classifiers accumulated for layer (list) :param min_layer_tpr: minimum acceptable true positive rate (float) :return: threshold that meets minimum acceptable TPR and false negatives to remove (float, list) """ pos_val_len = len(self.pos_validation.keys()) threshold = sum([wc.weight for wc in wc_lst]) / 2 tpr_counter, failed_alphas, idx_removals = self.evaluate_cascade_layer( self.pos_validation, threshold, wc_lst) layer_tpr = tpr_counter / pos_val_len print("\tInitial layer TPR:", tpr_counter, "/", pos_val_len, "=", layer_tpr) # If TPR is lower than desired, lower the threshold to meet the minimum if tpr_counter / pos_val_len < min_layer_tpr: min_pos_hits = round(pos_val_len * min_layer_tpr) num_to_include = min_pos_hits - tpr_counter threshold = failed_alphas[-1 * num_to_include] idx_removals = idx_removals[0:-1 * num_to_include] return threshold, idx_removals def evaluate_negative_validation(self, threshold, wc_lst): """ Evaluate threshold on negative validation set to get false positive of layer and extract true negatives :param threshold: threshold to test against (float) :param wc_lst: strong classifier (list) :return: false positive rate and true negatives to remove (float) """ neg_val_len = len(self.neg_validation) fpr_counter, _, idx_removals = self.evaluate_cascade_layer( self.neg_validation, threshold, wc_lst) layer_fpr = fpr_counter / neg_val_len print("\tLayer FPR:", fpr_counter, "/", neg_val_len, "=", layer_fpr, "\n") return layer_fpr, idx_removals def determine_training_nonface_images(self, threshold, wc_lst): """ Evaluate threshold on negative validation set to get false positive of layer and extract true negatives :param threshold: threshold to test against (float) :param wc_lst: strong classifier (list) :return: false positive rate (float) """ _, _, idx_removals = self.evaluate_cascade_layer( self.cascade_neg_test, threshold, wc_lst) print("Removing", len(idx_removals), "nonface images from training set\n") for idx in idx_removals: self.cascade_neg_test.pop(idx) df_imno_to_remove = list(self.cascade_neg_test.keys()) print(len(df_imno_to_remove), "images to make up nonface training set") return df_imno_to_remove @staticmethod def set_test_intg_img_with_position(img_set, extension): """ Initialize image test set for determining subsequent nonface training data: - Included images are associated with their original image numbers - Correlations are for expedited removal from training dataframes and mock evaluation of cascade :param img_set: path to nonface training set (string) :param extension: image file extension (string) :return: integral image and corresponding reference image number (dict) """ test_dict = {} neg_test_intg_imgs = HaarF.paths_to_integral_images(img_set, extension) for idx in range(0, len(neg_test_intg_imgs)): test_dict[idx] = neg_test_intg_imgs[idx] return test_dict @staticmethod def evaluate_cascade_layer(validation_intg_imgs, threshold, classifiers): """ Evaluate how many face detections occur with the layer's weak classifiers :param validation_intg_imgs: validation integral images (dict) :param validation_idxs: validation image indexes (list) :param threshold: threshold to compare against (float) :param classifiers: strong classifier (list) :return: number of positives, thresholds of negatives (int, list) """ low_alphas = [] counter = 0 # If the cumulative threshold is insufficient, the corresponding image's threshold and index are saved for im_no, intg_img in validation_intg_imgs.items(): cumulative_alpha = WeakC.evaluate_weak_classifiers( intg_img, classifiers) if cumulative_alpha >= threshold: counter += 1 else: low_alphas.append((cumulative_alpha, im_no)) low_alphas = sorted(low_alphas) idx_removals = [pair[1] for pair in low_alphas] low_alphas = [pair[0] for pair in low_alphas] return counter, low_alphas, idx_removals
def _zip_package(package_root, includes, excludes=None, dockerize_pip=False, follow_symlinks=False, python_path=None, requirements_files=None, use_pipenv=False, **kwargs): """Create zip file in memory with package dependencies. Args: package_root (str): Base directory to copy files from. includes (List[str]): Inclusion patterns. Only files matching those patterns will be included in the result. excludes (List[str]): Exclusion patterns. Files matching those patterns will be excluded from the result. Exclusions take precedence over inclusions. dockerize_pip (Union[bool, str]): Whether to use docker or under what conditions docker will be used to run ``pip``. follow_symlinks (bool): If true, symlinks will be included in the resulting zip file. python_path (Optional[str]): Explicit python interpreter to be used. pipenv must be installed and executable using ``-m`` if provided. requirements_files (Dict[str, bool]): Map of requirement file names and wether they exist. use_pipenv (bool): Wether to use pipenv to export a Pipfile as requirements.txt. kwargs (Any): Advanced options for subprocess and docker. See source code to determine what is supported. Returns: Tuple[str, str]: Content of the ZIP file as a byte string and calculated hash of all the files """ kwargs.setdefault("pipenv_timeout", 300) temp_root = os.path.join(os.path.expanduser("~"), ".runway_cache") if not os.path.isdir(temp_root): os.makedirs(temp_root) # exclude potential virtual environments in the package excludes.append(".venv/") with tempfile.TemporaryDirectory(prefix="cfngin", dir=temp_root) as tmpdir: tmp_req = os.path.join(tmpdir, "requirements.txt") copydir(package_root, tmpdir, includes, excludes, follow_symlinks) tmp_req = handle_requirements( package_root=package_root, dest_path=tmpdir, requirements=requirements_files, python_path=python_path, use_pipenv=use_pipenv, pipenv_timeout=kwargs["pipenv_timeout"], ) if should_use_docker(dockerize_pip): dockerized_pip(tmpdir, **kwargs) else: tmp_script = Path(tmpdir) / "__runway_run_pip_install.py" pip_cmd = [ python_path or sys.executable, "-m", "pip", "install", "--target", tmpdir, "--requirement", tmp_req, "--no-color", ] subprocess_args = {} if kwargs.get("python_dontwritebytecode"): subprocess_args["env"] = dict(os.environ, PYTHONDONTWRITEBYTECODE="1") # Pyinstaller build or explicit python path if getattr(sys, "frozen", False) and not python_path: script_contents = os.linesep.join([ "import runpy", "from runway.util import argv", "with argv(*{}):".format(json.dumps(pip_cmd[2:])), ' runpy.run_module("pip", run_name="__main__")\n', ]) # TODO remove python 2 logic when dropping python 2 tmp_script.write_text(script_contents if sys.version_info.major > 2 else script_contents.decode("UTF-8")) cmd = [sys.executable, "run-python", str(tmp_script)] else: if not _pip_has_no_color_option(pip_cmd[0]): pip_cmd.remove("--no-color") cmd = pip_cmd LOGGER.info( "The following output from pip may include incompatibility errors. " "These can generally be ignored (pip will erroneously warn " "about conflicts between the packages in your Lambda zip and " "your host system).") try: subprocess.check_call(cmd, **subprocess_args) except subprocess.CalledProcessError: raise PipError finally: if tmp_script.is_file(): tmp_script.unlink() if kwargs.get("python_exclude_bin_dir") and os.path.isdir( os.path.join(tmpdir, "bin")): LOGGER.debug("Removing python /bin directory from Lambda files") shutil.rmtree(os.path.join(tmpdir, "bin")) if kwargs.get("python_exclude_setuptools_dirs"): for i in os.listdir(tmpdir): if i.endswith(".egg-info") or i.endswith(".dist-info"): LOGGER.debug("Removing directory %s from Lambda files", i) shutil.rmtree(os.path.join(tmpdir, i)) req_files = _find_files(tmpdir, includes="**", follow_symlinks=False) return _zip_files(req_files, tmpdir)
def _upload_data_audit_artifacts(self, name): logger = self._task.get_logger() pd_artifact = self._artifacts_container.get(name) pd_metadata = self._artifacts_container.get_metadata(name) # remove from artifacts watch list if name in self._unregister_request: try: self._unregister_request.remove(name) except KeyError: pass self._artifacts_container.unregister_artifact(name) if pd_artifact is None: return override_filename_ext_in_uri = self._save_format override_filename_in_uri = name fd, local_csv = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) os.close(fd) local_csv = Path(local_csv) pd_artifact.to_csv(local_csv.as_posix(), index=False, compression=self._compression) current_sha2, file_sha2 = self.sha256sum(local_csv.as_posix(), skip_header=32) if name in self._last_artifacts_upload: previous_sha2 = self._last_artifacts_upload[name] if previous_sha2 == current_sha2: # nothing to do, we can skip the upload try: local_csv.unlink() except Exception: pass return self._last_artifacts_upload[name] = current_sha2 # If old trains-server, upload as debug image if not Session.check_min_api_version('2.3'): logger.report_image(title='artifacts', series=name, local_path=local_csv.as_posix(), delete_after_upload=True, iteration=self._task.get_last_iteration(), max_image_history=2) return # Find our artifact artifact = None for an_artifact in self._task_artifact_list: if an_artifact.key == name: artifact = an_artifact break file_size = local_csv.stat().st_size # upload file uri = self._upload_local_file( local_csv, name, delete_after_upload=True, override_filename=override_filename_in_uri, override_filename_ext=override_filename_ext_in_uri) # update task artifacts with self._task_edit_lock: if not artifact: artifact = tasks.Artifact(key=name, type=self._pd_artifact_type) self._task_artifact_list.append(artifact) artifact_type_data = tasks.ArtifactTypeData() artifact_type_data.data_hash = current_sha2 artifact_type_data.content_type = "text/csv" artifact_type_data.preview = str( pd_artifact.__repr__()) + '\n\n' + self._get_statistics( {name: pd_artifact}) artifact.type_data = artifact_type_data artifact.uri = uri artifact.content_size = file_size artifact.hash = file_sha2 artifact.timestamp = int(time()) artifact.display_data = [ (str(k), str(v)) for k, v in pd_metadata.items() ] if pd_metadata else None self._task.set_artifacts(self._task_artifact_list)
def _zip_package(package_root, includes, excludes=None, dockerize_pip=False, follow_symlinks=False, python_path=None, requirements_files=None, use_pipenv=False, **kwargs): """Create zip file in memory with package dependencies. Args: package_root (str): Base directory to copy files from. includes (List[str]): Inclusion patterns. Only files matching those patterns will be included in the result. excludes (List[str]): Exclusion patterns. Files matching those patterns will be excluded from the result. Exclusions take precedence over inclusions. dockerize_pip (Union[bool, str]): Whether to use docker or under what conditions docker will be used to run ``pip``. follow_symlinks (bool): If true, symlinks will be included in the resulting zip file. python_path (Optional[str]): Explicit python interpreter to be used. pipenv must be installed and executable using ``-m`` if provided. requirements_files (Dict[str, bool]): Map of requirement file names and wether they exist. use_pipenv (bool): Wether to use pipenv to export a Pipfile as requirements.txt. kwargs (Any): Advanced options for subprocess and docker. See source code to determine what is supported. Returns: Tuple[str, str]: Content of the ZIP file as a byte string and calculated hash of all the files """ kwargs.setdefault('pipenv_timeout', 300) temp_root = os.path.join(os.path.expanduser('~'), '.runway_cache') if not os.path.isdir(temp_root): os.makedirs(temp_root) # exclude potential virtual environments in the package excludes.append('.venv/') with tempfile.TemporaryDirectory(prefix='cfngin', dir=temp_root) as tmpdir: tmp_req = os.path.join(tmpdir, 'requirements.txt') copydir(package_root, tmpdir, includes, excludes, follow_symlinks) tmp_req = handle_requirements(package_root=package_root, dest_path=tmpdir, requirements=requirements_files, python_path=python_path, use_pipenv=use_pipenv, pipenv_timeout=kwargs['pipenv_timeout']) if should_use_docker(dockerize_pip): dockerized_pip(tmpdir, **kwargs) else: tmp_script = Path(tmpdir) / '__runway_run_pip_install.py' pip_cmd = [ python_path or sys.executable, '-m', 'pip', 'install', '--target', tmpdir, '--requirement', tmp_req, '--no-color' ] # Pyinstaller build or explicit python path if getattr(sys, 'frozen', False) and not python_path: script_contents = os.linesep.join([ 'import runpy', 'from runway.util import argv', 'with argv(*{}):'.format(json.dumps(pip_cmd[2:])), ' runpy.run_module("pip", run_name="__main__")\n' ]) # TODO remove python 2 logic when dropping python 2 tmp_script.write_text(script_contents if sys.version_info.major > 2 else script_contents.decode('UTF-8')) cmd = [sys.executable, 'run-python', str(tmp_script)] else: if not _pip_has_no_color_option(pip_cmd[0]): pip_cmd.remove('--no-color') cmd = pip_cmd LOGGER.info( 'The following output from pip may include incompatibility errors. ' 'These can generally be ignored (pip will erroneously warn ' 'about conflicts between the packages in your Lambda zip and ' 'your host system).') try: subprocess.check_call(cmd) except subprocess.CalledProcessError: raise PipError finally: if tmp_script.is_file(): tmp_script.unlink() req_files = _find_files(tmpdir, includes='**', follow_symlinks=False) return _zip_files(req_files, tmpdir)