Example #1
0
def execute_librec_thread(exp_count, base_path, var_params, value_tuple,
                          config_out, config, command):
    paths = get_experiment_paths(exp_count, base_path, create=True)
    exp_path = paths.get_path('exp')
    for key, value in zip(var_params, value_tuple):
        config_out[key] = value  # Loop over all variables, value pairs
    paths.add_to_config(config_out, 'result')
    save_properties(config_out, exp_path / "conf/librec.properties")
    # Pass list of vars and tuple
    save_status("Executing", exp_count, var_params, value_tuple, config, paths)
    # log file appends by default

    #        os.unlink(LOG_PATH)
    # Try block might be better?
    librec_log = Path(LOG_PATH)
    if librec_log.is_file():
        librec_log.unlink()

    execute_librec(exp_path, command)
    save_status("Completed", exp_count, var_params, value_tuple, config, paths)

    # Aldo's fault - This is only because if we 'split' something with splits made already, it will throw and error
    flag_val = 0
    try:
        # copy_log(paths.get_path('log'))
        flag_val = flag_val + 1
    except:
        pass

    return flag_val
Example #2
0
def _cache_file():
    p = Path(ip_lookup._cache_path())
    p.parent.mkdir(parents=True, exist_ok=True)  # pylint: disable=no-member

    yield p

    if p.exists():
        p.unlink()
Example #3
0
def pytest_unconfigure(config):
    if config_existed:
        config_backup.rename(str(path_config))
    else:
        os.remove(str(path_config))
    if config.option.link_gmx_mpi:
        gmx_mpi = Path('~/gmx_mpi').expanduser()
        gmx_mpi.unlink()
Example #4
0
def pytest_unconfigure(config):
    if config_existed:
        config_backup.rename(str(path_config))
    else:
        os.remove(str(path_config))
    if config.option.link_gmx_mpi:
        gmx_mpi = Path('~/gmx_mpi').expanduser()
        gmx_mpi.unlink()
Example #5
0
 def _skip_test_access_by_naver(self):
     test_data = '21149144.naver'
     lp, is_created = LegacyPlace.get_or_create_smart(test_data)
     path = Path(lp.path_accessed)
     if path.exists():
         path.unlink()
     self.assertEqual(path.exists(), False)
     lp.access()
     self.assertEqual(path.exists(), True)
Example #6
0
 def test_access_by_kakao(self):
     test_data = '14720610.kakao'
     lp, is_created = LegacyPlace.get_or_create_smart(test_data)
     path = Path(lp.path_accessed)
     if path.exists():
         path.unlink()
     self.assertEqual(path.exists(), False)
     lp.access()
     self.assertEqual(path.exists(), True)
Example #7
0
 def test_access_by_mango(self):
     test_data = 'f-YvkBx8IemC.mango'
     lp, is_created = LegacyPlace.get_or_create_smart(test_data)
     path = Path(lp.path_accessed)
     if path.exists():
         path.unlink()
     self.assertEqual(path.exists(), False)
     lp.access()
     self.assertEqual(path.exists(), True)
Example #8
0
 def test_access_by_google(self):
     if WORK_ENVIRONMENT: return
     test_data = 'ChIJs1Et3lYABDQR32tSk7gPEK4.google'
     lp, is_created = LegacyPlace.get_or_create_smart(test_data)
     path = Path(lp.path_accessed)
     if path.exists():
         path.unlink()
     self.assertEqual(path.exists(), False)
     lp.access()
     self.assertEqual(path.exists(), True)
Example #9
0
 def test_access_by_4square(self):
     if WORK_ENVIRONMENT: return
     test_data = '4ccffc63f6378cfaace1b1d6.4square'
     lp, is_created = LegacyPlace.get_or_create_smart(test_data)
     path = Path(lp.path_accessed)
     if path.exists():
         path.unlink()
     self.assertEqual(path.exists(), False)
     lp.access_force()
     self.assertEqual(path.exists(), True)
Example #10
0
    def __skip__test_access_methods(self):
        test_data = '031-724-2733'
        phone, is_created = PhoneNumber.get_or_create_smart(test_data)

        path = Path(phone.path_accessed)
        if path.exists():
            path.unlink()

        self.assertEqual(path.exists(), False)
        phone.access_force()
        self.assertEqual(path.exists(), True)
Example #11
0
    def __skip__test_access_methods(self):
        test_data = '자기랑 진우랑 찰칵~ ^^'
        inote, is_created = ImageNote.get_or_create_smart(test_data)

        path = Path(inote.path_accessed)
        if path.exists():
            path.unlink()

        self.assertEqual(path.exists(), False)
        inote.access_force()
        self.assertEqual(path.exists(), True)
Example #12
0
    def __skip__test_access_methods(self):
        test_data = '경기도 하남시 풍산로 270, 206동 402호 (선동, 미사강변도시2단지)'
        addr, is_created = Address.get_or_create_smart(test_data)

        path = Path(addr.path_accessed)
        if path.exists():
            path.unlink()

        self.assertEqual(path.exists(), False)
        addr.access_force()
        self.assertEqual(path.exists(), True)
Example #13
0
    def __skip__test_access_methods(self):
        test_data = '능이백숙 국물 죽이네~ ㅎㅎ'
        pnote, is_created = PlaceNote.get_or_create_smart(test_data)

        path = Path(pnote.path_accessed)
        if path.exists():
            path.unlink()

        self.assertEqual(path.exists(), False)
        pnote.access_force()
        self.assertEqual(path.exists(), True)
Example #14
0
    def __skip__test_access_methods(self):
        test_data = '관심'
        tname, is_created = TagName.get_or_create_smart(test_data)

        path = Path(tname.path_accessed)
        if path.exists():
            path.unlink()

        self.assertEqual(path.exists(), False)
        tname.access_force()
        self.assertEqual(path.exists(), True)
Example #15
0
 def access_local(self, source):
     file = Path(self.path_accessed)
     if not file.parent.exists():
         file.parent.mkdir(parents=True)
     summary = Path(self.path_summarized)
     if not Path(self.path_summarized).parent.exists():
         summary.parent.mkdir(parents=True)
     if file.exists():
         file.unlink()
     try:
         file.symlink_to(source)
     except OSError:
         print('[Content.access_local()] source file does not exists: %s' %
               source)
    def del_oldest_tile(self):
        """ Deletes the oldest tile from the cache. """
        arr = self._get_cache_arr()
        oldestAddr = None
        oldestTs = Inf
        for k,v in arr.iteritems():
            if v.get('ts',Inf) < oldestTs:
                oldestTs = v.get('tx',Inf)
                oldestAddr = k

        if oldestAddr is not None:
            p = Path(arr[oldestAddr].get('path',None))
            if p is None: raise IOError('Invalid Path!')
            p.unlink()
        else:
            raise IOError('No tiles to delete!')
Example #17
0
def test_backup_key_create_web(register_builtin_html, site, monkeypatch):
    store_path = Path(cmk.utils.paths.default_config_dir, "backup_keys.mk")

    assert not store_path.exists()
    mode = wato.ModeBackupEditKey()

    # First create a backup key
    mode._create_key({
        "alias": u"älias",
        "passphrase": "passphra$e",
    })

    assert store_path.exists()

    # Then test key existence
    test_mode = wato.ModeBackupEditKey()
    keys = test_mode.load()
    assert len(keys) == 1

    assert store_path.exists()
    store_path.unlink()
def test_acl_from_file():
    from modules.arp.arp_module import ACL

    p = Path('temp_test_acl_from_file.txt')
    with p.open(mode='w') as f:
        f.writelines([
            '10.0.0.1 11:ba:da:a5:55:11\n', '10.0.0.2 11:ba:da:a5:55:11\n',
            '192.168.178.5 11:8b:ad:f0:0d:11\n',
            '12.12.12.12 11:de:fa:ce:d0:11\n',
            '12.12.12.12 11:de:fa:ce:d0:13\n'
        ])

    acl = ACL.from_file(p)

    assert dict(acl.acl) == {
        '10.0.0.1': ['11:ba:da:a5:55:11'],
        '10.0.0.2': ['11:ba:da:a5:55:11'],
        '192.168.178.5': ['11:8b:ad:f0:0d:11'],
        '12.12.12.12': ['11:de:fa:ce:d0:11', '11:de:fa:ce:d0:13']
    }

    p.unlink()
Example #19
0
    def get(self):
        """
        Return an object constructed from the artifact file

        Currently supported types: Numpy.array, pandas.DataFrame, PIL.Image, dict (json)
        All other types will return a pathlib2.Path object pointing to a local copy of the artifacts file (or directory)

        :return: One of the following objects Numpy.array, pandas.DataFrame, PIL.Image, dict (json), pathlib2.Path
        """
        if self._object:
            return self._object

        local_file = self.get_local_copy()

        if self.type == 'numpy' and np:
            self._object = np.load(local_file)[self.name]
        elif self.type in ('pandas', Artifacts._pd_artifact_type) and pd:
            self._object = pd.read_csv(local_file)
        elif self.type == 'image':
            self._object = Image.open(local_file)
        elif self.type == 'JSON':
            with open(local_file, 'rt') as f:
                self._object = json.load(f)

        local_file = Path(local_file)

        if self._object is None:
            self._object = local_file
        else:
            from trains.storage.helper import StorageHelper
            # only of we are not using cache, we should delete the file
            if not hasattr(StorageHelper, 'get_cached_disabled'):
                # delete the temporary file, we already used it
                try:
                    local_file.unlink()
                except Exception:
                    pass

        return self._object
Example #20
0
def monolithic_adaboost(haar_feature_sums,
                        num_wc,
                        save_path="c:/",
                        save_list=False):
    """
    Generate a monolithic classifier using AdaBoost

    :param haar_feature_sums: feature sums dataframes               (object)
    :param num_wc: number of weak classifiers to have               (int)
    :param save_path: directory where progress is saved             (str)
    :param save_list: save weak classifier list between steps       (bool)
    :return: list of weak classifiers (a strong classifier)         (list)
    """
    save_path = Path(save_path) / "wc_lst_progress.pkl"
    if save_path.exists():
        print("Loading prior progress for monolithic classifier")
        with open(str(save_path), 'rb') as f:
            [wc_lst, weights, start] = pickle.load(f)
            haar_feature_sums.face_weights, haar_feature_sums.non_face_weights = weights
    else:
        wc_lst = []
        start = 0
    for it in range(start, num_wc):
        wc = WeakClassifier(haar_feature_sums)
        # Current weak classifier cannot be generated (prior wc was a perfect split)
        if wc.stop_flag:
            break
        wc_lst.append(wc)
        if save_list:
            with open(save_path, 'wb') as f:
                weights = haar_feature_sums.face_weights, haar_feature_sums.non_face_weights
                progress = [wc_lst, weights, it + 1]
                pickle.dump(progress, f)
        print("\tCalculated weak classifier: " + str(it + 1) + " / " +
              str(num_wc))
    if save_path.exists():
        save_path.unlink()
    return wc_lst
Example #21
0
 def tearDown(self):
     path = Path(self.filename)
     path.unlink()
Example #22
0
class TestReader(TestCase):
    """References TestReader

    Test suite for the Reader class.

    `Dat` and `Csv` may be used in unit tests because they does not
    contains any logic.
    """
    def setUp(self) -> None:
        """Initializing the object to test
        """
        # dummy simple csv for tests purpose
        self.dummy_csv = Path(FileUtils.Csv.CSV_NAME)
        self.dummy_csv.touch()
        self.dummy_csv.write_text(FileUtils.Csv.CSV_CONTENT)

    def tearDown(self) -> None:
        """Reinitialize state after unit tests execution
        """
        self.dummy_csv.unlink()

    def test_invalid_initialization_unknown_file(self):
        """A non-existing file should throw an exception
        """
        with self.assertRaises(FileNotFoundError):
            Reader(FileUtils.Csv.NON_EXISTING_NAME)

    def test_valid_properties_columns(self):
        """The reader should correctly get the columns number
        """
        # arrange
        expected = len(
            FileUtils.Csv.CSV_CONTENT.split(Csv.line_end)[0].split(
                Csv.separator))

        # act
        reader = Reader(str(self.dummy_csv))

        # assert
        self.assertEqual(expected, reader.columns)

    def test_valid_properties_rows(self):
        """The reader should correctly get the columns number
        """
        # arrange
        expected = len(FileUtils.Csv.CSV_CONTENT.split(Csv.line_end))

        # act
        reader = Reader(str(self.dummy_csv))

        # assert
        self.assertEqual(expected, reader.rows)

    def test_valid_read_content(self):
        """The reader should correctly gather the fields
        """
        # arrange
        first_line = FileUtils.Csv.CSV_CONTENT \
            .split(Csv.line_end)[0].split(Csv.separator)
        # extract data from formatted string
        first_line = [
            field[1:-1] if field.startswith(Csv.delimiter)
            and field.endswith(Csv.delimiter) else field
            for field in first_line
        ]
        expected_len = len(first_line)
        reader = Reader(str(self.dummy_csv))

        # act
        # read the content of the file
        content = list(reader.read_content())
        # only stores the first line
        content = content[0]

        # assert
        # the length of the content read should be the same as specified
        self.assertEqual(expected_len, len(content))

        # the files writen and loaded should be the same
        self.assertListEqual(first_line, content)
Example #23
0
def delete_session_file(shared_file_name):
    shared_file_name = Path(tc_parameters.WORKING_DIR, shared_file_name)
    shared_file_name.unlink()
Example #24
0
class TestNormalizer(TestCase):
    """References TestNormalizer

    Test suite for the Normalizer class.

    `Dat` and `Csv` may be used in unit tests because they does not
    contains any logic.
    """
    def setUp(self) -> None:
        """Initializing the object to test
        """
        self.normalizer = Normalizer(to_normalize_ext=Dat.ext,
                                     separator=Dat.separator)
        self.dummy_csv = Path(FileUtils.Csv.CSV_NAME)
        self.dummy_csv.touch()

        self.dummy_dat = Path(FileUtils.Csv.DAT_NAME)
        self.dummy_dat.touch()

    def tearDown(self) -> None:
        """Reinitialize state after unit tests execution
        """
        self.dummy_csv.unlink()
        self.dummy_dat.unlink()

    def test_invalid_is_valid_csv_field_number(self):
        """A bad formatted number should be invalid
        """
        # trailing quotes
        self.assertFalse(Normalizer.is_valid_csv_field('1337"'))
        # beginning quotes
        self.assertFalse(Normalizer.is_valid_csv_field('"1337'))
        # no quotes
        self.assertFalse(Normalizer.is_valid_csv_field('1337'))

    def test_valid_is_valid_csv_field_number(self):
        """A well formatted number should be valid
        """
        # int
        self.assertTrue(Normalizer.is_valid_csv_field('"42"'))
        # float
        self.assertTrue(Normalizer.is_valid_csv_field('"13.37"'))
        # negative
        self.assertTrue(Normalizer.is_valid_csv_field('"-3.14"'))

    def test_valid_is_valid_csv_field_string(self):
        """A well formatted string should be valid
        """
        # single string
        self.assertTrue(Normalizer.is_valid_csv_field('"field"'))
        # with spaces
        self.assertTrue(Normalizer.is_valid_csv_field('"some field"'))

    def test_invalid_convert_to_csv_no_file(self):
        """A non-existing file should throw an exception
        """
        # with an incorrect extension too
        with self.assertRaises(FileNotFoundError):
            self.normalizer.convert_to_csv(
                dat_path=FileUtils.Csv.NON_EXISTING_NAME)

        # with the appropriate extension
        with self.assertRaises(FileNotFoundError):
            self.normalizer.convert_to_csv(
                dat_path=FileUtils.Csv.NON_EXISTING_NAME + Dat.ext)

    def test_invalid_convert_to_csv_bad_file_dat_ext(self):
        """A bad DAT file extension should throw an exception
        """
        with self.assertRaises(BadFileFormatException):
            self.normalizer.convert_to_csv(dat_path=str(self.dummy_csv))

    def test_invalid_convert_to_csv_bad_file_dat_csv(self):
        """A bad CSV file extension should throw an exception
        """
        with self.assertRaises(BadFileFormatException):
            self.normalizer.convert_to_csv(dat_path=str(self.dummy_dat),
                                           csv_path=str(self.dummy_dat))

    def test_invalid_convert_to_csv_from_folder_non_existing_folder(self):
        """A non-existing folder should throw an exception
        """
        with self.assertRaises(BadFileFormatException):
            self.normalizer.convert_to_csv_from_folder(
                dat_folder=FileUtils.Csv.NON_EXISTING_NAME)

    def test_invalid_convert_to_csv_from_folder_not_folder(self):
        """A non-existing folder should throw an exception
        """
        with self.assertRaises(BadFileFormatException):
            self.normalizer.convert_to_csv_from_folder(
                dat_folder=self.dummy_dat)
Example #25
0
def delete_session_file(shared_file_name, syslog_ng_testcase):
    working_dir = syslog_ng_testcase.testcase_parameters.get_working_dir()
    shared_file_name = Path(working_dir, shared_file_name)
    shared_file_name.unlink()
Example #26
0
class AdaBoostCascade:
    def __init__(self,
                 hf_data,
                 hf_path,
                 neg_set="",
                 pos_val_set="",
                 neg_val_set="",
                 extension="",
                 max_layer_fpr=1,
                 min_layer_tpr=0,
                 target_fpr=1):
        """
        Generate a cascade of classifiers

        :param hf_data: Haar-like features training dataframes                          (object)
        :param hf_path: path to pckl files of Haar-like feature for first cascade       (string)
        :param neg_set: path to nonface training data                                   (string)
        :param pos_val_set: path to face validation set                                 (string)
        :param neg_val_set: path to nonface validation set                              (string)
        :param extension: extension of training data                                    (string)
        :param max_layer_fpr: maximum allowable false positive rate per layer           (float)
        :param min_layer_tpr: minimum allowable true positive rate per layer            (float)
        :param target_fpr: desired cumulative product of layer FPRs                     (float)
        """
        cascade_path = Path(hf_path) / "cascade.pkl"
        extended_cascade_path = Path(hf_path) / "extended_cascade.pkl"
        # Load existing classifier or generate/continue generating cascade
        if cascade_path.exists():
            with open(str(cascade_path), 'rb') as f:
                self.cascade = pickle.load(f)
        elif extended_cascade_path.exists():
            with open(str(extended_cascade_path), 'rb') as f:
                self.cascade = pickle.load(f)
        else:
            self.progress_path = Path(hf_path) / "cascade_progress.pkl"

            # Load progress later if it exists
            if not self.progress_path.exists():
                # Convert positive and negative validation sets to integral images
                self.pos_validation, self.neg_validation, self.cascade_neg_test = [
                    self.set_test_intg_img_with_position(path, extension)
                    for path in [pos_val_set, neg_val_set, neg_set]
                ]
                if 0 in [
                        len(self.pos_validation),
                        len(self.neg_validation),
                        len(self.cascade_neg_test)
                ]:
                    pos_check = "Positive validation set has: " + str(
                        len(self.pos_validation)) + " images\n"
                    neg_check = "Negative validation set has: " + str(
                        len(self.neg_validation)) + " images\n"
                    train_check = "Negative training set has: " + str(
                        len(self.cascade_neg_test)) + " images"
                    raise Exception(pos_check + neg_check + train_check)
                print("Positive validation set:", len(self.pos_validation))
                print("Negative validation set:", len(self.neg_validation))
                # Initialize training dataframes and cascade
                self.hf = hf_data
                self.cascade = {}

            # Train cascade
            self.train_cascade_layer(max_layer_fpr, target_fpr, min_layer_tpr)
            if self.progress_path.exists():
                self.progress_path.unlink()
            with open(str(cascade_path), 'wb') as f:
                pickle.dump(self.cascade, f)

    def train_cascade_layer(self, max_layer_fpr, target_fpr, min_layer_tpr):
        """
        Train a cascade of strong classifiers

        :param max_layer_fpr:
        :param target_fpr:
        :param min_layer_tpr:
        :return:
        """
        wc_lst, pos_removals, neg_removals = [], [], []
        threshold = float('inf')
        if not self.progress_path.exists():
            all_true_negatives = []
            no_cascade = 1
            fpr_product = 1.0
        else:
            with open(str(self.progress_path), 'rb') as f:
                print("Loading cascade progress")
                [
                    no_cascade, fpr_product, all_true_negatives, self.hf,
                    self.cascade, self.pos_validation, self.neg_validation,
                    self.cascade_neg_test
                ] = pickle.load(f)
        # While the product of layer FPRs is above the desired holistic FPR product
        while fpr_product > target_fpr:
            fpr_layer = 1.0
            print("\nTraining layer", no_cascade)

            # While the current layer's FPR is higher than the maximum acceptable FPR
            while fpr_layer > max_layer_fpr:
                # Add a single weak classifier per iteration - weights are updated per iteration
                print("\tStrong classifier might have",
                      len(wc_lst) + 1, "weak classifier(s)")
                wc = WeakC.monolithic_adaboost(self.hf, 1)
                if len(wc) == 0:
                    break
                wc_lst.append(wc[0])

                # Evaluate threshold on positive and negative validation sets
                threshold, pos_removals = self.evaluate_positive_validation_set(
                    wc_lst, min_layer_tpr)
                fpr_layer, neg_removals = self.evaluate_negative_validation(
                    threshold, wc_lst)

            # Remove false negatives and true negatives
            print("Removing", len(pos_removals),
                  "false negatives from the positive validation set")
            for idx in pos_removals:
                self.pos_validation.pop(idx)
            print("Removing", len(neg_removals),
                  "true negatives from the negative validation set")
            for idx in neg_removals:
                self.neg_validation.pop(idx)

            # Update cumulative product and update cascade/training set if needed
            fpr_product *= fpr_layer
            self.cascade[no_cascade] = [wc_lst, threshold]
            if fpr_product <= target_fpr:
                print(
                    "Target false positive rate reached - saving strong classifier"
                )
                break
            elif len(self.pos_validation.keys()) == 0 or len(
                    self.neg_validation.keys()) == 0:
                print(
                    "No more positive or negative validation images - saving strong classifier"
                )
                break
            else:
                o_len_neg_test = len(self.cascade_neg_test.keys())
                df_imno_to_remove = self.determine_training_nonface_images(
                    threshold, wc_lst)
                # New strong classifier classified negative training set completely correctly
                if len(df_imno_to_remove) == 0:
                    print("No false positives from nonface training set")
                    break
                # Reset weights if no nonface training images are to be removed, remove true negatives otherwise
                all_true_negatives += [df_imno_to_remove]
                self.hf.cascade_remove_negative_features(all_true_negatives)
                no_cascade += 1
                wc_lst = []

                print("Saving cascade layer")
                with open(str(self.progress_path), 'wb') as f:
                    progress = [
                        no_cascade, fpr_product, all_true_negatives, self.hf,
                        self.cascade, self.pos_validation, self.neg_validation,
                        self.cascade_neg_test
                    ]
                    pickle.dump(progress, f)

    def evaluate_positive_validation_set(self, wc_lst, min_layer_tpr):
        """
        Mock evaluation of cascade - iteratively removing false negatives:
            - Evaluate an inital TPR with the default threshold and collect false negatives' thresholds
            - Decrease threshold to meet minimum acceptable layer TPR if needed
            - Identify which images are false positives with the determined threshold
                - If the threshold's FPR is acceptable - false negatives will be removed from subsequent
                  evaluations to replicate a cascade

        :param wc_lst: weak classifiers accumulated for layer                                           (list)
        :param min_layer_tpr: minimum acceptable true positive rate                                     (float)
        :return: threshold that meets minimum acceptable TPR and false negatives to remove              (float, list)
        """
        pos_val_len = len(self.pos_validation.keys())
        threshold = sum([wc.weight for wc in wc_lst]) / 2
        tpr_counter, failed_alphas, idx_removals = self.evaluate_cascade_layer(
            self.pos_validation, threshold, wc_lst)
        layer_tpr = tpr_counter / pos_val_len
        print("\tInitial layer TPR:", tpr_counter, "/", pos_val_len, "=",
              layer_tpr)

        # If TPR is lower than desired, lower the threshold to meet the minimum
        if tpr_counter / pos_val_len < min_layer_tpr:
            min_pos_hits = round(pos_val_len * min_layer_tpr)
            num_to_include = min_pos_hits - tpr_counter
            threshold = failed_alphas[-1 * num_to_include]
            idx_removals = idx_removals[0:-1 * num_to_include]

        return threshold, idx_removals

    def evaluate_negative_validation(self, threshold, wc_lst):
        """
        Evaluate threshold on negative validation set to get false positive of layer and extract true negatives

        :param threshold: threshold to test against                             (float)
        :param wc_lst: strong classifier                                        (list)
        :return: false positive rate and true negatives to remove               (float)
        """
        neg_val_len = len(self.neg_validation)
        fpr_counter, _, idx_removals = self.evaluate_cascade_layer(
            self.neg_validation, threshold, wc_lst)
        layer_fpr = fpr_counter / neg_val_len
        print("\tLayer FPR:", fpr_counter, "/", neg_val_len, "=", layer_fpr,
              "\n")
        return layer_fpr, idx_removals

    def determine_training_nonface_images(self, threshold, wc_lst):
        """
        Evaluate threshold on negative validation set to get false positive of layer and extract true negatives

        :param threshold: threshold to test against                             (float)
        :param wc_lst: strong classifier                                        (list)
        :return: false positive rate                                            (float)
        """
        _, _, idx_removals = self.evaluate_cascade_layer(
            self.cascade_neg_test, threshold, wc_lst)
        print("Removing", len(idx_removals),
              "nonface images from training set\n")
        for idx in idx_removals:
            self.cascade_neg_test.pop(idx)
        df_imno_to_remove = list(self.cascade_neg_test.keys())
        print(len(df_imno_to_remove), "images to make up nonface training set")
        return df_imno_to_remove

    @staticmethod
    def set_test_intg_img_with_position(img_set, extension):
        """
        Initialize image test set for determining subsequent nonface training data:
            - Included images are associated with their original image numbers
            - Correlations are for expedited removal from training dataframes and mock evaluation of cascade

        :param img_set: path to nonface training set                                (string)
        :param extension: image file extension                                      (string)
        :return: integral image and corresponding reference image number            (dict)
        """
        test_dict = {}
        neg_test_intg_imgs = HaarF.paths_to_integral_images(img_set, extension)
        for idx in range(0, len(neg_test_intg_imgs)):
            test_dict[idx] = neg_test_intg_imgs[idx]
        return test_dict

    @staticmethod
    def evaluate_cascade_layer(validation_intg_imgs, threshold, classifiers):
        """
        Evaluate how many face detections occur with the layer's weak classifiers

        :param validation_intg_imgs: validation integral images                                   (dict)
        :param validation_idxs: validation image indexes                                          (list)
        :param threshold: threshold to compare against                                            (float)
        :param classifiers: strong classifier                                                     (list)
        :return: number of positives, thresholds of negatives                                     (int, list)
        """
        low_alphas = []
        counter = 0
        # If the cumulative threshold is insufficient, the corresponding image's threshold and index are saved
        for im_no, intg_img in validation_intg_imgs.items():
            cumulative_alpha = WeakC.evaluate_weak_classifiers(
                intg_img, classifiers)
            if cumulative_alpha >= threshold:
                counter += 1
            else:
                low_alphas.append((cumulative_alpha, im_no))
        low_alphas = sorted(low_alphas)
        idx_removals = [pair[1] for pair in low_alphas]
        low_alphas = [pair[0] for pair in low_alphas]
        return counter, low_alphas, idx_removals
Example #27
0
def _zip_package(package_root,
                 includes,
                 excludes=None,
                 dockerize_pip=False,
                 follow_symlinks=False,
                 python_path=None,
                 requirements_files=None,
                 use_pipenv=False,
                 **kwargs):
    """Create zip file in memory with package dependencies.

    Args:
        package_root (str): Base directory to copy files from.
        includes (List[str]): Inclusion patterns. Only files  matching those
            patterns will be included in the result.
        excludes (List[str]): Exclusion patterns. Files matching those
            patterns will be excluded from the result. Exclusions take
            precedence over inclusions.
        dockerize_pip (Union[bool, str]): Whether to use docker or under what
            conditions docker will be used to run ``pip``.
        follow_symlinks (bool): If true, symlinks will be included in the
            resulting zip file.
        python_path (Optional[str]): Explicit python interpreter to be used.
            pipenv must be installed and executable using ``-m`` if provided.
        requirements_files (Dict[str, bool]): Map of requirement file names and
            wether they exist.
        use_pipenv (bool): Wether to use pipenv to export a Pipfile as
            requirements.txt.
        kwargs (Any): Advanced options for subprocess and docker. See source
            code to determine what is supported.

    Returns:
        Tuple[str, str]: Content of the ZIP file as a byte string and
        calculated hash of all the files

    """
    kwargs.setdefault("pipenv_timeout", 300)

    temp_root = os.path.join(os.path.expanduser("~"), ".runway_cache")
    if not os.path.isdir(temp_root):
        os.makedirs(temp_root)

    # exclude potential virtual environments in the package
    excludes.append(".venv/")

    with tempfile.TemporaryDirectory(prefix="cfngin", dir=temp_root) as tmpdir:
        tmp_req = os.path.join(tmpdir, "requirements.txt")
        copydir(package_root, tmpdir, includes, excludes, follow_symlinks)
        tmp_req = handle_requirements(
            package_root=package_root,
            dest_path=tmpdir,
            requirements=requirements_files,
            python_path=python_path,
            use_pipenv=use_pipenv,
            pipenv_timeout=kwargs["pipenv_timeout"],
        )

        if should_use_docker(dockerize_pip):
            dockerized_pip(tmpdir, **kwargs)
        else:
            tmp_script = Path(tmpdir) / "__runway_run_pip_install.py"
            pip_cmd = [
                python_path or sys.executable,
                "-m",
                "pip",
                "install",
                "--target",
                tmpdir,
                "--requirement",
                tmp_req,
                "--no-color",
            ]

            subprocess_args = {}
            if kwargs.get("python_dontwritebytecode"):
                subprocess_args["env"] = dict(os.environ,
                                              PYTHONDONTWRITEBYTECODE="1")

            # Pyinstaller build or explicit python path
            if getattr(sys, "frozen", False) and not python_path:
                script_contents = os.linesep.join([
                    "import runpy",
                    "from runway.util import argv",
                    "with argv(*{}):".format(json.dumps(pip_cmd[2:])),
                    '   runpy.run_module("pip", run_name="__main__")\n',
                ])
                # TODO remove python 2 logic when dropping python 2
                tmp_script.write_text(script_contents if sys.version_info.major
                                      > 2 else script_contents.decode("UTF-8"))
                cmd = [sys.executable, "run-python", str(tmp_script)]
            else:
                if not _pip_has_no_color_option(pip_cmd[0]):
                    pip_cmd.remove("--no-color")
                cmd = pip_cmd

            LOGGER.info(
                "The following output from pip may include incompatibility errors. "
                "These can generally be ignored (pip will erroneously warn "
                "about conflicts between the packages in your Lambda zip and "
                "your host system).")

            try:
                subprocess.check_call(cmd, **subprocess_args)
            except subprocess.CalledProcessError:
                raise PipError
            finally:
                if tmp_script.is_file():
                    tmp_script.unlink()

        if kwargs.get("python_exclude_bin_dir") and os.path.isdir(
                os.path.join(tmpdir, "bin")):
            LOGGER.debug("Removing python /bin directory from Lambda files")
            shutil.rmtree(os.path.join(tmpdir, "bin"))
        if kwargs.get("python_exclude_setuptools_dirs"):
            for i in os.listdir(tmpdir):
                if i.endswith(".egg-info") or i.endswith(".dist-info"):
                    LOGGER.debug("Removing directory %s from Lambda files", i)
                    shutil.rmtree(os.path.join(tmpdir, i))

        req_files = _find_files(tmpdir, includes="**", follow_symlinks=False)
        return _zip_files(req_files, tmpdir)
Example #28
0
    def _upload_data_audit_artifacts(self, name):
        logger = self._task.get_logger()
        pd_artifact = self._artifacts_container.get(name)
        pd_metadata = self._artifacts_container.get_metadata(name)

        # remove from artifacts watch list
        if name in self._unregister_request:
            try:
                self._unregister_request.remove(name)
            except KeyError:
                pass
            self._artifacts_container.unregister_artifact(name)

        if pd_artifact is None:
            return

        override_filename_ext_in_uri = self._save_format
        override_filename_in_uri = name
        fd, local_csv = mkstemp(prefix=quote(name, safe="") + '.',
                                suffix=override_filename_ext_in_uri)
        os.close(fd)
        local_csv = Path(local_csv)
        pd_artifact.to_csv(local_csv.as_posix(),
                           index=False,
                           compression=self._compression)
        current_sha2, file_sha2 = self.sha256sum(local_csv.as_posix(),
                                                 skip_header=32)
        if name in self._last_artifacts_upload:
            previous_sha2 = self._last_artifacts_upload[name]
            if previous_sha2 == current_sha2:
                # nothing to do, we can skip the upload
                try:
                    local_csv.unlink()
                except Exception:
                    pass
                return
        self._last_artifacts_upload[name] = current_sha2

        # If old trains-server, upload as debug image
        if not Session.check_min_api_version('2.3'):
            logger.report_image(title='artifacts',
                                series=name,
                                local_path=local_csv.as_posix(),
                                delete_after_upload=True,
                                iteration=self._task.get_last_iteration(),
                                max_image_history=2)
            return

        # Find our artifact
        artifact = None
        for an_artifact in self._task_artifact_list:
            if an_artifact.key == name:
                artifact = an_artifact
                break

        file_size = local_csv.stat().st_size

        # upload file
        uri = self._upload_local_file(
            local_csv,
            name,
            delete_after_upload=True,
            override_filename=override_filename_in_uri,
            override_filename_ext=override_filename_ext_in_uri)

        # update task artifacts
        with self._task_edit_lock:
            if not artifact:
                artifact = tasks.Artifact(key=name,
                                          type=self._pd_artifact_type)
                self._task_artifact_list.append(artifact)
            artifact_type_data = tasks.ArtifactTypeData()

            artifact_type_data.data_hash = current_sha2
            artifact_type_data.content_type = "text/csv"
            artifact_type_data.preview = str(
                pd_artifact.__repr__()) + '\n\n' + self._get_statistics(
                    {name: pd_artifact})

            artifact.type_data = artifact_type_data
            artifact.uri = uri
            artifact.content_size = file_size
            artifact.hash = file_sha2
            artifact.timestamp = int(time())
            artifact.display_data = [
                (str(k), str(v)) for k, v in pd_metadata.items()
            ] if pd_metadata else None

            self._task.set_artifacts(self._task_artifact_list)
Example #29
0
def _zip_package(package_root,
                 includes,
                 excludes=None,
                 dockerize_pip=False,
                 follow_symlinks=False,
                 python_path=None,
                 requirements_files=None,
                 use_pipenv=False,
                 **kwargs):
    """Create zip file in memory with package dependencies.

    Args:
        package_root (str): Base directory to copy files from.
        includes (List[str]): Inclusion patterns. Only files  matching those
            patterns will be included in the result.
        excludes (List[str]): Exclusion patterns. Files matching those
            patterns will be excluded from the result. Exclusions take
            precedence over inclusions.
        dockerize_pip (Union[bool, str]): Whether to use docker or under what
            conditions docker will be used to run ``pip``.
        follow_symlinks (bool): If true, symlinks will be included in the
            resulting zip file.
        python_path (Optional[str]): Explicit python interpreter to be used.
            pipenv must be installed and executable using ``-m`` if provided.
        requirements_files (Dict[str, bool]): Map of requirement file names and
            wether they exist.
        use_pipenv (bool): Wether to use pipenv to export a Pipfile as
            requirements.txt.
        kwargs (Any): Advanced options for subprocess and docker. See source
            code to determine what is supported.

    Returns:
        Tuple[str, str]: Content of the ZIP file as a byte string and
        calculated hash of all the files

    """
    kwargs.setdefault('pipenv_timeout', 300)

    temp_root = os.path.join(os.path.expanduser('~'), '.runway_cache')
    if not os.path.isdir(temp_root):
        os.makedirs(temp_root)

    # exclude potential virtual environments in the package
    excludes.append('.venv/')

    with tempfile.TemporaryDirectory(prefix='cfngin', dir=temp_root) as tmpdir:
        tmp_req = os.path.join(tmpdir, 'requirements.txt')
        copydir(package_root, tmpdir, includes, excludes, follow_symlinks)
        tmp_req = handle_requirements(package_root=package_root,
                                      dest_path=tmpdir,
                                      requirements=requirements_files,
                                      python_path=python_path,
                                      use_pipenv=use_pipenv,
                                      pipenv_timeout=kwargs['pipenv_timeout'])

        if should_use_docker(dockerize_pip):
            dockerized_pip(tmpdir, **kwargs)
        else:
            tmp_script = Path(tmpdir) / '__runway_run_pip_install.py'
            pip_cmd = [
                python_path or sys.executable, '-m', 'pip', 'install',
                '--target', tmpdir, '--requirement', tmp_req, '--no-color'
            ]

            # Pyinstaller build or explicit python path
            if getattr(sys, 'frozen', False) and not python_path:
                script_contents = os.linesep.join([
                    'import runpy', 'from runway.util import argv',
                    'with argv(*{}):'.format(json.dumps(pip_cmd[2:])),
                    '   runpy.run_module("pip", run_name="__main__")\n'
                ])
                # TODO remove python 2 logic when dropping python 2
                tmp_script.write_text(script_contents if sys.version_info.major
                                      > 2 else script_contents.decode('UTF-8'))
                cmd = [sys.executable, 'run-python', str(tmp_script)]
            else:
                if not _pip_has_no_color_option(pip_cmd[0]):
                    pip_cmd.remove('--no-color')
                cmd = pip_cmd

            LOGGER.info(
                'The following output from pip may include incompatibility errors. '
                'These can generally be ignored (pip will erroneously warn '
                'about conflicts between the packages in your Lambda zip and '
                'your host system).')

            try:
                subprocess.check_call(cmd)
            except subprocess.CalledProcessError:
                raise PipError
            finally:
                if tmp_script.is_file():
                    tmp_script.unlink()

        req_files = _find_files(tmpdir, includes='**', follow_symlinks=False)
        return _zip_files(req_files, tmpdir)