Esempio n. 1
0
def parse_cifar(dataset, mode):
    features = []
    labels = []
    coarse_labels = []
    batch_names = []

    TARFILE, label_data, label_labels, label_coarse = get_data_params(dataset)
    datanames = get_datanames(dataset, mode)

    try:
        spinner = Spinner(prefix="Loading {} data...".format(mode))
        spinner.start()
        tf = tarfile.open(TARFILE)
        for dataname in datanames:
            ti = tf.getmember(dataname)
            data = unpickle(tf.extractfile(ti))
            features.append(data[label_data])
            labels.append(data[label_labels])
            batch_names.extend([dataname.split('/')[1]] *
                               len(data[label_data]))
            if dataset == 'cifar100superclass':
                coarse_labels.append(data[label_coarse])
        features = np.concatenate(features)
        features = features.reshape(features.shape[0], 3, 32, 32)
        features = features.transpose(0, 2, 3, 1).astype('uint8')
        labels = np.concatenate(labels)
        if dataset == 'cifar100superclass':
            coarse_labels = np.concatenate(coarse_labels)
        spinner.stop()
    except KeyboardInterrupt:
        spinner.stop()
        sys.exit(1)

    return features, labels, coarse_labels, batch_names
Esempio n. 2
0
 def load_hash_dict(self, load_path, use_cache, target_dir):
     if load_path and Path(load_path).exists() and use_cache:
         logger.debug("Load hash cache: {}".format(load_path))
         spinner = Spinner(prefix="Loading hash cache...")
         spinner.start()
         self.hash_dict = joblib.load(load_path)
         spinner.stop()
         is_update = self.update_hash_dict()
         return not is_update
     else:
         self.hash_dict = {}
         self.update_hash_dict()
         return False
Esempio n. 3
0
    def update_hash_dict(self):
        if self.num_proc is None:
            self.num_proc = cpu_count() - 1

        # check current hash_dict
        current_files = set(self.image_filenames)
        cache_files = self.hash_dict.keys()
        lost_set = cache_files - current_files
        target_files = list(current_files - cache_files)

        if len(lost_set) + len(target_files) > 0:
            try:
                if len(self.hash_dict) == 0:
                    spinner = Spinner(
                        prefix=
                        "Calculating image hashes (hash-bits={} num-proc={})..."
                        .format(self.hash_bits, self.num_proc))
                else:
                    spinner = Spinner(
                        prefix=
                        "Updating image hashes (hash-bits={} num-proc={})...".
                        format(self.hash_bits, self.num_proc))
                spinner.start()

                # del lost_set from hash_dict
                for f in lost_set:
                    del self.hash_dict[f]

                if six.PY2:
                    from pathos.multiprocessing import ProcessPool as Pool
                elif six.PY3:
                    from multiprocessing import Pool
                pool = Pool(self.num_proc)
                hashes = pool.map(self.gen_hash, target_files)
                for filename, hash_value in zip(target_files, hashes):
                    self.hash_dict[filename] = hash_value
                spinner.stop()
            except KeyboardInterrupt:
                pool.terminate()
                pool.join()
                spinner.stop()
                sys.exit(1)
            return True
        else:
            return False
Esempio n. 4
0
 def load(self, load_path, use_cache, target_dir):
     if load_path and Path(load_path).exists() and use_cache:
         cache_mtime = self.check_mtime(load_path)
         target_mtime = self.check_latest_dir_mtime(target_dir)
         if cache_mtime > target_mtime:
             logger.debug("Load hash cache: {}".format(load_path))
             spinner = Spinner(prefix="Loading hash cache...")
             spinner.start()
             self.cache = joblib.load(load_path)
             spinner.stop()
             return True
         else:
             self.cache = []
             self.make_hash_list()
             return False
     else:
         self.cache = []
         self.make_hash_list()
         return False
Esempio n. 5
0
    def make_hash_list(self):
        if self.num_proc is None:
            self.num_proc = cpu_count() - 1

        try:
            spinner = Spinner(
                prefix="Calculating image hashes (hash-bits={} num-proc={})..."
                .format(self.hash_bits, self.num_proc))
            spinner.start()
            if six.PY2:
                from pathos.multiprocessing import ProcessPool as Pool
            elif six.PY3:
                from multiprocessing import Pool
            pool = Pool(self.num_proc)
            self.cache = pool.map(self.gen_hash, self.image_filenames)
            spinner.stop()
        except KeyboardInterrupt:
            pool.terminate()
            pool.join()
            spinner.stop()
            sys.exit(1)