def master(src_cfg, suffix_in, suffix_out, K, N, nr_processes, double_norm): D = 64 dataset = Dataset(src_cfg, nr_clusters=K) samples = [ str(sample) for sample in dataset.get_data('train')[0] + dataset.get_data('test')[0] ] if double_norm: worker = double_normalization suffix = '.double_norm' gmm = load_gmm( os.path.join(dataset.FEAT_DIR + suffix_in, 'gmm', 'gmm_%d' % K)) else: worker = merge suffix = '' gmm = None path_in = os.path.join(dataset.FEAT_DIR + suffix_in, 'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp') path_out = os.path.join(dataset.FEAT_DIR + suffix_out, 'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp' + suffix) sstats_in = SstatsMap(path_in) sstats_out = SstatsMap(path_out) len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE kwargs = { 'N': N, 'sstats_in': sstats_in, 'sstats_out': sstats_out, 'len_sstats': len_sstats, 'gmm': gmm } if nr_processes > 1: nr_samples_per_process = len(samples) / nr_processes + 1 for ii in xrange(nr_processes): mp.Process(target=worker, args=(samples[ii * nr_samples_per_process:(ii + 1) * nr_samples_per_process], ), kwargs=kwargs).start() else: worker(samples, **kwargs)
def get_slice_data_from_file(dataset, split, class_idx, gmm, nr_pos, nr_neg): samples = _get_samples(dataset, class_idx, data_type=split, nr_pos=nr_pos, nr_neg=nr_neg)[0] len_descs = gmm.k + 2 * gmm.d * gmm.k sstats, labels, info = SstatsMap( os.path.join(dataset.SSTATS_DIR, 'stats.tmp')).get_merged( samples, len_descs) sstats = sstats.reshape((-1, len_descs)) binary_labels = tuple_labels_to_list_labels(labels, class_idx) return SliceData(sstats, binary_labels, info)