def opt(data_root_folder=None, one_hot=True, partitions=None, shuffle=False, seed=None): """ data_folder_name = 'mnist' if data_root_folder is None: data_root_folder = os.path.join(os.getcwd(), 'DATA') if not os.path.exists(data_root_folder): os.mkdir(data_root_folder) data_folder = os.path.join(data_root_folder, data_folder_name) """ #datasets = se.read_data_semeion() datasets = Digit.read_opt() train = em.Dataset(datasets.train.images, datasets.train.labels, name="opt") validation = em.Dataset(datasets.validation.images, datasets.validation.labels, name="opt") test = em.Dataset(datasets.test.images, datasets.test.labels, name="opt") res = [train, validation, test] if partitions: res = redivide_data(res, partition_proportions=partitions, shuffle=shuffle, seed=seed) return em.Datasets.from_list(res)
def get_reward(chain, trainset, n_class): reward = np.zeros(n_class) for i in range(n_class): if i in chain: ind = chain.index(i) train_next = em.Dataset(trainset.data[ind:ind + 1], trainset.target[ind:ind + 1]) tr_supplier = train_next.create_supplier(x, y) val_supplier = val.create_supplier(x, y) # test_supplier = test.create_supplier(x, y) tf.global_variables_initializer().run() # tr_accs, val_accs, test_accs = [], [], [] # hyper_step(T, inner_objective_feed_dicts=tr_supplier, outer_objective_feed_dicts=val_supplier) # res = sess.run(far.hyperparameters()) + [accuracy.eval(tr_supplier()), accuracy.eval(val_supplier())] tr_accs, val_accs = [], [] run(T, inner_objective_feed_dicts=tr_supplier, outer_objective_feed_dicts=val_supplier) tr_accs.append(accuracy.eval(tr_supplier())), val_accs.append(accuracy.eval(val_supplier())) print('training accuracy', tr_accs[-1]) print('validation accuracy', val_accs[-1]) print('-' * 50) reward[i] = val_accs[-1] else: reward[i] = 0 return reward
def all_data(self, partition_proportions=None, seed=None): if not self._loaded_images: self.load_all_images() while not self.check_loaded_images(600): time.sleep(5) data, targets = [], [] for k, c in enumerate(sorted(self._loaded_images)): data += list(self._loaded_images[c].values()) targets += [k] * 600 if self.info['one_hot_enc']: targets = em.to_one_hot_enc(targets, dimension=len(self._loaded_images)) _dts = [ em.Dataset(data=np.stack(data), target=np.array(targets), name='MiniImagenet_full') ] if seed: np.random.seed(seed) if partition_proportions: _dts = redivide_data( _dts, partition_proportions=partition_proportions, shuffle=True) return em.Datasets.from_list(_dts)
def random_classification_datasets(n_samples, features=100, classes=2, informative=.1, partition_proportions=(.5, .3), rnd=None, one_hot=True, **mk_cls_kwargs): rnd_state = em.get_rand_state(rnd) X, Y = make_classification(n_samples, features, n_classes=classes, random_state=rnd_state, **mk_cls_kwargs) if one_hot: Y = utils.to_one_hot_enc(Y) print('range of Y', np.min(Y), np.max(Y)) info = utils.merge_dicts({ 'informative': informative, 'random_seed': rnd }, mk_cls_kwargs) name = em.utils.name_from_dict(info, 'w') dt = em.Dataset(X, Y, name=name, info=info) datasets = em.Datasets.from_list(redivide_data([dt], partition_proportions)) print('conditioning of X^T X', np.linalg.cond(datasets.train.data.T @ datasets.train.data)) return datasets
def random_regression_datasets(n_samples, features=100, outs=1, informative=.1, partition_proportions=(.5, .3), rnd=None, **mk_rgr_kwargs): rnd_state = em.get_rand_state(rnd) X, Y, w = make_regression(n_samples, features, int(features * informative), outs, random_state=rnd_state, coef=True, **mk_rgr_kwargs) if outs == 1: Y = np.reshape(Y, (n_samples, 1)) print('range of Y', np.min(Y), np.max(Y)) info = utils.merge_dicts( { 'informative': informative, 'random_seed': rnd, 'w': w }, mk_rgr_kwargs) name = em.utils.name_from_dict(info, 'w') dt = em.Dataset(X, Y, name=name, info=info) datasets = em.Datasets.from_list(redivide_data([dt], partition_proportions)) print('conditioning of X^T X', np.linalg.cond(datasets.train.data.T @ datasets.train.data)) return datasets
def get_reward(chain, trainset, n_class): reward = np.zeros(n_class) for i in range(n_class): ind = chain.index(i) train_next = em.Dataset(trainset.data[ind:ind + 1], trainset.target[ind:ind + 1]) tr_supplier = train_next.create_supplier(x, y) val_supplier = validation.create_supplier(x, y) test_supplier = test.create_supplier(x, y) tf.global_variables_initializer().run() tr_accs, val_accs, test_accs = [], [], [] run(T, inner_objective_feed_dicts=tr_supplier, outer_objective_feed_dicts=val_supplier) tr_accs.append(accuracy.eval(tr_supplier())), val_accs.append( accuracy.eval(val_supplier())) test_accs.append(accuracy.eval(test_supplier())) print('training accuracy', tr_accs[-1]) print('validation accuracy', val_accs[-1]) print('test accuracy', test_accs[-1]) print('learning rate', lr.eval()) print('norm of examples weight', tf.norm(weights).eval()) # print(n) print('-' * 50) reward[i] = val_accs[-1] return reward
def make_dataset(sample_set, label_set): sample_train = np.array(sample_set, dtype=np.float32) label_train = np.zeros((len(label_set), n_class)) for i in range(len(label_set)): label_train[i][int(label_set[i])] = 1 options = dict(dtype=dtypes.float32, reshape=True, seed=None) train = DataSet(sample_train, label_train, **options) validation = DataSet(sample_train, label_train, **options) test = DataSet(sample_train, label_train, **options) dataset = base.Datasets(train=train, validation=validation, test=test) train = em.Dataset(dataset.train.images, dataset.train.labels, name="CLIMATE") validation = em.Dataset(dataset.validation.images, dataset.validation.labels, name="CLIMATE") test = em.Dataset(dataset.test.images, dataset.test.labels, name="CLIMATE") res = [train, validation, test] return em.Datasets.from_list(res)
def generate_datasets(self, rand=None, num_classes=None, num_examples=None, wait_for_n_min=None): rand = em.get_rand_state(rand) if wait_for_n_min: import time while not self.check_loaded_images(wait_for_n_min): time.sleep(5) if not num_examples: num_examples = self.kwargs['num_examples'] if not num_classes: num_classes = self.kwargs['num_classes'] clss = self._loaded_images if self._loaded_images else self.info[ 'classes'] random_classes = rand.choice(list(clss.keys()), size=(num_classes, ), replace=False) rand_class_dict = {rnd: k for k, rnd in enumerate(random_classes)} _dts = [] for ns in em.as_tuple_or_list(num_examples): classes = balanced_choice_wr(random_classes, ns, rand) all_images = {cls: list(clss[cls]) for cls in classes} data, targets, sample_info = [], [], [] for c in classes: rand.shuffle(all_images[c]) img_name = all_images[c][0] all_images[c].remove(img_name) sample_info.append({'name': img_name, 'label': c}) if self._loaded_images: data.append(clss[c][img_name]) else: from scipy.misc import imread, imresize data.append( imresize(imread(join(self.info['base_folder'], join(c, img_name)), mode='RGB'), size=(self.info['resize'], self.info['resize'], 3)) / 255.) targets.append(rand_class_dict[c]) if self.info['one_hot_enc']: targets = em.to_one_hot_enc(targets, dimension=num_classes) _dts.append( em.Dataset(data=np.array(np.stack(data)), target=targets, sample_info=sample_info, info={'all_classes': random_classes})) return em.Datasets.from_list(_dts)
def get_dataset(dict, coarse_label, superclass): data_index = np.where(coarse_label==superclass)[0] data = dict[b'data'][data_index].reshape(len(data_index),32,32,3) label = np.array(dict[b'fine_labels'])[data_index] u = np.unique(label) target = np.zeros(shape=(len(data_index),num_class)) for i in range(len(label)): ii = np.where(u==label[i]) target[i,ii] = 1 return em.Dataset(data, target)
def omni_light(folder=join(DATA_FOLDER, 'omniglot-light'), add_bias=False): """ Extract from omniglot dataset with rotated images, 100 classes, 3 examples per class in training set 3 examples per class in validation set 15 examples per class in test set """ file = h5py.File(os.path.join(folder, 'omni-light.h5'), 'r') return em.Datasets.from_list([ em.Dataset(np.array(file['X_ft_tr']), np.array(file['Y_tr']), info={'original images': np.array(file['X_orig_tr'])}, add_bias=add_bias), em.Dataset(np.array(file['X_ft_val']), np.array(file['Y_val']), info={'original images': np.array(file['X_orig_val'])}, add_bias=add_bias), em.Dataset(np.array(file['X_ft_test']), np.array(file['Y_test']), info={'original images': np.array(file['X_orig_test'])}, add_bias=add_bias) ])
def mnist(folder=None, one_hot=True, partitions=None, filters=None, maps=None, shuffle=False): if not folder: folder = MNIST_DIR datasets = read_data_sets(folder, one_hot=one_hot) train = em.Dataset(datasets.train.images, datasets.train.labels, name='MNIST') validation = em.Dataset(datasets.validation.images, datasets.validation.labels, name='MNIST') test = em.Dataset(datasets.test.images, datasets.test.labels, name='MNIST') res = [train, validation, test] if partitions: res = redivide_data(res, partition_proportions=partitions, filters=filters, maps=maps, shuffle=shuffle) res += [None] * (3 - len(res)) return em.Datasets.from_list(res)
def generate_datasets(self, rand=None, num_classes=None, num_examples=None): rand = em.get_rand_state(rand) if not num_examples: num_examples = self.kwargs['num_examples'] if not num_classes: num_classes = self.kwargs['num_classes'] clss = self._loaded_images if self._loaded_images else self.info[ 'classes'] random_classes = rand.choice(list(clss.keys()), size=(num_classes, ), replace=False) rand_class_dict = {rnd: k for k, rnd in enumerate(random_classes)} _dts = [] for ns in em.as_tuple_or_list(num_examples): classes = balanced_choice_wr(random_classes, ns, rand) all_images = {cls: list(clss[cls]) for cls in classes} data, targets, sample_info = [], [], [] for c in classes: rand.shuffle(all_images[c]) img_name = all_images[c][0] all_images[c].remove(img_name) sample_info.append({'name': img_name, 'label': c}) data.append(clss[c][img_name]) targets.append(rand_class_dict[c]) if self.info['one_hot_enc']: targets = em.to_one_hot_enc(targets, dimension=num_classes) _dts.append( em.Dataset(data=np.array(np.stack(data)), target=targets, sample_info=sample_info, info={'all_classes': random_classes})) return em.Datasets.from_list(_dts)
gIndex.append(gg) val_supplier = val.create_supplier(x, y) test_supplier = test.create_supplier(x, y) tf.global_variables_initializer().run() for i in range(0, numData, batch): r = np.array([gIndex[0][chain[0][i]]]) for j in range(1, numSet): ggg = np.reshape(gIndex[j][chain[j][i]], (1, )) r = np.concatenate((r, ggg), axis=0) next = np.argmax(r) # ll = chain[next][i] # percentage[next][ll] += 1 train_next = em.Dataset(train[next].data[i:i + 1], train[next].target[i:i + 1]) tr_supplier = train_next.create_supplier(x, y) tr_accs, val_accs, test_accs = [], [], [] run(T, inner_objective_feed_dicts=tr_supplier, outer_objective_feed_dicts=val_supplier) tr_accs.append(accuracy.eval(tr_supplier())), val_accs.append( accuracy.eval(val_supplier())) test_accs.append(accuracy.eval(test_supplier())) # y_test_pred = sess.run(y_pred, feed_dict={x:test.data}) # y_test_label = np.argmax(y_test_pred, axis=1) # y_true_label = np.argmax(test.target, axis=1) # for ii in range(n_class): # ccount = 0 # yyy = np.zeros(len(y_true_label))
train_coarse_label = np.array(train_dict[b'coarse_labels']) train = [] val_data = np.zeros(shape=(1,32,32,3)) val_target = np.zeros(shape=(1,num_class)) for i in range(numSet): t = get_dataset(train_dict,train_coarse_label,SuperClass[i]) val_index = np.random.choice(numData, 10, replace=False) val_data = np.concatenate([val_data,t.data[val_index]]) val_target = np.concatenate([val_target, t.target[val_index]]) if i==0: val_data = val_data[1:] val_target = val_target[1:] train.append(t) print(t.data.shape) print(t.target.shape) validation = em.Dataset(val_data, val_target) print(validation.data.shape) print(validation.target.shape) file = 'cifar-100-python/test' test_dict = get_data(file) test_coarse_label = np.array(test_dict[b'coarse_labels']) test_train = get_dataset(train_dict, train_coarse_label, 8) test_test = get_dataset(test_dict, test_coarse_label, 8) mb_dict = defaultdict(list) # meta_batch dictionary for _ in range(meta_batch_size): x, y = get_placeholders() mb_dict['x'].append(x) mb_dict['y'].append(y) hyper_repr = build_hyper_representation(x, auto_reuse=True)
action_i = 0 temp_V = 0 for j in range(numSet): class_at_j = int(label_at_i[numSet - j - 1]) r_at_j = reward[j][class_at_j] bell = r_at_j for m in range(n_class**numSet): bell += P_a[j][state, m] * (gamma * V[m]) if bell > temp_V: temp_V = bell action_i = j ll = chain[action_i][i] percentage[action_i][ll] += 1 train_next = em.Dataset(train[action_i].data[i:i + batch], train[action_i].target[i:i + batch]) tr_supplier = train_next.create_supplier(x, y) tr_accs, val_accs, test_accs = [], [], [] run(T, inner_objective_feed_dicts=tr_supplier, outer_objective_feed_dicts=val_supplier) tr_accs.append(accuracy.eval(tr_supplier())), val_accs.append( accuracy.eval(val_supplier())) test_accs.append(accuracy.eval(test_supplier())) # y_test_pred = sess.run(y_pred, feed_dict={x: test.data}) # y_test_label = np.argmax(y_test_pred, axis=1) # y_true_label = np.argmax(test.target, axis=1) # for ii in range(n_class): # ccount = 0
zip(tf.model_variables(), far.utils.hyperparameters()[:4])}) print('Variables (or tensors) that will store the values of the hypergradients') print(*far.hypergradients(), sep='\n') T = 100 next = 0 val_supplier = val.create_supplier(x, y) test_supplier = test.create_supplier(x, y) tf.global_variables_initializer().run() for i in range(0, numData, batch): if next == numSet: next = 0 train_next = em.Dataset(train[next].data[i:i + batch], train[next].target[i:i + batch]) next += 1 tr_supplier = train_next.create_supplier(x, y) tr_accs, val_accs, test_accs = [], [], [] run(T, inner_objective_feed_dicts=tr_supplier, outer_objective_feed_dicts=val_supplier) tr_accs.append(accuracy.eval(tr_supplier())), val_accs.append(accuracy.eval(val_supplier())) test_accs.append(accuracy.eval(test_supplier())) print(next) print('training accuracy', tr_accs[-1]) print('validation accuracy', val_accs[-1]) print('test accuracy', test_accs[-1]) print('-' * 50)
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x') y = tf.placeholder(tf.float32, shape=(None, 10), name='y') batch = 100 size = 1400 datasets = em.load.semeion() datasets = em.Datasets.from_list(datasets) train1 = datasets.train datasets = em.load.opt() datasets = em.Datasets.from_list(datasets) train2 = datasets.train train3 = datasets.validation train44 = datasets.test train4 = em.Dataset(train44.data[0:size], train44.target[0:size]) train5 = em.Dataset(train44.data[size:], train44.target[size:]) datasets2 = em.load.mnist(folder=os.path.join(os.getcwd(), 'MNIST_DATA/mnist'), partitions=(.02143,0.02,)) datasets2 = em.Datasets.from_list(datasets2) validation = datasets2.validation test = datasets2.test with tf.variable_scope('model'): h1 = tcl.fully_connected(x, 300) out = tcl.fully_connected(h1, datasets.train.dim_target) print('Ground model weights (parameters)') [print(e) for e in tf.model_variables()] with tf.variable_scope('inital_weight_model'): h1_hyp = tcl.fully_connected(x, 300, variables_collections=far.HYPERPARAMETERS_COLLECTIONS,
def redivide_data(datasets, partition_proportions=None, shuffle=False, filters=None, maps=None, balance_classes=False, rand=None): """ Function that redivides datasets. Can be use also to shuffle or filter or map examples. :param rand: :param balance_classes: # TODO RICCARDO :param datasets: original datasets, instances of class Dataset (works with get_data and get_targets for compatibility with mnist datasets :param partition_proportions: (optional, default None) list of fractions that can either sum up to 1 or less then one, in which case one additional partition is created with proportion 1 - sum(partition proportions). If None it will retain the same proportion of samples found in datasets :param shuffle: (optional, default False) if True shuffles the examples :param filters: (optional, default None) filter or list of filters: functions with signature (data, target, index) -> boolean (accept or reject the sample) :param maps: (optional, default None) map or list of maps: functions with signature (data, target, index) -> (new_data, new_target) (maps the old sample to a new one, possibly also to more than one sample, for data augmentation) :return: a list of datasets of length equal to the (possibly augmented) partition_proportion """ rnd = em.get_rand_state(rand) all_data = vstack([get_data(d) for d in datasets]) all_labels = stack_or_concat([get_targets(d) for d in datasets]) all_infos = np.concatenate([d.sample_info for d in datasets]) N = all_data.shape[0] if partition_proportions: # argument check partition_proportions = list([partition_proportions] if isinstance( partition_proportions, float) else partition_proportions) sum_proportions = sum(partition_proportions) assert sum_proportions <= 1, "partition proportions must sum up to at most one: %d" % sum_proportions if sum_proportions < 1.: partition_proportions += [1. - sum_proportions] else: partition_proportions = [ 1. * get_data(d).shape[0] / N for d in datasets ] if shuffle: if sp and isinstance(all_data, sp.sparse.csr.csr_matrix): raise NotImplementedError() # if sk_shuffle: # TODO this does not work!!! find a way to shuffle these matrices while # keeping compatibility with tensorflow! # all_data, all_labels, all_infos = sk_shuffle(all_data, all_labels, all_infos) # else: permutation = np.arange(all_data.shape[0]) rnd.shuffle(permutation) all_data = all_data[permutation] all_labels = np.array(all_labels[permutation]) all_infos = np.array(all_infos[permutation]) if filters: if sp and isinstance(all_data, sp.sparse.csr.csr_matrix): raise NotImplementedError() filters = as_list(filters) data_triple = [(x, y, d) for x, y, d in zip(all_data, all_labels, all_infos)] for fiat in filters: data_triple = [ xy for i, xy in enumerate(data_triple) if fiat(xy[0], xy[1], xy[2], i) ] all_data = np.vstack([e[0] for e in data_triple]) all_labels = np.vstack([e[1] for e in data_triple]) all_infos = np.vstack([e[2] for e in data_triple]) if maps: if sp and isinstance(all_data, sp.sparse.csr.csr_matrix): raise NotImplementedError() maps = as_list(maps) data_triple = [(x, y, d) for x, y, d in zip(all_data, all_labels, all_infos)] for _map in maps: data_triple = [ _map(xy[0], xy[1], xy[2], i) for i, xy in enumerate(data_triple) ] all_data = np.vstack([e[0] for e in data_triple]) all_labels = np.vstack([e[1] for e in data_triple]) all_infos = np.vstack([e[2] for e in data_triple]) N = all_data.shape[0] assert N == all_labels.shape[0] calculated_partitions = reduce( lambda v1, v2: v1 + [sum(v1) + v2], [int(N * prp) for prp in partition_proportions], [0]) calculated_partitions[-1] = N print('datasets.redivide_data:, computed partitions numbers -', calculated_partitions, 'len all', N, end=' ') new_general_info_dict = {} for data in datasets: new_general_info_dict = {**new_general_info_dict, **data.info} if balance_classes: new_datasets = [] forbidden_indices = np.empty(0, dtype=np.int64) for d1, d2 in zip(calculated_partitions[:-1], calculated_partitions[1:-1]): indices = np.array( get_indices_balanced_classes(d2 - d1, all_labels, forbidden_indices)) dataset = em.Dataset(data=all_data[indices], target=all_labels[indices], sample_info=all_infos[indices], info=new_general_info_dict) new_datasets.append(dataset) forbidden_indices = np.append(forbidden_indices, indices) test_if_balanced(dataset) remaining_indices = np.array( list(set(list(range(N))) - set(forbidden_indices))) new_datasets.append( em.Dataset(data=all_data[remaining_indices], target=all_labels[remaining_indices], sample_info=all_infos[remaining_indices], info=new_general_info_dict)) else: new_datasets = [ em.Dataset(data=all_data[d1:d2], target=all_labels[d1:d2], sample_info=all_infos[d1:d2], info=new_general_info_dict) for d1, d2 in zip(calculated_partitions, calculated_partitions[1:]) ] print('DONE') return new_datasets
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x') y = tf.placeholder(tf.float32, shape=(None, 10), name='y') batch = 100 size = 1400 datasets = em.load.semeion() datasets = em.Datasets.from_list(datasets) train1 = datasets.train datasets = em.load.opt() datasets = em.Datasets.from_list(datasets) train2 = datasets.train train3 = datasets.validation train44 = datasets.test train4 = em.Dataset(train44.data[0:size], train44.target[0:size]) train5 = em.Dataset(train44.data[size:], train44.target[size:]) datasets2 = em.load.mnist(folder=os.path.join(os.getcwd(), 'MNIST_DATA/mnist'), partitions=( .02143, 0.02, )) datasets2 = em.Datasets.from_list(datasets2) validation = datasets2.validation test = datasets2.test with tf.variable_scope('model'): h1 = tcl.fully_connected(x, 300) out = tcl.fully_connected(h1, datasets.train.dim_target) print('Ground model weights (parameters)')
print( 'Variables (or tensors) that will store the values of the hypergradients' ) print(*far.hypergradients(), sep='\n') T = 100 count = [1, 1, 1, 1, 1] acc = [] for i in range(numSet): next = i train_next = em.Dataset(train[next].data[0:batch], train[next].target[0:batch]) tr_supplier = train_next.create_supplier(x, y) val_supplier = val.create_supplier(x, y) test_supplier = test.create_supplier(x, y) tf.global_variables_initializer().run() ''' hyper_step(T, inner_objective_feed_dicts=tr_supplier, outer_objective_feed_dicts=val_supplier) res = sess.run(far.hyperparameters()) + [accuracy.eval(tr_supplier()), accuracy.eval(val_supplier()), accuracy.eval(test_supplier())] ''' tr_accs, val_accs, test_accs = [], [], [] run(T,