def get_distribution_problem(name, dataset_dir=None, load_to_memory=True, **kw): """ Creates train/valid/test distribution estimation MLProblems from dataset ``name``. ``name`` must be one of the supported dataset (see variable ``distribution_names`` of this module). Option ``load_to_memory`` determines whether the dataset should be loaded into memory or always read from its files. If environment variable MLPYTHON_DATASET_REPO has been set to a valid directory path, this function will look into its appropriate subdirectory to find the dataset. Alternatively the subdirectory path can be given by the user through option ``dataset_dir``. """ if name not in distribution_names: raise ValueError('dataset ' + name + ' unknown for distribution learning') exec 'import mlpython.datasets.' + name + ' as mldataset' if dataset_dir is None: # Try to find dataset in MLPYTHON_DATASET_REPO import os repo = os.environ.get('MLPYTHON_DATASET_REPO') if repo is None: raise ValueError( 'environment variable MLPYTHON_DATASET_REPO is not defined') dataset_dir = os.environ.get('MLPYTHON_DATASET_REPO') + '/' + name all_data = mldataset.load(dataset_dir, load_to_memory=load_to_memory, **kw) train_data, train_metadata = all_data['train'] valid_data, valid_metadata = all_data['valid'] test_data, test_metadata = all_data['test'] import mlpython.mlproblems.generic as mlpb if name == 'binarized_mnist' or name == 'nips' or name == 'nips_russ': trainset = mlpb.MLProblem(train_data, train_metadata) else: trainset = mlpb.SubsetFieldsProblem(train_data, train_metadata) validset = trainset.apply_on(valid_data, valid_metadata) testset = trainset.apply_on(test_data, test_metadata) return trainset, validset, testset
def verify_gradients(self): print 'WARNING: calling verify_gradients reinitializes the learner' rng = np.random.mtrand.RandomState(1234) input,target = (rng.rand(20)<0.5,2) class fake_clustering: def __init__(self,cluster): self.cluster = cluster def compute_cluster(self,input): return self.cluster self.seed = 1234 self.hidden_size = 3 self.n_clusters = 3 self.n_k_means_stages = 0 self.n_k_means = 3 self.n_k_means_inputs = 3 self.autoencoder_regularization = 0.1 self.autoencoder_missing_fraction = 0 self.activation_function = 'tanh' self.initialize(mlpb.MLProblem([(input,target)],{'input_size':20,'targets':set([0,1,2])})) epsilon=1e-6 self.learning_rate = 1 self.decrease_constant = 0 for l in range(10): input,target = (rng.rand(20)<0.5,2) self.clusterings = [ fake_clustering( cluster=int(rng.rand()*self.n_clusters) ) for i in range(1,self.n_k_means+1)] import copy Ws_copy = copy.deepcopy(self.Ws) emp_dWs = copy.deepcopy(self.Ws) for h in range(self.n_k_means*self.n_clusters): for i in range(self.Ws[h].shape[0]): for j in range(self.Ws[h].shape[1]): self.Ws[h][i,j] += epsilon self.fprop(input) a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.Ws[h][i,j] -= epsilon self.Ws[h][i,j] -= epsilon self.fprop(input) b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.Ws[h][i,j] += epsilon emp_dWs[h][i,j] = (a-b)/(2.*epsilon) self.bprop(target) self.Ws = Ws_copy for h in range(self.n_k_means*self.n_clusters): print 'dWs['+str(h)+'] diff.:',np.sum(np.abs(self.dWs[h].ravel()-emp_dWs[h].ravel()))/self.Ws[h].ravel().shape[0] cs_copy = copy.deepcopy(self.cs) emp_dcs = copy.deepcopy(self.cs) for h in range(self.n_k_means*self.n_clusters): for i in range(self.cs[h].shape[0]): self.cs[h][i] += epsilon self.fprop(input) a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.cs[h][i] -= epsilon self.cs[h][i] -= epsilon self.fprop(input) b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.cs[h][i] += epsilon emp_dcs[h][i] = (a-b)/(2.*epsilon) self.bprop(target) self.cs = cs_copy for h in range(self.n_k_means*self.n_clusters): print 'dcs['+str(h)+'] diff.:',np.sum(np.abs(self.dcs[h].ravel()-emp_dcs[h].ravel()))/self.cs[h].ravel().shape[0] Vs_copy = copy.deepcopy(self.Vs) emp_dVs = copy.deepcopy(self.Vs) for h in range(self.n_k_means*self.n_clusters): for i in range(self.Vs[h].shape[0]): for j in range(self.Vs[h].shape[1]): self.Vs[h][i,j] += epsilon self.fprop(input) a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.Vs[h][i,j] -= epsilon self.Vs[h][i,j] -= epsilon self.fprop(input) b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.Vs[h][i,j] += epsilon emp_dVs[h][i,j] = (a-b)/(2.*epsilon) self.bprop(target) self.Vs = Vs_copy for h in range(self.n_k_means*self.n_clusters): print 'dVs['+str(h)+'] diff.:',np.sum(np.abs(self.dVs[h].ravel()-emp_dVs[h].ravel()))/self.Vs[h].ravel().shape[0] d_copy = np.array(self.d) emp_dd = np.zeros(self.d.shape) for i in range(self.d.shape[0]): self.d[i] += epsilon self.fprop(input) a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.d[i] -= epsilon self.d[i] -= epsilon self.fprop(input) b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.d[i] += epsilon emp_dd[i] = (a-b)/(2.*epsilon) self.bprop(target) self.d[:] = d_copy print 'dd diff.:',np.sum(np.abs(self.dd.ravel()-emp_dd.ravel()))/self.d.ravel().shape[0] dae_d_copy = np.array(self.dae_d) emp_dae_dd = np.zeros(self.dae_d.shape) for i in range(self.dae_d.shape[0]): self.dae_d[i] += epsilon self.fprop(input) a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.dae_d[i] -= epsilon self.dae_d[i] -= epsilon self.fprop(input) b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2) self.dae_d[i] += epsilon emp_dae_dd[i] = (a-b)/(2.*epsilon) self.bprop(target) self.dae_d[:] = dae_d_copy print 'dae_dd diff.:',np.sum(np.abs(self.dae_dd.ravel()-emp_dae_dd.ravel()))/self.dae_d.ravel().shape[0] # Setting gradients to 0 self.update()