Beispiel #1
0
def get_distribution_problem(name,
                             dataset_dir=None,
                             load_to_memory=True,
                             **kw):
    """
    Creates train/valid/test distribution estimation MLProblems from dataset ``name``.

    ``name`` must be one of the supported dataset (see variable
    ``distribution_names`` of this module).

    Option ``load_to_memory`` determines whether the dataset should
    be loaded into memory or always read from its files.

    If environment variable MLPYTHON_DATASET_REPO has been set to a
    valid directory path, this function will look into its appropriate
    subdirectory to find the dataset. Alternatively the subdirectory path
    can be given by the user through option ``dataset_dir``.
    """

    if name not in distribution_names:
        raise ValueError('dataset ' + name +
                         ' unknown for distribution learning')

    exec 'import mlpython.datasets.' + name + ' as mldataset'

    if dataset_dir is None:
        # Try to find dataset in MLPYTHON_DATASET_REPO
        import os
        repo = os.environ.get('MLPYTHON_DATASET_REPO')
        if repo is None:
            raise ValueError(
                'environment variable MLPYTHON_DATASET_REPO is not defined')
        dataset_dir = os.environ.get('MLPYTHON_DATASET_REPO') + '/' + name

    all_data = mldataset.load(dataset_dir, load_to_memory=load_to_memory, **kw)

    train_data, train_metadata = all_data['train']
    valid_data, valid_metadata = all_data['valid']
    test_data, test_metadata = all_data['test']

    import mlpython.mlproblems.generic as mlpb
    if name == 'binarized_mnist' or name == 'nips' or name == 'nips_russ':
        trainset = mlpb.MLProblem(train_data, train_metadata)
    else:
        trainset = mlpb.SubsetFieldsProblem(train_data, train_metadata)
    validset = trainset.apply_on(valid_data, valid_metadata)
    testset = trainset.apply_on(test_data, test_metadata)

    return trainset, validset, testset
Beispiel #2
0
   def verify_gradients(self):
      
      print 'WARNING: calling verify_gradients reinitializes the learner'

      rng = np.random.mtrand.RandomState(1234)
      input,target = (rng.rand(20)<0.5,2)

      class fake_clustering:
          def __init__(self,cluster):
              self.cluster = cluster
          def compute_cluster(self,input):
              return self.cluster

      self.seed = 1234
      self.hidden_size = 3
      self.n_clusters = 3
      self.n_k_means_stages = 0
      self.n_k_means = 3
      self.n_k_means_inputs = 3
      self.autoencoder_regularization = 0.1
      self.autoencoder_missing_fraction = 0
      self.activation_function = 'tanh'
      self.initialize(mlpb.MLProblem([(input,target)],{'input_size':20,'targets':set([0,1,2])}))
      epsilon=1e-6
      self.learning_rate = 1
      self.decrease_constant = 0

      for l in range(10):
          input,target = (rng.rand(20)<0.5,2)
          self.clusterings = [ fake_clustering( cluster=int(rng.rand()*self.n_clusters) ) for i in range(1,self.n_k_means+1)]
          import copy
          Ws_copy = copy.deepcopy(self.Ws)
          emp_dWs = copy.deepcopy(self.Ws)
          for h in range(self.n_k_means*self.n_clusters):
            for i in range(self.Ws[h].shape[0]):
               for j in range(self.Ws[h].shape[1]):
                   self.Ws[h][i,j] += epsilon
                   self.fprop(input)
                   a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
                   self.Ws[h][i,j] -= epsilon
                   
                   self.Ws[h][i,j] -= epsilon
                   self.fprop(input)
                   b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
                   self.Ws[h][i,j] += epsilon
          
                   emp_dWs[h][i,j] = (a-b)/(2.*epsilon)
          
          self.bprop(target)
          self.Ws = Ws_copy
          for h in range(self.n_k_means*self.n_clusters):
              print 'dWs['+str(h)+'] diff.:',np.sum(np.abs(self.dWs[h].ravel()-emp_dWs[h].ravel()))/self.Ws[h].ravel().shape[0]
          
          cs_copy = copy.deepcopy(self.cs)
          emp_dcs = copy.deepcopy(self.cs)
          for h in range(self.n_k_means*self.n_clusters):
             for i in range(self.cs[h].shape[0]):
                self.cs[h][i] += epsilon
                self.fprop(input)
                a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
                self.cs[h][i] -= epsilon
                
                self.cs[h][i] -= epsilon
                self.fprop(input)
                b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
                self.cs[h][i] += epsilon
                
                emp_dcs[h][i] = (a-b)/(2.*epsilon)
          
          self.bprop(target)
          self.cs = cs_copy
          for h in range(self.n_k_means*self.n_clusters):
              print 'dcs['+str(h)+'] diff.:',np.sum(np.abs(self.dcs[h].ravel()-emp_dcs[h].ravel()))/self.cs[h].ravel().shape[0]
          
          Vs_copy = copy.deepcopy(self.Vs)
          emp_dVs = copy.deepcopy(self.Vs)
          for h in range(self.n_k_means*self.n_clusters):
            for i in range(self.Vs[h].shape[0]):
               for j in range(self.Vs[h].shape[1]):
                   self.Vs[h][i,j] += epsilon
                   self.fprop(input)
                   a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
                   self.Vs[h][i,j] -= epsilon
                   
                   self.Vs[h][i,j] -= epsilon
                   self.fprop(input)
                   b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
                   self.Vs[h][i,j] += epsilon
          
                   emp_dVs[h][i,j] = (a-b)/(2.*epsilon)
          
          self.bprop(target)
          self.Vs = Vs_copy
          for h in range(self.n_k_means*self.n_clusters):
              print 'dVs['+str(h)+'] diff.:',np.sum(np.abs(self.dVs[h].ravel()-emp_dVs[h].ravel()))/self.Vs[h].ravel().shape[0]
          
          d_copy = np.array(self.d)
          emp_dd = np.zeros(self.d.shape)
          for i in range(self.d.shape[0]):
             self.d[i] += epsilon
             self.fprop(input)
             a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
             self.d[i] -= epsilon
             
             self.d[i] -= epsilon
             self.fprop(input)
             b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
          
             self.d[i] += epsilon
             
             emp_dd[i] = (a-b)/(2.*epsilon)
          
          self.bprop(target)
          self.d[:] = d_copy
          print 'dd diff.:',np.sum(np.abs(self.dd.ravel()-emp_dd.ravel()))/self.d.ravel().shape[0]

          dae_d_copy = np.array(self.dae_d)
          emp_dae_dd = np.zeros(self.dae_d.shape)
          for i in range(self.dae_d.shape[0]):
             self.dae_d[i] += epsilon
             self.fprop(input)
             a = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
             self.dae_d[i] -= epsilon
             
             self.dae_d[i] -= epsilon
             self.fprop(input)
             b = -np.log(self.output[target]) + self.autoencoder_regularization * np.sum((self.dae_output-input)**2)
          
             self.dae_d[i] += epsilon
             
             emp_dae_dd[i] = (a-b)/(2.*epsilon)
          
          self.bprop(target)
          self.dae_d[:] = dae_d_copy
          print 'dae_dd diff.:',np.sum(np.abs(self.dae_dd.ravel()-emp_dae_dd.ravel()))/self.dae_d.ravel().shape[0]

          # Setting gradients to 0
          self.update()