def test(self, test_X=None):
     log.maybeLog(self.logger, "\nTesting---------\n")
     if test_X is None:
         log.maybeLog(self.logger, "Testing using data given during initialization of GSN.\n")
         test_X  = self.test_X
         if test_X is None:
             log.maybeLog(self.logger, "\nPlease provide a test dataset!\n")
             raise AssertionError("Please provide a test dataset")
     else:
         log.maybeLog(self.logger, "Testing using data provided to test function.\n")
         
     ###########
     # TESTING #
     ###########
     n_examples = 100
     tests = test_X.get_value()[0:n_examples]
     noisy_tests = self.f_noise(test_X.get_value()[0:n_examples])
     cost, reconstructed = self.f_recon(noisy_tests) 
     # Concatenate stuff if it is an image
     if self.is_image:
         stacked = numpy.vstack([numpy.vstack([tests[i*10 : (i+1)*10], noisy_tests[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
         number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height,self.image_width), (10,30)))
         
         number_reconstruction.save(self.outdir+'gsn_image_reconstruction_test.png')
     # Otherwise, save reconstructed numpy array as csv
     else:
         numpy.savetxt(self.outdir+'gsn_reconstruction_test.csv', reconstructed, delimiter=",")
         
     log.maybeLog(self.logger, "----------------\n\nAverage test cost is "+str(cost)+"\n\n-----------------")
 def plot_samples(self, epoch_number="", leading_text="", n_samples=400):
     to_sample = time.time()
     initial = self.test_X.get_value(borrow=True)[:1]
     rand_idx = numpy.random.choice(range(self.test_X.get_value(borrow=True).shape[0]))
     rand_init = self.test_X.get_value(borrow=True)[rand_idx:rand_idx+1]
     
     V, _ = self.sample(initial, n_samples)
     rand_V, _ = self.sample(rand_init, n_samples)
     
     img_samples = PIL.Image.fromarray(tile_raster_images(V, (self.image_height, self.image_width), closest_to_square_factors(n_samples)))
     rand_img_samples = PIL.Image.fromarray(tile_raster_images(rand_V, (self.image_height, self.image_width), closest_to_square_factors(n_samples)))
     
     fname = self.outdir+leading_text+'samples_epoch_'+str(epoch_number)+'.png'
     img_samples.save(fname)
     rfname = self.outdir+leading_text+'samples_rand_epoch_'+str(epoch_number)+'.png'
     rand_img_samples.save(rfname) 
     log.maybeLog(self.logger, 'Took ' + make_time_units_string(time.time() - to_sample) + ' to sample '+str(n_samples*2)+' numbers')
Exemple #3
0
 def plot_samples(self, epoch_number="", leading_text="", n_samples=400):
     to_sample = time.time()
     initial = self.test_X.get_value(borrow=True)[:1]
     rand_idx = numpy.random.choice(range(self.test_X.get_value(borrow=True).shape[0]))
     rand_init = self.test_X.get_value(borrow=True)[rand_idx:rand_idx+1]
     
     V, _ = self.sample(initial, n_samples)
     rand_V, _ = self.sample(rand_init, n_samples)
     
     img_samples = PIL.Image.fromarray(tile_raster_images(V, (self.image_height, self.image_width), closest_to_square_factors(n_samples)))
     rand_img_samples = PIL.Image.fromarray(tile_raster_images(rand_V, (self.image_height, self.image_width), closest_to_square_factors(n_samples)))
     
     fname = self.outdir+leading_text+'samples_epoch_'+str(epoch_number)+'.png'
     img_samples.save(fname)
     rfname = self.outdir+leading_text+'samples_rand_epoch_'+str(epoch_number)+'.png'
     rand_img_samples.save(rfname) 
     log.maybeLog(self.logger, 'Took ' + make_time_units_string(time.time() - to_sample) + ' to sample '+str(n_samples*2)+' numbers')
 def plot_samples(self, epoch_number="", leading_text="", n_samples=400):
     to_sample = time.time()
     initial = self.test_X.get_value()[:1]
     V = self.sample(initial, n_samples)
     img_samples = PIL.Image.fromarray(tile_raster_images(V, (self.root_N_input,self.root_N_input), (ceil(sqrt(n_samples)), ceil(sqrt(n_samples)))))
     
     fname = self.outdir+leading_text+'samples_epoch_'+str(epoch_number)+'.png'
     img_samples.save(fname) 
     log.maybeLog(self.logger, 'Took ' + str(time.time() - to_sample) + ' to sample '+n_samples+' numbers')
 def plot_samples(epoch_number, iteration):
     to_sample = time.time()
     if layers == 1:
         # one layer model
         V = sample_some_numbers_single_layer()
     else:
         V, H0 = sample_some_numbers()
     img_samples =   PIL.Image.fromarray(tile_raster_images(V, (root_N_input,root_N_input), (20,20)))
     
     fname       =   outdir+'samples_iteration_'+str(iteration)+'_epoch_'+str(epoch_number)+'.png'
     img_samples.save(fname) 
     print 'Took ' + str(time.time() - to_sample) + ' to sample 400 numbers'
 def plot_samples(epoch_number, leading_text):
     to_sample = time.time()
     if layers == 1:
         # one layer model
         V = sample_some_numbers_single_layer()
     else:
         V, H0 = sample_some_numbers()
     img_samples =   PIL.Image.fromarray(tile_raster_images(V, (root_N_input,root_N_input), (20,20)))
     
     fname       =   outdir+leading_text+'samples_epoch_'+str(epoch_number)+'.png'
     img_samples.save(fname) 
     logger.log('Took ' + str(time.time() - to_sample) + ' to sample 400 numbers')
    def plot_samples(epoch_number, iteration):
        to_sample = time.time()
        if layers == 1:
            # one layer model
            V = sample_some_numbers_single_layer()
        else:
            V, H0 = sample_some_numbers()
        img_samples = PIL.Image.fromarray(
            tile_raster_images(V, (root_N_input, root_N_input), (20, 20)))

        fname = outdir + 'samples_iteration_' + str(
            iteration) + '_epoch_' + str(epoch_number) + '.png'
        img_samples.save(fname)
        print 'Took ' + str(time.time() - to_sample) + ' to sample 400 numbers'
Exemple #8
0
    def plot_samples(self, epoch_number="", leading_text="", n_samples=400):
        to_sample = time.time()
        initial = self.test_X.get_value()[:1]
        V = self.sample(initial, n_samples)
        img_samples = PIL.Image.fromarray(
            tile_raster_images(V, (self.root_N_input, self.root_N_input),
                               (ceil(sqrt(n_samples)), ceil(sqrt(n_samples)))))

        fname = self.outdir + leading_text + 'samples_epoch_' + str(
            epoch_number) + '.png'
        img_samples.save(fname)
        log.maybeLog(
            self.logger, 'Took ' + str(time.time() - to_sample) +
            ' to sample ' + n_samples + ' numbers')
    def plot_samples(epoch_number, leading_text):
        to_sample = time.time()
        if layers == 1:
            # one layer model
            V = sample_some_numbers_single_layer()
        else:
            V, H0 = sample_some_numbers()
        img_samples = PIL.Image.fromarray(
            tile_raster_images(V, (root_N_input, root_N_input), (20, 20)))

        fname = outdir + leading_text + 'samples_epoch_' + str(
            epoch_number) + '.png'
        img_samples.save(fname)
        logger.log('Took ' + str(time.time() - to_sample) +
                   ' to sample 400 numbers')
Exemple #10
0
    def test(self, test_X=None):
        log.maybeLog(self.logger, "\nTesting---------\n")
        if test_X is None:
            log.maybeLog(
                self.logger,
                "Testing using data given during initialization of GSN.\n")
            test_X = self.test_X
            if test_X is None:
                log.maybeLog(self.logger, "\nPlease provide a test dataset!\n")
                raise AssertionError("Please provide a test dataset")
        else:
            log.maybeLog(self.logger,
                         "Testing using data provided to test function.\n")

        ###########
        # TESTING #
        ###########
        n_examples = 100
        tests = test_X.get_value()[0:n_examples]
        noisy_tests = self.f_noise(test_X.get_value()[0:n_examples])
        cost, reconstructed = self.f_recon(noisy_tests)
        # Concatenate stuff if it is an image
        if self.is_image:
            stacked = numpy.vstack([
                numpy.vstack([
                    tests[i * 10:(i + 1) * 10],
                    noisy_tests[i * 10:(i + 1) * 10],
                    reconstructed[i * 10:(i + 1) * 10]
                ]) for i in range(10)
            ])
            number_reconstruction = PIL.Image.fromarray(
                tile_raster_images(stacked,
                                   (self.image_height, self.image_width),
                                   (10, 30)))

            number_reconstruction.save(self.outdir +
                                       'gsn_image_reconstruction_test.png')
        # Otherwise, save reconstructed numpy array as csv
        else:
            numpy.savetxt(self.outdir + 'gsn_reconstruction_test.csv',
                          reconstructed,
                          delimiter=",")

        log.maybeLog(
            self.logger, "----------------\n\nAverage test cost is " +
            str(cost) + "\n\n-----------------")
    def train_GSN(train_X, train_Y, valid_X, valid_Y, test_X, test_Y):
        logger.log("\n-----------TRAINING GSN------------\n")
        
        # TRAINING
        n_epoch     =   state.n_epoch
        batch_size  =   state.gsn_batch_size
        STOP        =   False
        counter     =   0
        learning_rate.set_value(cast32(state.learning_rate))  # learning rate
        times = []
        best_cost = float('inf')
        best_params = None
        patience = 0
                    
        logger.log(['train X size:',str(train_X.shape.eval())])
        logger.log(['valid X size:',str(valid_X.shape.eval())])
        logger.log(['test X size:',str(test_X.shape.eval())])
        
        if state.vis_init:
            bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X.get_value().mean(axis=0))))
    
        if state.test_model:
            # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting
            logger.log('Testing : skip training')
            STOP    =   True
    
        while not STOP:
            counter += 1
            t = time.time()
            logger.append([counter,'\t'])
                
            #shuffle the data
            data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, dataset, rng)
                
            #train
            train_costs = []
            for i in xrange(len(train_X.get_value(borrow=True)) / batch_size):
                x = train_X.get_value()[i * batch_size : (i+1) * batch_size]
                cost = f_learn_gsn(x)
                train_costs.append([cost])
            train_costs = numpy.mean(train_costs)
            # record it
            logger.append(['Train:',trunc(train_costs),'\t'])
            with open(gsn_train_convergence,'a') as f:
                f.write("{0!s},".format(train_costs))
                f.write("\n")
    
    
            #valid
            valid_costs = []
            for i in xrange(len(valid_X.get_value(borrow=True)) / batch_size):
                x = valid_X.get_value()[i * batch_size : (i+1) * batch_size]
                cost = f_cost_gsn(x)
                valid_costs.append([cost])                    
            valid_costs = numpy.mean(valid_costs)
            # record it
            logger.append(['Valid:',trunc(valid_costs), '\t'])
            with open(gsn_valid_convergence,'a') as f:
                f.write("{0!s},".format(valid_costs))
                f.write("\n")
    
    
            #test
            test_costs = []
            for i in xrange(len(test_X.get_value(borrow=True)) / batch_size):
                x = test_X.get_value()[i * batch_size : (i+1) * batch_size]
                cost = f_cost_gsn(x)
                test_costs.append([cost])                
            test_costs = numpy.mean(test_costs)
            # record it 
            logger.append(['Test:',trunc(test_costs), '\t'])
            with open(gsn_test_convergence,'a') as f:
                f.write("{0!s},".format(test_costs))
                f.write("\n")
            
            
            #check for early stopping
            cost = numpy.sum(valid_costs)
            if cost < best_cost*state.early_stop_threshold:
                patience = 0
                best_cost = cost
                # save the parameters that made it the best
                best_params = save_params(gsn_params)
            else:
                patience += 1
    
            if counter >= n_epoch or patience >= state.early_stop_length:
                STOP = True
                if best_params is not None:
                    restore_params(gsn_params, best_params)
                save_params_to_file('gsn', counter, gsn_params)
    
            timing = time.time() - t
            times.append(timing)
    
            logger.append('time: '+make_time_units_string(timing)+'\t')
            
            logger.log('remaining: '+make_time_units_string((n_epoch - counter) * numpy.mean(times)))
    
            if (counter % state.save_frequency) == 0 or STOP is True:
                n_examples = 100
                random_idx = numpy.array(R.sample(range(len(test_X.get_value(borrow=True))), n_examples))
                numbers = test_X.get_value(borrow=True)[random_idx]
                noisy_numbers = f_noise(test_X.get_value(borrow=True)[random_idx])
                reconstructed = f_recon_gsn(noisy_numbers) 
                # Concatenate stuff
                stacked = numpy.vstack([numpy.vstack([numbers[i*10 : (i+1)*10], noisy_numbers[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (root_N_input,root_N_input), (10,30)))
                    
                number_reconstruction.save(outdir+'gsn_number_reconstruction_epoch_'+str(counter)+'.png')
        
                #sample_numbers(counter, 'seven')
                plot_samples(counter, 'gsn')
        
                #save gsn_params
                save_params_to_file('gsn', counter, gsn_params)
         
            # ANNEAL!
            new_lr = learning_rate.get_value() * annealing
            learning_rate.set_value(new_lr)

        
        # 10k samples
        print 'Generating 10,000 samples'
        samples, _  =   sample_some_numbers(N=10000)
        f_samples   =   outdir+'samples.npy'
        numpy.save(f_samples, samples)
        print 'saved digits'
 def train_regression(iteration, train_X, train_Y, valid_X, valid_Y, test_X, test_Y):
     print '-------------------------------------------'
     print 'TRAINING RECURRENT REGRESSION FOR ITERATION',iteration
     with open(logfile,'a') as f:
         f.write("--------------------------\nTRAINING RECURRENT REGRESSION FOR ITERATION {0!s}\n".format(iteration))
     
     # TRAINING
     # TRAINING
     n_epoch     =   state.n_epoch
     batch_size  =   state.batch_size
     STOP        =   False
     counter     =   0
     if iteration == 0:
         recurrent_learning_rate.set_value(cast32(state.learning_rate))  # learning rate
     times = []
     best_cost = float('inf')
     patience = 0
         
     print 'learning rate:',recurrent_learning_rate.get_value()
     
     print 'train X size:',str(train_X.shape.eval())
     print 'valid X size:',str(valid_X.shape.eval())
     print 'test X size:',str(test_X.shape.eval())
 
     train_costs =   []
     valid_costs =   []
     test_costs  =   []
     
     if state.vis_init:
         bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X.get_value().mean(axis=0))))
 
     if state.test_model:
         # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting
         print 'Testing : skip training'
         STOP    =   True
 
 
     while not STOP:
         counter += 1
         t = time.time()
         print counter,'\t',
         
         #shuffle the data
         data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, dataset, rng)
         
         #train
         #init recurrent hiddens as zero
         recurrent_hiddens = [T.zeros((batch_size,recurrent_layer_size)).eval() for recurrent_layer_size in recurrent_layer_sizes]
         train_cost = []
         for i in range(len(train_X.get_value(borrow=True)) / batch_size):
             x = train_X.get_value()[i * batch_size : (i+1) * batch_size]
             x1 = train_X.get_value()[(i * batch_size) + 1 : ((i+1) * batch_size) + 1]
             [x,x1], recurrent_hiddens = fix_input_size([x,x1], recurrent_hiddens)
             _ins = recurrent_hiddens + [x,x1]
             _outs = recurrent_f_learn(*_ins)
             recurrent_hiddens = _outs[:len(recurrent_hiddens)]
             cost = _outs[-1]
             train_cost.append(cost)
             
         train_cost = numpy.mean(train_cost) 
         train_costs.append(train_cost)
         print 'rTrain : ',trunc(train_cost), '\t',
         with open(logfile,'a') as f:
             f.write("rTrain : {0!s}\t".format(trunc(train_cost)))
         with open(recurrent_train_convergence,'a') as f:
             f.write("{0!s},".format(train_cost))
 
         #valid
         #init recurrent hiddens as zero
         recurrent_hiddens = [T.zeros((batch_size,recurrent_layer_size)).eval() for recurrent_layer_size in recurrent_layer_sizes]
         valid_cost  =  []
         for i in range(len(valid_X.get_value(borrow=True)) / batch_size):
             x = valid_X.get_value()[i * batch_size : (i+1) * batch_size]
             x1 = valid_X.get_value()[(i * batch_size) + 1 : ((i+1) * batch_size) + 1]
             [x,x1], recurrent_hiddens = fix_input_size([x,x1], recurrent_hiddens)
             _ins = recurrent_hiddens + [x,x1]
             _outs = f_cost(*_ins)
             recurrent_hiddens = _outs[:len(recurrent_hiddens)]
             cost = _outs[-1]
             valid_cost.append(cost)
                 
         valid_cost = numpy.mean(valid_cost) 
         valid_costs.append(valid_cost)
         print 'rValid : ', trunc(valid_cost), '\t',
         with open(logfile,'a') as f:
             f.write("rValid : {0!s}\t".format(trunc(valid_cost)))
         with open(recurrent_valid_convergence,'a') as f:
             f.write("{0!s},".format(valid_cost))
 
         #test
         recurrent_hiddens = [T.zeros((batch_size,recurrent_layer_size)).eval() for recurrent_layer_size in recurrent_layer_sizes]
         test_cost  =   []
         for i in range(len(test_X.get_value(borrow=True)) / batch_size):
             x = test_X.get_value()[i * batch_size : (i+1) * batch_size]
             x1 = test_X.get_value()[(i * batch_size) + 1 : ((i+1) * batch_size) + 1]
             [x,x1], recurrent_hiddens = fix_input_size([x,x1], recurrent_hiddens)
             _ins = recurrent_hiddens + [x,x1]
             _outs = f_cost(*_ins)
             recurrent_hiddens = _outs[:len(recurrent_hiddens)]
             cost = _outs[-1]
             test_cost.append(cost)
             
         test_cost = numpy.mean(test_cost) 
         test_costs.append(test_cost)
         print 'rTest  : ', trunc(test_cost), '\t',
         with open(logfile,'a') as f:
             f.write("rTest : {0!s}\t".format(trunc(test_cost)))
         with open(recurrent_test_convergence,'a') as f:
             f.write("{0!s},".format(test_cost))
 
         #check for early stopping
         cost = train_cost
         if iteration != 0:
             cost = cost + train_cost
         if cost < best_cost*state.early_stop_threshold:
             patience = 0
             best_cost = cost
         else:
             patience += 1
             
         timing = time.time() - t
         times.append(timing)
 
         print 'time : ', trunc(timing),
         
         print 'remaining: ', trunc((n_epoch - counter) * numpy.mean(times) / 60 / 60), 'hrs'
         
         with open(logfile,'a') as f:
             f.write("B : {0!s}\t".format(str([trunc(vb.get_value().mean()) for vb in recurrent_bias_list])))
             
         with open(logfile,'a') as f:
             f.write("W : {0!s}\t".format(str([trunc(abs(v.get_value(borrow=True)).mean()) for v in recurrent_weights_list_encode])))
         
         with open(logfile,'a') as f:
             f.write("V : {0!s}\t".format(str([trunc(abs(v.get_value(borrow=True)).mean()) for v in recurrent_weights_list_decode])))
             
         with open(logfile,'a') as f:
             f.write("Time : {0!s} seconds\n".format(trunc(timing)))
                 
         if (counter % state.save_frequency) == 0:
             # Checking reconstruction
             nums = test_X.get_value()[range(100)]
             noisy_nums = f_noise(test_X.get_value()[range(100)])
             reconstructed = []
             reconstructed_prediction = []
             #init recurrent hiddens as zero
             recurrent_hiddens = [T.zeros((batch_size,recurrent_layer_size)).eval() for recurrent_layer_size in recurrent_layer_sizes]
             for num in noisy_nums:
                 _ins = recurrent_hiddens + [num]
                 _outs = f_recon(*_ins)
                 recurrent_hiddens = _outs[:len(recurrent_hiddens)]
                 [recon,recon_pred] = _outs[len(recurrent_hiddens):]
                 reconstructed.append(recon)
                 reconstructed_prediction.append(recon_pred)
             # Concatenate stuff
             stacked = numpy.vstack([numpy.vstack([nums[i*10 : (i+1)*10], noisy_nums[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10], reconstructed_prediction[i*10 : (i+1)*10]]) for i in range(10)])
             
             number_reconstruction   =   PIL.Image.fromarray(tile_raster_images(stacked, (root_N_input,root_N_input), (10,40)))
             #epoch_number    =   reduce(lambda x,y : x + y, ['_'] * (4-len(str(counter)))) + str(counter)
             number_reconstruction.save(outdir+'recurrent_number_reconstruction_iteration_'+str(iteration)+'_epoch_'+str(counter)+'.png')
     
             #sample_numbers(counter, 'seven')
             plot_samples(counter, iteration)
     
             #save params
             save_params_to_file('recurrent', counter, params, iteration)
      
         # ANNEAL!
         new_r_lr = recurrent_learning_rate.get_value() * annealing
         recurrent_learning_rate.set_value(new_r_lr)
 
     # if test
 
     # 10k samples
     print 'Generating 10,000 samples'
     samples, _  =   sample_some_numbers(N=10000)
     f_samples   =   outdir+'samples.npy'
     numpy.save(f_samples, samples)
     print 'saved digits'
Exemple #13
0
    def train_regression(iteration, train_X, train_Y, valid_X, valid_Y, test_X, test_Y):
        logger.log('-------------TRAINING REGRESSION FOR ITERATION {0!s}-------------'.format(iteration))

        # TRAINING
        n_epoch = state.n_epoch
        batch_size = state.batch_size
        STOP = False
        counter = 0
        best_cost = float('inf')
        best_params = None
        patience = 0
        if iteration == 0:
            regression_learning_rate.set_value(cast32(state.learning_rate))  # learning rate
        times = []

        logger.log(['learning rate:', regression_learning_rate.get_value()])

        logger.log(['train X size:', str(train_X.shape.eval())])
        logger.log(['valid X size:', str(valid_X.shape.eval())])
        logger.log(['test X size:', str(test_X.shape.eval())])

        if state.test_model:
            # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting
            logger.log('Testing : skip training')
            STOP = True

        while not STOP:
            counter += 1
            t = time.time()
            logger.append([counter, '\t'])

            # shuffle the data
            # data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, dataset, rng)

            # train
            train_costs = []
            train_errors = []
            for i in range(len(train_X.get_value(borrow=True)) / batch_size):
                xs = [train_X.get_value(borrow=True)[
                      (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                      range(len(Xs))]
                xs, _ = fix_input_size(xs)
                _ins = xs  # + [sequence_window_size]
                cost, error = regression_f_learn(*_ins)
                # print trunc(cost)
                # print [numpy.asarray(a) for a in f_check(*_ins)]
                train_costs.append(cost)
                train_errors.append(error)

            train_costs = numpy.mean(train_costs)
            train_errors = numpy.mean(train_errors)
            logger.append(['rTrain: ', trunc(train_costs), trunc(train_errors), '\t'])
            with open(regression_train_convergence, 'a') as f:
                f.write("{0!s},".format(train_costs))
                f.write("\n")

            # valid
            valid_costs = []
            for i in range(len(valid_X.get_value(borrow=True)) / batch_size):
                xs = [valid_X.get_value(borrow=True)[
                      (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                      range(len(Xs))]
                xs, _ = fix_input_size(xs)
                _ins = xs  # + [sequence_window_size]
                cost, _ = regression_f_cost(*_ins)
                valid_costs.append(cost)

            valid_costs = numpy.mean(valid_costs)
            logger.append(['rValid: ', trunc(valid_costs), '\t'])
            with open(regression_valid_convergence, 'a') as f:
                f.write("{0!s},".format(valid_costs))
                f.write("\n")

            # test
            test_costs = []
            test_errors = []
            for i in range(len(test_X.get_value(borrow=True)) / batch_size):
                xs = [test_X.get_value(borrow=True)[
                      (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                      range(len(Xs))]
                xs, _ = fix_input_size(xs)
                _ins = xs  # + [sequence_window_size]
                cost, error = regression_f_cost(*_ins)
                test_costs.append(cost)
                test_errors.append(error)

            test_costs = numpy.mean(test_costs)
            test_errors = numpy.mean(test_errors)
            logger.append(['rTest: ', trunc(test_costs), trunc(test_errors), '\t'])
            with open(regression_test_convergence, 'a') as f:
                f.write("{0!s},".format(test_costs))
                f.write("\n")

            # check for early stopping
            cost = numpy.sum(valid_costs)
            if cost < best_cost * state.early_stop_threshold:
                patience = 0
                best_cost = cost
                # keep the best params so far
                best_params = save_params(regression_params)
            else:
                patience += 1

            if counter >= n_epoch or patience >= state.early_stop_length:
                STOP = True
                if best_params is not None:
                    restore_params(regression_params, best_params)
                save_params_to_file('regression', counter, regression_params, iteration)
                logger.log(["next learning rate should be", regression_learning_rate.get_value() * annealing])

            timing = time.time() - t
            times.append(timing)

            logger.append('time: ' + make_time_units_string(timing))

            logger.log('remaining: ' + make_time_units_string((n_epoch - counter) * numpy.mean(times)))

            if (counter % state.save_frequency) == 0 or STOP is True:
                n_examples = 100 + sequence_window_size
                # Checking reconstruction
                # grab 100 numbers in the sequence from the test set
                nums = test_X.get_value()[range(n_examples)]
                noisy_nums = f_noise(test_X.get_value()[range(n_examples)])

                reconstructed_prediction = []
                reconstructed = []
                for i in range(n_examples):
                    if i >= sequence_window_size:
                        xs = [noisy_nums[i - x] for x in range(len(Xs))]
                        xs.reverse()
                        _ins = xs  # + [sequence_window_size]
                        _outs = f_recon(*_ins)
                        prediction = _outs[0]
                        reconstruction = _outs[1]
                        reconstructed_prediction.append(prediction)
                        reconstructed.append(reconstruction)
                nums = nums[sequence_window_size:]
                noisy_nums = noisy_nums[sequence_window_size:]
                reconstructed_prediction = numpy.array(reconstructed_prediction)
                reconstructed = numpy.array(reconstructed)

                # Concatenate stuff
                stacked = numpy.vstack([numpy.vstack([nums[i * 10: (i + 1) * 10], noisy_nums[i * 10: (i + 1) * 10],
                                                      reconstructed_prediction[i * 10: (i + 1) * 10],
                                                      reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)])

                number_reconstruction = PIL.Image.fromarray(
                    tile_raster_images(stacked, (root_N_input, root_N_input), (10, 40)))
                # epoch_number    =   reduce(lambda x,y : x + y, ['_'] * (4-len(str(counter)))) + str(counter)
                number_reconstruction.save(
                    outdir + 'regression_number_reconstruction_iteration_' + str(iteration) + '_epoch_' + str(
                        counter) + '.png')

                # save gsn_params
                save_params_to_file('regression', counter, regression_params, iteration)

            # ANNEAL!
            new_r_lr = regression_learning_rate.get_value() * annealing
            regression_learning_rate.set_value(new_r_lr)
 def train(self, train_X=None, valid_X=None, test_X=None, continue_training=False):
     log.maybeLog(self.logger, "\nTraining---------\n")
     if train_X is None:
         log.maybeLog(self.logger, "Training using data given during initialization of GSN.\n")
         train_X = self.train_X
         if train_X is None:
             log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
             raise AssertionError("Please provide a training dataset!")
     else:
         log.maybeLog(self.logger, "Training using data provided to training function.\n")
     if valid_X is None:
         valid_X = self.valid_X
     if test_X is None:
         test_X  = self.test_X
         
     train_X = raise_data_to_list(train_X)
     valid_X = raise_data_to_list(valid_X)
     test_X  = raise_data_to_list(test_X)
         
     
     ############
     # TRAINING #
     ############
     log.maybeLog(self.logger, "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format(self.n_epoch))
     STOP        = False
     counter     = 0
     if not continue_training:
         self.learning_rate.set_value(self.init_learn_rate)  # learning rate
     times       = []
     best_cost   = float('inf')
     best_params = None
     patience    = 0
                 
     log.maybeLog(self.logger, ['train X size:',str(train_X[0].shape.eval())])
     if valid_X is not None:
         log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].shape.eval())])
     if test_X is not None:
         log.maybeLog(self.logger, ['test X size:',str(test_X[0].shape.eval())])
     
     if self.vis_init:
         self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value().mean(axis=0))))
 
     while not STOP:
         counter += 1
         t = time.time()
         log.maybeAppend(self.logger, [counter,'\t'])
         
         #train
         train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size)
         log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)), '\t'])
 
         #valid
         if valid_X is not None:
             valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size)
             log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t'])
 
         #test
         if test_X is not None:
             test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size)
             log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)), '\t'])
             
         #check for early stopping
         if valid_X is not None:
             cost = numpy.sum(valid_costs)
         else:
             cost = numpy.sum(train_costs)
         if cost < best_cost*self.early_stop_threshold:
             patience = 0
             best_cost = cost
             # save the parameters that made it the best
             best_params = save_params(self.params)
         else:
             patience += 1
 
         if counter >= self.n_epoch or patience >= self.early_stop_length:
             STOP = True
             if best_params is not None:
                 restore_params(self.params, best_params)
             save_params_to_file(counter, self.params, self.outdir, self.logger)
 
         timing = time.time() - t
         times.append(timing)
 
         log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
         
         log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
     
         if (counter % self.save_frequency) == 0 or STOP is True:
             if self.is_image:
                 n_examples = 100
                 tests = test_X.get_value()[0:n_examples]
                 noisy_tests = self.f_noise(test_X.get_value()[0:n_examples])
                 _, reconstructed = self.f_recon(noisy_tests) 
                 # Concatenate stuff if it is an image
                 stacked = numpy.vstack([numpy.vstack([tests[i*10 : (i+1)*10], noisy_tests[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                 number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height,self.image_width), (10,30)))
                 
                 number_reconstruction.save(self.outdir+'gsn_image_reconstruction_epoch_'+str(counter)+'.png')
     
             #save gsn_params
             save_params_to_file(counter, self.params, self.outdir, self.logger)
      
         # ANNEAL!
         new_lr = self.learning_rate.get_value() * self.annealing
         self.learning_rate.set_value(new_lr)
         
         new_hidden_sigma = self.hidden_add_noise_sigma.get_value() * self.noise_annealing
         self.hidden_add_noise_sigma.set_value(new_hidden_sigma)
         
         new_salt_pepper = self.input_salt_and_pepper.get_value() * self.noise_annealing
         self.input_salt_and_pepper.set_value(new_salt_pepper)
    def train_GSN(train_X, train_Y, valid_X, valid_Y, test_X, test_Y):
        logger.log("\n-----------TRAINING GSN------------\n")

        # TRAINING
        n_epoch = state.n_epoch
        batch_size = state.gsn_batch_size
        STOP = False
        counter = 0
        learning_rate.set_value(cast32(state.learning_rate))  # learning rate
        times = []
        best_cost = float('inf')
        best_params = None
        patience = 0

        logger.log(['train X size:', str(train_X.shape.eval())])
        logger.log(['valid X size:', str(valid_X.shape.eval())])
        logger.log(['test X size:', str(test_X.shape.eval())])

        if state.vis_init:
            bias_list[0].set_value(
                logit(numpy.clip(0.9, 0.001,
                                 train_X.get_value().mean(axis=0))))

        if state.test_model:
            # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting
            logger.log('Testing : skip training')
            STOP = True

        while not STOP:
            counter += 1
            t = time.time()
            logger.append([counter, '\t'])

            #shuffle the data
            data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y,
                                     test_X, test_Y, dataset, rng)

            #train
            train_costs = []
            for i in xrange(len(train_X.get_value(borrow=True)) / batch_size):
                x = train_X.get_value()[i * batch_size:(i + 1) * batch_size]
                cost = f_learn_gsn(x)
                train_costs.append([cost])
            train_costs = numpy.mean(train_costs)
            # record it
            logger.append(['Train:', trunc(train_costs), '\t'])
            with open(gsn_train_convergence, 'a') as f:
                f.write("{0!s},".format(train_costs))
                f.write("\n")

            #valid
            valid_costs = []
            for i in xrange(len(valid_X.get_value(borrow=True)) / batch_size):
                x = valid_X.get_value()[i * batch_size:(i + 1) * batch_size]
                cost = f_cost_gsn(x)
                valid_costs.append([cost])
            valid_costs = numpy.mean(valid_costs)
            # record it
            logger.append(['Valid:', trunc(valid_costs), '\t'])
            with open(gsn_valid_convergence, 'a') as f:
                f.write("{0!s},".format(valid_costs))
                f.write("\n")

            #test
            test_costs = []
            for i in xrange(len(test_X.get_value(borrow=True)) / batch_size):
                x = test_X.get_value()[i * batch_size:(i + 1) * batch_size]
                cost = f_cost_gsn(x)
                test_costs.append([cost])
            test_costs = numpy.mean(test_costs)
            # record it
            logger.append(['Test:', trunc(test_costs), '\t'])
            with open(gsn_test_convergence, 'a') as f:
                f.write("{0!s},".format(test_costs))
                f.write("\n")

            #check for early stopping
            cost = numpy.sum(valid_costs)
            if cost < best_cost * state.early_stop_threshold:
                patience = 0
                best_cost = cost
                # save the parameters that made it the best
                best_params = save_params(gsn_params)
            else:
                patience += 1

            if counter >= n_epoch or patience >= state.early_stop_length:
                STOP = True
                if best_params is not None:
                    restore_params(gsn_params, best_params)
                save_params_to_file('gsn', counter, gsn_params)

            timing = time.time() - t
            times.append(timing)

            logger.append('time: ' + make_time_units_string(timing) + '\t')

            logger.log('remaining: ' +
                       make_time_units_string((n_epoch - counter) *
                                              numpy.mean(times)))

            if (counter % state.save_frequency) == 0 or STOP is True:
                n_examples = 100
                random_idx = numpy.array(
                    R.sample(range(len(test_X.get_value(borrow=True))),
                             n_examples))
                numbers = test_X.get_value(borrow=True)[random_idx]
                noisy_numbers = f_noise(
                    test_X.get_value(borrow=True)[random_idx])
                reconstructed = f_recon_gsn(noisy_numbers)
                # Concatenate stuff
                stacked = numpy.vstack([
                    numpy.vstack([
                        numbers[i * 10:(i + 1) * 10],
                        noisy_numbers[i * 10:(i + 1) * 10],
                        reconstructed[i * 10:(i + 1) * 10]
                    ]) for i in range(10)
                ])
                number_reconstruction = PIL.Image.fromarray(
                    tile_raster_images(stacked, (root_N_input, root_N_input),
                                       (10, 30)))

                number_reconstruction.save(outdir +
                                           'gsn_number_reconstruction_epoch_' +
                                           str(counter) + '.png')

                #sample_numbers(counter, 'seven')
                plot_samples(counter, 'gsn')

                #save gsn_params
                save_params_to_file('gsn', counter, gsn_params)

            # ANNEAL!
            new_lr = learning_rate.get_value() * annealing
            learning_rate.set_value(new_lr)

        # 10k samples
        print 'Generating 10,000 samples'
        samples, _ = sample_some_numbers(N=10000)
        f_samples = outdir + 'samples.npy'
        numpy.save(f_samples, samples)
        print 'saved digits'
    def train_RNN_GSN(train_X, train_Y, valid_X, valid_Y, test_X, test_Y):
        # If we are using Hessian-free training
        if state.hessian_free == 1:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)

# If we are using SGD training
        else:
            # Define the re-used loops for f_learn and f_cost
            def apply_cost_function_to_dataset(function, dataset):
                costs = []
                for i in xrange(
                        len(dataset.get_value(borrow=True)) / batch_size):
                    xs = dataset.get_value(
                        borrow=True)[i * batch_size:(i + 1) * batch_size]
                    cost = function(xs)
                    costs.append([cost])
                return numpy.mean(costs)

            logger.log("\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            n_epoch = state.n_epoch
            batch_size = state.batch_size
            STOP = False
            counter = 0
            learning_rate.set_value(cast32(
                state.learning_rate))  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0

            logger.log(['train X size:', str(train_X.shape.eval())])
            logger.log(['valid X size:', str(valid_X.shape.eval())])
            logger.log(['test X size:', str(test_X.shape.eval())])

            if state.vis_init:
                bias_list[0].set_value(
                    logit(
                        numpy.clip(0.9, 0.001,
                                   train_X.get_value().mean(axis=0))))

            if state.test_model:
                # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting
                logger.log('Testing : skip training')
                STOP = True

            while not STOP:
                counter += 1
                t = time.time()
                logger.append([counter, '\t'])

                #shuffle the data
                data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y,
                                         test_X, test_Y, dataset, rng)

                #train
                train_costs = apply_cost_function_to_dataset(f_learn, train_X)
                # record it
                logger.append(['Train:', trunc(train_costs), '\t'])
                with open(train_convergence, 'a') as f:
                    f.write("{0!s},".format(train_costs))
                    f.write("\n")

                #valid
                valid_costs = apply_cost_function_to_dataset(f_cost, valid_X)
                # record it
                logger.append(['Valid:', trunc(valid_costs), '\t'])
                with open(valid_convergence, 'a') as f:
                    f.write("{0!s},".format(valid_costs))
                    f.write("\n")

                #test
                test_costs = apply_cost_function_to_dataset(f_cost, test_X)
                # record it
                logger.append(['Test:', trunc(test_costs), '\t'])
                with open(test_convergence, 'a') as f:
                    f.write("{0!s},".format(test_costs))
                    f.write("\n")

                #check for early stopping
                cost = numpy.sum(valid_costs)
                if cost < best_cost * state.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = save_params(params)
                else:
                    patience += 1

                if counter >= n_epoch or patience >= state.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(params, best_params)
                    save_params_to_file('all', counter, params)

                timing = time.time() - t
                times.append(timing)

                logger.append('time: ' + make_time_units_string(timing) + '\t')

                logger.log('remaining: ' +
                           make_time_units_string((n_epoch - counter) *
                                                  numpy.mean(times)))

                if (counter % state.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    nums = test_X.get_value(borrow=True)[range(n_examples)]
                    noisy_nums = f_noise(
                        test_X.get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_nums)):
                        recon = f_recon(noisy_nums[max(0, (i + 1) -
                                                       batch_size):i + 1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)

                    # Concatenate stuff
                    stacked = numpy.vstack([
                        numpy.vstack([
                            nums[i * 10:(i + 1) * 10],
                            noisy_nums[i * 10:(i + 1) * 10],
                            reconstructed[i * 10:(i + 1) * 10]
                        ]) for i in range(10)
                    ])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(stacked,
                                           (root_N_input, root_N_input),
                                           (10, 30)))

                    number_reconstruction.save(
                        outdir + 'rnngsn_number_reconstruction_epoch_' +
                        str(counter) + '.png')

                    #sample_numbers(counter, 'seven')
                    plot_samples(counter, 'rnngsn')

                    #save params
                    save_params_to_file('all', counter, params)

                # ANNEAL!
                new_lr = learning_rate.get_value() * annealing
                learning_rate.set_value(new_lr)

            # 10k samples
            print 'Generating 10,000 samples'
            samples, _ = sample_some_numbers(N=10000)
            f_samples = outdir + 'samples.npy'
            numpy.save(f_samples, samples)
            print 'saved digits'
Exemple #17
0
    def train(self,
              train_X=None,
              valid_X=None,
              test_X=None,
              continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(
                self.logger,
                "Training using data given during initialization of GSN.\n")
            train_X = self.train_X
            if train_X is None:
                log.maybeLog(self.logger,
                             "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(
                self.logger,
                "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
        if test_X is None:
            test_X = self.test_X

        train_X = raise_data_to_list(train_X)
        valid_X = raise_data_to_list(valid_X)
        test_X = raise_data_to_list(test_X)

        ############
        # TRAINING #
        ############
        log.maybeLog(
            self.logger,
            "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format(
                self.n_epoch))
        STOP = False
        counter = 0
        if not continue_training:
            self.learning_rate.set_value(self.init_learn_rate)  # learning rate
        times = []
        best_cost = float('inf')
        best_params = None
        patience = 0

        log.maybeLog(
            self.logger,
            ['train X size:', str(train_X[0].shape.eval())])
        if valid_X is not None:
            log.maybeLog(self.logger,
                         ['valid X size:',
                          str(valid_X[0].shape.eval())])
        if test_X is not None:
            log.maybeLog(
                self.logger,
                ['test X size:', str(test_X[0].shape.eval())])

        if self.vis_init:
            self.bias_list[0].set_value(
                logit(
                    numpy.clip(0.9, 0.001,
                               train_X[0].get_value().mean(axis=0))))

        while not STOP:
            counter += 1
            t = time.time()
            log.maybeAppend(self.logger, [counter, '\t'])

            #train
            train_costs = data.apply_cost_function_to_dataset(
                self.f_learn, train_X, self.batch_size)
            log.maybeAppend(
                self.logger,
                ['Train:', trunc(numpy.mean(train_costs)), '\t'])

            #valid
            if valid_X is not None:
                valid_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, valid_X, self.batch_size)
                log.maybeAppend(
                    self.logger,
                    ['Valid:', trunc(numpy.mean(valid_costs)), '\t'])

            #test
            if test_X is not None:
                test_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, test_X, self.batch_size)
                log.maybeAppend(
                    self.logger,
                    ['Test:', trunc(numpy.mean(test_costs)), '\t'])

            #check for early stopping
            if valid_X is not None:
                cost = numpy.sum(valid_costs)
            else:
                cost = numpy.sum(train_costs)
            if cost < best_cost * self.early_stop_threshold:
                patience = 0
                best_cost = cost
                # save the parameters that made it the best
                best_params = save_params(self.params)
            else:
                patience += 1

            if counter >= self.n_epoch or patience >= self.early_stop_length:
                STOP = True
                if best_params is not None:
                    restore_params(self.params, best_params)
                save_params_to_file(counter, self.params, self.outdir,
                                    self.logger)

            timing = time.time() - t
            times.append(timing)

            log.maybeAppend(self.logger,
                            'time: ' + make_time_units_string(timing) + '\t')

            log.maybeLog(
                self.logger, 'remaining: ' + make_time_units_string(
                    (self.n_epoch - counter) * numpy.mean(times)))

            if (counter % self.save_frequency) == 0 or STOP is True:
                if self.is_image:
                    n_examples = 100
                    tests = test_X.get_value()[0:n_examples]
                    noisy_tests = self.f_noise(
                        test_X.get_value()[0:n_examples])
                    _, reconstructed = self.f_recon(noisy_tests)
                    # Concatenate stuff if it is an image
                    stacked = numpy.vstack([
                        numpy.vstack([
                            tests[i * 10:(i + 1) * 10],
                            noisy_tests[i * 10:(i + 1) * 10],
                            reconstructed[i * 10:(i + 1) * 10]
                        ]) for i in range(10)
                    ])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(
                            stacked, (self.image_height, self.image_width),
                            (10, 30)))

                    number_reconstruction.save(
                        self.outdir + 'gsn_image_reconstruction_epoch_' +
                        str(counter) + '.png')

                #save gsn_params
                save_params_to_file(counter, self.params, self.outdir,
                                    self.logger)

            # ANNEAL!
            new_lr = self.learning_rate.get_value() * self.annealing
            self.learning_rate.set_value(new_lr)

            new_hidden_sigma = self.hidden_add_noise_sigma.get_value(
            ) * self.noise_annealing
            self.hidden_add_noise_sigma.set_value(new_hidden_sigma)

            new_salt_pepper = self.input_salt_and_pepper.get_value(
            ) * self.noise_annealing
            self.input_salt_and_pepper.set_value(new_salt_pepper)
Exemple #18
0
    def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n")
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(self.logger, "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X  = self.test_X
            test_Y  = self.test_Y
            
        # Input data - make sure it is a list of shared datasets
        train_X = raise_to_list(train_X)
        train_Y = raise_to_list(train_Y)
        valid_X = raise_to_list(valid_X)
        valid_Y = raise_to_list(valid_Y)
        test_X  = raise_to_list(test_X)
        test_Y =  raise_to_list(test_Y)
            
        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        if self.train_gsn_first:
            log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n")
            # init_gsn = GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, state=self.gsn_args, logger=self.logger)
            # init_gsn.train()
            print "NOT IMPLEMENTED"
    
        
        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#         
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)
        
        ################################
        # If we are using SGD training #
        ################################
        else:
            log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP        =   False
            counter     =   0
            if not continue_training:
                self.learning_rate.set_value(self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0
                        
            log.maybeLog(self.logger, ['train X size:',str(train_X[0].get_value(borrow=True).shape)])
            if valid_X is not None:
                log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].get_value(borrow=True).shape)])
            if test_X is not None:
                log.maybeLog(self.logger, ['test X size:',str(test_X[0].get_value(borrow=True).shape)])
            
            if self.vis_init:
                self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value(borrow=True).mean(axis=0))))
                
            start_time = time.time()
        
            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter,'\t'])
                    
#                 if is_artificial:
#                     data.sequence_mnist_data(train_X[0], train_Y[0], valid_X[0], valid_Y[0], test_X[0], test_Y[0], artificial_sequence, rng)
                     
                #train
                train_costs = []
                train_errors = []
                for train_data in train_X:
                    costs_and_errors = data.apply_cost_function_to_dataset(self.f_learn, train_data, self.batch_size)
                    train_costs.extend([cost for (cost, error) in costs_and_errors])
                    train_errors.extend([error for (cost, error) in costs_and_errors])
                log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)),trunc(numpy.mean(train_errors)),'\t'])
         
         
                #valid
                if valid_X is not None:
                    valid_costs = []
                    for valid_data in valid_X:
                        cs = data.apply_cost_function_to_dataset(self.f_cost, valid_data, self.batch_size)
                        valid_costs.extend([c for c,e in cs])
                    log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t'])
         
         
                #test
                if test_X is not None:
                    test_costs = []
                    test_errors = []
                    for test_data in test_X:
                        costs_and_errors = data.apply_cost_function_to_dataset(self.f_cost, test_data, self.batch_size)
                        test_costs.extend([cost for (cost, error) in costs_and_errors])
                        test_errors.extend([error for (cost, error) in costs_and_errors])
                    log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)),trunc(numpy.mean(test_errors)), '\t'])
                
                 
                #check for early stopping
                if valid_X is not None:
                    cost = numpy.sum(valid_costs)
                else:
                    cost = numpy.sum(train_costs)
                if cost < best_cost*self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = copy_params(self.params)
                else:
                    patience += 1
         
                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    self.save_params('all', counter, self.params)
         
                timing = time.time() - t
                times.append(timing)
         
                log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
            
                log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
        
                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    xs_test = test_X[0].get_value(borrow=True)[range(n_examples)]
                    noisy_xs_test = self.f_noise(test_X[0].get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_xs_test)):
                        recon, recon_cost = self.f_recon(noisy_xs_test[max(0,(i+1)-self.batch_size):i+1])
                        reconstructions.append(recon[-1])
                    reconstructed = numpy.array(reconstructions)
                    if (self.is_image):
                        # Concatenate stuff
                        stacked = numpy.vstack([numpy.vstack([xs_test[i*10 : (i+1)*10], noisy_xs_test[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                        number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height, self.image_width), (10,30)))
                            
                        number_reconstruction.save(self.outdir+'rnngsn_reconstruction_epoch_'+str(counter)+'.png')
            
                        #sample_numbers(counter, 'seven')
                        # plot_samples(counter, 'rnngsn')

            
                    #save params
                    self.save_params('all', counter, self.params)
             
                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)
                
                new_noise = self.input_salt_and_pepper.get_value() * self.noise_annealing
                self.input_salt_and_pepper.set_value(new_noise)
                
            log.maybeLog(self.logger, "\n------------TOTAL RNN-GSN TRAIN TIME TOOK {0!s}---------".format(make_time_units_string(time.time()-start_time)))
Exemple #19
0
    def train_GSN(iteration, train_X, train_Y, valid_X, valid_Y, test_X, test_Y):
        logger.log('----------------TRAINING GSN FOR ITERATION ' + str(iteration) + "--------------\n")

        # TRAINING
        n_epoch = state.n_epoch
        batch_size = state.batch_size
        STOP = False
        counter = 0
        if iteration == 0:
            learning_rate.set_value(cast32(state.learning_rate))  # learning rate
        times = []
        best_cost = float('inf')
        best_params = None
        patience = 0

        logger.log(['learning rate:', learning_rate.get_value()])

        logger.log(['train X size:', str(train_X.shape.eval())])
        logger.log(['valid X size:', str(valid_X.shape.eval())])
        logger.log(['test X size:', str(test_X.shape.eval())])

        if state.vis_init:
            bias_list[0].set_value(logit(numpy.clip(0.9, 0.001, train_X.get_value().mean(axis=0))))

        if state.test_model:
            # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting
            logger.log('Testing : skip training')
            STOP = True

        while not STOP:
            counter += 1
            t = time.time()
            logger.append([counter, '\t'])

            # shuffle the data
            # data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, dataset, rng)

            # train
            train_costs = []
            train_errors = []
            if iteration == 0:
                for i in range(len(train_X.get_value(borrow=True)) / batch_size):
                    x = train_X.get_value(borrow=True)[i * batch_size: (i + 1) * batch_size]
                    cost, error = gsn_f_learn_init(x)
                    train_costs.append([cost])
                    train_errors.append([error])
            else:
                for i in range(len(train_X.get_value(borrow=True)) / batch_size):
                    xs = [train_X.get_value(borrow=True)[
                          (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                          range(len(Xs))]
                    xs, _ = fix_input_size(xs)
                    _ins = xs  # + [sequence_window_size]
                    cost, error = gsn_f_learn(*_ins)
                    train_costs.append(cost)
                    train_errors.append(error)

            train_costs = numpy.mean(train_costs)
            train_errors = numpy.mean(train_errors)
            logger.append(['Train: ', trunc(train_costs), trunc(train_errors), '\t'])
            with open(train_convergence, 'a') as f:
                f.write("{0!s},".format(train_costs))
                f.write("\n")

            # valid
            valid_costs = []
            if iteration == 0:
                for i in range(len(valid_X.get_value(borrow=True)) / batch_size):
                    x = valid_X.get_value(borrow=True)[i * batch_size: (i + 1) * batch_size]
                    cost, _ = gsn_f_cost_init(x)
                    valid_costs.append([cost])
            else:
                for i in range(len(valid_X.get_value(borrow=True)) / batch_size):
                    xs = [valid_X.get_value(borrow=True)[
                          (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                          range(len(Xs))]
                    xs, _ = fix_input_size(xs)
                    _ins = xs  # + [sequence_window_size]
                    costs, _ = gsn_f_cost(*_ins)
                    valid_costs.append(costs)

            valid_costs = numpy.mean(valid_costs)
            logger.append(['Valid: ', trunc(valid_costs), '\t'])
            with open(valid_convergence, 'a') as f:
                f.write("{0!s},".format(valid_costs))
                f.write("\n")

            # test
            test_costs = []
            test_errors = []
            if iteration == 0:
                for i in range(len(test_X.get_value(borrow=True)) / batch_size):
                    x = test_X.get_value(borrow=True)[i * batch_size: (i + 1) * batch_size]
                    cost, error = gsn_f_cost_init(x)
                    test_costs.append([cost])
                    test_errors.append([error])
            else:
                for i in range(len(test_X.get_value(borrow=True)) / batch_size):
                    xs = [test_X.get_value(borrow=True)[
                          (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                          range(len(Xs))]
                    xs, _ = fix_input_size(xs)
                    _ins = xs  # + [sequence_window_size]
                    costs, errors = gsn_f_cost(*_ins)
                    test_costs.append(costs)
                    test_errors.append(errors)

            test_costs = numpy.mean(test_costs)
            test_errors = numpy.mean(test_errors)
            logger.append(['Test: ', trunc(test_costs), trunc(test_errors), '\t'])
            with open(test_convergence, 'a') as f:
                f.write("{0!s},".format(test_costs))
                f.write("\n")

            # check for early stopping
            cost = numpy.sum(valid_costs)
            if cost < best_cost * state.early_stop_threshold:
                patience = 0
                best_cost = cost
                # save the parameters that made it the best
                best_params = save_params(gsn_params)
            else:
                patience += 1

            if counter >= n_epoch or patience >= state.early_stop_length:
                STOP = True
                if best_params is not None:
                    restore_params(gsn_params, best_params)
                save_params_to_file('gsn', counter, gsn_params, iteration)
                logger.log(["next learning rate should be", learning_rate.get_value() * annealing])

            timing = time.time() - t
            times.append(timing)

            logger.append('time: ' + make_time_units_string(timing))

            logger.log('remaining: ' + make_time_units_string((n_epoch - counter) * numpy.mean(times)))

            if (counter % state.save_frequency) == 0 or STOP is True:
                n_examples = 100
                if iteration == 0:
                    random_idx = numpy.array(R.sample(range(len(test_X.get_value())), n_examples))
                    numbers = test_X.get_value()[random_idx]
                    noisy_numbers = f_noise(test_X.get_value()[random_idx])
                    reconstructed = f_recon_init(noisy_numbers)
                    # Concatenate stuff
                    stacked = numpy.vstack([numpy.vstack(
                        [numbers[i * 10: (i + 1) * 10], noisy_numbers[i * 10: (i + 1) * 10],
                         reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(stacked, (root_N_input, root_N_input), (10, 30)))
                else:
                    n_examples = n_examples + sequence_window_size
                    # Checking reconstruction
                    # grab 100 numbers in the sequence from the test set
                    nums = test_X.get_value()[range(n_examples)]
                    noisy_nums = f_noise(test_X.get_value()[range(n_examples)])

                    reconstructed_prediction = []
                    reconstructed = []
                    for i in range(n_examples):
                        if i >= sequence_window_size:
                            xs = [noisy_nums[i - x] for x in range(len(Xs))]
                            xs.reverse()
                            _ins = xs  # + [sequence_window_size]
                            _outs = f_recon(*_ins)
                            prediction = _outs[0]
                            reconstruction = _outs[1]
                            reconstructed_prediction.append(prediction)
                            reconstructed.append(reconstruction)
                    nums = nums[sequence_window_size:]
                    noisy_nums = noisy_nums[sequence_window_size:]
                    reconstructed_prediction = numpy.array(reconstructed_prediction)
                    reconstructed = numpy.array(reconstructed)

                    # Concatenate stuff
                    stacked = numpy.vstack([numpy.vstack([nums[i * 10: (i + 1) * 10], noisy_nums[i * 10: (i + 1) * 10],
                                                          reconstructed_prediction[i * 10: (i + 1) * 10],
                                                          reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(stacked, (root_N_input, root_N_input), (10, 40)))

                # epoch_number    =   reduce(lambda x,y : x + y, ['_'] * (4-len(str(counter)))) + str(counter)
                number_reconstruction.save(
                    outdir + 'gsn_number_reconstruction_iteration_' + str(iteration) + '_epoch_' + str(
                        counter) + '.png')

                # sample_numbers(counter, 'seven')
                plot_samples(counter, iteration)

                # save gsn_params
                save_params_to_file('gsn', counter, gsn_params, iteration)

            # ANNEAL!
            new_lr = learning_rate.get_value() * annealing
            learning_rate.set_value(new_lr)

        # 10k samples
        logger.log('Generating 10,000 samples')
        samples, _ = sample_some_numbers(N=10000)
        f_samples = outdir + 'samples.npy'
        numpy.save(f_samples, samples)
        logger.log('saved digits')
    def train_RNN_GSN(train_X, train_Y, valid_X, valid_Y, test_X, test_Y):
        # If we are using Hessian-free training
        if state.hessian_free == 1:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#         
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)
        
        # If we are using SGD training
        else:
            # Define the re-used loops for f_learn and f_cost
            def apply_cost_function_to_dataset(function, dataset):
                costs = []
                for i in xrange(len(dataset.get_value(borrow=True)) / batch_size):
                    xs = dataset.get_value(borrow=True)[i * batch_size : (i+1) * batch_size]
                    cost = function(xs)
                    costs.append([cost])
                return numpy.mean(costs)
            
            logger.log("\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            n_epoch     =   state.n_epoch
            batch_size  =   state.batch_size
            STOP        =   False
            counter     =   0
            learning_rate.set_value(cast32(state.learning_rate))  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0
                        
            logger.log(['train X size:',str(train_X.shape.eval())])
            logger.log(['valid X size:',str(valid_X.shape.eval())])
            logger.log(['test X size:',str(test_X.shape.eval())])
            
            if state.vis_init:
                bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X.get_value().mean(axis=0))))
        
            if state.test_model:
                # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting
                logger.log('Testing : skip training')
                STOP    =   True
        
            while not STOP:
                counter += 1
                t = time.time()
                logger.append([counter,'\t'])
                    
                #shuffle the data
                data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, dataset, rng)
                     
                #train
                train_costs = apply_cost_function_to_dataset(f_learn, train_X)
                # record it
                logger.append(['Train:',trunc(train_costs),'\t'])
                with open(train_convergence,'a') as f:
                    f.write("{0!s},".format(train_costs))
                    f.write("\n")
         
         
                #valid
                valid_costs = apply_cost_function_to_dataset(f_cost, valid_X)
                # record it
                logger.append(['Valid:',trunc(valid_costs), '\t'])
                with open(valid_convergence,'a') as f:
                    f.write("{0!s},".format(valid_costs))
                    f.write("\n")
         
         
                #test
                test_costs = apply_cost_function_to_dataset(f_cost, test_X)
                # record it 
                logger.append(['Test:',trunc(test_costs), '\t'])
                with open(test_convergence,'a') as f:
                    f.write("{0!s},".format(test_costs))
                    f.write("\n")
                 
                 
                #check for early stopping
                cost = numpy.sum(valid_costs)
                if cost < best_cost*state.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = save_params(params)
                else:
                    patience += 1
         
                if counter >= n_epoch or patience >= state.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(params, best_params)
                    save_params_to_file('all', counter, params)
         
                timing = time.time() - t
                times.append(timing)
         
                logger.append('time: '+make_time_units_string(timing)+'\t')
            
                logger.log('remaining: '+make_time_units_string((n_epoch - counter) * numpy.mean(times)))
        
                if (counter % state.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    nums = test_X.get_value(borrow=True)[range(n_examples)]
                    noisy_nums = f_noise(test_X.get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_nums)):
                        recon = f_recon(noisy_nums[max(0,(i+1)-batch_size):i+1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)

                    # Concatenate stuff
                    stacked = numpy.vstack([numpy.vstack([nums[i*10 : (i+1)*10], noisy_nums[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                    number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (root_N_input,root_N_input), (10,30)))
                        
                    number_reconstruction.save(outdir+'rnngsn_number_reconstruction_epoch_'+str(counter)+'.png')
            
                    #sample_numbers(counter, 'seven')
                    plot_samples(counter, 'rnngsn')
            
                    #save params
                    save_params_to_file('all', counter, params)
             
                # ANNEAL!
                new_lr = learning_rate.get_value() * annealing
                learning_rate.set_value(new_lr)
    
            
            # 10k samples
            print 'Generating 10,000 samples'
            samples, _  =   sample_some_numbers(N=10000)
            f_samples   =   outdir+'samples.npy'
            numpy.save(f_samples, samples)
            print 'saved digits'
    def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n")
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(self.logger, "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X  = self.test_X
            test_Y  = self.test_Y
            
        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        if self.train_gsn_first:
            log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n")
            init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger)
            init_gsn.train()
    
        #############################
        # Save the model parameters #
        #############################
        def save_params_to_file(name, n, gsn_params):
            pass
            print 'saving parameters...'
            save_path = self.outdir+name+'_params_epoch_'+str(n)+'.pkl'
            f = open(save_path, 'wb')
            try:
                cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL)
            finally:
                f.close()
                
        def save_params(params):
            values = [param.get_value(borrow=True) for param in params]
            return values
        
        def restore_params(params, values):
            for i in range(len(params)):
                params[i].set_value(values[i])
    
        
        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#         
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)
        
        ################################
        # If we are using SGD training #
        ################################
        else:
            log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP        =   False
            counter     =   0
            if not continue_training:
                self.learning_rate.set_value(self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0
                        
            log.maybeLog(self.logger, ['train X size:',str(train_X.shape.eval())])
            if valid_X is not None:
                log.maybeLog(self.logger, ['valid X size:',str(valid_X.shape.eval())])
            if test_X is not None:
                log.maybeLog(self.logger, ['test X size:',str(test_X.shape.eval())])
            
            if self.vis_init:
                self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X.get_value().mean(axis=0))))
        
            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter,'\t'])
                    
                if is_artificial:
                    data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, artificial_sequence, rng)
                     
                #train
                train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger, ['Train:',trunc(train_costs),'\t'])
         
         
                #valid
                valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger, ['Valid:',trunc(valid_costs), '\t'])
         
         
                #test
                test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size)
                # record it 
                log.maybeAppend(self.logger, ['Test:',trunc(test_costs), '\t'])
                 
                 
                #check for early stopping
                cost = numpy.sum(valid_costs)
                if cost < best_cost*self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = save_params(self.params)
                else:
                    patience += 1
         
                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    save_params_to_file('all', counter, self.params)
         
                timing = time.time() - t
                times.append(timing)
         
                log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
            
                log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
        
                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    nums = test_X.get_value(borrow=True)[range(n_examples)]
                    noisy_nums = self.f_noise(test_X.get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_nums)):
                        recon = self.f_recon(noisy_nums[max(0,(i+1)-self.batch_size):i+1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)

                    # Concatenate stuff
                    stacked = numpy.vstack([numpy.vstack([nums[i*10 : (i+1)*10], noisy_nums[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                    number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.root_N_input,self.root_N_input), (10,30)))
                        
                    number_reconstruction.save(self.outdir+'rnngsn_number_reconstruction_epoch_'+str(counter)+'.png')
                    
                    #save params
                    save_params_to_file('all', counter, self.params)
             
                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)
Exemple #22
0
    def train(self,
              train_X=None,
              train_Y=None,
              valid_X=None,
              valid_Y=None,
              test_X=None,
              test_Y=None,
              is_artificial=False,
              artificial_sequence=1,
              continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(
                self.logger,
                "Training using data given during initialization of RNN-GSN.\n"
            )
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger,
                             "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(
                self.logger,
                "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X = self.test_X
            test_Y = self.test_Y

        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        if self.train_gsn_first:
            log.maybeLog(
                self.logger,
                "\n\n----------Initially training the GSN---------\n\n")
            init_gsn = generative_stochastic_network.GSN(train_X=train_X,
                                                         valid_X=valid_X,
                                                         test_X=test_X,
                                                         args=self.gsn_args,
                                                         logger=self.logger)
            init_gsn.train()

        #############################
        # Save the model parameters #
        #############################
        def save_params_to_file(name, n, gsn_params):
            pass
            print 'saving parameters...'
            save_path = self.outdir + name + '_params_epoch_' + str(n) + '.pkl'
            f = open(save_path, 'wb')
            try:
                cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL)
            finally:
                f.close()

        def save_params(params):
            values = [param.get_value(borrow=True) for param in params]
            return values

        def restore_params(params, values):
            for i in range(len(params)):
                params[i].set_value(values[i])

        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)

################################
# If we are using SGD training #
################################
        else:
            log.maybeLog(self.logger,
                         "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP = False
            counter = 0
            if not continue_training:
                self.learning_rate.set_value(
                    self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0

            log.maybeLog(
                self.logger,
                ['train X size:', str(train_X.shape.eval())])
            if valid_X is not None:
                log.maybeLog(self.logger,
                             ['valid X size:',
                              str(valid_X.shape.eval())])
            if test_X is not None:
                log.maybeLog(
                    self.logger,
                    ['test X size:', str(test_X.shape.eval())])

            if self.vis_init:
                self.bias_list[0].set_value(
                    logit(
                        numpy.clip(0.9, 0.001,
                                   train_X.get_value().mean(axis=0))))

            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter, '\t'])

                if is_artificial:
                    data.sequence_mnist_data(train_X, train_Y, valid_X,
                                             valid_Y, test_X, test_Y,
                                             artificial_sequence, rng)

                #train
                train_costs = data.apply_cost_function_to_dataset(
                    self.f_learn, train_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger,
                                ['Train:', trunc(train_costs), '\t'])

                #valid
                valid_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, valid_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger,
                                ['Valid:', trunc(valid_costs), '\t'])

                #test
                test_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, test_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger,
                                ['Test:', trunc(test_costs), '\t'])

                #check for early stopping
                cost = numpy.sum(valid_costs)
                if cost < best_cost * self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = save_params(self.params)
                else:
                    patience += 1

                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    save_params_to_file('all', counter, self.params)

                timing = time.time() - t
                times.append(timing)

                log.maybeAppend(
                    self.logger,
                    'time: ' + make_time_units_string(timing) + '\t')

                log.maybeLog(
                    self.logger, 'remaining: ' + make_time_units_string(
                        (self.n_epoch - counter) * numpy.mean(times)))

                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    nums = test_X.get_value(borrow=True)[range(n_examples)]
                    noisy_nums = self.f_noise(
                        test_X.get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_nums)):
                        recon = self.f_recon(
                            noisy_nums[max(0, (i + 1) - self.batch_size):i +
                                       1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)

                    # Concatenate stuff
                    stacked = numpy.vstack([
                        numpy.vstack([
                            nums[i * 10:(i + 1) * 10],
                            noisy_nums[i * 10:(i + 1) * 10],
                            reconstructed[i * 10:(i + 1) * 10]
                        ]) for i in range(10)
                    ])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(
                            stacked, (self.root_N_input, self.root_N_input),
                            (10, 30)))

                    number_reconstruction.save(
                        self.outdir + 'rnngsn_number_reconstruction_epoch_' +
                        str(counter) + '.png')

                    #save params
                    save_params_to_file('all', counter, self.params)

                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)