Ejemplo n.º 1
0
 def gen_10k_samples(self):
     for i,x in enumerate(self.test_X):
         log.maybeLog(self.logger, 'Generating 10,000 samples {0!s}/{1!s}'.format(i,len(self.test_X)))
         samples, _ = self.sample(x.get_value()[1:2], 1000, 1)
         f_samples = 'samples_test{0!s}.npy'.format(i)
         numpy.save(f_samples, samples)
         log.maybeLog(self.logger, 'saved digits')
Ejemplo n.º 2
0
        def recurrent_step(x_t, u_tm1, add_noise):
            # Make current guess for hiddens based on U
            for i in range(self.gsn_layers):
                if i % 2 == 0:
                    log.maybeLog(
                        self.logger, "Using {0!s} and {1!s}".format(
                            self.recurrent_to_gsn_weights_list[(i + 1) / 2],
                            self.bias_list[i + 1]))
            h_t = T.concatenate([
                self.hidden_activation(self.bias_list[i + 1] + T.dot(
                    u_tm1, self.recurrent_to_gsn_weights_list[(i + 1) / 2]))
                for i in range(self.gsn_layers) if i % 2 == 0
            ],
                                axis=0)

            # Make a GSN to update U
            _, hs = generative_stochastic_network.build_gsn(
                x_t, self.weights_list, self.bias_list, add_noise,
                self.noiseless_h1, self.hidden_add_noise_sigma,
                self.input_salt_and_pepper, self.input_sampling, self.MRG,
                self.visible_activation, self.hidden_activation,
                self.walkbacks, self.logger)
            htop_t = hs[-1]
            ins_t = htop_t

            ua_t = T.dot(ins_t, self.W_ins_u) + T.dot(
                u_tm1, self.W_u_u) + self.recurrent_bias
            u_t = self.recurrent_hidden_activation(ua_t)
            return [ua_t, u_t, h_t]
Ejemplo n.º 3
0
     def sample_some_numbers(n_samples):
         # The network's initial state
         init_vis       = initial
         noisy_init_vis = self.f_noise(init_vis)
         
         network_state  = [[noisy_init_vis] + [numpy.zeros((initial.shape[0],self.hidden_size), dtype='float32') for _ in self.bias_list[1:]]]
         
         visible_chain  = [init_vis]
         noisy_h0_chain = [noisy_init_vis]
         sampled_h = []
         
         times = []
         for i in xrange(n_samples-1):
             _t = time.time()
            
             # feed the last state into the network, compute new state, and obtain visible units expectation chain 
             net_state_out, vis_pX_chain = sampling_wrapper(network_state[-1])
 
             # append to the visible chain
             visible_chain += vis_pX_chain
 
             # append state output to the network state chain
             network_state.append(net_state_out)
             
             noisy_h0_chain.append(net_state_out[0])
             
             if i%k == 0:
                 sampled_h.append(T.stack(net_state_out[1:]))
                 if i == k:
                     log.maybeLog(self.logger, "About "+make_time_units_string(numpy.mean(times)*(n_samples-1-i))+" remaining...")
                 
             times.append(time.time() - _t)
 
         log.maybeLog(self.logger, "Sampling done.")
         return numpy.vstack(visible_chain), sampled_h
Ejemplo n.º 4
0
     def sample_some_numbers(n_samples):
         # The network's initial state
         init_vis       = initial
         noisy_init_vis = self.f_noise(init_vis)
         
         network_state  = [[noisy_init_vis] + [numpy.zeros((initial.shape[0],self.hidden_size), dtype='float32') for _ in self.bias_list[1:]]]
         
         visible_chain  = [init_vis]
         noisy_h0_chain = [noisy_init_vis]
         sampled_h = []
         
         times = []
         for i in xrange(n_samples-1):
             _t = time.time()
            
             # feed the last state into the network, compute new state, and obtain visible units expectation chain 
             net_state_out, vis_pX_chain = sampling_wrapper(network_state[-1])
 
             # append to the visible chain
             visible_chain += vis_pX_chain
 
             # append state output to the network state chain
             network_state.append(net_state_out)
             
             noisy_h0_chain.append(net_state_out[0])
             
             if i%k == 0:
                 sampled_h.append(T.stack(net_state_out[1:]))
                 if i == k:
                     log.maybeLog(self.logger, "About "+make_time_units_string(numpy.mean(times)*(n_samples-1-i))+" remaining...")
                 
             times.append(time.time() - _t)
 
         log.maybeLog(self.logger, "Sampling done.")
         return numpy.vstack(visible_chain), sampled_h
Ejemplo n.º 5
0
 def gen_10k_samples(self):
     for i,x in enumerate(self.test_X):
         log.maybeLog(self.logger, 'Generating 10,000 samples {0!s}/{1!s}'.format(i,len(self.test_X)))
         samples, _ = self.sample(x.get_value()[1:2], 1000, 1)
         f_samples = 'samples_test{0!s}.npy'.format(i)
         numpy.save(f_samples, samples)
         log.maybeLog(self.logger, 'saved digits')
def build_gsn_given_hiddens(X,
                            hiddens,
                            weights_list,
                            bias_list,
                            add_noise              = defaults["add_noise"],
                            noiseless_h1           = defaults["noiseless_h1"],
                            hidden_add_noise_sigma = defaults["hidden_add_noise_sigma"],
                            input_salt_and_pepper  = defaults["input_salt_and_pepper"],
                            input_sampling         = defaults["input_sampling"],
                            MRG                    = defaults["MRG"],
                            visible_activation     = defaults["visible_activation"],
                            hidden_activation      = defaults["hidden_activation"],
                            walkbacks              = defaults["walkbacks"],
                            cost_function          = defaults["cost_function"],
                            logger = None):
    
    log.maybeLog(logger, ["Building the GSN graph given hiddens with", walkbacks,"walkbacks"])
    p_X_chain = []
    for i in range(walkbacks):
        log.maybeLog(logger, "GSN (prediction) Walkback {!s}/{!s}".format(i+1,walkbacks))
        update_layers_reverse(hiddens, weights_list, bias_list, p_X_chain, add_noise, noiseless_h1, hidden_add_noise_sigma, input_salt_and_pepper, input_sampling, MRG, visible_activation, hidden_activation, logger)
        

    x_sample = p_X_chain[-1]
    
    costs     = [cost_function(rX, X) for rX in p_X_chain]
    show_cost = costs[-1] # for logging to show progress
    cost      = numpy.sum(costs)
    
    return x_sample, cost, show_cost
 def test(self, test_X=None):
     log.maybeLog(self.logger, "\nTesting---------\n")
     if test_X is None:
         log.maybeLog(self.logger, "Testing using data given during initialization of GSN.\n")
         test_X  = self.test_X
         if test_X is None:
             log.maybeLog(self.logger, "\nPlease provide a test dataset!\n")
             raise AssertionError("Please provide a test dataset")
     else:
         log.maybeLog(self.logger, "Testing using data provided to test function.\n")
         
     ###########
     # TESTING #
     ###########
     n_examples = 100
     tests = test_X.get_value()[0:n_examples]
     noisy_tests = self.f_noise(test_X.get_value()[0:n_examples])
     cost, reconstructed = self.f_recon(noisy_tests) 
     # Concatenate stuff if it is an image
     if self.is_image:
         stacked = numpy.vstack([numpy.vstack([tests[i*10 : (i+1)*10], noisy_tests[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
         number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height,self.image_width), (10,30)))
         
         number_reconstruction.save(self.outdir+'gsn_image_reconstruction_test.png')
     # Otherwise, save reconstructed numpy array as csv
     else:
         numpy.savetxt(self.outdir+'gsn_reconstruction_test.csv', reconstructed, delimiter=",")
         
     log.maybeLog(self.logger, "----------------\n\nAverage test cost is "+str(cost)+"\n\n-----------------")
Ejemplo n.º 8
0
 def save_params(self, name, n, params):
     log.maybeLog(self.logger, 'saving parameters...')
     save_path = self.outdir+name+'_params_epoch_'+str(n)+'.pkl'
     f = open(save_path, 'wb')
     try:
         cPickle.dump(params, f, protocol=cPickle.HIGHEST_PROTOCOL)
     finally:
         f.close()
Ejemplo n.º 9
0
 def save_params(self, name, n, params):
     log.maybeLog(self.logger, 'saving parameters...')
     save_path = self.outdir+name+'_params_epoch_'+str(n)+'.pkl'
     f = open(save_path, 'wb')
     try:
         cPickle.dump(params, f, protocol=cPickle.HIGHEST_PROTOCOL)
     finally:
         f.close()
 def plot_samples(self, epoch_number="", leading_text="", n_samples=400):
     to_sample = time.time()
     initial = self.test_X.get_value()[:1]
     V = self.sample(initial, n_samples)
     img_samples = PIL.Image.fromarray(tile_raster_images(V, (self.root_N_input,self.root_N_input), (ceil(sqrt(n_samples)), ceil(sqrt(n_samples)))))
     
     fname = self.outdir+leading_text+'samples_epoch_'+str(epoch_number)+'.png'
     img_samples.save(fname) 
     log.maybeLog(self.logger, 'Took ' + str(time.time() - to_sample) + ' to sample '+n_samples+' numbers')
Ejemplo n.º 11
0
 def sample_some_numbers_single_layer(n_samples):
     x0 = initial
     samples = [x0]
     x = self.f_noise(x0)
     for _ in xrange(n_samples-1):
         x = self.f_sample(x)
         samples.append(x)
         x = rng.binomial(n=1, p=x, size=x.shape).astype('float32')
         x = self.f_noise(x)
     
     log.maybeLog(self.logger, "Sampling done.")
     return numpy.vstack(samples), None
Ejemplo n.º 12
0
 def sample_some_numbers_single_layer(n_samples):
     x0 = initial
     samples = [x0]
     x = self.f_noise(x0)
     for _ in xrange(n_samples-1):
         x = self.f_sample(x)
         samples.append(x)
         x = rng.binomial(n=1, p=x, size=x.shape).astype('float32')
         x = self.f_noise(x)
     
     log.maybeLog(self.logger, "Sampling done.")
     return numpy.vstack(samples), None
Ejemplo n.º 13
0
def update_layers_scan_step(
        hiddens_t,
        weights_list,
        bias_list,
        add_noise=defaults["add_noise"],
        noiseless_h1=defaults["noiseless_h1"],
        hidden_add_noise_sigma=defaults["hidden_add_noise_sigma"],
        input_salt_and_pepper=defaults["input_salt_and_pepper"],
        input_sampling=defaults["input_sampling"],
        MRG=defaults["MRG"],
        visible_activation=defaults["visible_activation"],
        hidden_activation=defaults["hidden_activation"],
        logger=None):
    p_X_chain = []
    log.maybeLog(logger, "One full update step for layers.")
    # One update over the odd layers + one update over the even layers
    log.maybeLog(logger, 'odd layer updates')
    # update the odd layers
    update_odd_layers(hiddens_t, weights_list, bias_list, add_noise,
                      noiseless_h1, hidden_add_noise_sigma,
                      input_salt_and_pepper, input_sampling, MRG,
                      visible_activation, hidden_activation, logger)
    log.maybeLog(logger, 'even layer updates')
    # update the even layers
    update_even_layers(hiddens_t, weights_list, bias_list, p_X_chain,
                       add_noise, noiseless_h1, hidden_add_noise_sigma,
                       input_salt_and_pepper, input_sampling, MRG,
                       visible_activation, hidden_activation, logger)
    log.maybeLog(logger, 'done full update.\n')
    # return the generated sample, the sampled next input, and hiddens
    return p_X_chain[0], hiddens_t
Ejemplo n.º 14
0
    def plot_samples(self, epoch_number="", leading_text="", n_samples=400):
        to_sample = time.time()
        initial = self.test_X.get_value()[:1]
        V = self.sample(initial, n_samples)
        img_samples = PIL.Image.fromarray(
            tile_raster_images(V, (self.root_N_input, self.root_N_input),
                               (ceil(sqrt(n_samples)), ceil(sqrt(n_samples)))))

        fname = self.outdir + leading_text + 'samples_epoch_' + str(
            epoch_number) + '.png'
        img_samples.save(fname)
        log.maybeLog(
            self.logger, 'Took ' + str(time.time() - to_sample) +
            ' to sample ' + n_samples + ' numbers')
def update_layers_scan_step(hiddens_t,
                            weights_list,
                            bias_list,
                            add_noise=defaults["add_noise"],
                            noiseless_h1=defaults["noiseless_h1"],
                            hidden_add_noise_sigma=defaults["hidden_add_noise_sigma"],
                            input_salt_and_pepper=defaults["input_salt_and_pepper"],
                            input_sampling=defaults["input_sampling"],
                            MRG=defaults["MRG"],
                            visible_activation=defaults["visible_activation"],
                            hidden_activation=defaults["hidden_activation"],
                            logger=None):
    p_X_chain = []
    log.maybeLog(logger, "One full update step for layers.")
    # One update over the odd layers + one update over the even layers
    log.maybeLog(logger, 'odd layer updates')
    # update the odd layers
    update_odd_layers(hiddens_t, weights_list, bias_list, add_noise, noiseless_h1, hidden_add_noise_sigma,
                      input_salt_and_pepper, input_sampling, MRG, visible_activation, hidden_activation, logger)
    log.maybeLog(logger, 'even layer updates')
    # update the even layers
    update_even_layers(hiddens_t, weights_list, bias_list, p_X_chain, add_noise, noiseless_h1, hidden_add_noise_sigma,
                       input_salt_and_pepper, input_sampling, MRG, visible_activation, hidden_activation, logger)
    log.maybeLog(logger, 'done full update.\n')
    # return the generated sample, the sampled next input, and hiddens
    return p_X_chain[0], hiddens_t
Ejemplo n.º 16
0
 def recurrent_step(x_t, u_tm1, add_noise):
     # Make current guess for hiddens based on U
     for i in range(self.gsn_layers):
         if i%2 == 0:
             log.maybeLog(self.logger, "Using {0!s} and {1!s}".format(self.recurrent_to_gsn_weights_list[(i+1)/2],self.bias_list[i+1]))
     h_t = T.concatenate([self.hidden_activation(self.bias_list[i+1] + T.dot(u_tm1, self.recurrent_to_gsn_weights_list[(i+1)/2])) for i in range(self.gsn_layers) if i%2 == 0],axis=0)
     
     # Make a GSN to update U
     _, hs = generative_stochastic_network.build_gsn(x_t, self.weights_list, self.bias_list, add_noise, self.noiseless_h1, self.hidden_add_noise_sigma, self.input_salt_and_pepper, self.input_sampling, self.MRG, self.visible_activation, self.hidden_activation, self.walkbacks, self.logger)
     htop_t = hs[-1]
     ins_t = htop_t
     
     ua_t = T.dot(ins_t, self.W_ins_u) + T.dot(u_tm1, self.W_u_u) + self.recurrent_bias
     u_t = self.recurrent_hidden_activation(ua_t)
     return [ua_t, u_t, h_t]
def update_odd_layers(hiddens,
                      weights_list,
                      bias_list,
                      add_noise              = defaults["add_noise"],
                      noiseless_h1           = defaults["noiseless_h1"],
                      hidden_add_noise_sigma = defaults["hidden_add_noise_sigma"],
                      input_salt_and_pepper  = defaults["input_salt_and_pepper"],
                      input_sampling         = defaults["input_sampling"],
                      MRG                    = defaults["MRG"],
                      visible_activation     = defaults["visible_activation"],
                      hidden_activation      = defaults["hidden_activation"],
                      logger = None):
    # Loop over the odd layers
    for i in range(1, len(hiddens), 2):
        log.maybeLog(logger, ['updating layer',i])
        simple_update_layer(hiddens, weights_list, bias_list, None, i, add_noise, noiseless_h1, hidden_add_noise_sigma, input_salt_and_pepper, input_sampling, MRG, visible_activation, hidden_activation, logger)
Ejemplo n.º 18
0
 def plot_samples(self, epoch_number="", leading_text="", n_samples=400):
     to_sample = time.time()
     initial = self.test_X.get_value(borrow=True)[:1]
     rand_idx = numpy.random.choice(range(self.test_X.get_value(borrow=True).shape[0]))
     rand_init = self.test_X.get_value(borrow=True)[rand_idx:rand_idx+1]
     
     V, _ = self.sample(initial, n_samples)
     rand_V, _ = self.sample(rand_init, n_samples)
     
     img_samples = PIL.Image.fromarray(tile_raster_images(V, (self.image_height, self.image_width), closest_to_square_factors(n_samples)))
     rand_img_samples = PIL.Image.fromarray(tile_raster_images(rand_V, (self.image_height, self.image_width), closest_to_square_factors(n_samples)))
     
     fname = self.outdir+leading_text+'samples_epoch_'+str(epoch_number)+'.png'
     img_samples.save(fname)
     rfname = self.outdir+leading_text+'samples_rand_epoch_'+str(epoch_number)+'.png'
     rand_img_samples.save(rfname) 
     log.maybeLog(self.logger, 'Took ' + make_time_units_string(time.time() - to_sample) + ' to sample '+str(n_samples*2)+' numbers')
Ejemplo n.º 19
0
    def test(self, test_X=None):
        log.maybeLog(self.logger, "\nTesting---------\n")
        if test_X is None:
            log.maybeLog(
                self.logger,
                "Testing using data given during initialization of GSN.\n")
            test_X = self.test_X
            if test_X is None:
                log.maybeLog(self.logger, "\nPlease provide a test dataset!\n")
                raise AssertionError("Please provide a test dataset")
        else:
            log.maybeLog(self.logger,
                         "Testing using data provided to test function.\n")

        ###########
        # TESTING #
        ###########
        n_examples = 100
        tests = test_X.get_value()[0:n_examples]
        noisy_tests = self.f_noise(test_X.get_value()[0:n_examples])
        cost, reconstructed = self.f_recon(noisy_tests)
        # Concatenate stuff if it is an image
        if self.is_image:
            stacked = numpy.vstack([
                numpy.vstack([
                    tests[i * 10:(i + 1) * 10],
                    noisy_tests[i * 10:(i + 1) * 10],
                    reconstructed[i * 10:(i + 1) * 10]
                ]) for i in range(10)
            ])
            number_reconstruction = PIL.Image.fromarray(
                tile_raster_images(stacked,
                                   (self.image_height, self.image_width),
                                   (10, 30)))

            number_reconstruction.save(self.outdir +
                                       'gsn_image_reconstruction_test.png')
        # Otherwise, save reconstructed numpy array as csv
        else:
            numpy.savetxt(self.outdir + 'gsn_reconstruction_test.csv',
                          reconstructed,
                          delimiter=",")

        log.maybeLog(
            self.logger, "----------------\n\nAverage test cost is " +
            str(cost) + "\n\n-----------------")
Ejemplo n.º 20
0
 def plot_samples(self, epoch_number="", leading_text="", n_samples=400):
     to_sample = time.time()
     initial = self.test_X.get_value(borrow=True)[:1]
     rand_idx = numpy.random.choice(range(self.test_X.get_value(borrow=True).shape[0]))
     rand_init = self.test_X.get_value(borrow=True)[rand_idx:rand_idx+1]
     
     V, _ = self.sample(initial, n_samples)
     rand_V, _ = self.sample(rand_init, n_samples)
     
     img_samples = PIL.Image.fromarray(tile_raster_images(V, (self.image_height, self.image_width), closest_to_square_factors(n_samples)))
     rand_img_samples = PIL.Image.fromarray(tile_raster_images(rand_V, (self.image_height, self.image_width), closest_to_square_factors(n_samples)))
     
     fname = self.outdir+leading_text+'samples_epoch_'+str(epoch_number)+'.png'
     img_samples.save(fname)
     rfname = self.outdir+leading_text+'samples_rand_epoch_'+str(epoch_number)+'.png'
     rand_img_samples.save(rfname) 
     log.maybeLog(self.logger, 'Took ' + make_time_units_string(time.time() - to_sample) + ' to sample '+str(n_samples*2)+' numbers')
Ejemplo n.º 21
0
def build_gsn_scan(X,
                   weights_list,
                   bias_list,
                   add_noise=defaults["add_noise"],
                   noiseless_h1=defaults["noiseless_h1"],
                   hidden_add_noise_sigma=defaults["hidden_add_noise_sigma"],
                   input_salt_and_pepper=defaults["input_salt_and_pepper"],
                   input_sampling=defaults["input_sampling"],
                   MRG=defaults["MRG"],
                   visible_activation=defaults["visible_activation"],
                   hidden_activation=defaults["hidden_activation"],
                   walkbacks=defaults["walkbacks"],
                   cost_function=defaults["cost_function"],
                   logger=None):

    # Whether or not to corrupt the visible input X
    if add_noise:
        X_init = salt_and_pepper(X, input_salt_and_pepper)
    else:
        X_init = X
    # init hiddens with zeros
    hiddens_0 = [X_init]
    for w in weights_list:
        hiddens_0.append(T.zeros_like(T.dot(hiddens_0[-1], w)))

    log.maybeLog(logger,
                 ["Building the GSN graph with", walkbacks, "walkbacks"])
    p_X_chain = []
    for i in range(walkbacks):
        log.maybeLog(
            logger,
            "GSN (after scan) Walkback {!s}/{!s}".format(i + 1, walkbacks))
        update_layers(hiddens_0, weights_list, bias_list, p_X_chain, add_noise,
                      noiseless_h1, hidden_add_noise_sigma,
                      input_salt_and_pepper, input_sampling, MRG,
                      visible_activation, hidden_activation, logger)

    x_sample = p_X_chain[-1]

    costs = [cost_function(rX, X) for rX in p_X_chain]
    show_cost = costs[-1]  # for logging to show progress
    cost = numpy.sum(costs)

    return x_sample, cost, show_cost  #, updates
Ejemplo n.º 22
0
def update_odd_layers(
        hiddens,
        weights_list,
        bias_list,
        add_noise=defaults["add_noise"],
        noiseless_h1=defaults["noiseless_h1"],
        hidden_add_noise_sigma=defaults["hidden_add_noise_sigma"],
        input_salt_and_pepper=defaults["input_salt_and_pepper"],
        input_sampling=defaults["input_sampling"],
        MRG=defaults["MRG"],
        visible_activation=defaults["visible_activation"],
        hidden_activation=defaults["hidden_activation"],
        logger=None):
    # Loop over the odd layers
    for i in range(1, len(hiddens), 2):
        log.maybeLog(logger, ['updating layer', i])
        simple_update_layer(hiddens, weights_list, bias_list, None, i,
                            add_noise, noiseless_h1, hidden_add_noise_sigma,
                            input_salt_and_pepper, input_sampling, MRG,
                            visible_activation, hidden_activation, logger)
Ejemplo n.º 23
0
     def recurrent_step(x_t, u_tm1, add_noise):
         # Make current guess for hiddens based on U
         for i in range(self.layers):
             if i%2 == 0:
                 log.maybeLog(self.logger, "Using {0!s} and {1!s}".format(self.recurrent_to_gsn_weights_list[(i+1)/2],self.bias_list[i+1]))
         h_t = T.concatenate([self.hidden_activation(self.bias_list[i+1] + T.dot(u_tm1, self.recurrent_to_gsn_weights_list[(i+1)/2])) for i in range(self.layers) if i%2 == 0],axis=0)
         
         generate = x_t is None
         if generate:
             pass
         
         # Make a GSN to update U
 #         chain, hs = gsn.build_gsn(x_t, weights_list, bias_list, add_noise, state.noiseless_h1, state.hidden_add_noise_sigma, state.input_salt_and_pepper, state.input_sampling, MRG, visible_activation, hidden_activation, walkbacks, logger)
 #         htop_t = hs[-1]
 #         denoised_x_t = chain[-1]
         # Update U
 #         ua_t = T.dot(denoised_x_t, W_x_u) + T.dot(htop_t, W_h_u) + T.dot(u_tm1, W_u_u) + recurrent_bias
         ua_t = T.dot(x_t, self.W_x_u) + T.dot(u_tm1, self.W_u_u) + self.recurrent_bias
         u_t = self.recurrent_hidden_activation(ua_t)
         return None if generate else [ua_t, u_t, h_t]
Ejemplo n.º 24
0
     def recurrent_step(x_t, u_tm1, add_noise):
         # Make current guess for hiddens based on U
         for i in range(self.layers):
             if i%2 == 0:
                 log.maybeLog(self.logger, "Using {0!s} and {1!s}".format(self.recurrent_to_gsn_weights_list[(i+1)/2],self.bias_list[i+1]))
         h_t = T.concatenate([self.hidden_activation(self.bias_list[i+1] + T.dot(u_tm1, self.recurrent_to_gsn_weights_list[(i+1)/2])) for i in range(self.layers) if i%2 == 0],axis=0)
         
         generate = x_t is None
         if generate:
             pass
         
         # Make a GSN to update U
 #         chain, hs = gsn.build_gsn(x_t, weights_list, bias_list, add_noise, state.noiseless_h1, state.hidden_add_noise_sigma, state.input_salt_and_pepper, state.input_sampling, MRG, visible_activation, hidden_activation, walkbacks, logger)
 #         htop_t = hs[-1]
 #         denoised_x_t = chain[-1]
         # Update U
 #         ua_t = T.dot(denoised_x_t, W_x_u) + T.dot(htop_t, W_h_u) + T.dot(u_tm1, W_u_u) + recurrent_bias
         ua_t = T.dot(x_t, self.W_x_u) + T.dot(u_tm1, self.W_u_u) + self.recurrent_bias
         u_t = self.recurrent_hidden_activation(ua_t)
         return None if generate else [ua_t, u_t, h_t]
Ejemplo n.º 25
0
def update_layers_reverse(
        hiddens,
        weights_list,
        bias_list,
        p_X_chain,
        add_noise=defaults["add_noise"],
        noiseless_h1=defaults["noiseless_h1"],
        hidden_add_noise_sigma=defaults["hidden_add_noise_sigma"],
        input_salt_and_pepper=defaults["input_salt_and_pepper"],
        input_sampling=defaults["input_sampling"],
        MRG=defaults["MRG"],
        visible_activation=defaults["visible_activation"],
        hidden_activation=defaults["hidden_activation"],
        logger=None):
    # One update over the even layers + one update over the odd layers
    log.maybeLog(logger, 'even layer updates')
    # update the even layers
    update_even_layers(hiddens, weights_list, bias_list, p_X_chain, add_noise,
                       noiseless_h1, hidden_add_noise_sigma,
                       input_salt_and_pepper, input_sampling, MRG,
                       visible_activation, hidden_activation, logger)
    log.maybeLog(logger, 'odd layer updates')
    # update the odd layers
    update_odd_layers(hiddens, weights_list, bias_list, add_noise,
                      noiseless_h1, hidden_add_noise_sigma,
                      input_salt_and_pepper, input_sampling, MRG,
                      visible_activation, hidden_activation, logger)
    log.maybeLog(logger, 'done full update.\n')
def build_gsn_scan(X,
                   weights_list,
                   bias_list,
                   add_noise              = defaults["add_noise"],
                   noiseless_h1           = defaults["noiseless_h1"],
                   hidden_add_noise_sigma = defaults["hidden_add_noise_sigma"],
                   input_salt_and_pepper  = defaults["input_salt_and_pepper"],
                   input_sampling         = defaults["input_sampling"],
                   MRG                    = defaults["MRG"],
                   visible_activation     = defaults["visible_activation"],
                   hidden_activation      = defaults["hidden_activation"],
                   walkbacks              = defaults["walkbacks"],
                   cost_function          = defaults["cost_function"],
                   logger = None):
    
    # Whether or not to corrupt the visible input X
    if add_noise:
        X_init = salt_and_pepper(X, input_salt_and_pepper)
    else:
        X_init = X
    # init hiddens with zeros
    hiddens_0 = [X_init]
    for w in weights_list:
        hiddens_0.append(T.zeros_like(T.dot(hiddens_0[-1], w)))
    
    log.maybeLog(logger, ["Building the GSN graph with", walkbacks,"walkbacks"])
    p_X_chain = []
    for i in range(walkbacks):
        log.maybeLog(logger, "GSN (after scan) Walkback {!s}/{!s}".format(i+1,walkbacks))
        update_layers(hiddens_0, weights_list, bias_list, p_X_chain, add_noise, noiseless_h1, hidden_add_noise_sigma, input_salt_and_pepper, input_sampling, MRG, visible_activation, hidden_activation, logger)
        

    x_sample = p_X_chain[-1]
    
    costs     = [cost_function(rX, X) for rX in p_X_chain]
    show_cost = costs[-1] # for logging to show progress
    cost      = numpy.sum(costs)
    
    return x_sample, cost, show_cost#, updates
Ejemplo n.º 27
0
def build_gsn_given_hiddens(
        X,
        hiddens,
        weights_list,
        bias_list,
        add_noise=defaults["add_noise"],
        noiseless_h1=defaults["noiseless_h1"],
        hidden_add_noise_sigma=defaults["hidden_add_noise_sigma"],
        input_salt_and_pepper=defaults["input_salt_and_pepper"],
        input_sampling=defaults["input_sampling"],
        MRG=defaults["MRG"],
        visible_activation=defaults["visible_activation"],
        hidden_activation=defaults["hidden_activation"],
        walkbacks=defaults["walkbacks"],
        cost_function=defaults["cost_function"],
        logger=None):

    log.maybeLog(
        logger,
        ["Building the GSN graph given hiddens with", walkbacks, "walkbacks"])
    p_X_chain = []
    for i in range(walkbacks):
        log.maybeLog(
            logger,
            "GSN (prediction) Walkback {!s}/{!s}".format(i + 1, walkbacks))
        update_layers_reverse(hiddens, weights_list, bias_list, p_X_chain,
                              add_noise, noiseless_h1, hidden_add_noise_sigma,
                              input_salt_and_pepper, input_sampling, MRG,
                              visible_activation, hidden_activation, logger)

    x_sample = p_X_chain[-1]

    costs = [cost_function(rX, X) for rX in p_X_chain]
    show_cost = costs[-1]  # for logging to show progress
    cost = numpy.sum(costs)

    return x_sample, cost, show_cost
Ejemplo n.º 28
0
 def load_params(self, filename):
     '''
     self.params = self.weights_list + self.bias_list + self.recurrent_to_gsn_weights_list + [self.W_u_u, self.W_x_u, self.recurrent_bias]
     '''
     def set_param(loaded_params, start, param):
         [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[start:start+len(param)], param)]
         return start + len(param)
         
     if os.path.isfile(filename):
         log.maybeLog(self.logger, "\nLoading existing RNN-GSN parameters...")
         loaded_params = cPickle.load(open(filename,'r'))
         start = 0
         start = set_param(loaded_params, start, self.weights_list)
         start = set_param(loaded_params, start, self.bias_list)
         start = set_param(loaded_params, start, self.recurrent_to_gsn_weights_list)
         set_param(loaded_params, start, self.u_params)
         log.maybeLog(self.logger, "Parameters loaded.\n")
     else:
         log.maybeLog(self.logger, "\n\nCould not find existing RNN-GSN parameter file {}.\n\n".format(filename))
Ejemplo n.º 29
0
 def load_params(self, filename):
     '''
     self.params = self.weights_list + self.bias_list + self.recurrent_to_gsn_weights_list + [self.W_u_u, self.W_x_u, self.recurrent_bias]
     '''
     def set_param(loaded_params, start, param):
         [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[start:start+len(param)], param)]
         return start + len(param)
         
     if os.path.isfile(filename):
         log.maybeLog(self.logger, "\nLoading existing RNN-GSN parameters...")
         loaded_params = cPickle.load(open(filename,'r'))
         start = 0
         start = set_param(loaded_params, start, self.weights_list)
         start = set_param(loaded_params, start, self.bias_list)
         start = set_param(loaded_params, start, self.recurrent_to_gsn_weights_list)
         set_param(loaded_params, start, self.u_params)
         log.maybeLog(self.logger, "Parameters loaded.\n")
     else:
         log.maybeLog(self.logger, "\n\nCould not find existing RNN-GSN parameter file {}.\n\n".format(filename))
def update_layers_reverse(hiddens,
                          weights_list,
                          bias_list,
                          p_X_chain, 
                          add_noise              = defaults["add_noise"],
                          noiseless_h1           = defaults["noiseless_h1"],
                          hidden_add_noise_sigma = defaults["hidden_add_noise_sigma"],
                          input_salt_and_pepper  = defaults["input_salt_and_pepper"],
                          input_sampling         = defaults["input_sampling"],
                          MRG                    = defaults["MRG"],
                          visible_activation     = defaults["visible_activation"],
                          hidden_activation      = defaults["hidden_activation"],
                          logger = None):
    # One update over the even layers + one update over the odd layers
    log.maybeLog(logger, 'even layer updates')
    # update the even layers
    update_even_layers(hiddens, weights_list, bias_list, p_X_chain, add_noise, noiseless_h1, hidden_add_noise_sigma, input_salt_and_pepper, input_sampling, MRG, visible_activation, hidden_activation, logger)
    log.maybeLog(logger, 'odd layer updates')
    # update the odd layers
    update_odd_layers(hiddens, weights_list, bias_list, add_noise, noiseless_h1, hidden_add_noise_sigma, input_salt_and_pepper, input_sampling, MRG, visible_activation, hidden_activation, logger)
    log.maybeLog(logger, 'done full update.\n')
 def __init__(self, train_X=None, valid_X=None, test_X=None, args=None, logger=None):
     # Output logger
     self.logger = logger
     self.outdir = args.get("output_path", defaults["output_path"])
     if self.outdir[-1] != '/':
         self.outdir = self.outdir+'/'
     # Input data - make sure it is a list of shared datasets
     self.train_X = raise_data_to_list(train_X)
     self.valid_X = raise_data_to_list(valid_X)
     self.test_X  = raise_data_to_list(test_X)
     
     # variables from the dataset that are used for initialization and image reconstruction
     if train_X is None:
         self.N_input = args.get("input_size")
         if args.get("input_size") is None:
             raise AssertionError("Please either specify input_size in the arguments or provide an example train_X for input dimensionality.")
     else:
         self.N_input = train_X[0].eval().shape[1]
     self.root_N_input = numpy.sqrt(self.N_input)
     
     self.is_image = args.get('is_image', defaults['is_image'])
     if self.is_image:
         self.image_width  = args.get('width', self.root_N_input)
         self.image_height = args.get('height', self.root_N_input)
     
     #######################################
     # Network and training specifications #
     #######################################
     self.layers          = args.get('layers', defaults['layers']) # number hidden layers
     self.walkbacks       = args.get('walkbacks', defaults['walkbacks']) # number of walkbacks
     self.learning_rate   = theano.shared(cast32(args.get('learning_rate', defaults['learning_rate'])))  # learning rate
     self.init_learn_rate = cast32(args.get('learning_rate', defaults['learning_rate']))
     self.momentum        = theano.shared(cast32(args.get('momentum', defaults['momentum']))) # momentum term
     self.annealing       = cast32(args.get('annealing', defaults['annealing'])) # exponential annealing coefficient
     self.noise_annealing = cast32(args.get('noise_annealing', defaults['noise_annealing'])) # exponential noise annealing coefficient
     self.batch_size      = args.get('batch_size', defaults['batch_size'])
     self.n_epoch         = args.get('n_epoch', defaults['n_epoch'])
     self.early_stop_threshold = args.get('early_stop_threshold', defaults['early_stop_threshold'])
     self.early_stop_length = args.get('early_stop_length', defaults['early_stop_length'])
     self.save_frequency  = args.get('save_frequency', defaults['save_frequency'])
     
     self.noiseless_h1           = args.get('noiseless_h1', defaults["noiseless_h1"])
     self.hidden_add_noise_sigma = theano.shared(cast32(args.get('hidden_add_noise_sigma', defaults["hidden_add_noise_sigma"])))
     self.input_salt_and_pepper  = theano.shared(cast32(args.get('input_salt_and_pepper', defaults["input_salt_and_pepper"])))
     self.input_sampling         = args.get('input_sampling', defaults["input_sampling"])
     self.vis_init               = args.get('vis_init', defaults['vis_init'])
     
     self.layer_sizes = [self.N_input] + [args.get('hidden_size', defaults['hidden_size'])] * self.layers # layer sizes, from h0 to hK (h0 is the visible layer)
     
     self.f_recon = None
     self.f_noise = None
     
     # Activation functions!            
     if args.get('hidden_activation') is not None:
         log.maybeLog(self.logger, 'Using specified activation for hiddens')
         self.hidden_activation = args.get('hidden_activation')
     elif args.get('hidden_act') is not None:
         self.hidden_activation = get_activation_function(args.get('hidden_act'))
         log.maybeLog(self.logger, 'Using {0!s} activation for hiddens'.format(args.get('hidden_act')))
     else:
         log.maybeLog(self.logger, "Using default activation for hiddens")
         self.hidden_activation = defaults['hidden_activation']
         
     # Visible layer activation
     if args.get('visible_activation') is not None:
         log.maybeLog(self.logger, 'Using specified activation for visible layer')
         self.visible_activation = args.get('visible_activation')
     elif args.get('visible_act') is not None:
         self.visible_activation = get_activation_function(args.get('visible_act'))
         log.maybeLog(self.logger, 'Using {0!s} activation for visible layer'.format(args.get('visible_act')))
     else:
         log.maybeLog(self.logger, 'Using default activation for visible layer')
         self.visible_activation = defaults['visible_activation']
         
     # Cost function!
     if args.get('cost_function') is not None:
         log.maybeLog(self.logger, '\nUsing specified cost function for training\n')
         self.cost_function = args.get('cost_function')
     elif args.get('cost_funct') is not None:
         self.cost_function = get_cost_function(args.get('cost_funct'))
         log.maybeLog(self.logger, 'Using {0!s} for cost function'.format(args.get('cost_funct')))
     else:
         log.maybeLog(self.logger, '\nUsing default cost function for training\n')
         self.cost_function = defaults['cost_function']
     
     ############################
     # Theano variables and RNG #
     ############################
     self.X   = T.fmatrix('X') # for use in sampling
     self.MRG = RNG_MRG.MRG_RandomStreams(1)
     rng.seed(1)
     
     ###############
     # Parameters! #
     ###############
     # initialize a list of weights and biases based on layer_sizes for the GSN
     if args.get('weights_list') is None:
         self.weights_list = [get_shared_weights(self.layer_sizes[layer], self.layer_sizes[layer+1], name="W_{0!s}_{1!s}".format(layer,layer+1)) for layer in range(self.layers)] # initialize each layer to uniform sample from sqrt(6. / (n_in + n_out))
     else:
         self.weights_list = args.get('weights_list')
     if args.get('bias_list') is None:
         self.bias_list    = [get_shared_bias(self.layer_sizes[layer], name='b_'+str(layer)) for layer in range(self.layers + 1)] # initialize each layer to 0's.
     else:
         self.bias_list    = args.get('bias_list')
     self.params = self.weights_list + self.bias_list
     
     #################
     # Build the GSN #
     #################
     log.maybeLog(self.logger, "\nBuilding GSN graphs for training and testing")
     # GSN for training - with noise
     add_noise = True
     p_X_chain, _ = build_gsn(self.X,
                              self.weights_list,
                              self.bias_list,
                              add_noise,
                              self.noiseless_h1,
                              self.hidden_add_noise_sigma,
                              self.input_salt_and_pepper,
                              self.input_sampling,
                              self.MRG,
                              self.visible_activation,
                              self.hidden_activation,
                              self.walkbacks,
                              self.logger)
     
     # GSN for reconstruction checks along the way - no noise
     add_noise = False
     p_X_chain_recon, _ = build_gsn(self.X,
                                    self.weights_list,
                                    self.bias_list,
                                    add_noise,
                                    self.noiseless_h1,
                                    self.hidden_add_noise_sigma,
                                    self.input_salt_and_pepper,
                                    self.input_sampling,
                                    self.MRG,
                                    self.visible_activation,
                                    self.hidden_activation,
                                    self.walkbacks,
                                    self.logger)
     
     #######################
     # Costs and gradients #
     #######################
     log.maybeLog(self.logger, 'Cost w.r.t p(X|...) at every step in the graph for the GSN')
     gsn_costs     = [self.cost_function(rX, self.X) for rX in p_X_chain]
     show_gsn_cost = gsn_costs[-1] # for logging to show progress
     gsn_cost      = numpy.sum(gsn_costs)
     
     gsn_costs_recon     = [self.cost_function(rX, self.X) for rX in p_X_chain_recon]
     show_gsn_cost_recon = gsn_costs_recon[-1]
     
     log.maybeLog(self.logger, ["gsn params:", self.params])
     
     # Stochastic gradient descent!
     gradient        =   T.grad(gsn_cost, self.params)              
     gradient_buffer =   [theano.shared(numpy.zeros(param.get_value().shape, dtype='float32')) for param in self.params] 
     m_gradient      =   [self.momentum * gb + (cast32(1) - self.momentum) * g for (gb, g) in zip(gradient_buffer, gradient)]
     param_updates   =   [(param, param - self.learning_rate * mg) for (param, mg) in zip(self.params, m_gradient)]
     gradient_buffer_updates = zip(gradient_buffer, m_gradient)
     updates         =   OrderedDict(param_updates + gradient_buffer_updates)
     
     ############
     # Sampling #
     ############
     # the input to the sampling function
     X_sample = T.fmatrix("X_sampling")
     self.network_state_input = [X_sample] + [T.fmatrix("H_sampling_"+str(i+1)) for i in range(self.layers)]
    
     # "Output" state of the network (noisy)
     # initialized with input, then we apply updates
     self.network_state_output = [X_sample] + self.network_state_input[1:]
     visible_pX_chain = []
 
     # ONE update
     log.maybeLog(self.logger, "Performing one walkback in network state sampling.")
     update_layers(self.network_state_output,
                   self.weights_list,
                   self.bias_list,
                   visible_pX_chain, 
                   True,
                   self.noiseless_h1,
                   self.hidden_add_noise_sigma,
                   self.input_salt_and_pepper,
                   self.input_sampling,
                   self.MRG,
                   self.visible_activation,
                   self.hidden_activation,
                   self.logger)
     
     #################################
     #     Create the functions      #
     #################################
     log.maybeLog(self.logger, "Compiling functions...")
     t = time.time()
     
     self.f_learn = theano.function(inputs  = [self.X],
                               updates = updates,
                               outputs = show_gsn_cost,
                               name='gsn_f_learn')
 
     self.f_cost  = theano.function(inputs  = [self.X],
                               outputs = show_gsn_cost,
                               name='gsn_f_cost')
     
     # used for checkpoints and testing - no noise in network
     self.f_recon = theano.function(inputs  = [self.X],
                                    outputs = [show_gsn_cost_recon, p_X_chain_recon[-1]],
                                    name='gsn_f_recon')
     
     self.f_noise = theano.function(inputs = [self.X],
                                    outputs = salt_and_pepper(self.X, self.input_salt_and_pepper),
                                    name='gsn_f_noise')
 
     if self.layers == 1: 
         self.f_sample = theano.function(inputs = [X_sample], 
                                         outputs = visible_pX_chain[-1], 
                                         name='gsn_f_sample_single_layer')
     else:
         # WHY IS THERE A WARNING????
         # because the first odd layers are not used -> directly computed FROM THE EVEN layers
         # unused input = warn
         self.f_sample = theano.function(inputs = self.network_state_input,
                                         outputs = self.network_state_output + visible_pX_chain,
                                         on_unused_input='warn',
                                         name='gsn_f_sample')
     
     log.maybeLog(self.logger, "Compiling done. Took "+make_time_units_string(time.time() - t)+".\n")
Ejemplo n.º 32
0
    def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n")
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(self.logger, "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X  = self.test_X
            test_Y  = self.test_Y
            
        # Input data - make sure it is a list of shared datasets
        train_X = raise_to_list(train_X)
        train_Y = raise_to_list(train_Y)
        valid_X = raise_to_list(valid_X)
        valid_Y = raise_to_list(valid_Y)
        test_X  = raise_to_list(test_X)
        test_Y =  raise_to_list(test_Y)
            
        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        if self.train_gsn_first:
            log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n")
            # init_gsn = GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, state=self.gsn_args, logger=self.logger)
            # init_gsn.train()
            print "NOT IMPLEMENTED"
    
        
        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#         
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)
        
        ################################
        # If we are using SGD training #
        ################################
        else:
            log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP        =   False
            counter     =   0
            if not continue_training:
                self.learning_rate.set_value(self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0
                        
            log.maybeLog(self.logger, ['train X size:',str(train_X[0].get_value(borrow=True).shape)])
            if valid_X is not None:
                log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].get_value(borrow=True).shape)])
            if test_X is not None:
                log.maybeLog(self.logger, ['test X size:',str(test_X[0].get_value(borrow=True).shape)])
            
            if self.vis_init:
                self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value(borrow=True).mean(axis=0))))
                
            start_time = time.time()
        
            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter,'\t'])
                    
#                 if is_artificial:
#                     data.sequence_mnist_data(train_X[0], train_Y[0], valid_X[0], valid_Y[0], test_X[0], test_Y[0], artificial_sequence, rng)
                     
                #train
                train_costs = []
                train_errors = []
                for train_data in train_X:
                    costs_and_errors = data.apply_cost_function_to_dataset(self.f_learn, train_data, self.batch_size)
                    train_costs.extend([cost for (cost, error) in costs_and_errors])
                    train_errors.extend([error for (cost, error) in costs_and_errors])
                log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)),trunc(numpy.mean(train_errors)),'\t'])
         
         
                #valid
                if valid_X is not None:
                    valid_costs = []
                    for valid_data in valid_X:
                        cs = data.apply_cost_function_to_dataset(self.f_cost, valid_data, self.batch_size)
                        valid_costs.extend([c for c,e in cs])
                    log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t'])
         
         
                #test
                if test_X is not None:
                    test_costs = []
                    test_errors = []
                    for test_data in test_X:
                        costs_and_errors = data.apply_cost_function_to_dataset(self.f_cost, test_data, self.batch_size)
                        test_costs.extend([cost for (cost, error) in costs_and_errors])
                        test_errors.extend([error for (cost, error) in costs_and_errors])
                    log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)),trunc(numpy.mean(test_errors)), '\t'])
                
                 
                #check for early stopping
                if valid_X is not None:
                    cost = numpy.sum(valid_costs)
                else:
                    cost = numpy.sum(train_costs)
                if cost < best_cost*self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = copy_params(self.params)
                else:
                    patience += 1
         
                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    self.save_params('all', counter, self.params)
         
                timing = time.time() - t
                times.append(timing)
         
                log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
            
                log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
        
                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    xs_test = test_X[0].get_value(borrow=True)[range(n_examples)]
                    noisy_xs_test = self.f_noise(test_X[0].get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_xs_test)):
                        recon, recon_cost = self.f_recon(noisy_xs_test[max(0,(i+1)-self.batch_size):i+1])
                        reconstructions.append(recon[-1])
                    reconstructed = numpy.array(reconstructions)
                    if (self.is_image):
                        # Concatenate stuff
                        stacked = numpy.vstack([numpy.vstack([xs_test[i*10 : (i+1)*10], noisy_xs_test[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                        number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height, self.image_width), (10,30)))
                            
                        number_reconstruction.save(self.outdir+'rnngsn_reconstruction_epoch_'+str(counter)+'.png')
            
                        #sample_numbers(counter, 'seven')
                        # plot_samples(counter, 'rnngsn')

            
                    #save params
                    self.save_params('all', counter, self.params)
             
                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)
                
                new_noise = self.input_salt_and_pepper.get_value() * self.noise_annealing
                self.input_salt_and_pepper.set_value(new_noise)
                
            log.maybeLog(self.logger, "\n------------TOTAL RNN-GSN TRAIN TIME TOOK {0!s}---------".format(make_time_units_string(time.time()-start_time)))
Ejemplo n.º 33
0
    def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n")
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(self.logger, "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X  = self.test_X
            test_Y  = self.test_Y
            
        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        if self.train_gsn_first:
            log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n")
            init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger)
            init_gsn.train()
    
        #############################
        # Save the model parameters #
        #############################
        def save_params_to_file(name, n, gsn_params):
            pass
            print 'saving parameters...'
            save_path = self.outdir+name+'_params_epoch_'+str(n)+'.pkl'
            f = open(save_path, 'wb')
            try:
                cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL)
            finally:
                f.close()
                
        def save_params(params):
            values = [param.get_value(borrow=True) for param in params]
            return values
        
        def restore_params(params, values):
            for i in range(len(params)):
                params[i].set_value(values[i])
    
        
        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#         
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)
        
        ################################
        # If we are using SGD training #
        ################################
        else:
            log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP        =   False
            counter     =   0
            if not continue_training:
                self.learning_rate.set_value(self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0
                        
            log.maybeLog(self.logger, ['train X size:',str(train_X.shape.eval())])
            if valid_X is not None:
                log.maybeLog(self.logger, ['valid X size:',str(valid_X.shape.eval())])
            if test_X is not None:
                log.maybeLog(self.logger, ['test X size:',str(test_X.shape.eval())])
            
            if self.vis_init:
                self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X.get_value().mean(axis=0))))
        
            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter,'\t'])
                    
                if is_artificial:
                    data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, artificial_sequence, rng)
                     
                #train
                train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger, ['Train:',trunc(train_costs),'\t'])
         
         
                #valid
                valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger, ['Valid:',trunc(valid_costs), '\t'])
         
         
                #test
                test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size)
                # record it 
                log.maybeAppend(self.logger, ['Test:',trunc(test_costs), '\t'])
                 
                 
                #check for early stopping
                cost = numpy.sum(valid_costs)
                if cost < best_cost*self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = save_params(self.params)
                else:
                    patience += 1
         
                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    save_params_to_file('all', counter, self.params)
         
                timing = time.time() - t
                times.append(timing)
         
                log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
            
                log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
        
                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    nums = test_X.get_value(borrow=True)[range(n_examples)]
                    noisy_nums = self.f_noise(test_X.get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_nums)):
                        recon = self.f_recon(noisy_nums[max(0,(i+1)-self.batch_size):i+1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)

                    # Concatenate stuff
                    stacked = numpy.vstack([numpy.vstack([nums[i*10 : (i+1)*10], noisy_nums[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                    number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.root_N_input,self.root_N_input), (10,30)))
                        
                    number_reconstruction.save(self.outdir+'rnngsn_number_reconstruction_epoch_'+str(counter)+'.png')
                    
                    #save params
                    save_params_to_file('all', counter, self.params)
             
                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)
def build_gsn(X,
              weights_list,
              bias_list,
              add_noise              = defaults["add_noise"],
              noiseless_h1           = defaults["noiseless_h1"],
              hidden_add_noise_sigma = defaults["hidden_add_noise_sigma"],
              input_salt_and_pepper  = defaults["input_salt_and_pepper"],
              input_sampling         = defaults["input_sampling"],
              MRG                    = defaults["MRG"],
              visible_activation     = defaults["visible_activation"],
              hidden_activation      = defaults["hidden_activation"],
              walkbacks              = defaults["walkbacks"],
              logger = None):
    """
    Construct a GSN (unimodal transition operator) for k walkbacks on the input X.
    Returns the list of predicted X's after k walkbacks and the resulting layer values.

    @type  X: Theano symbolic variable
    @param X: The variable representing the visible input.
    
    @type  weights_list: List(matrix)
    @param weights_list: The list of weights to use between layers.
    
    @type  bias_list: List(vector)
    @param bias_list: The list of biases to use for each layer.
    
    @type  add_noise: Boolean
    @param add_noise: Whether or not to add noise in the computational graph.
    
    @type  noiseless_h1: Boolean
    @param noiseless_h1: Whether or not to add noise in the first hidden layer.
    
    @type  hidden_add_noise_sigma: Float
    @param hidden_add_noise_sigma: The sigma value for the hidden noise function.
    
    @type  input_salt_and_pepper: Float
    @param input_salt_and_pepper: The amount of masking noise to use.
    
    @type  input_sampling: Boolean
    @param input_sampling: Whether to sample from each walkback prediction (like Gibbs).
    
    @type  MRG: Theano random generator
    @param MRG: Random generator.
    
    @type  visible_activation: Function
    @param visible_activation: The visible layer X activation function.
    
    @type  hidden_activation: Function
    @param hidden_activation: The hidden layer activation function.
    
    @type  walkbacks: Integer
    @param walkbacks: The k number of walkbacks to use for the GSN.
    
    @type  logger: Logger
    @param logger: The output log to use.
    
    @rtype:   List
    @return:  predicted_x_chain, hiddens
    """
    p_X_chain = []
    # Whether or not to corrupt the visible input X
    if add_noise:
        X_init = salt_and_pepper(X, input_salt_and_pepper)
    else:
        X_init = X
    # init hiddens with zeros
    hiddens = [X_init]
    for w in weights_list:
        hiddens.append(T.zeros_like(T.dot(hiddens[-1], w)))
    # The layer update scheme
    log.maybeLog(logger, ["Building the GSN graph :", walkbacks,"updates"])
    for i in range(walkbacks):
        log.maybeLog(logger, "GSN Walkback {!s}/{!s}".format(i+1,walkbacks))
        update_layers(hiddens, weights_list, bias_list, p_X_chain, add_noise, noiseless_h1, hidden_add_noise_sigma, input_salt_and_pepper, input_sampling, MRG, visible_activation, hidden_activation, logger)
        
    return p_X_chain, hiddens
Ejemplo n.º 35
0
    def __init__(self,
                 train_X=None,
                 valid_X=None,
                 test_X=None,
                 args=None,
                 logger=None):
        # Output logger
        self.logger = logger
        self.outdir = args.get("output_path", defaults["output_path"])
        if self.outdir[-1] != '/':
            self.outdir = self.outdir + '/'
        # Input data - make sure it is a list of shared datasets
        self.train_X = raise_data_to_list(train_X)
        self.valid_X = raise_data_to_list(valid_X)
        self.test_X = raise_data_to_list(test_X)

        # variables from the dataset that are used for initialization and image reconstruction
        if train_X is None:
            self.N_input = args.get("input_size")
            if args.get("input_size") is None:
                raise AssertionError(
                    "Please either specify input_size in the arguments or provide an example train_X for input dimensionality."
                )
        else:
            self.N_input = train_X[0].eval().shape[1]
        self.root_N_input = numpy.sqrt(self.N_input)

        self.is_image = args.get('is_image', defaults['is_image'])
        if self.is_image:
            self.image_width = args.get('width', self.root_N_input)
            self.image_height = args.get('height', self.root_N_input)

        #######################################
        # Network and training specifications #
        #######################################
        self.layers = args.get('layers',
                               defaults['layers'])  # number hidden layers
        self.walkbacks = args.get('walkbacks',
                                  defaults['walkbacks'])  # number of walkbacks
        self.learning_rate = theano.shared(
            cast32(args.get('learning_rate',
                            defaults['learning_rate'])))  # learning rate
        self.init_learn_rate = cast32(
            args.get('learning_rate', defaults['learning_rate']))
        self.momentum = theano.shared(
            cast32(args.get('momentum',
                            defaults['momentum'])))  # momentum term
        self.annealing = cast32(args.get(
            'annealing',
            defaults['annealing']))  # exponential annealing coefficient
        self.noise_annealing = cast32(
            args.get('noise_annealing', defaults['noise_annealing'])
        )  # exponential noise annealing coefficient
        self.batch_size = args.get('batch_size', defaults['batch_size'])
        self.n_epoch = args.get('n_epoch', defaults['n_epoch'])
        self.early_stop_threshold = args.get('early_stop_threshold',
                                             defaults['early_stop_threshold'])
        self.early_stop_length = args.get('early_stop_length',
                                          defaults['early_stop_length'])
        self.save_frequency = args.get('save_frequency',
                                       defaults['save_frequency'])

        self.noiseless_h1 = args.get('noiseless_h1', defaults["noiseless_h1"])
        self.hidden_add_noise_sigma = theano.shared(
            cast32(
                args.get('hidden_add_noise_sigma',
                         defaults["hidden_add_noise_sigma"])))
        self.input_salt_and_pepper = theano.shared(
            cast32(
                args.get('input_salt_and_pepper',
                         defaults["input_salt_and_pepper"])))
        self.input_sampling = args.get('input_sampling',
                                       defaults["input_sampling"])
        self.vis_init = args.get('vis_init', defaults['vis_init'])

        self.layer_sizes = [self.N_input] + [
            args.get('hidden_size', defaults['hidden_size'])
        ] * self.layers  # layer sizes, from h0 to hK (h0 is the visible layer)

        self.f_recon = None
        self.f_noise = None

        # Activation functions!
        if args.get('hidden_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for hiddens')
            self.hidden_activation = args.get('hidden_activation')
        elif args.get('hidden_act') is not None:
            self.hidden_activation = get_activation_function(
                args.get('hidden_act'))
            log.maybeLog(
                self.logger, 'Using {0!s} activation for hiddens'.format(
                    args.get('hidden_act')))
        else:
            log.maybeLog(self.logger, "Using default activation for hiddens")
            self.hidden_activation = defaults['hidden_activation']

        # Visible layer activation
        if args.get('visible_activation') is not None:
            log.maybeLog(self.logger,
                         'Using specified activation for visible layer')
            self.visible_activation = args.get('visible_activation')
        elif args.get('visible_act') is not None:
            self.visible_activation = get_activation_function(
                args.get('visible_act'))
            log.maybeLog(
                self.logger, 'Using {0!s} activation for visible layer'.format(
                    args.get('visible_act')))
        else:
            log.maybeLog(self.logger,
                         'Using default activation for visible layer')
            self.visible_activation = defaults['visible_activation']

        # Cost function!
        if args.get('cost_function') is not None:
            log.maybeLog(self.logger,
                         '\nUsing specified cost function for training\n')
            self.cost_function = args.get('cost_function')
        elif args.get('cost_funct') is not None:
            self.cost_function = get_cost_function(args.get('cost_funct'))
            log.maybeLog(
                self.logger,
                'Using {0!s} for cost function'.format(args.get('cost_funct')))
        else:
            log.maybeLog(self.logger,
                         '\nUsing default cost function for training\n')
            self.cost_function = defaults['cost_function']

        ############################
        # Theano variables and RNG #
        ############################
        self.X = T.fmatrix('X')  # for use in sampling
        self.MRG = RNG_MRG.MRG_RandomStreams(1)
        rng.seed(1)

        ###############
        # Parameters! #
        ###############
        # initialize a list of weights and biases based on layer_sizes for the GSN
        if args.get('weights_list') is None:
            self.weights_list = [
                get_shared_weights(self.layer_sizes[layer],
                                   self.layer_sizes[layer + 1],
                                   name="W_{0!s}_{1!s}".format(
                                       layer, layer + 1))
                for layer in range(self.layers)
            ]  # initialize each layer to uniform sample from sqrt(6. / (n_in + n_out))
        else:
            self.weights_list = args.get('weights_list')
        if args.get('bias_list') is None:
            self.bias_list = [
                get_shared_bias(self.layer_sizes[layer],
                                name='b_' + str(layer))
                for layer in range(self.layers + 1)
            ]  # initialize each layer to 0's.
        else:
            self.bias_list = args.get('bias_list')
        self.params = self.weights_list + self.bias_list

        #################
        # Build the GSN #
        #################
        log.maybeLog(self.logger,
                     "\nBuilding GSN graphs for training and testing")
        # GSN for training - with noise
        add_noise = True
        p_X_chain, _ = build_gsn(
            self.X, self.weights_list, self.bias_list, add_noise,
            self.noiseless_h1, self.hidden_add_noise_sigma,
            self.input_salt_and_pepper, self.input_sampling, self.MRG,
            self.visible_activation, self.hidden_activation, self.walkbacks,
            self.logger)

        # GSN for reconstruction checks along the way - no noise
        add_noise = False
        p_X_chain_recon, _ = build_gsn(
            self.X, self.weights_list, self.bias_list, add_noise,
            self.noiseless_h1, self.hidden_add_noise_sigma,
            self.input_salt_and_pepper, self.input_sampling, self.MRG,
            self.visible_activation, self.hidden_activation, self.walkbacks,
            self.logger)

        #######################
        # Costs and gradients #
        #######################
        log.maybeLog(
            self.logger,
            'Cost w.r.t p(X|...) at every step in the graph for the GSN')
        gsn_costs = [self.cost_function(rX, self.X) for rX in p_X_chain]
        show_gsn_cost = gsn_costs[-1]  # for logging to show progress
        gsn_cost = numpy.sum(gsn_costs)

        gsn_costs_recon = [
            self.cost_function(rX, self.X) for rX in p_X_chain_recon
        ]
        show_gsn_cost_recon = gsn_costs_recon[-1]

        log.maybeLog(self.logger, ["gsn params:", self.params])

        # Stochastic gradient descent!
        gradient = T.grad(gsn_cost, self.params)
        gradient_buffer = [
            theano.shared(numpy.zeros(param.get_value().shape,
                                      dtype='float32'))
            for param in self.params
        ]
        m_gradient = [
            self.momentum * gb + (cast32(1) - self.momentum) * g
            for (gb, g) in zip(gradient_buffer, gradient)
        ]
        param_updates = [(param, param - self.learning_rate * mg)
                         for (param, mg) in zip(self.params, m_gradient)]
        gradient_buffer_updates = zip(gradient_buffer, m_gradient)
        updates = OrderedDict(param_updates + gradient_buffer_updates)

        ############
        # Sampling #
        ############
        # the input to the sampling function
        X_sample = T.fmatrix("X_sampling")
        self.network_state_input = [X_sample] + [
            T.fmatrix("H_sampling_" + str(i + 1)) for i in range(self.layers)
        ]

        # "Output" state of the network (noisy)
        # initialized with input, then we apply updates
        self.network_state_output = [X_sample] + self.network_state_input[1:]
        visible_pX_chain = []

        # ONE update
        log.maybeLog(self.logger,
                     "Performing one walkback in network state sampling.")
        update_layers(self.network_state_output, self.weights_list,
                      self.bias_list, visible_pX_chain, True,
                      self.noiseless_h1, self.hidden_add_noise_sigma,
                      self.input_salt_and_pepper, self.input_sampling,
                      self.MRG, self.visible_activation,
                      self.hidden_activation, self.logger)

        #################################
        #     Create the functions      #
        #################################
        log.maybeLog(self.logger, "Compiling functions...")
        t = time.time()

        self.f_learn = theano.function(inputs=[self.X],
                                       updates=updates,
                                       outputs=show_gsn_cost,
                                       name='gsn_f_learn')

        self.f_cost = theano.function(inputs=[self.X],
                                      outputs=show_gsn_cost,
                                      name='gsn_f_cost')

        # used for checkpoints and testing - no noise in network
        self.f_recon = theano.function(
            inputs=[self.X],
            outputs=[show_gsn_cost_recon, p_X_chain_recon[-1]],
            name='gsn_f_recon')

        self.f_noise = theano.function(inputs=[self.X],
                                       outputs=salt_and_pepper(
                                           self.X, self.input_salt_and_pepper),
                                       name='gsn_f_noise')

        if self.layers == 1:
            self.f_sample = theano.function(inputs=[X_sample],
                                            outputs=visible_pX_chain[-1],
                                            name='gsn_f_sample_single_layer')
        else:
            # WHY IS THERE A WARNING????
            # because the first odd layers are not used -> directly computed FROM THE EVEN layers
            # unused input = warn
            self.f_sample = theano.function(inputs=self.network_state_input,
                                            outputs=self.network_state_output +
                                            visible_pX_chain,
                                            on_unused_input='warn',
                                            name='gsn_f_sample')

        log.maybeLog(
            self.logger, "Compiling done. Took " +
            make_time_units_string(time.time() - t) + ".\n")
Ejemplo n.º 36
0
def simple_update_layer(
        hiddens,
        weights_list,
        bias_list,
        p_X_chain,
        i,
        add_noise=defaults["add_noise"],
        noiseless_h1=defaults["noiseless_h1"],
        hidden_add_noise_sigma=defaults["hidden_add_noise_sigma"],
        input_salt_and_pepper=defaults["input_salt_and_pepper"],
        input_sampling=defaults["input_sampling"],
        MRG=defaults["MRG"],
        visible_activation=defaults["visible_activation"],
        hidden_activation=defaults["hidden_activation"],
        logger=None):
    # Compute the dot product, whatever layer
    # If the visible layer X
    if i == 0:
        log.maybeLog(logger, 'using ' + str(weights_list[i]) + '.T')
        hiddens[i] = T.dot(hiddens[i + 1], weights_list[i].T) + bias_list[i]
    # If the top layer
    elif i == len(hiddens) - 1:
        log.maybeLog(logger, ['using', weights_list[i - 1]])
        hiddens[i] = T.dot(hiddens[i - 1], weights_list[i - 1]) + bias_list[i]
    # Otherwise in-between layers
    else:
        log.maybeLog(logger, [
            "using {0!s} and {1!s}.T".format(weights_list[i - 1],
                                             weights_list[i])
        ])
        # next layer        :   hiddens[i+1], assigned weights : W_i
        # previous layer    :   hiddens[i-1], assigned weights : W_(i-1)
        hiddens[i] = T.dot(hiddens[i + 1], weights_list[i].T) + T.dot(
            hiddens[i - 1], weights_list[i - 1]) + bias_list[i]

    # Add pre-activation noise if NOT input layer
    if i == 1 and noiseless_h1:
        log.maybeLog(logger, '>>NO noise in first hidden layer')
        add_noise = False

    # pre activation noise
    if i != 0 and add_noise:
        log.maybeLog(logger,
                     ['Adding pre-activation gaussian noise for layer', i])
        hiddens[i] = add_gaussian_noise(hiddens[i], hidden_add_noise_sigma)

    # ACTIVATION!
    if i == 0:
        log.maybeLog(logger, 'Activation for visible layer')
        hiddens[i] = visible_activation(hiddens[i])
    else:
        log.maybeLog(logger, ['Hidden units activation for layer', i])
        hiddens[i] = hidden_activation(hiddens[i])

    # post activation noise
    # why is there post activation noise? Because there is already pre-activation noise, this just doubles the amount of noise between each activation of the hiddens.
    if i != 0 and add_noise:
        log.maybeLog(logger,
                     ['Adding post-activation gaussian noise for layer', i])
        hiddens[i] = add_gaussian_noise(hiddens[i], hidden_add_noise_sigma)

    # build the reconstruction chain if updating the visible layer X
    if i == 0:
        # if input layer -> append p(X|...)
        p_X_chain.append(hiddens[i])

        # sample from p(X|...) - SAMPLING NEEDS TO BE CORRECT FOR INPUT TYPES I.E. FOR BINARY MNIST SAMPLING IS BINOMIAL. real-valued inputs should be gaussian
        if input_sampling:
            log.maybeLog(logger, 'Sampling from input')
            sampled = MRG.binomial(p=hiddens[i],
                                   size=hiddens[i].shape,
                                   dtype='float32')
        else:
            log.maybeLog(logger, '>>NO input sampling')
            sampled = hiddens[i]
        # add noise
        sampled = salt_and_pepper(sampled, input_salt_and_pepper)

        # set input layer
        hiddens[i] = sampled
Ejemplo n.º 37
0
def build_gsn(X,
              weights_list,
              bias_list,
              add_noise=defaults["add_noise"],
              noiseless_h1=defaults["noiseless_h1"],
              hidden_add_noise_sigma=defaults["hidden_add_noise_sigma"],
              input_salt_and_pepper=defaults["input_salt_and_pepper"],
              input_sampling=defaults["input_sampling"],
              MRG=defaults["MRG"],
              visible_activation=defaults["visible_activation"],
              hidden_activation=defaults["hidden_activation"],
              walkbacks=defaults["walkbacks"],
              logger=None):
    """
    Construct a GSN (unimodal transition operator) for k walkbacks on the input X.
    Returns the list of predicted X's after k walkbacks and the resulting layer values.

    @type  X: Theano symbolic variable
    @param X: The variable representing the visible input.
    
    @type  weights_list: List(matrix)
    @param weights_list: The list of weights to use between layers.
    
    @type  bias_list: List(vector)
    @param bias_list: The list of biases to use for each layer.
    
    @type  add_noise: Boolean
    @param add_noise: Whether or not to add noise in the computational graph.
    
    @type  noiseless_h1: Boolean
    @param noiseless_h1: Whether or not to add noise in the first hidden layer.
    
    @type  hidden_add_noise_sigma: Float
    @param hidden_add_noise_sigma: The sigma value for the hidden noise function.
    
    @type  input_salt_and_pepper: Float
    @param input_salt_and_pepper: The amount of masking noise to use.
    
    @type  input_sampling: Boolean
    @param input_sampling: Whether to sample from each walkback prediction (like Gibbs).
    
    @type  MRG: Theano random generator
    @param MRG: Random generator.
    
    @type  visible_activation: Function
    @param visible_activation: The visible layer X activation function.
    
    @type  hidden_activation: Function
    @param hidden_activation: The hidden layer activation function.
    
    @type  walkbacks: Integer
    @param walkbacks: The k number of walkbacks to use for the GSN.
    
    @type  logger: Logger
    @param logger: The output log to use.
    
    @rtype:   List
    @return:  predicted_x_chain, hiddens
    """
    p_X_chain = []
    # Whether or not to corrupt the visible input X
    if add_noise:
        X_init = salt_and_pepper(X, input_salt_and_pepper)
    else:
        X_init = X
    # init hiddens with zeros
    hiddens = [X_init]
    for w in weights_list:
        hiddens.append(T.zeros_like(T.dot(hiddens[-1], w)))
    # The layer update scheme
    log.maybeLog(logger, ["Building the GSN graph :", walkbacks, "updates"])
    for i in range(walkbacks):
        log.maybeLog(logger, "GSN Walkback {!s}/{!s}".format(i + 1, walkbacks))
        update_layers(hiddens, weights_list, bias_list, p_X_chain, add_noise,
                      noiseless_h1, hidden_add_noise_sigma,
                      input_salt_and_pepper, input_sampling, MRG,
                      visible_activation, hidden_activation, logger)

    return p_X_chain, hiddens
Ejemplo n.º 38
0
 def sample(self, initial, n_samples=400, k=1):
     log.maybeLog(self.logger, "Starting sampling...")
     def sample_some_numbers_single_layer(n_samples):
         x0 = initial
         samples = [x0]
         x = self.f_noise(x0)
         for _ in xrange(n_samples-1):
             x = self.f_sample(x)
             samples.append(x)
             x = rng.binomial(n=1, p=x, size=x.shape).astype('float32')
             x = self.f_noise(x)
         
         log.maybeLog(self.logger, "Sampling done.")
         return numpy.vstack(samples), None
             
     def sampling_wrapper(NSI):
         # * is the "splat" operator: It takes a list as input, and expands it into actual positional arguments in the function call.
         out = self.f_sample(*NSI)
         NSO = out[:len(self.network_state_output)]
         vis_pX_chain = out[len(self.network_state_output):]
         return NSO, vis_pX_chain
 
     def sample_some_numbers(n_samples):
         # The network's initial state
         init_vis       = initial
         noisy_init_vis = self.f_noise(init_vis)
         
         network_state  = [[noisy_init_vis] + [numpy.zeros((initial.shape[0],self.hidden_size), dtype='float32') for _ in self.bias_list[1:]]]
         
         visible_chain  = [init_vis]
         noisy_h0_chain = [noisy_init_vis]
         sampled_h = []
         
         times = []
         for i in xrange(n_samples-1):
             _t = time.time()
            
             # feed the last state into the network, compute new state, and obtain visible units expectation chain 
             net_state_out, vis_pX_chain = sampling_wrapper(network_state[-1])
 
             # append to the visible chain
             visible_chain += vis_pX_chain
 
             # append state output to the network state chain
             network_state.append(net_state_out)
             
             noisy_h0_chain.append(net_state_out[0])
             
             if i%k == 0:
                 sampled_h.append(T.stack(net_state_out[1:]))
                 if i == k:
                     log.maybeLog(self.logger, "About "+make_time_units_string(numpy.mean(times)*(n_samples-1-i))+" remaining...")
                 
             times.append(time.time() - _t)
 
         log.maybeLog(self.logger, "Sampling done.")
         return numpy.vstack(visible_chain), sampled_h
     
     if self.layers == 1:
         return sample_some_numbers_single_layer(n_samples)
     else:
         return sample_some_numbers(n_samples)
Ejemplo n.º 39
0
 def sample(self, initial, n_samples=400, k=1):
     log.maybeLog(self.logger, "Starting sampling...")
     def sample_some_numbers_single_layer(n_samples):
         x0 = initial
         samples = [x0]
         x = self.f_noise(x0)
         for _ in xrange(n_samples-1):
             x = self.f_sample(x)
             samples.append(x)
             x = rng.binomial(n=1, p=x, size=x.shape).astype('float32')
             x = self.f_noise(x)
         
         log.maybeLog(self.logger, "Sampling done.")
         return numpy.vstack(samples), None
             
     def sampling_wrapper(NSI):
         # * is the "splat" operator: It takes a list as input, and expands it into actual positional arguments in the function call.
         out = self.f_sample(*NSI)
         NSO = out[:len(self.network_state_output)]
         vis_pX_chain = out[len(self.network_state_output):]
         return NSO, vis_pX_chain
 
     def sample_some_numbers(n_samples):
         # The network's initial state
         init_vis       = initial
         noisy_init_vis = self.f_noise(init_vis)
         
         network_state  = [[noisy_init_vis] + [numpy.zeros((initial.shape[0],self.hidden_size), dtype='float32') for _ in self.bias_list[1:]]]
         
         visible_chain  = [init_vis]
         noisy_h0_chain = [noisy_init_vis]
         sampled_h = []
         
         times = []
         for i in xrange(n_samples-1):
             _t = time.time()
            
             # feed the last state into the network, compute new state, and obtain visible units expectation chain 
             net_state_out, vis_pX_chain = sampling_wrapper(network_state[-1])
 
             # append to the visible chain
             visible_chain += vis_pX_chain
 
             # append state output to the network state chain
             network_state.append(net_state_out)
             
             noisy_h0_chain.append(net_state_out[0])
             
             if i%k == 0:
                 sampled_h.append(T.stack(net_state_out[1:]))
                 if i == k:
                     log.maybeLog(self.logger, "About "+make_time_units_string(numpy.mean(times)*(n_samples-1-i))+" remaining...")
                 
             times.append(time.time() - _t)
 
         log.maybeLog(self.logger, "Sampling done.")
         return numpy.vstack(visible_chain), sampled_h
     
     if self.layers == 1:
         return sample_some_numbers_single_layer(n_samples)
     else:
         return sample_some_numbers(n_samples)
Ejemplo n.º 40
0
    def __init__(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, args=None, logger=None):
        # Output logger
        self.logger = logger
        self.outdir = args.get("output_path", defaults["output_path"])
        if self.outdir[-1] != '/':
            self.outdir = self.outdir+'/'
            
        data.mkdir_p(self.outdir)
        
        # Configuration
        config_filename = self.outdir+'config'
        logger.log('Saving config')
        with open(config_filename, 'w') as f:
            f.write(str(args))
 
        # Input data - make sure it is a list of shared datasets if it isn't. THIS WILL KEEP 'NONE' AS 'NONE' no need to worry :)
        self.train_X = raise_to_list(train_X)
        self.train_Y = raise_to_list(train_Y)
        self.valid_X = raise_to_list(valid_X)
        self.valid_Y = raise_to_list(valid_Y)
        self.test_X  = raise_to_list(test_X)
        self.test_Y  = raise_to_list(test_Y)
                
        # variables from the dataset that are used for initialization and image reconstruction
        if self.train_X is None:
            self.N_input = args.get("input_size")
            if args.get("input_size") is None:
                raise AssertionError("Please either specify input_size in the arguments or provide an example train_X for input dimensionality.")
        else:
            self.N_input = self.train_X[0].get_value(borrow=True).shape[1]
        
        self.is_image = args.get('is_image', defaults['is_image'])
        if self.is_image:
            (_h, _w) = closest_to_square_factors(self.N_input)
            self.image_width  = args.get('width', _w)
            self.image_height = args.get('height', _h)
            
        #######################################
        # Network and training specifications #
        #######################################
        self.layers          = args.get('layers', defaults['layers']) # number hidden layers
        self.walkbacks       = args.get('walkbacks', defaults['walkbacks']) # number of walkbacks
        self.learning_rate   = theano.shared(cast32(args.get('learning_rate', defaults['learning_rate'])))  # learning rate
        self.init_learn_rate = cast32(args.get('learning_rate', defaults['learning_rate']))
        self.momentum        = theano.shared(cast32(args.get('momentum', defaults['momentum']))) # momentum term
        self.annealing       = cast32(args.get('annealing', defaults['annealing'])) # exponential annealing coefficient
        self.noise_annealing = cast32(args.get('noise_annealing', defaults['noise_annealing'])) # exponential noise annealing coefficient
        self.batch_size      = args.get('batch_size', defaults['batch_size'])
        self.gsn_batch_size = args.get('gsn_batch_size', defaults['gsn_batch_size'])
        self.n_epoch         = args.get('n_epoch', defaults['n_epoch'])
        self.early_stop_threshold = args.get('early_stop_threshold', defaults['early_stop_threshold'])
        self.early_stop_length = args.get('early_stop_length', defaults['early_stop_length'])
        self.save_frequency  = args.get('save_frequency', defaults['save_frequency'])
        
        self.noiseless_h1           = args.get('noiseless_h1', defaults["noiseless_h1"])
        self.hidden_add_noise_sigma = theano.shared(cast32(args.get('hidden_add_noise_sigma', defaults["hidden_add_noise_sigma"])))
        self.input_salt_and_pepper  = theano.shared(cast32(args.get('input_salt_and_pepper', defaults["input_salt_and_pepper"])))
        self.input_sampling         = args.get('input_sampling', defaults["input_sampling"])
        self.vis_init               = args.get('vis_init', defaults['vis_init'])
        self.initialize_gsn         = args.get('initialize_gsn', defaults['initialize_gsn'])
        self.hessian_free           = args.get('hessian_free', defaults['hessian_free'])
        
        self.hidden_size = args.get('hidden_size', defaults['hidden_size'])
        self.layer_sizes = [self.N_input] + [self.hidden_size] * self.layers # layer sizes, from h0 to hK (h0 is the visible layer)
        self.recurrent_hidden_size = args.get('recurrent_hidden_size', defaults['recurrent_hidden_size'])
        
        self.f_recon = None
        self.f_noise = None
        
        # Activation functions!
        # For the GSN:
        if args.get('hidden_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for GSN hiddens')
            self.hidden_activation = args.get('hidden_activation')
        elif args.get('hidden_act') is not None:
            self.hidden_activation = get_activation_function(args.get('hidden_act'))
            log.maybeLog(self.logger, 'Using {0!s} activation for GSN hiddens'.format(args.get('hidden_act')))
        else:
            log.maybeLog(self.logger, "Using default activation for GSN hiddens")
            self.hidden_activation = defaults['hidden_activation']
            
        # For the RNN:
        if args.get('recurrent_hidden_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for RNN hiddens')
            self.recurrent_hidden_activation = args.get('recurrent_hidden_activation')
        elif args.get('recurrent_hidden_act') is not None:
            self.recurrent_hidden_activation = get_activation_function(args.get('recurrent_hidden_act'))
            log.maybeLog(self.logger, 'Using {0!s} activation for RNN hiddens'.format(args.get('recurrent_hidden_act')))
        else:
            log.maybeLog(self.logger, "Using default activation for RNN hiddens")
            self.recurrent_hidden_activation = defaults['recurrent_hidden_activation']
            
        # Visible layer activation
        if args.get('visible_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for visible layer')
            self.visible_activation = args.get('visible_activation')
        elif args.get('visible_act') is not None:
            self.visible_activation = get_activation_function(args.get('visible_act'))
            log.maybeLog(self.logger, 'Using {0!s} activation for visible layer'.format(args.get('visible_act')))
        else:
            log.maybeLog(self.logger, 'Using default activation for visible layer')
            self.visible_activation = defaults['visible_activation']
            
        # Cost function!
        if args.get('cost_function') is not None:
            log.maybeLog(self.logger, '\nUsing specified cost function for GSN training\n')
            self.cost_function = args.get('cost_function')
        elif args.get('cost_funct') is not None:
            self.cost_function = get_cost_function(args.get('cost_funct'))
            log.maybeLog(self.logger, 'Using {0!s} for cost function'.format(args.get('cost_funct')))
        else:
            log.maybeLog(self.logger, '\nUsing default cost function for GSN training\n')
            self.cost_function = defaults['cost_function']
        
        ############################
        # Theano variables and RNG #
        ############################
        self.X = T.fmatrix('X') #single (batch) for training gsn
        self.Xs = T.fmatrix('Xs') #sequence for training rnn-gsn
        self.MRG = RNG_MRG.MRG_RandomStreams(1)
        
        ###############
        # Parameters! #
        ###############
        #gsn
        self.weights_list = [get_shared_weights(self.layer_sizes[i], self.layer_sizes[i+1], name="W_{0!s}_{1!s}".format(i,i+1)) for i in range(self.layers)] # initialize each layer to uniform sample from sqrt(6. / (n_in + n_out))
        self.bias_list    = [get_shared_bias(self.layer_sizes[i], name='b_'+str(i)) for i in range(self.layers + 1)] # initialize each layer to 0's.
        
        #recurrent
        self.recurrent_to_gsn_weights_list = [get_shared_weights(self.recurrent_hidden_size, self.layer_sizes[layer], name="W_u_h{0!s}".format(layer)) for layer in range(self.layers+1) if layer%2 != 0]
        self.W_u_u = get_shared_weights(self.recurrent_hidden_size, self.recurrent_hidden_size, name="W_u_u")
        self.W_x_u = get_shared_weights(self.N_input, self.recurrent_hidden_size, name="W_x_u")
        self.recurrent_bias = get_shared_bias(self.recurrent_hidden_size, name='b_u')
        
        #lists for use with gradients
        self.gsn_params = self.weights_list + self.bias_list
        self.u_params   = [self.W_u_u, self.W_x_u, self.recurrent_bias]
        self.params     = self.gsn_params + self.recurrent_to_gsn_weights_list + self.u_params
        
        ###########################################################
        #           load initial parameters of gsn                #
        ###########################################################
        self.train_gsn_first = False
        if self.initialize_gsn:
            params_to_load = 'gsn_params_epoch_30.pkl'
            if not os.path.isfile(params_to_load):
                self.train_gsn_first = True 
            else:
                log.maybeLog(self.logger, "\nLoading existing GSN parameters\n")
                loaded_params = cPickle.load(open(params_to_load,'r'))
                [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[:len(self.weights_list)], self.weights_list)]
                [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[len(self.weights_list):], self.bias_list)]
                
        if self.initialize_gsn:
            self.gsn_args = {'weights_list':       self.weights_list,
                             'bias_list':          self.bias_list,
                             'hidden_activation':  self.hidden_activation,
                             'visible_activation': self.visible_activation,
                             'cost_function':      self.cost_function,
                             'layers':             self.layers,
                             'walkbacks':          self.walkbacks,
                             'hidden_size':        self.hidden_size,
                             'learning_rate':      args.get('learning_rate', defaults['learning_rate']),
                             'momentum':           args.get('momentum', defaults['momentum']),
                             'annealing':          self.annealing,
                             'noise_annealing':    self.noise_annealing,
                             'batch_size':         self.gsn_batch_size,
                             'n_epoch':            self.n_epoch,
                             'early_stop_threshold':   self.early_stop_threshold,
                             'early_stop_length':      self.early_stop_length,
                             'save_frequency':         self.save_frequency,
                             'noiseless_h1':           self.noiseless_h1,
                             'hidden_add_noise_sigma': args.get('hidden_add_noise_sigma', defaults['hidden_add_noise_sigma']),
                             'input_salt_and_pepper':  args.get('input_salt_and_pepper', defaults['input_salt_and_pepper']),
                             'input_sampling':      self.input_sampling,
                             'vis_init':            self.vis_init,
                             'output_path':         self.outdir+'gsn/',
                             'is_image':            self.is_image,
                             'input_size':          self.N_input
                             }
            
        ############
        # Sampling #
        ############
        # the input to the sampling function
        X_sample = T.fmatrix("X_sampling")
        self.network_state_input = [X_sample] + [T.fmatrix("H_sampling_"+str(i+1)) for i in range(self.layers)]
       
        # "Output" state of the network (noisy)
        # initialized with input, then we apply updates
        self.network_state_output = [X_sample] + self.network_state_input[1:]
        visible_pX_chain = []
    
        # ONE update
        _add_noise = True
        log.maybeLog(self.logger, "Performing one walkback in network state sampling.")
        GSN.update_layers(self.network_state_output,
                          self.weights_list,
                          self.bias_list,
                          visible_pX_chain, 
                          _add_noise,
                          self.noiseless_h1,
                          self.hidden_add_noise_sigma,
                          self.input_salt_and_pepper,
                          self.input_sampling,
                          self.MRG,
                          self.visible_activation,
                          self.hidden_activation,
                          self.logger)
    
               
        #############################################
        #      Build the graphs for the RNN-GSN     #
        #############################################
        # If `x_t` is given, deterministic recurrence to compute the u_t. Otherwise, first generate
        def recurrent_step(x_t, u_tm1, add_noise):
            # Make current guess for hiddens based on U
            for i in range(self.layers):
                if i%2 == 0:
                    log.maybeLog(self.logger, "Using {0!s} and {1!s}".format(self.recurrent_to_gsn_weights_list[(i+1)/2],self.bias_list[i+1]))
            h_t = T.concatenate([self.hidden_activation(self.bias_list[i+1] + T.dot(u_tm1, self.recurrent_to_gsn_weights_list[(i+1)/2])) for i in range(self.layers) if i%2 == 0],axis=0)
            
            generate = x_t is None
            if generate:
                pass
            
            # Make a GSN to update U
    #         chain, hs = gsn.build_gsn(x_t, weights_list, bias_list, add_noise, state.noiseless_h1, state.hidden_add_noise_sigma, state.input_salt_and_pepper, state.input_sampling, MRG, visible_activation, hidden_activation, walkbacks, logger)
    #         htop_t = hs[-1]
    #         denoised_x_t = chain[-1]
            # Update U
    #         ua_t = T.dot(denoised_x_t, W_x_u) + T.dot(htop_t, W_h_u) + T.dot(u_tm1, W_u_u) + recurrent_bias
            ua_t = T.dot(x_t, self.W_x_u) + T.dot(u_tm1, self.W_u_u) + self.recurrent_bias
            u_t = self.recurrent_hidden_activation(ua_t)
            return None if generate else [ua_t, u_t, h_t]
        
        log.maybeLog(self.logger, "\nCreating recurrent step scan.")
        # For training, the deterministic recurrence is used to compute all the
        # {h_t, 1 <= t <= T} given Xs. Conditional GSNs can then be trained
        # in batches using those parameters.
        u0 = T.zeros((self.recurrent_hidden_size,))  # initial value for the RNN hidden units
        (ua, u, h_t), updates_recurrent = theano.scan(fn=lambda x_t, u_tm1, *_: recurrent_step(x_t, u_tm1, True),
                                                           sequences=self.Xs,
                                                           outputs_info=[None, u0, None],
                                                           non_sequences=self.params)
        
        log.maybeLog(self.logger, "Now for reconstruction sample without noise")
        (_, _, h_t_recon), updates_recurrent_recon = theano.scan(fn=lambda x_t, u_tm1, *_: recurrent_step(x_t, u_tm1, False),
                                                           sequences=self.Xs,
                                                           outputs_info=[None, u0, None],
                                                           non_sequences=self.params)
        # put together the hiddens list
        h_list = [T.zeros_like(self.Xs)]
        for layer, w in enumerate(self.weights_list):
            if layer%2 != 0:
                h_list.append(T.zeros_like(T.dot(h_list[-1], w)))
            else:
                h_list.append((h_t.T[(layer/2)*self.hidden_size:(layer/2+1)*self.hidden_size]).T)
                
        h_list_recon = [T.zeros_like(self.Xs)]
        for layer, w in enumerate(self.weights_list):
            if layer%2 != 0:
                h_list_recon.append(T.zeros_like(T.dot(h_list_recon[-1], w)))
            else:
                h_list_recon.append((h_t_recon.T[(layer/2)*self.hidden_size:(layer/2+1)*self.hidden_size]).T)
        
        #with noise
        _, _, cost, show_cost, error = GSN.build_gsn_given_hiddens(self.Xs, h_list, self.weights_list, self.bias_list, True, self.noiseless_h1, self.hidden_add_noise_sigma, self.input_salt_and_pepper, self.input_sampling, self.MRG, self.visible_activation, self.hidden_activation, self.walkbacks, self.cost_function)
        #without noise for reconstruction
        x_sample_recon, _, _, recon_show_cost, _ = GSN.build_gsn_given_hiddens(self.Xs, h_list_recon, self.weights_list, self.bias_list, False, self.noiseless_h1, self.hidden_add_noise_sigma, self.input_salt_and_pepper, self.input_sampling, self.MRG, self.visible_activation, self.hidden_activation, self.walkbacks, self.cost_function)
        
        updates_train = updates_recurrent
        updates_cost = updates_recurrent
        
        #############
        #   COSTS   #
        #############
        log.maybeLog(self.logger, '\nCost w.r.t p(X|...) at every step in the graph')
        start_functions_time = time.time()

        # if we are not using Hessian-free training create the normal sgd functions
        if not self.hessian_free:
            gradient      = T.grad(cost, self.params)      
            gradient_buffer = [theano.shared(numpy.zeros(param.get_value().shape, dtype='float32')) for param in self.params]
            
            m_gradient    = [self.momentum * gb + (cast32(1) - self.momentum) * g for (gb, g) in zip(gradient_buffer, gradient)]
            param_updates = [(param, param - self.learning_rate * mg) for (param, mg) in zip(self.params, m_gradient)]
            gradient_buffer_updates = zip(gradient_buffer, m_gradient)
                
            updates = OrderedDict(param_updates + gradient_buffer_updates)
            updates_train.update(updates)
        
            log.maybeLog(self.logger, "rnn-gsn learn...")
            self.f_learn = theano.function(inputs  = [self.Xs],
                                      updates = updates_train,
                                      outputs = [show_cost, error],
                                      on_unused_input='warn',
                                      name='rnngsn_f_learn')
            
            log.maybeLog(self.logger, "rnn-gsn cost...")
            self.f_cost  = theano.function(inputs  = [self.Xs],
                                      updates = updates_cost,
                                      outputs = [show_cost, error],
                                      on_unused_input='warn',
                                      name='rnngsn_f_cost')
        
        log.maybeLog(self.logger, "Training/cost functions done.")
        
        # Denoise some numbers : show number, noisy number, predicted number, reconstructed number
        log.maybeLog(self.logger, "Creating graph for noisy reconstruction function at checkpoints during training.")
        self.f_recon = theano.function(inputs=[self.Xs],
                                       outputs=[x_sample_recon[-1], recon_show_cost],
                                       name='rnngsn_f_recon')
        
        # a function to add salt and pepper noise
        self.f_noise = theano.function(inputs = [self.X],
                                       outputs = salt_and_pepper(self.X, self.input_salt_and_pepper),
                                       name='rnngsn_f_noise')
        # Sampling functions
        log.maybeLog(self.logger, "Creating sampling function...")
        if self.layers == 1: 
            self.f_sample = theano.function(inputs = [X_sample],
                                            outputs = visible_pX_chain[-1],
                                            name='rnngsn_f_sample_single_layer')
        else:
            self.f_sample = theano.function(inputs = self.network_state_input,
                                            outputs = self.network_state_output + visible_pX_chain,
                                            on_unused_input='warn',
                                            name='rnngsn_f_sample')
        
    
        log.maybeLog(self.logger, "Done compiling all functions.")
        compilation_time = time.time() - start_functions_time
        # Show the compile time with appropriate easy-to-read units.
        log.maybeLog(self.logger, "Total compilation time took "+make_time_units_string(compilation_time)+".\n\n")
def simple_update_layer(hiddens,
                        weights_list,
                        bias_list,
                        p_X_chain,
                        i,
                        add_noise              = defaults["add_noise"],
                        noiseless_h1           = defaults["noiseless_h1"],
                        hidden_add_noise_sigma = defaults["hidden_add_noise_sigma"],
                        input_salt_and_pepper  = defaults["input_salt_and_pepper"],
                        input_sampling         = defaults["input_sampling"],
                        MRG                    = defaults["MRG"],
                        visible_activation     = defaults["visible_activation"],
                        hidden_activation      = defaults["hidden_activation"],
                        logger = None):   
    # Compute the dot product, whatever layer
    # If the visible layer X
    if i == 0:
        log.maybeLog(logger, 'using '+str(weights_list[i])+'.T')
        hiddens[i] = T.dot(hiddens[i+1], weights_list[i].T) + bias_list[i]           
    # If the top layer
    elif i == len(hiddens)-1:
        log.maybeLog(logger, ['using',weights_list[i-1]])
        hiddens[i] = T.dot(hiddens[i-1], weights_list[i-1]) + bias_list[i]
    # Otherwise in-between layers
    else:
        log.maybeLog(logger, ["using {0!s} and {1!s}.T".format(weights_list[i-1], weights_list[i])])
        # next layer        :   hiddens[i+1], assigned weights : W_i
        # previous layer    :   hiddens[i-1], assigned weights : W_(i-1)
        hiddens[i] = T.dot(hiddens[i+1], weights_list[i].T) + T.dot(hiddens[i-1], weights_list[i-1]) + bias_list[i]

    # Add pre-activation noise if NOT input layer
    if i==1 and noiseless_h1:
        log.maybeLog(logger, '>>NO noise in first hidden layer')
        add_noise = False

    # pre activation noise       
    if i != 0 and add_noise:
        log.maybeLog(logger, ['Adding pre-activation gaussian noise for layer', i])
        hiddens[i] = add_gaussian_noise(hiddens[i], hidden_add_noise_sigma)
   
    # ACTIVATION!
    if i == 0:
        log.maybeLog(logger, 'Activation for visible layer')
        hiddens[i] = visible_activation(hiddens[i])
    else:
        log.maybeLog(logger, ['Hidden units activation for layer', i])
        hiddens[i] = hidden_activation(hiddens[i])

    # post activation noise
    # why is there post activation noise? Because there is already pre-activation noise, this just doubles the amount of noise between each activation of the hiddens.  
    if i != 0 and add_noise:
        log.maybeLog(logger, ['Adding post-activation gaussian noise for layer', i])
        hiddens[i] = add_gaussian_noise(hiddens[i], hidden_add_noise_sigma)

    # build the reconstruction chain if updating the visible layer X
    if i == 0:
        # if input layer -> append p(X|...)
        p_X_chain.append(hiddens[i])
        
        # sample from p(X|...) - SAMPLING NEEDS TO BE CORRECT FOR INPUT TYPES I.E. FOR BINARY MNIST SAMPLING IS BINOMIAL. real-valued inputs should be gaussian
        if input_sampling:
            log.maybeLog(logger, 'Sampling from input')
            sampled = MRG.binomial(p = hiddens[i], size=hiddens[i].shape, dtype='float32')
        else:
            log.maybeLog(logger, '>>NO input sampling')
            sampled = hiddens[i]
        # add noise
        sampled = salt_and_pepper(sampled, input_salt_and_pepper)
        
        # set input layer
        hiddens[i] = sampled
Ejemplo n.º 42
0
    def train(self,
              train_X=None,
              valid_X=None,
              test_X=None,
              continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(
                self.logger,
                "Training using data given during initialization of GSN.\n")
            train_X = self.train_X
            if train_X is None:
                log.maybeLog(self.logger,
                             "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(
                self.logger,
                "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
        if test_X is None:
            test_X = self.test_X

        train_X = raise_data_to_list(train_X)
        valid_X = raise_data_to_list(valid_X)
        test_X = raise_data_to_list(test_X)

        ############
        # TRAINING #
        ############
        log.maybeLog(
            self.logger,
            "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format(
                self.n_epoch))
        STOP = False
        counter = 0
        if not continue_training:
            self.learning_rate.set_value(self.init_learn_rate)  # learning rate
        times = []
        best_cost = float('inf')
        best_params = None
        patience = 0

        log.maybeLog(
            self.logger,
            ['train X size:', str(train_X[0].shape.eval())])
        if valid_X is not None:
            log.maybeLog(self.logger,
                         ['valid X size:',
                          str(valid_X[0].shape.eval())])
        if test_X is not None:
            log.maybeLog(
                self.logger,
                ['test X size:', str(test_X[0].shape.eval())])

        if self.vis_init:
            self.bias_list[0].set_value(
                logit(
                    numpy.clip(0.9, 0.001,
                               train_X[0].get_value().mean(axis=0))))

        while not STOP:
            counter += 1
            t = time.time()
            log.maybeAppend(self.logger, [counter, '\t'])

            #train
            train_costs = data.apply_cost_function_to_dataset(
                self.f_learn, train_X, self.batch_size)
            log.maybeAppend(
                self.logger,
                ['Train:', trunc(numpy.mean(train_costs)), '\t'])

            #valid
            if valid_X is not None:
                valid_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, valid_X, self.batch_size)
                log.maybeAppend(
                    self.logger,
                    ['Valid:', trunc(numpy.mean(valid_costs)), '\t'])

            #test
            if test_X is not None:
                test_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, test_X, self.batch_size)
                log.maybeAppend(
                    self.logger,
                    ['Test:', trunc(numpy.mean(test_costs)), '\t'])

            #check for early stopping
            if valid_X is not None:
                cost = numpy.sum(valid_costs)
            else:
                cost = numpy.sum(train_costs)
            if cost < best_cost * self.early_stop_threshold:
                patience = 0
                best_cost = cost
                # save the parameters that made it the best
                best_params = save_params(self.params)
            else:
                patience += 1

            if counter >= self.n_epoch or patience >= self.early_stop_length:
                STOP = True
                if best_params is not None:
                    restore_params(self.params, best_params)
                save_params_to_file(counter, self.params, self.outdir,
                                    self.logger)

            timing = time.time() - t
            times.append(timing)

            log.maybeAppend(self.logger,
                            'time: ' + make_time_units_string(timing) + '\t')

            log.maybeLog(
                self.logger, 'remaining: ' + make_time_units_string(
                    (self.n_epoch - counter) * numpy.mean(times)))

            if (counter % self.save_frequency) == 0 or STOP is True:
                if self.is_image:
                    n_examples = 100
                    tests = test_X.get_value()[0:n_examples]
                    noisy_tests = self.f_noise(
                        test_X.get_value()[0:n_examples])
                    _, reconstructed = self.f_recon(noisy_tests)
                    # Concatenate stuff if it is an image
                    stacked = numpy.vstack([
                        numpy.vstack([
                            tests[i * 10:(i + 1) * 10],
                            noisy_tests[i * 10:(i + 1) * 10],
                            reconstructed[i * 10:(i + 1) * 10]
                        ]) for i in range(10)
                    ])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(
                            stacked, (self.image_height, self.image_width),
                            (10, 30)))

                    number_reconstruction.save(
                        self.outdir + 'gsn_image_reconstruction_epoch_' +
                        str(counter) + '.png')

                #save gsn_params
                save_params_to_file(counter, self.params, self.outdir,
                                    self.logger)

            # ANNEAL!
            new_lr = self.learning_rate.get_value() * self.annealing
            self.learning_rate.set_value(new_lr)

            new_hidden_sigma = self.hidden_add_noise_sigma.get_value(
            ) * self.noise_annealing
            self.hidden_add_noise_sigma.set_value(new_hidden_sigma)

            new_salt_pepper = self.input_salt_and_pepper.get_value(
            ) * self.noise_annealing
            self.input_salt_and_pepper.set_value(new_salt_pepper)
Ejemplo n.º 43
0
    def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n")
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(self.logger, "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X  = self.test_X
            test_Y  = self.test_Y
            
        # Input data - make sure it is a list of shared datasets
        train_X = raise_to_list(train_X)
        train_Y = raise_to_list(train_Y)
        valid_X = raise_to_list(valid_X)
        valid_Y = raise_to_list(valid_Y)
        test_X  = raise_to_list(test_X)
        test_Y =  raise_to_list(test_Y)
            
        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        # if self.train_gsn_first:
        #     log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n")
        #     init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger)
        #     init_gsn.train()
    
        
        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#         
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)
        
        ################################
        # If we are using SGD training #
        ################################
        else:
            log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP        =   False
            counter     =   0
            if not continue_training:
                self.learning_rate.set_value(self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0
                        
            log.maybeLog(self.logger, ['train X size:',str(train_X[0].get_value(borrow=True).shape)])
            if valid_X is not None:
                log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].get_value(borrow=True).shape)])
            if test_X is not None:
                log.maybeLog(self.logger, ['test X size:',str(test_X[0].get_value(borrow=True).shape)])
            
            if self.vis_init:
                self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value(borrow=True).mean(axis=0))))
                
            start_time = time.time()
        
            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter,'\t'])
                    
#                 if is_artificial:
#                     data.sequence_mnist_data(train_X[0], train_Y[0], valid_X[0], valid_Y[0], test_X[0], test_Y[0], artificial_sequence, rng)
                     
                #train
                train_costs = []
                train_errors = []
                for train_data in train_X:
                    costs_and_errors = data.apply_cost_function_to_dataset(self.f_learn, train_data, self.batch_size)
                    train_costs.extend([cost for (cost, error) in costs_and_errors])
                    train_errors.extend([error for (cost, error) in costs_and_errors])
                log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)),trunc(numpy.mean(train_errors)),'\t'])
         
         
                #valid
                if valid_X is not None:
                    valid_costs = []
                    for valid_data in valid_X:
                        cs = data.apply_cost_function_to_dataset(self.f_cost, valid_data, self.batch_size)
                        valid_costs.extend([c for c,e in cs])
                    log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t'])
         
         
                #test
                if test_X is not None:
                    test_costs = []
                    test_errors = []
                    for test_data in test_X:
                        costs_and_errors = data.apply_cost_function_to_dataset(self.f_cost, test_data, self.batch_size)
                        test_costs.extend([cost for (cost, error) in costs_and_errors])
                        test_errors.extend([error for (cost, error) in costs_and_errors])
                    log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)),trunc(numpy.mean(test_errors)), '\t'])
                
                 
                #check for early stopping
                if valid_X is not None:
                    cost = numpy.sum(valid_costs)
                else:
                    cost = numpy.sum(train_costs)
                if cost < best_cost*self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = copy_params(self.params)
                else:
                    patience += 1
         
                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    self.save_params('all', counter, self.params)
         
                timing = time.time() - t
                times.append(timing)
         
                log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
            
                log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
        
                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    xs_test = test_X[0].get_value(borrow=True)[range(n_examples)]
                    noisy_xs_test = self.f_noise(test_X[0].get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_xs_test)):
                        recon, recon_cost = self.f_recon(noisy_xs_test[max(0,(i+1)-self.batch_size):i+1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)
                    if (self.is_image):
                        # Concatenate stuff
                        # stacked = numpy.vstack([numpy.vstack([xs_test[i*10 : (i+1)*10], noisy_xs_test[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                        # number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height, self.image_width), (10,30)))
                            
                        # number_reconstruction.save(self.outdir+'rnngsn_reconstruction_epoch_'+str(counter)+'.png')
            
                        #sample_numbers(counter, 'seven')
#                         plot_samples(counter, 'rnngsn')
                        pass
            
                    #save params
                    self.save_params('all', counter, self.params)
             
                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)
                
                new_noise = self.input_salt_and_pepper.get_value() * self.noise_annealing
                self.input_salt_and_pepper.set_value(new_noise)
                
            log.maybeLog(self.logger, "\n------------TOTAL RNN-GSN TRAIN TIME TOOK {0!s}---------".format(make_time_units_string(time.time()-start_time)))
 def train(self, train_X=None, valid_X=None, test_X=None, continue_training=False):
     log.maybeLog(self.logger, "\nTraining---------\n")
     if train_X is None:
         log.maybeLog(self.logger, "Training using data given during initialization of GSN.\n")
         train_X = self.train_X
         if train_X is None:
             log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
             raise AssertionError("Please provide a training dataset!")
     else:
         log.maybeLog(self.logger, "Training using data provided to training function.\n")
     if valid_X is None:
         valid_X = self.valid_X
     if test_X is None:
         test_X  = self.test_X
         
     train_X = raise_data_to_list(train_X)
     valid_X = raise_data_to_list(valid_X)
     test_X  = raise_data_to_list(test_X)
         
     
     ############
     # TRAINING #
     ############
     log.maybeLog(self.logger, "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format(self.n_epoch))
     STOP        = False
     counter     = 0
     if not continue_training:
         self.learning_rate.set_value(self.init_learn_rate)  # learning rate
     times       = []
     best_cost   = float('inf')
     best_params = None
     patience    = 0
                 
     log.maybeLog(self.logger, ['train X size:',str(train_X[0].shape.eval())])
     if valid_X is not None:
         log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].shape.eval())])
     if test_X is not None:
         log.maybeLog(self.logger, ['test X size:',str(test_X[0].shape.eval())])
     
     if self.vis_init:
         self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value().mean(axis=0))))
 
     while not STOP:
         counter += 1
         t = time.time()
         log.maybeAppend(self.logger, [counter,'\t'])
         
         #train
         train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size)
         log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)), '\t'])
 
         #valid
         if valid_X is not None:
             valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size)
             log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t'])
 
         #test
         if test_X is not None:
             test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size)
             log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)), '\t'])
             
         #check for early stopping
         if valid_X is not None:
             cost = numpy.sum(valid_costs)
         else:
             cost = numpy.sum(train_costs)
         if cost < best_cost*self.early_stop_threshold:
             patience = 0
             best_cost = cost
             # save the parameters that made it the best
             best_params = save_params(self.params)
         else:
             patience += 1
 
         if counter >= self.n_epoch or patience >= self.early_stop_length:
             STOP = True
             if best_params is not None:
                 restore_params(self.params, best_params)
             save_params_to_file(counter, self.params, self.outdir, self.logger)
 
         timing = time.time() - t
         times.append(timing)
 
         log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
         
         log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
     
         if (counter % self.save_frequency) == 0 or STOP is True:
             if self.is_image:
                 n_examples = 100
                 tests = test_X.get_value()[0:n_examples]
                 noisy_tests = self.f_noise(test_X.get_value()[0:n_examples])
                 _, reconstructed = self.f_recon(noisy_tests) 
                 # Concatenate stuff if it is an image
                 stacked = numpy.vstack([numpy.vstack([tests[i*10 : (i+1)*10], noisy_tests[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                 number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height,self.image_width), (10,30)))
                 
                 number_reconstruction.save(self.outdir+'gsn_image_reconstruction_epoch_'+str(counter)+'.png')
     
             #save gsn_params
             save_params_to_file(counter, self.params, self.outdir, self.logger)
      
         # ANNEAL!
         new_lr = self.learning_rate.get_value() * self.annealing
         self.learning_rate.set_value(new_lr)
         
         new_hidden_sigma = self.hidden_add_noise_sigma.get_value() * self.noise_annealing
         self.hidden_add_noise_sigma.set_value(new_hidden_sigma)
         
         new_salt_pepper = self.input_salt_and_pepper.get_value() * self.noise_annealing
         self.input_salt_and_pepper.set_value(new_salt_pepper)
Ejemplo n.º 45
0
    def __init__(self,
                 train_X=None,
                 train_Y=None,
                 valid_X=None,
                 valid_Y=None,
                 test_X=None,
                 test_Y=None,
                 args=None,
                 logger=None):
        # Output logger
        self.logger = logger
        self.outdir = args.get("output_path", defaults["output_path"])
        if self.outdir[-1] != '/':
            self.outdir = self.outdir + '/'
        # Input data
        self.train_X = train_X
        self.train_Y = train_Y
        self.valid_X = valid_X
        self.valid_Y = valid_Y
        self.test_X = test_X
        self.test_Y = test_Y

        # variables from the dataset that are used for initialization and image reconstruction
        if train_X is None:
            self.N_input = args.get("input_size")
            if args.get("input_size") is None:
                raise AssertionError(
                    "Please either specify input_size in the arguments or provide an example train_X for input dimensionality."
                )
        else:
            self.N_input = train_X.eval().shape[1]
        self.root_N_input = numpy.sqrt(self.N_input)

        self.is_image = args.get('is_image', defaults['is_image'])
        if self.is_image:
            self.image_width = args.get('width', self.root_N_input)
            self.image_height = args.get('height', self.root_N_input)

        #######################################
        # Network and training specifications #
        #######################################
        self.gsn_layers = args.get(
            'gsn_layers', defaults['gsn_layers'])  # number hidden layers
        self.walkbacks = args.get('walkbacks',
                                  defaults['walkbacks'])  # number of walkbacks
        self.learning_rate = theano.shared(
            cast32(args.get('learning_rate',
                            defaults['learning_rate'])))  # learning rate
        self.init_learn_rate = cast32(
            args.get('learning_rate', defaults['learning_rate']))
        self.momentum = theano.shared(
            cast32(args.get('momentum',
                            defaults['momentum'])))  # momentum term
        self.annealing = cast32(args.get(
            'annealing',
            defaults['annealing']))  # exponential annealing coefficient
        self.noise_annealing = cast32(
            args.get('noise_annealing', defaults['noise_annealing'])
        )  # exponential noise annealing coefficient
        self.batch_size = args.get('batch_size', defaults['batch_size'])
        self.gsn_batch_size = args.get('gsn_batch_size',
                                       defaults['gsn_batch_size'])
        self.n_epoch = args.get('n_epoch', defaults['n_epoch'])
        self.early_stop_threshold = args.get('early_stop_threshold',
                                             defaults['early_stop_threshold'])
        self.early_stop_length = args.get('early_stop_length',
                                          defaults['early_stop_length'])
        self.save_frequency = args.get('save_frequency',
                                       defaults['save_frequency'])

        self.noiseless_h1 = args.get('noiseless_h1', defaults["noiseless_h1"])
        self.hidden_add_noise_sigma = theano.shared(
            cast32(
                args.get('hidden_add_noise_sigma',
                         defaults["hidden_add_noise_sigma"])))
        self.input_salt_and_pepper = theano.shared(
            cast32(
                args.get('input_salt_and_pepper',
                         defaults["input_salt_and_pepper"])))
        self.input_sampling = args.get('input_sampling',
                                       defaults["input_sampling"])
        self.vis_init = args.get('vis_init', defaults['vis_init'])
        self.load_params = args.get('load_params', defaults['load_params'])
        self.hessian_free = args.get('hessian_free', defaults['hessian_free'])

        self.layer_sizes = [self.N_input] + [
            args.get('hidden_size', defaults['hidden_size'])
        ] * self.gsn_layers  # layer sizes, from h0 to hK (h0 is the visible layer)
        self.recurrent_hidden_size = args.get(
            'recurrent_hidden_size', defaults['recurrent_hidden_size'])
        self.top_layer_sizes = [self.recurrent_hidden_size] + [
            args.get('hidden_size', defaults['hidden_size'])
        ] * self.gsn_layers  # layer sizes, from h0 to hK (h0 is the visible layer)

        self.f_recon = None
        self.f_noise = None

        # Activation functions!
        # For the GSN:
        if args.get('hidden_activation') is not None:
            log.maybeLog(self.logger,
                         'Using specified activation for GSN hiddens')
            self.hidden_activation = args.get('hidden_activation')
        elif args.get('hidden_act') == 'sigmoid':
            log.maybeLog(self.logger,
                         'Using sigmoid activation for GSN hiddens')
            self.hidden_activation = T.nnet.sigmoid
        elif args.get('hidden_act') == 'rectifier':
            log.maybeLog(self.logger,
                         'Using rectifier activation for GSN hiddens')
            self.hidden_activation = lambda x: T.maximum(cast32(0), x)
        elif args.get('hidden_act') == 'tanh':
            log.maybeLog(
                self.logger,
                'Using hyperbolic tangent activation for GSN hiddens')
            self.hidden_activation = lambda x: T.tanh(x)
        elif args.get('hidden_act') is not None:
            log.maybeLog(
                self.logger,
                "Did not recognize hidden activation {0!s}, please use tanh, rectifier, or sigmoid for GSN hiddens"
                .format(args.get('hidden_act')))
            raise NotImplementedError(
                "Did not recognize hidden activation {0!s}, please use tanh, rectifier, or sigmoid for GSN hiddens"
                .format(args.get('hidden_act')))
        else:
            log.maybeLog(self.logger,
                         "Using default activation for GSN hiddens")
            self.hidden_activation = defaults['hidden_activation']
        # For the RNN:
        if args.get('recurrent_hidden_activation') is not None:
            log.maybeLog(self.logger,
                         'Using specified activation for RNN hiddens')
            self.recurrent_hidden_activation = args.get(
                'recurrent_hidden_activation')
        elif args.get('recurrent_hidden_act') == 'sigmoid':
            log.maybeLog(self.logger,
                         'Using sigmoid activation for RNN hiddens')
            self.recurrent_hidden_activation = T.nnet.sigmoid
        elif args.get('recurrent_hidden_act') == 'rectifier':
            log.maybeLog(self.logger,
                         'Using rectifier activation for RNN hiddens')
            self.recurrent_hidden_activation = lambda x: T.maximum(
                cast32(0), x)
        elif args.get('recurrent_hidden_act') == 'tanh':
            log.maybeLog(
                self.logger,
                'Using hyperbolic tangent activation for RNN hiddens')
            self.recurrent_hidden_activation = lambda x: T.tanh(x)
        elif args.get('recurrent_hidden_act') is not None:
            log.maybeLog(
                self.logger,
                "Did not recognize hidden activation {0!s}, please use tanh, rectifier, or sigmoid for RNN hiddens"
                .format(args.get('hidden_act')))
            raise NotImplementedError(
                "Did not recognize hidden activation {0!s}, please use tanh, rectifier, or sigmoid for RNN hiddens"
                .format(args.get('hidden_act')))
        else:
            log.maybeLog(self.logger,
                         "Using default activation for RNN hiddens")
            self.recurrent_hidden_activation = defaults[
                'recurrent_hidden_activation']
        # Visible layer activation
        if args.get('visible_activation') is not None:
            log.maybeLog(self.logger,
                         'Using specified activation for visible layer')
            self.visible_activation = args.get('visible_activation')
        elif args.get('visible_act') == 'sigmoid':
            log.maybeLog(self.logger,
                         'Using sigmoid activation for visible layer')
            self.visible_activation = T.nnet.sigmoid
        elif args.get('visible_act') == 'softmax':
            log.maybeLog(self.logger,
                         'Using softmax activation for visible layer')
            self.visible_activation = T.nnet.softmax
        elif args.get('visible_act') is not None:
            log.maybeLog(
                self.logger,
                "Did not recognize visible activation {0!s}, please use sigmoid or softmax"
                .format(args.get('visible_act')))
            raise NotImplementedError(
                "Did not recognize visible activation {0!s}, please use sigmoid or softmax"
                .format(args.get('visible_act')))
        else:
            log.maybeLog(self.logger,
                         'Using default activation for visible layer')
            self.visible_activation = defaults['visible_activation']

        # Cost function!
        if args.get('cost_function') is not None:
            log.maybeLog(self.logger,
                         '\nUsing specified cost function for GSN training\n')
            self.cost_function = args.get('cost_function')
        elif args.get('cost_funct') == 'binary_crossentropy':
            log.maybeLog(self.logger, '\nUsing binary cross-entropy cost!\n')
            self.cost_function = lambda x, y: T.mean(
                T.nnet.binary_crossentropy(x, y))
        elif args.get('cost_funct') == 'square':
            log.maybeLog(self.logger, "\nUsing square error cost!\n")
            #cost_function = lambda x,y: T.log(T.mean(T.sqr(x-y)))
            self.cost_function = lambda x, y: T.log(T.sum(T.pow((x - y), 2)))
        elif args.get('cost_funct') is not None:
            log.maybeLog(
                self.logger,
                "\nDid not recognize cost function {0!s}, please use binary_crossentropy or square\n"
                .format(args.get('cost_funct')))
            raise NotImplementedError(
                "Did not recognize cost function {0!s}, please use binary_crossentropy or square"
                .format(args.get('cost_funct')))
        else:
            log.maybeLog(self.logger,
                         '\nUsing default cost function for GSN training\n')
            self.cost_function = defaults['cost_function']

        ############################
        # Theano variables and RNG #
        ############################
        self.X = T.fmatrix('X')  #single (batch) for training gsn
        self.Xs = T.fmatrix('Xs')  #sequence for training rnn
        self.MRG = RNG_MRG.MRG_RandomStreams(1)

        ###############
        # Parameters! #
        ###############
        #visible gsn
        self.weights_list = [
            get_shared_weights(self.layer_sizes[i],
                               self.layer_sizes[i + 1],
                               name="W_{0!s}_{1!s}".format(i, i + 1))
            for i in range(self.gsn_layers)
        ]  # initialize each layer to uniform sample from sqrt(6. / (n_in + n_out))
        self.bias_list = [
            get_shared_bias(self.layer_sizes[i], name='b_' + str(i))
            for i in range(self.gsn_layers + 1)
        ]  # initialize each layer to 0's.

        #recurrent
        self.recurrent_to_gsn_weights_list = [
            get_shared_weights(self.recurrent_hidden_size,
                               self.layer_sizes[layer],
                               name="W_u_h{0!s}".format(layer))
            for layer in range(self.gsn_layers + 1) if layer % 2 != 0
        ]
        self.W_u_u = get_shared_weights(self.recurrent_hidden_size,
                                        self.recurrent_hidden_size,
                                        name="W_u_u")
        self.W_ins_u = get_shared_weights(args.get('hidden_size',
                                                   defaults['hidden_size']),
                                          self.recurrent_hidden_size,
                                          name="W_ins_u")
        self.recurrent_bias = get_shared_bias(self.recurrent_hidden_size,
                                              name='b_u')

        #top layer gsn
        self.top_weights_list = [
            get_shared_weights(self.top_layer_sizes[i],
                               self.top_layer_sizes[i + 1],
                               name="Wtop_{0!s}_{1!s}".format(i, i + 1))
            for i in range(self.gsn_layers)
        ]  # initialize each layer to uniform sample from sqrt(6. / (n_in + n_out))
        self.top_bias_list = [
            get_shared_bias(self.top_layer_sizes[i], name='btop_' + str(i))
            for i in range(self.gsn_layers + 1)
        ]  # initialize each layer to 0's.

        #lists for use with gradients
        self.gsn_params = self.weights_list + self.bias_list
        self.u_params = [self.W_u_u, self.W_ins_u, self.recurrent_bias]
        self.top_params = self.top_weights_list + self.top_bias_list
        self.params = self.gsn_params + self.recurrent_to_gsn_weights_list + self.u_params + self.top_params

        ###################################################
        #          load initial parameters                #
        ###################################################
        if self.load_params:
            params_to_load = 'gsn_params.pkl'
            log.maybeLog(self.logger, "\nLoading existing GSN parameters\n")
            loaded_params = cPickle.load(open(params_to_load, 'r'))
            [
                p.set_value(lp.get_value(borrow=False)) for lp, p in zip(
                    loaded_params[:len(self.weights_list)], self.weights_list)
            ]
            [
                p.set_value(lp.get_value(borrow=False)) for lp, p in zip(
                    loaded_params[len(self.weights_list):], self.bias_list)
            ]

            params_to_load = 'rnn_params.pkl'
            log.maybeLog(self.logger, "\nLoading existing RNN parameters\n")
            loaded_params = cPickle.load(open(params_to_load, 'r'))
            [
                p.set_value(lp.get_value(borrow=False)) for lp, p in zip(
                    loaded_params[:len(self.recurrent_to_gsn_weights_list)],
                    self.recurrent_to_gsn_weights_list)
            ]
            [
                p.set_value(lp.get_value(borrow=False)) for lp, p in zip(
                    loaded_params[len(self.recurrent_to_gsn_weights_list
                                      ):len(self.recurrent_to_gsn_weights_list
                                            ) + 1], self.W_u_u)
            ]
            [
                p.set_value(lp.get_value(borrow=False)) for lp, p in zip(
                    loaded_params[len(self.recurrent_to_gsn_weights_list) +
                                  1:len(self.recurrent_to_gsn_weights_list) +
                                  2], self.W_ins_u)
            ]
            [
                p.set_value(lp.get_value(borrow=False)) for lp, p in zip(
                    loaded_params[len(self.recurrent_to_gsn_weights_list) +
                                  2:], self.recurrent_bias)
            ]

            params_to_load = 'top_gsn_params.pkl'
            log.maybeLog(self.logger,
                         "\nLoading existing top level GSN parameters\n")
            loaded_params = cPickle.load(open(params_to_load, 'r'))
            [
                p.set_value(lp.get_value(borrow=False))
                for lp, p in zip(loaded_params[:len(self.top_weights_list)],
                                 self.top_weights_list)
            ]
            [
                p.set_value(lp.get_value(borrow=False))
                for lp, p in zip(loaded_params[len(self.top_weights_list):],
                                 self.top_bias_list)
            ]

        self.gsn_args = {
            'weights_list':
            self.weights_list,
            'bias_list':
            self.bias_list,
            'hidden_activation':
            self.hidden_activation,
            'visible_activation':
            self.visible_activation,
            'cost_function':
            self.cost_function,
            'layers':
            self.gsn_layers,
            'walkbacks':
            self.walkbacks,
            'hidden_size':
            args.get('hidden_size', defaults['hidden_size']),
            'learning_rate':
            args.get('learning_rate', defaults['learning_rate']),
            'momentum':
            args.get('momentum', defaults['momentum']),
            'annealing':
            self.annealing,
            'noise_annealing':
            self.noise_annealing,
            'batch_size':
            self.gsn_batch_size,
            'n_epoch':
            self.n_epoch,
            'early_stop_threshold':
            self.early_stop_threshold,
            'early_stop_length':
            self.early_stop_length,
            'save_frequency':
            self.save_frequency,
            'noiseless_h1':
            self.noiseless_h1,
            'hidden_add_noise_sigma':
            args.get('hidden_add_noise_sigma',
                     defaults['hidden_add_noise_sigma']),
            'input_salt_and_pepper':
            args.get('input_salt_and_pepper',
                     defaults['input_salt_and_pepper']),
            'input_sampling':
            self.input_sampling,
            'vis_init':
            self.vis_init,
            'output_path':
            self.outdir + 'gsn/',
            'is_image':
            self.is_image,
            'input_size':
            self.N_input
        }

        self.top_gsn_args = {
            'weights_list':
            self.top_weights_list,
            'bias_list':
            self.top_bias_list,
            'hidden_activation':
            self.hidden_activation,
            'visible_activation':
            self.recurrent_hidden_activation,
            'cost_function':
            self.cost_function,
            'layers':
            self.gsn_layers,
            'walkbacks':
            self.walkbacks,
            'hidden_size':
            args.get('hidden_size', defaults['hidden_size']),
            'learning_rate':
            args.get('learning_rate', defaults['learning_rate']),
            'momentum':
            args.get('momentum', defaults['momentum']),
            'annealing':
            self.annealing,
            'noise_annealing':
            self.noise_annealing,
            'batch_size':
            self.gsn_batch_size,
            'n_epoch':
            self.n_epoch,
            'early_stop_threshold':
            self.early_stop_threshold,
            'early_stop_length':
            self.early_stop_length,
            'save_frequency':
            self.save_frequency,
            'noiseless_h1':
            self.noiseless_h1,
            'hidden_add_noise_sigma':
            args.get('hidden_add_noise_sigma',
                     defaults['hidden_add_noise_sigma']),
            'input_salt_and_pepper':
            args.get('input_salt_and_pepper',
                     defaults['input_salt_and_pepper']),
            'input_sampling':
            self.input_sampling,
            'vis_init':
            self.vis_init,
            'output_path':
            self.outdir + 'top_gsn/',
            'is_image':
            False,
            'input_size':
            self.recurrent_hidden_size
        }

        ############
        # Sampling #
        ############
        # the input to the sampling function
        X_sample = T.fmatrix("X_sampling")
        self.network_state_input = [X_sample] + [
            T.fmatrix("H_sampling_" + str(i + 1))
            for i in range(self.gsn_layers)
        ]

        # "Output" state of the network (noisy)
        # initialized with input, then we apply updates
        self.network_state_output = [X_sample] + self.network_state_input[1:]
        visible_pX_chain = []

        # ONE update
        log.maybeLog(self.logger,
                     "Performing one walkback in network state sampling.")
        generative_stochastic_network.update_layers(
            self.network_state_output, self.weights_list, self.bias_list,
            visible_pX_chain, True, self.noiseless_h1,
            self.hidden_add_noise_sigma, self.input_salt_and_pepper,
            self.input_sampling, self.MRG, self.visible_activation,
            self.hidden_activation, self.logger)

        ##############################################
        #        Build the graphs for the SEN        #
        ##############################################
        # If `x_t` is given, deterministic recurrence to compute the u_t. Otherwise, first generate
        def recurrent_step(x_t, u_tm1, add_noise):
            # Make current guess for hiddens based on U
            for i in range(self.gsn_layers):
                if i % 2 == 0:
                    log.maybeLog(
                        self.logger, "Using {0!s} and {1!s}".format(
                            self.recurrent_to_gsn_weights_list[(i + 1) / 2],
                            self.bias_list[i + 1]))
            h_t = T.concatenate([
                self.hidden_activation(self.bias_list[i + 1] + T.dot(
                    u_tm1, self.recurrent_to_gsn_weights_list[(i + 1) / 2]))
                for i in range(self.gsn_layers) if i % 2 == 0
            ],
                                axis=0)

            # Make a GSN to update U
            _, hs = generative_stochastic_network.build_gsn(
                x_t, self.weights_list, self.bias_list, add_noise,
                self.noiseless_h1, self.hidden_add_noise_sigma,
                self.input_salt_and_pepper, self.input_sampling, self.MRG,
                self.visible_activation, self.hidden_activation,
                self.walkbacks, self.logger)
            htop_t = hs[-1]
            ins_t = htop_t

            ua_t = T.dot(ins_t, self.W_ins_u) + T.dot(
                u_tm1, self.W_u_u) + self.recurrent_bias
            u_t = self.recurrent_hidden_activation(ua_t)
            return [ua_t, u_t, h_t]

        log.maybeLog(self.logger, "\nCreating recurrent step scan.")
        # For training, the deterministic recurrence is used to compute all the
        # {h_t, 1 <= t <= T} given Xs. Conditional GSNs can then be trained
        # in batches using those parameters.
        u0 = T.zeros((self.recurrent_hidden_size,
                      ))  # initial value for the RNN hidden units
        (ua, u, h_t), updates_recurrent = theano.scan(
            fn=lambda x_t, u_tm1, *_: recurrent_step(x_t, u_tm1, True),
            sequences=self.Xs,
            outputs_info=[None, u0, None],
            non_sequences=self.params)

        log.maybeLog(self.logger,
                     "Now for reconstruction sample without noise")
        (_, _, h_t_recon), updates_recurrent_recon = theano.scan(
            fn=lambda x_t, u_tm1, *_: recurrent_step(x_t, u_tm1, False),
            sequences=self.Xs,
            outputs_info=[None, u0, None],
            non_sequences=self.params)
        # put together the hiddens list
        h_list = [T.zeros_like(self.Xs)]
        for layer, w in enumerate(self.weights_list):
            if layer % 2 != 0:
                h_list.append(T.zeros_like(T.dot(h_list[-1], w)))
            else:
                h_list.append(
                    (h_t.T[(layer / 2) * self.hidden_size:(layer / 2 + 1) *
                           self.hidden_size]).T)

        h_list_recon = [T.zeros_like(self.Xs)]
        for layer, w in enumerate(self.weights_list):
            if layer % 2 != 0:
                h_list_recon.append(T.zeros_like(T.dot(h_list_recon[-1], w)))
            else:
                h_list_recon.append(
                    (h_t_recon.T[(layer / 2) *
                                 self.hidden_size:(layer / 2 + 1) *
                                 self.hidden_size]).T)

        #with noise
        _, cost, show_cost = generative_stochastic_network.build_gsn_given_hiddens(
            self.Xs, h_list, self.weights_list, self.bias_list, True,
            self.noiseless_h1, self.hidden_add_noise_sigma,
            self.input_salt_and_pepper, self.input_sampling, self.MRG,
            self.visible_activation, self.hidden_activation, self.walkbacks,
            self.cost_function, self.logger)
        #without noise for reconstruction
        x_sample_recon, _, _ = generative_stochastic_network.build_gsn_given_hiddens(
            self.Xs, h_list_recon, self.weights_list, self.bias_list, False,
            self.noiseless_h1, self.hidden_add_noise_sigma,
            self.input_salt_and_pepper, self.input_sampling, self.MRG,
            self.visible_activation, self.hidden_activation, self.walkbacks,
            self.cost_function, self.logger)

        updates_train = updates_recurrent
        updates_cost = updates_recurrent

        #############
        #   COSTS   #
        #############
        log.maybeLog(self.logger,
                     '\nCost w.r.t p(X|...) at every step in the graph')
        start_functions_time = time.time()

        # if we are not using Hessian-free training create the normal sgd functions
        if not self.hessian_free:
            gradient = T.grad(cost, self.params)
            gradient_buffer = [
                theano.shared(
                    numpy.zeros(param.get_value().shape, dtype='float32'))
                for param in self.params
            ]

            m_gradient = [
                self.momentum * gb + (cast32(1) - self.momentum) * g
                for (gb, g) in zip(gradient_buffer, gradient)
            ]
            param_updates = [(param, param - self.learning_rate * mg)
                             for (param, mg) in zip(self.params, m_gradient)]
            gradient_buffer_updates = zip(gradient_buffer, m_gradient)

            updates = OrderedDict(param_updates + gradient_buffer_updates)
            updates_train.update(updates)

            log.maybeLog(self.logger, "rnn-gsn learn...")
            self.f_learn = theano.function(inputs=[self.Xs],
                                           updates=updates_train,
                                           outputs=show_cost,
                                           on_unused_input='warn',
                                           name='rnngsn_f_learn')

            log.maybeLog(self.logger, "rnn-gsn cost...")
            self.f_cost = theano.function(inputs=[self.Xs],
                                          updates=updates_cost,
                                          outputs=show_cost,
                                          on_unused_input='warn',
                                          name='rnngsn_f_cost')

        log.maybeLog(self.logger, "Training/cost functions done.")

        # Denoise some numbers : show number, noisy number, predicted number, reconstructed number
        log.maybeLog(
            self.logger,
            "Creating graph for noisy reconstruction function at checkpoints during training."
        )
        self.f_recon = theano.function(inputs=[self.Xs],
                                       outputs=x_sample_recon[-1],
                                       updates=updates_recurrent_recon,
                                       name='rnngsn_f_recon')

        # a function to add salt and pepper noise
        self.f_noise = theano.function(inputs=[self.X],
                                       outputs=salt_and_pepper(
                                           self.X, self.input_salt_and_pepper),
                                       name='rnngsn_f_noise')
        # Sampling functions
        log.maybeLog(self.logger, "Creating sampling function...")
        if self.gsn_layers == 1:
            self.f_sample = theano.function(
                inputs=[X_sample],
                outputs=visible_pX_chain[-1],
                name='rnngsn_f_sample_single_layer')
        else:
            # WHY IS THERE A WARNING????
            # because the first odd layers are not used -> directly computed FROM THE EVEN layers
            # unused input = warn
            self.f_sample = theano.function(inputs=self.network_state_input,
                                            outputs=self.network_state_output +
                                            visible_pX_chain,
                                            on_unused_input='warn',
                                            name='rnngsn_f_sample')

        log.maybeLog(self.logger, "Done compiling all functions.")
        compilation_time = time.time() - start_functions_time
        # Show the compile time with appropriate easy-to-read units.
        log.maybeLog(
            self.logger, "Total compilation time took " +
            make_time_units_string(compilation_time) + ".\n\n")
Ejemplo n.º 46
0
    def __init__(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, args=None, logger=None):
        # Output logger
        self.logger = logger
        self.outdir = args.get("output_path", defaults["output_path"])
        if self.outdir[-1] != '/':
            self.outdir = self.outdir+'/'
            
        data.mkdir_p(self.outdir)
        
        # Configuration
        config_filename = self.outdir+'config'
        logger.log('Saving config')
        with open(config_filename, 'w') as f:
            f.write(str(args))
 
        # Input data - make sure it is a list of shared datasets if it isn't. THIS WILL KEEP 'NONE' AS 'NONE' no need to worry :)
        self.train_X = raise_to_list(train_X)
        self.train_Y = raise_to_list(train_Y)
        self.valid_X = raise_to_list(valid_X)
        self.valid_Y = raise_to_list(valid_Y)
        self.test_X  = raise_to_list(test_X)
        self.test_Y  = raise_to_list(test_Y)
                
        # variables from the dataset that are used for initialization and image reconstruction
        if self.train_X is None:
            self.N_input = args.get("input_size")
            if args.get("input_size") is None:
                raise AssertionError("Please either specify input_size in the arguments or provide an example train_X for input dimensionality.")
        else:
            self.N_input = self.train_X[0].get_value(borrow=True).shape[1]
        
        self.is_image = args.get('is_image', defaults['is_image'])
        if self.is_image:
            (_h, _w) = closest_to_square_factors(self.N_input)
            self.image_width  = args.get('width', _w)
            self.image_height = args.get('height', _h)
            
        #######################################
        # Network and training specifications #
        #######################################
        self.layers          = args.get('layers', defaults['layers']) # number hidden layers
        self.walkbacks       = args.get('walkbacks', defaults['walkbacks']) # number of walkbacks
        self.learning_rate   = theano.shared(cast32(args.get('learning_rate', defaults['learning_rate'])))  # learning rate
        self.init_learn_rate = cast32(args.get('learning_rate', defaults['learning_rate']))
        self.momentum        = theano.shared(cast32(args.get('momentum', defaults['momentum']))) # momentum term
        self.annealing       = cast32(args.get('annealing', defaults['annealing'])) # exponential annealing coefficient
        self.noise_annealing = cast32(args.get('noise_annealing', defaults['noise_annealing'])) # exponential noise annealing coefficient
        self.batch_size      = args.get('batch_size', defaults['batch_size'])
        self.gsn_batch_size = args.get('gsn_batch_size', defaults['gsn_batch_size'])
        self.n_epoch         = args.get('n_epoch', defaults['n_epoch'])
        self.early_stop_threshold = args.get('early_stop_threshold', defaults['early_stop_threshold'])
        self.early_stop_length = args.get('early_stop_length', defaults['early_stop_length'])
        self.save_frequency  = args.get('save_frequency', defaults['save_frequency'])
        
        self.noiseless_h1           = args.get('noiseless_h1', defaults["noiseless_h1"])
        self.hidden_add_noise_sigma = theano.shared(cast32(args.get('hidden_add_noise_sigma', defaults["hidden_add_noise_sigma"])))
        self.input_salt_and_pepper  = theano.shared(cast32(args.get('input_salt_and_pepper', defaults["input_salt_and_pepper"])))
        self.input_sampling         = args.get('input_sampling', defaults["input_sampling"])
        self.vis_init               = args.get('vis_init', defaults['vis_init'])
        self.initialize_gsn         = args.get('initialize_gsn', defaults['initialize_gsn'])
        self.hessian_free           = args.get('hessian_free', defaults['hessian_free'])
        
        self.hidden_size = args.get('hidden_size', defaults['hidden_size'])
        self.layer_sizes = [self.N_input] + [self.hidden_size] * self.layers # layer sizes, from h0 to hK (h0 is the visible layer)
        self.recurrent_hidden_size = args.get('recurrent_hidden_size', defaults['recurrent_hidden_size'])
        
        self.f_recon = None
        self.f_noise = None
        
        # Activation functions!
        # For the GSN:
        if args.get('hidden_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for GSN hiddens')
            self.hidden_activation = args.get('hidden_activation')
        elif args.get('hidden_act') is not None:
            self.hidden_activation = get_activation_function(args.get('hidden_act'))
            log.maybeLog(self.logger, 'Using {0!s} activation for GSN hiddens'.format(args.get('hidden_act')))
        else:
            log.maybeLog(self.logger, "Using default activation for GSN hiddens")
            self.hidden_activation = defaults['hidden_activation']
            
        # For the RNN:
        if args.get('recurrent_hidden_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for RNN hiddens')
            self.recurrent_hidden_activation = args.get('recurrent_hidden_activation')
        elif args.get('recurrent_hidden_act') is not None:
            self.recurrent_hidden_activation = get_activation_function(args.get('recurrent_hidden_act'))
            log.maybeLog(self.logger, 'Using {0!s} activation for RNN hiddens'.format(args.get('recurrent_hidden_act')))
        else:
            log.maybeLog(self.logger, "Using default activation for RNN hiddens")
            self.recurrent_hidden_activation = defaults['recurrent_hidden_activation']
            
        # Visible layer activation
        if args.get('visible_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for visible layer')
            self.visible_activation = args.get('visible_activation')
        elif args.get('visible_act') is not None:
            self.visible_activation = get_activation_function(args.get('visible_act'))
            log.maybeLog(self.logger, 'Using {0!s} activation for visible layer'.format(args.get('visible_act')))
        else:
            log.maybeLog(self.logger, 'Using default activation for visible layer')
            self.visible_activation = defaults['visible_activation']
            
        # Cost function!
        if args.get('cost_function') is not None:
            log.maybeLog(self.logger, '\nUsing specified cost function for GSN training\n')
            self.cost_function = args.get('cost_function')
        elif args.get('cost_funct') is not None:
            self.cost_function = get_cost_function(args.get('cost_funct'))
            log.maybeLog(self.logger, 'Using {0!s} for cost function'.format(args.get('cost_funct')))
        else:
            log.maybeLog(self.logger, '\nUsing default cost function for GSN training\n')
            self.cost_function = defaults['cost_function']
        
        ############################
        # Theano variables and RNG #
        ############################
        self.X = T.fmatrix('X') #single (batch) for training gsn
        self.Xs = T.fmatrix('Xs') #sequence for training rnn-gsn
        self.MRG = RNG_MRG.MRG_RandomStreams(1)
        
        ###############
        # Parameters! #
        ###############
        #gsn
        self.weights_list = [get_shared_weights(self.layer_sizes[i], self.layer_sizes[i+1], name="W_{0!s}_{1!s}".format(i,i+1)) for i in range(self.layers)] # initialize each layer to uniform sample from sqrt(6. / (n_in + n_out))
        self.bias_list    = [get_shared_bias(self.layer_sizes[i], name='b_'+str(i)) for i in range(self.layers + 1)] # initialize each layer to 0's.
        
        #recurrent
        self.recurrent_to_gsn_weights_list = [get_shared_weights(self.recurrent_hidden_size, self.layer_sizes[layer], name="W_u_h{0!s}".format(layer)) for layer in range(self.layers+1) if layer%2 != 0]
        self.W_u_u = get_shared_weights(self.recurrent_hidden_size, self.recurrent_hidden_size, name="W_u_u")
        self.W_x_u = get_shared_weights(self.N_input, self.recurrent_hidden_size, name="W_x_u")
        self.recurrent_bias = get_shared_bias(self.recurrent_hidden_size, name='b_u')
        
        #lists for use with gradients
        self.gsn_params = self.weights_list + self.bias_list
        self.u_params   = [self.W_u_u, self.W_x_u, self.recurrent_bias]
        self.params     = self.gsn_params + self.recurrent_to_gsn_weights_list + self.u_params
        
        ###########################################################
        #           load initial parameters of gsn                #
        ###########################################################
        self.train_gsn_first = False
        if self.initialize_gsn:
            params_to_load = 'gsn_params.pkl'
            if not os.path.isfile(params_to_load):
                self.train_gsn_first = True 
            else:
                log.maybeLog(self.logger, "\nLoading existing GSN parameters\n")
                loaded_params = cPickle.load(open(params_to_load,'r'))
                [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[:len(self.weights_list)], self.weights_list)]
                [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[len(self.weights_list):], self.bias_list)]
                
        if self.initialize_gsn:
            self.gsn_args = {'weights_list':       self.weights_list,
                             'bias_list':          self.bias_list,
                             'hidden_activation':  self.hidden_activation,
                             'visible_activation': self.visible_activation,
                             'cost_function':      self.cost_function,
                             'layers':             self.layers,
                             'walkbacks':          self.walkbacks,
                             'hidden_size':        self.hidden_size,
                             'learning_rate':      args.get('learning_rate', defaults['learning_rate']),
                             'momentum':           args.get('momentum', defaults['momentum']),
                             'annealing':          self.annealing,
                             'noise_annealing':    self.noise_annealing,
                             'batch_size':         self.gsn_batch_size,
                             'n_epoch':            self.n_epoch,
                             'early_stop_threshold':   self.early_stop_threshold,
                             'early_stop_length':      self.early_stop_length,
                             'save_frequency':         self.save_frequency,
                             'noiseless_h1':           self.noiseless_h1,
                             'hidden_add_noise_sigma': args.get('hidden_add_noise_sigma', defaults['hidden_add_noise_sigma']),
                             'input_salt_and_pepper':  args.get('input_salt_and_pepper', defaults['input_salt_and_pepper']),
                             'input_sampling':      self.input_sampling,
                             'vis_init':            self.vis_init,
                             'output_path':         self.outdir+'gsn/',
                             'is_image':            self.is_image,
                             'input_size':          self.N_input
                             }
            
        ############
        # Sampling #
        ############
        # the input to the sampling function
        X_sample = T.fmatrix("X_sampling")
        self.network_state_input = [X_sample] + [T.fmatrix("H_sampling_"+str(i+1)) for i in range(self.layers)]
       
        # "Output" state of the network (noisy)
        # initialized with input, then we apply updates
        self.network_state_output = [X_sample] + self.network_state_input[1:]
        visible_pX_chain = []
    
        # ONE update
        _add_noise = True
        log.maybeLog(self.logger, "Performing one walkback in network state sampling.")
        GSN.update_layers(self.network_state_output,
                          self.weights_list,
                          self.bias_list,
                          visible_pX_chain, 
                          _add_noise,
                          self.noiseless_h1,
                          self.hidden_add_noise_sigma,
                          self.input_salt_and_pepper,
                          self.input_sampling,
                          self.MRG,
                          self.visible_activation,
                          self.hidden_activation,
                          self.logger)
    
               
        #############################################
        #      Build the graphs for the RNN-GSN     #
        #############################################
        # If `x_t` is given, deterministic recurrence to compute the u_t. Otherwise, first generate
        def recurrent_step(x_t, u_tm1, add_noise):
            # Make current guess for hiddens based on U
            for i in range(self.layers):
                if i%2 == 0:
                    log.maybeLog(self.logger, "Using {0!s} and {1!s}".format(self.recurrent_to_gsn_weights_list[(i+1)/2],self.bias_list[i+1]))
            h_t = T.concatenate([self.hidden_activation(self.bias_list[i+1] + T.dot(u_tm1, self.recurrent_to_gsn_weights_list[(i+1)/2])) for i in range(self.layers) if i%2 == 0],axis=0)
            
            generate = x_t is None
            if generate:
                pass
            
            # Make a GSN to update U
    #         chain, hs = gsn.build_gsn(x_t, weights_list, bias_list, add_noise, state.noiseless_h1, state.hidden_add_noise_sigma, state.input_salt_and_pepper, state.input_sampling, MRG, visible_activation, hidden_activation, walkbacks, logger)
    #         htop_t = hs[-1]
    #         denoised_x_t = chain[-1]
            # Update U
    #         ua_t = T.dot(denoised_x_t, W_x_u) + T.dot(htop_t, W_h_u) + T.dot(u_tm1, W_u_u) + recurrent_bias
            ua_t = T.dot(x_t, self.W_x_u) + T.dot(u_tm1, self.W_u_u) + self.recurrent_bias
            u_t = self.recurrent_hidden_activation(ua_t)
            return None if generate else [ua_t, u_t, h_t]
        
        log.maybeLog(self.logger, "\nCreating recurrent step scan.")
        # For training, the deterministic recurrence is used to compute all the
        # {h_t, 1 <= t <= T} given Xs. Conditional GSNs can then be trained
        # in batches using those parameters.
        u0 = T.zeros((self.recurrent_hidden_size,))  # initial value for the RNN hidden units
        (ua, u, h_t), updates_recurrent = theano.scan(fn=lambda x_t, u_tm1, *_: recurrent_step(x_t, u_tm1, True),
                                                           sequences=self.Xs,
                                                           outputs_info=[None, u0, None],
                                                           non_sequences=self.params)
        
        log.maybeLog(self.logger, "Now for reconstruction sample without noise")
        (_, _, h_t_recon), updates_recurrent_recon = theano.scan(fn=lambda x_t, u_tm1, *_: recurrent_step(x_t, u_tm1, False),
                                                           sequences=self.Xs,
                                                           outputs_info=[None, u0, None],
                                                           non_sequences=self.params)
        # put together the hiddens list
        h_list = [T.zeros_like(self.Xs)]
        for layer, w in enumerate(self.weights_list):
            if layer%2 != 0:
                h_list.append(T.zeros_like(T.dot(h_list[-1], w)))
            else:
                h_list.append((h_t.T[(layer/2)*self.hidden_size:(layer/2+1)*self.hidden_size]).T)
                
        h_list_recon = [T.zeros_like(self.Xs)]
        for layer, w in enumerate(self.weights_list):
            if layer%2 != 0:
                h_list_recon.append(T.zeros_like(T.dot(h_list_recon[-1], w)))
            else:
                h_list_recon.append((h_t_recon.T[(layer/2)*self.hidden_size:(layer/2+1)*self.hidden_size]).T)
        
        #with noise
        _, _, cost, show_cost, error = GSN.build_gsn_given_hiddens(self.Xs, h_list, self.weights_list, self.bias_list, True, self.noiseless_h1, self.hidden_add_noise_sigma, self.input_salt_and_pepper, self.input_sampling, self.MRG, self.visible_activation, self.hidden_activation, self.walkbacks, self.cost_function)
        #without noise for reconstruction
        x_sample_recon, _, _, recon_show_cost, _ = GSN.build_gsn_given_hiddens(self.Xs, h_list_recon, self.weights_list, self.bias_list, False, self.noiseless_h1, self.hidden_add_noise_sigma, self.input_salt_and_pepper, self.input_sampling, self.MRG, self.visible_activation, self.hidden_activation, self.walkbacks, self.cost_function)
        
        updates_train = updates_recurrent
        updates_cost = updates_recurrent
        
        #############
        #   COSTS   #
        #############
        log.maybeLog(self.logger, '\nCost w.r.t p(X|...) at every step in the graph')
        start_functions_time = time.time()

        # if we are not using Hessian-free training create the normal sgd functions
        if not self.hessian_free:
            gradient      = T.grad(cost, self.params)      
            gradient_buffer = [theano.shared(numpy.zeros(param.get_value().shape, dtype='float32')) for param in self.params]
            
            m_gradient    = [self.momentum * gb + (cast32(1) - self.momentum) * g for (gb, g) in zip(gradient_buffer, gradient)]
            param_updates = [(param, param - self.learning_rate * mg) for (param, mg) in zip(self.params, m_gradient)]
            gradient_buffer_updates = zip(gradient_buffer, m_gradient)
                
            updates = OrderedDict(param_updates + gradient_buffer_updates)
            updates_train.update(updates)
        
            log.maybeLog(self.logger, "rnn-gsn learn...")
            self.f_learn = theano.function(inputs  = [self.Xs],
                                      updates = updates_train,
                                      outputs = [show_cost, error],
                                      on_unused_input='warn',
                                      name='rnngsn_f_learn')
            
            log.maybeLog(self.logger, "rnn-gsn cost...")
            self.f_cost  = theano.function(inputs  = [self.Xs],
                                      updates = updates_cost,
                                      outputs = [show_cost, error],
                                      on_unused_input='warn',
                                      name='rnngsn_f_cost')
        
        log.maybeLog(self.logger, "Training/cost functions done.")
        
        # Denoise some numbers : show number, noisy number, predicted number, reconstructed number
        log.maybeLog(self.logger, "Creating graph for noisy reconstruction function at checkpoints during training.")
        self.f_recon = theano.function(inputs=[self.Xs],
                                       outputs=[x_sample_recon[-1], recon_show_cost],
                                       name='rnngsn_f_recon')
        
        # a function to add salt and pepper noise
        self.f_noise = theano.function(inputs = [self.X],
                                       outputs = salt_and_pepper(self.X, self.input_salt_and_pepper),
                                       name='rnngsn_f_noise')
        # Sampling functions
        log.maybeLog(self.logger, "Creating sampling function...")
        if self.layers == 1: 
            self.f_sample = theano.function(inputs = [X_sample],
                                            outputs = visible_pX_chain[-1],
                                            name='rnngsn_f_sample_single_layer')
        else:
            self.f_sample = theano.function(inputs = self.network_state_input,
                                            outputs = self.network_state_output + visible_pX_chain,
                                            on_unused_input='warn',
                                            name='rnngsn_f_sample')
        
    
        log.maybeLog(self.logger, "Done compiling all functions.")
        compilation_time = time.time() - start_functions_time
        # Show the compile time with appropriate easy-to-read units.
        log.maybeLog(self.logger, "Total compilation time took "+make_time_units_string(compilation_time)+".\n\n")
Ejemplo n.º 47
0
    def __init__(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, args=None, logger=None):
        # Output logger
        self.logger = logger
        self.outdir = args.get("output_path", defaults["output_path"])
        if self.outdir[-1] != '/':
            self.outdir = self.outdir+'/'
        # Input data
        self.train_X = train_X
        self.train_Y = train_Y
        self.valid_X = valid_X
        self.valid_Y = valid_Y
        self.test_X  = test_X
        self.test_Y  = test_Y
        
        # variables from the dataset that are used for initialization and image reconstruction
        if train_X is None:
            self.N_input = args.get("input_size")
            if args.get("input_size") is None:
                raise AssertionError("Please either specify input_size in the arguments or provide an example train_X for input dimensionality.")
        else:
            self.N_input = train_X.eval().shape[1]
        self.root_N_input = numpy.sqrt(self.N_input)
        
        self.is_image = args.get('is_image', defaults['is_image'])
        if self.is_image:
            self.image_width  = args.get('width', self.root_N_input)
            self.image_height = args.get('height', self.root_N_input)
            
        #######################################
        # Network and training specifications #
        #######################################
        self.gsn_layers      = args.get('gsn_layers', defaults['gsn_layers']) # number hidden layers
        self.walkbacks       = args.get('walkbacks', defaults['walkbacks']) # number of walkbacks
        self.learning_rate   = theano.shared(cast32(args.get('learning_rate', defaults['learning_rate'])))  # learning rate
        self.init_learn_rate = cast32(args.get('learning_rate', defaults['learning_rate']))
        self.momentum        = theano.shared(cast32(args.get('momentum', defaults['momentum']))) # momentum term
        self.annealing       = cast32(args.get('annealing', defaults['annealing'])) # exponential annealing coefficient
        self.noise_annealing = cast32(args.get('noise_annealing', defaults['noise_annealing'])) # exponential noise annealing coefficient
        self.batch_size      = args.get('batch_size', defaults['batch_size'])
        self.gsn_batch_size = args.get('gsn_batch_size', defaults['gsn_batch_size'])
        self.n_epoch         = args.get('n_epoch', defaults['n_epoch'])
        self.early_stop_threshold = args.get('early_stop_threshold', defaults['early_stop_threshold'])
        self.early_stop_length = args.get('early_stop_length', defaults['early_stop_length'])
        self.save_frequency  = args.get('save_frequency', defaults['save_frequency'])
        
        self.noiseless_h1           = args.get('noiseless_h1', defaults["noiseless_h1"])
        self.hidden_add_noise_sigma = theano.shared(cast32(args.get('hidden_add_noise_sigma', defaults["hidden_add_noise_sigma"])))
        self.input_salt_and_pepper  = theano.shared(cast32(args.get('input_salt_and_pepper', defaults["input_salt_and_pepper"])))
        self.input_sampling         = args.get('input_sampling', defaults["input_sampling"])
        self.vis_init               = args.get('vis_init', defaults['vis_init'])
        self.load_params            = args.get('load_params', defaults['load_params'])
        self.hessian_free           = args.get('hessian_free', defaults['hessian_free'])
        
        self.layer_sizes = [self.N_input] + [args.get('hidden_size', defaults['hidden_size'])] * self.gsn_layers # layer sizes, from h0 to hK (h0 is the visible layer)
        self.recurrent_hidden_size = args.get('recurrent_hidden_size', defaults['recurrent_hidden_size'])
        self.top_layer_sizes = [self.recurrent_hidden_size] + [args.get('hidden_size', defaults['hidden_size'])] * self.gsn_layers # layer sizes, from h0 to hK (h0 is the visible layer)
        
        self.f_recon = None
        self.f_noise = None
        
        # Activation functions!
        # For the GSN:
        if args.get('hidden_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for GSN hiddens')
            self.hidden_activation = args.get('hidden_activation')
        elif args.get('hidden_act') == 'sigmoid':
            log.maybeLog(self.logger, 'Using sigmoid activation for GSN hiddens')
            self.hidden_activation = T.nnet.sigmoid
        elif args.get('hidden_act') == 'rectifier':
            log.maybeLog(self.logger, 'Using rectifier activation for GSN hiddens')
            self.hidden_activation = lambda x : T.maximum(cast32(0), x)
        elif args.get('hidden_act') == 'tanh':
            log.maybeLog(self.logger, 'Using hyperbolic tangent activation for GSN hiddens')
            self.hidden_activation = lambda x : T.tanh(x)
        elif args.get('hidden_act') is not None:
            log.maybeLog(self.logger, "Did not recognize hidden activation {0!s}, please use tanh, rectifier, or sigmoid for GSN hiddens".format(args.get('hidden_act')))
            raise NotImplementedError("Did not recognize hidden activation {0!s}, please use tanh, rectifier, or sigmoid for GSN hiddens".format(args.get('hidden_act')))
        else:
            log.maybeLog(self.logger, "Using default activation for GSN hiddens")
            self.hidden_activation = defaults['hidden_activation']
        # For the RNN:
        if args.get('recurrent_hidden_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for RNN hiddens')
            self.recurrent_hidden_activation = args.get('recurrent_hidden_activation')
        elif args.get('recurrent_hidden_act') == 'sigmoid':
            log.maybeLog(self.logger, 'Using sigmoid activation for RNN hiddens')
            self.recurrent_hidden_activation = T.nnet.sigmoid
        elif args.get('recurrent_hidden_act') == 'rectifier':
            log.maybeLog(self.logger, 'Using rectifier activation for RNN hiddens')
            self.recurrent_hidden_activation = lambda x : T.maximum(cast32(0), x)
        elif args.get('recurrent_hidden_act') == 'tanh':
            log.maybeLog(self.logger, 'Using hyperbolic tangent activation for RNN hiddens')
            self.recurrent_hidden_activation = lambda x : T.tanh(x)
        elif args.get('recurrent_hidden_act') is not None:
            log.maybeLog(self.logger, "Did not recognize hidden activation {0!s}, please use tanh, rectifier, or sigmoid for RNN hiddens".format(args.get('hidden_act')))
            raise NotImplementedError("Did not recognize hidden activation {0!s}, please use tanh, rectifier, or sigmoid for RNN hiddens".format(args.get('hidden_act')))
        else:
            log.maybeLog(self.logger, "Using default activation for RNN hiddens")
            self.recurrent_hidden_activation = defaults['recurrent_hidden_activation']
        # Visible layer activation
        if args.get('visible_activation') is not None:
            log.maybeLog(self.logger, 'Using specified activation for visible layer')
            self.visible_activation = args.get('visible_activation')
        elif args.get('visible_act') == 'sigmoid':
            log.maybeLog(self.logger, 'Using sigmoid activation for visible layer')
            self.visible_activation = T.nnet.sigmoid
        elif args.get('visible_act') == 'softmax':
            log.maybeLog(self.logger, 'Using softmax activation for visible layer')
            self.visible_activation = T.nnet.softmax
        elif args.get('visible_act') is not None:
            log.maybeLog(self.logger, "Did not recognize visible activation {0!s}, please use sigmoid or softmax".format(args.get('visible_act')))
            raise NotImplementedError("Did not recognize visible activation {0!s}, please use sigmoid or softmax".format(args.get('visible_act')))
        else:
            log.maybeLog(self.logger, 'Using default activation for visible layer')
            self.visible_activation = defaults['visible_activation']
            
        # Cost function!
        if args.get('cost_function') is not None:
            log.maybeLog(self.logger, '\nUsing specified cost function for GSN training\n')
            self.cost_function = args.get('cost_function')
        elif args.get('cost_funct') == 'binary_crossentropy':
            log.maybeLog(self.logger, '\nUsing binary cross-entropy cost!\n')
            self.cost_function = lambda x,y: T.mean(T.nnet.binary_crossentropy(x,y))
        elif args.get('cost_funct') == 'square':
            log.maybeLog(self.logger, "\nUsing square error cost!\n")
            #cost_function = lambda x,y: T.log(T.mean(T.sqr(x-y)))
            self.cost_function = lambda x,y: T.log(T.sum(T.pow((x-y),2)))
        elif args.get('cost_funct') is not None:
            log.maybeLog(self.logger, "\nDid not recognize cost function {0!s}, please use binary_crossentropy or square\n".format(args.get('cost_funct')))
            raise NotImplementedError("Did not recognize cost function {0!s}, please use binary_crossentropy or square".format(args.get('cost_funct')))
        else:
            log.maybeLog(self.logger, '\nUsing default cost function for GSN training\n')
            self.cost_function = defaults['cost_function']
        
        ############################
        # Theano variables and RNG #
        ############################
        self.X = T.fmatrix('X') #single (batch) for training gsn
        self.Xs = T.fmatrix('Xs') #sequence for training rnn
        self.MRG = RNG_MRG.MRG_RandomStreams(1)
        
        ###############
        # Parameters! #
        ###############
        #visible gsn
        self.weights_list = [get_shared_weights(self.layer_sizes[i], self.layer_sizes[i+1], name="W_{0!s}_{1!s}".format(i,i+1)) for i in range(self.gsn_layers)] # initialize each layer to uniform sample from sqrt(6. / (n_in + n_out))
        self.bias_list    = [get_shared_bias(self.layer_sizes[i], name='b_'+str(i)) for i in range(self.gsn_layers + 1)] # initialize each layer to 0's.
        
        #recurrent
        self.recurrent_to_gsn_weights_list = [get_shared_weights(self.recurrent_hidden_size, self.layer_sizes[layer], name="W_u_h{0!s}".format(layer)) for layer in range(self.gsn_layers+1) if layer%2 != 0]
        self.W_u_u = get_shared_weights(self.recurrent_hidden_size, self.recurrent_hidden_size, name="W_u_u")
        self.W_ins_u = get_shared_weights(args.get('hidden_size', defaults['hidden_size']), self.recurrent_hidden_size, name="W_ins_u")
        self.recurrent_bias = get_shared_bias(self.recurrent_hidden_size, name='b_u')
        
        #top layer gsn
        self.top_weights_list = [get_shared_weights(self.top_layer_sizes[i], self.top_layer_sizes[i+1], name="Wtop_{0!s}_{1!s}".format(i,i+1)) for i in range(self.gsn_layers)] # initialize each layer to uniform sample from sqrt(6. / (n_in + n_out))
        self.top_bias_list    = [get_shared_bias(self.top_layer_sizes[i], name='btop_'+str(i)) for i in range(self.gsn_layers + 1)] # initialize each layer to 0's.
        
        #lists for use with gradients
        self.gsn_params = self.weights_list + self.bias_list
        self.u_params   = [self.W_u_u, self.W_ins_u, self.recurrent_bias]
        self.top_params = self.top_weights_list + self.top_bias_list
        self.params     = self.gsn_params + self.recurrent_to_gsn_weights_list + self.u_params + self.top_params
        
        ###################################################
        #          load initial parameters                #
        ###################################################
        if self.load_params:
            params_to_load = 'gsn_params.pkl'
            log.maybeLog(self.logger, "\nLoading existing GSN parameters\n")
            loaded_params = cPickle.load(open(params_to_load,'r'))
            [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[:len(self.weights_list)], self.weights_list)]
            [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[len(self.weights_list):], self.bias_list)]
            
            params_to_load = 'rnn_params.pkl'
            log.maybeLog(self.logger, "\nLoading existing RNN parameters\n")
            loaded_params = cPickle.load(open(params_to_load,'r'))
            [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[:len(self.recurrent_to_gsn_weights_list)], self.recurrent_to_gsn_weights_list)]
            [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[len(self.recurrent_to_gsn_weights_list):len(self.recurrent_to_gsn_weights_list)+1], self.W_u_u)]
            [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[len(self.recurrent_to_gsn_weights_list)+1:len(self.recurrent_to_gsn_weights_list)+2], self.W_ins_u)]
            [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[len(self.recurrent_to_gsn_weights_list)+2:], self.recurrent_bias)]
            
            params_to_load = 'top_gsn_params.pkl'
            log.maybeLog(self.logger, "\nLoading existing top level GSN parameters\n")
            loaded_params = cPickle.load(open(params_to_load,'r'))
            [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[:len(self.top_weights_list)], self.top_weights_list)]
            [p.set_value(lp.get_value(borrow=False)) for lp, p in zip(loaded_params[len(self.top_weights_list):], self.top_bias_list)]
                
        self.gsn_args = {'weights_list':       self.weights_list,
                         'bias_list':          self.bias_list,
                         'hidden_activation':  self.hidden_activation,
                         'visible_activation': self.visible_activation,
                         'cost_function':      self.cost_function,
                         'layers':             self.gsn_layers,
                         'walkbacks':          self.walkbacks,
                         'hidden_size':        args.get('hidden_size', defaults['hidden_size']),
                         'learning_rate':      args.get('learning_rate', defaults['learning_rate']),
                         'momentum':           args.get('momentum', defaults['momentum']),
                         'annealing':          self.annealing,
                         'noise_annealing':    self.noise_annealing,
                         'batch_size':         self.gsn_batch_size,
                         'n_epoch':            self.n_epoch,
                         'early_stop_threshold':   self.early_stop_threshold,
                         'early_stop_length':      self.early_stop_length,
                         'save_frequency':         self.save_frequency,
                         'noiseless_h1':           self.noiseless_h1,
                         'hidden_add_noise_sigma': args.get('hidden_add_noise_sigma', defaults['hidden_add_noise_sigma']),
                         'input_salt_and_pepper':  args.get('input_salt_and_pepper', defaults['input_salt_and_pepper']),
                         'input_sampling':      self.input_sampling,
                         'vis_init':            self.vis_init,
                         'output_path':         self.outdir+'gsn/',
                         'is_image':            self.is_image,
                         'input_size':          self.N_input
                         }
        
        self.top_gsn_args = {'weights_list':       self.top_weights_list,
                             'bias_list':          self.top_bias_list,
                             'hidden_activation':  self.hidden_activation,
                             'visible_activation': self.recurrent_hidden_activation,
                             'cost_function':      self.cost_function,
                             'layers':             self.gsn_layers,
                             'walkbacks':          self.walkbacks,
                             'hidden_size':        args.get('hidden_size', defaults['hidden_size']),
                             'learning_rate':      args.get('learning_rate', defaults['learning_rate']),
                             'momentum':           args.get('momentum', defaults['momentum']),
                             'annealing':          self.annealing,
                             'noise_annealing':    self.noise_annealing,
                             'batch_size':         self.gsn_batch_size,
                             'n_epoch':            self.n_epoch,
                             'early_stop_threshold':   self.early_stop_threshold,
                             'early_stop_length':      self.early_stop_length,
                             'save_frequency':         self.save_frequency,
                             'noiseless_h1':           self.noiseless_h1,
                             'hidden_add_noise_sigma': args.get('hidden_add_noise_sigma', defaults['hidden_add_noise_sigma']),
                             'input_salt_and_pepper':  args.get('input_salt_and_pepper', defaults['input_salt_and_pepper']),
                             'input_sampling':      self.input_sampling,
                             'vis_init':            self.vis_init,
                             'output_path':         self.outdir+'top_gsn/',
                             'is_image':            False,
                             'input_size':          self.recurrent_hidden_size
                             }
            
        ############
        # Sampling #
        ############
        # the input to the sampling function
        X_sample = T.fmatrix("X_sampling")
        self.network_state_input = [X_sample] + [T.fmatrix("H_sampling_"+str(i+1)) for i in range(self.gsn_layers)]
       
        # "Output" state of the network (noisy)
        # initialized with input, then we apply updates
        self.network_state_output = [X_sample] + self.network_state_input[1:]
        visible_pX_chain = []
    
        # ONE update
        log.maybeLog(self.logger, "Performing one walkback in network state sampling.")
        generative_stochastic_network.update_layers(self.network_state_output,
                          self.weights_list,
                          self.bias_list,
                          visible_pX_chain, 
                          True,
                          self.noiseless_h1,
                          self.hidden_add_noise_sigma,
                          self.input_salt_and_pepper,
                          self.input_sampling,
                          self.MRG,
                          self.visible_activation,
                          self.hidden_activation,
                          self.logger)
    
               
        ##############################################
        #        Build the graphs for the SEN        #
        ##############################################
        # If `x_t` is given, deterministic recurrence to compute the u_t. Otherwise, first generate
        def recurrent_step(x_t, u_tm1, add_noise):
            # Make current guess for hiddens based on U
            for i in range(self.gsn_layers):
                if i%2 == 0:
                    log.maybeLog(self.logger, "Using {0!s} and {1!s}".format(self.recurrent_to_gsn_weights_list[(i+1)/2],self.bias_list[i+1]))
            h_t = T.concatenate([self.hidden_activation(self.bias_list[i+1] + T.dot(u_tm1, self.recurrent_to_gsn_weights_list[(i+1)/2])) for i in range(self.gsn_layers) if i%2 == 0],axis=0)
            
            # Make a GSN to update U
            _, hs = generative_stochastic_network.build_gsn(x_t, self.weights_list, self.bias_list, add_noise, self.noiseless_h1, self.hidden_add_noise_sigma, self.input_salt_and_pepper, self.input_sampling, self.MRG, self.visible_activation, self.hidden_activation, self.walkbacks, self.logger)
            htop_t = hs[-1]
            ins_t = htop_t
            
            ua_t = T.dot(ins_t, self.W_ins_u) + T.dot(u_tm1, self.W_u_u) + self.recurrent_bias
            u_t = self.recurrent_hidden_activation(ua_t)
            return [ua_t, u_t, h_t]
        
        log.maybeLog(self.logger, "\nCreating recurrent step scan.")
        # For training, the deterministic recurrence is used to compute all the
        # {h_t, 1 <= t <= T} given Xs. Conditional GSNs can then be trained
        # in batches using those parameters.
        u0 = T.zeros((self.recurrent_hidden_size,))  # initial value for the RNN hidden units
        (ua, u, h_t), updates_recurrent = theano.scan(fn=lambda x_t, u_tm1, *_: recurrent_step(x_t, u_tm1, True),
                                                           sequences=self.Xs,
                                                           outputs_info=[None, u0, None],
                                                           non_sequences=self.params)
        
        log.maybeLog(self.logger, "Now for reconstruction sample without noise")
        (_, _, h_t_recon), updates_recurrent_recon = theano.scan(fn=lambda x_t, u_tm1, *_: recurrent_step(x_t, u_tm1, False),
                                                           sequences=self.Xs,
                                                           outputs_info=[None, u0, None],
                                                           non_sequences=self.params)
        # put together the hiddens list
        h_list = [T.zeros_like(self.Xs)]
        for layer, w in enumerate(self.weights_list):
            if layer%2 != 0:
                h_list.append(T.zeros_like(T.dot(h_list[-1], w)))
            else:
                h_list.append((h_t.T[(layer/2)*self.hidden_size:(layer/2+1)*self.hidden_size]).T)
                
        h_list_recon = [T.zeros_like(self.Xs)]
        for layer, w in enumerate(self.weights_list):
            if layer%2 != 0:
                h_list_recon.append(T.zeros_like(T.dot(h_list_recon[-1], w)))
            else:
                h_list_recon.append((h_t_recon.T[(layer/2)*self.hidden_size:(layer/2+1)*self.hidden_size]).T)
        
        #with noise
        _, cost, show_cost = generative_stochastic_network.build_gsn_given_hiddens(self.Xs, h_list, self.weights_list, self.bias_list, True, self.noiseless_h1, self.hidden_add_noise_sigma, self.input_salt_and_pepper, self.input_sampling, self.MRG, self.visible_activation, self.hidden_activation, self.walkbacks, self.cost_function, self.logger)
        #without noise for reconstruction
        x_sample_recon, _, _ = generative_stochastic_network.build_gsn_given_hiddens(self.Xs, h_list_recon, self.weights_list, self.bias_list, False, self.noiseless_h1, self.hidden_add_noise_sigma, self.input_salt_and_pepper, self.input_sampling, self.MRG, self.visible_activation, self.hidden_activation, self.walkbacks, self.cost_function, self.logger)
        
        updates_train = updates_recurrent
        updates_cost = updates_recurrent
        
        #############
        #   COSTS   #
        #############
        log.maybeLog(self.logger, '\nCost w.r.t p(X|...) at every step in the graph')
        start_functions_time = time.time()

        # if we are not using Hessian-free training create the normal sgd functions
        if not self.hessian_free:
            gradient      = T.grad(cost, self.params)      
            gradient_buffer = [theano.shared(numpy.zeros(param.get_value().shape, dtype='float32')) for param in self.params]
            
            m_gradient    = [self.momentum * gb + (cast32(1) - self.momentum) * g for (gb, g) in zip(gradient_buffer, gradient)]
            param_updates = [(param, param - self.learning_rate * mg) for (param, mg) in zip(self.params, m_gradient)]
            gradient_buffer_updates = zip(gradient_buffer, m_gradient)
                
            updates = OrderedDict(param_updates + gradient_buffer_updates)
            updates_train.update(updates)
        
            log.maybeLog(self.logger, "rnn-gsn learn...")
            self.f_learn = theano.function(inputs  = [self.Xs],
                                      updates = updates_train,
                                      outputs = show_cost,
                                      on_unused_input='warn',
                                      name='rnngsn_f_learn')
            
            log.maybeLog(self.logger, "rnn-gsn cost...")
            self.f_cost  = theano.function(inputs  = [self.Xs],
                                      updates = updates_cost,
                                      outputs = show_cost, 
                                      on_unused_input='warn',
                                      name='rnngsn_f_cost')
        
        log.maybeLog(self.logger, "Training/cost functions done.")
        
        # Denoise some numbers : show number, noisy number, predicted number, reconstructed number
        log.maybeLog(self.logger, "Creating graph for noisy reconstruction function at checkpoints during training.")
        self.f_recon = theano.function(inputs=[self.Xs],
                                       outputs=x_sample_recon[-1],
                                       updates=updates_recurrent_recon,
                                       name='rnngsn_f_recon')
        
        # a function to add salt and pepper noise
        self.f_noise = theano.function(inputs = [self.X],
                                       outputs = salt_and_pepper(self.X, self.input_salt_and_pepper),
                                       name='rnngsn_f_noise')
        # Sampling functions
        log.maybeLog(self.logger, "Creating sampling function...")
        if self.gsn_layers == 1: 
            self.f_sample = theano.function(inputs = [X_sample],
                                            outputs = visible_pX_chain[-1],
                                            name='rnngsn_f_sample_single_layer')
        else:
            # WHY IS THERE A WARNING????
            # because the first odd layers are not used -> directly computed FROM THE EVEN layers
            # unused input = warn
            self.f_sample = theano.function(inputs = self.network_state_input,
                                            outputs = self.network_state_output + visible_pX_chain,
                                            on_unused_input='warn',
                                            name='rnngsn_f_sample')
         
    
        log.maybeLog(self.logger, "Done compiling all functions.")
        compilation_time = time.time() - start_functions_time
        # Show the compile time with appropriate easy-to-read units.
        log.maybeLog(self.logger, "Total compilation time took "+make_time_units_string(compilation_time)+".\n\n")
Ejemplo n.º 48
0
    def train(self,
              train_X=None,
              train_Y=None,
              valid_X=None,
              valid_Y=None,
              test_X=None,
              test_Y=None,
              is_artificial=False,
              artificial_sequence=1,
              continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(
                self.logger,
                "Training using data given during initialization of RNN-GSN.\n"
            )
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger,
                             "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(
                self.logger,
                "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X = self.test_X
            test_Y = self.test_Y

        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        if self.train_gsn_first:
            log.maybeLog(
                self.logger,
                "\n\n----------Initially training the GSN---------\n\n")
            init_gsn = generative_stochastic_network.GSN(train_X=train_X,
                                                         valid_X=valid_X,
                                                         test_X=test_X,
                                                         args=self.gsn_args,
                                                         logger=self.logger)
            init_gsn.train()

        #############################
        # Save the model parameters #
        #############################
        def save_params_to_file(name, n, gsn_params):
            pass
            print 'saving parameters...'
            save_path = self.outdir + name + '_params_epoch_' + str(n) + '.pkl'
            f = open(save_path, 'wb')
            try:
                cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL)
            finally:
                f.close()

        def save_params(params):
            values = [param.get_value(borrow=True) for param in params]
            return values

        def restore_params(params, values):
            for i in range(len(params)):
                params[i].set_value(values[i])

        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)

################################
# If we are using SGD training #
################################
        else:
            log.maybeLog(self.logger,
                         "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP = False
            counter = 0
            if not continue_training:
                self.learning_rate.set_value(
                    self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0

            log.maybeLog(
                self.logger,
                ['train X size:', str(train_X.shape.eval())])
            if valid_X is not None:
                log.maybeLog(self.logger,
                             ['valid X size:',
                              str(valid_X.shape.eval())])
            if test_X is not None:
                log.maybeLog(
                    self.logger,
                    ['test X size:', str(test_X.shape.eval())])

            if self.vis_init:
                self.bias_list[0].set_value(
                    logit(
                        numpy.clip(0.9, 0.001,
                                   train_X.get_value().mean(axis=0))))

            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter, '\t'])

                if is_artificial:
                    data.sequence_mnist_data(train_X, train_Y, valid_X,
                                             valid_Y, test_X, test_Y,
                                             artificial_sequence, rng)

                #train
                train_costs = data.apply_cost_function_to_dataset(
                    self.f_learn, train_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger,
                                ['Train:', trunc(train_costs), '\t'])

                #valid
                valid_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, valid_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger,
                                ['Valid:', trunc(valid_costs), '\t'])

                #test
                test_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, test_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger,
                                ['Test:', trunc(test_costs), '\t'])

                #check for early stopping
                cost = numpy.sum(valid_costs)
                if cost < best_cost * self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = save_params(self.params)
                else:
                    patience += 1

                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    save_params_to_file('all', counter, self.params)

                timing = time.time() - t
                times.append(timing)

                log.maybeAppend(
                    self.logger,
                    'time: ' + make_time_units_string(timing) + '\t')

                log.maybeLog(
                    self.logger, 'remaining: ' + make_time_units_string(
                        (self.n_epoch - counter) * numpy.mean(times)))

                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    nums = test_X.get_value(borrow=True)[range(n_examples)]
                    noisy_nums = self.f_noise(
                        test_X.get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_nums)):
                        recon = self.f_recon(
                            noisy_nums[max(0, (i + 1) - self.batch_size):i +
                                       1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)

                    # Concatenate stuff
                    stacked = numpy.vstack([
                        numpy.vstack([
                            nums[i * 10:(i + 1) * 10],
                            noisy_nums[i * 10:(i + 1) * 10],
                            reconstructed[i * 10:(i + 1) * 10]
                        ]) for i in range(10)
                    ])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(
                            stacked, (self.root_N_input, self.root_N_input),
                            (10, 30)))

                    number_reconstruction.save(
                        self.outdir + 'rnngsn_number_reconstruction_epoch_' +
                        str(counter) + '.png')

                    #save params
                    save_params_to_file('all', counter, self.params)

                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)