Ejemplo n.º 1
0
 def step(hiddens, x):
     gsn = GSN(inputs_hook=(28 * 28, x),
               hiddens_hook=(gsn_hiddens, hiddens),
               params_hook=(gsn_params),
               **gsn_args)
     # return the reconstruction and cost!
     return gsn.get_outputs(), gsn.get_train_cost()
Ejemplo n.º 2
0
 def step(hiddens, x):
     gsn = GSN(
         inputs_hook=(28*28, x),
         hiddens_hook=(gsn_hiddens, hiddens),
         params_hook=(gsn_params),
         **gsn_args
     )
     # return the reconstruction and cost!
     return gsn.get_outputs(), gsn.get_train_cost()
Ejemplo n.º 3
0
    def __init__(self):
        super(RNN_GSN, self).__init__()

        gsn_hiddens = 500
        gsn_layers = 2

        # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them)
        self.rnn = RNN(
            input_size=28 * 28,
            hidden_size=100,
            # needs to output hidden units for odd layers of GSN
            output_size=gsn_hiddens * (math.ceil(gsn_layers / 2.)),
            layers=1,
            activation='tanh',
            hidden_activation='relu',
            weights_init='uniform',
            weights_interval='montreal',
            r_weights_init='identity')

        # Create the GSN that will encode the input space
        gsn = GSN(input_size=28 * 28,
                  hidden_size=gsn_hiddens,
                  layers=gsn_layers,
                  walkbacks=4,
                  visible_activation='sigmoid',
                  hidden_activation='tanh',
                  image_height=28,
                  image_width=28)
        # grab the input arguments
        gsn_args = gsn.args.copy()
        # grab the parameters it initialized
        gsn_params = gsn.get_params()

        # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep)
        # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens
        def step(hiddens, x):
            gsn = GSN(inputs_hook=(28 * 28, x),
                      hiddens_hook=(gsn_hiddens, hiddens),
                      params_hook=(gsn_params),
                      **gsn_args)
            # return the reconstruction and cost!
            return gsn.get_outputs(), gsn.get_train_cost()

        (outputs, costs), scan_updates = theano.scan(
            fn=lambda h, x: step(h, x),
            sequences=[self.rnn.output, self.rnn.input],
            outputs_info=[None, None])

        self.outputs = outputs

        self.updates = dict()
        self.updates.update(self.rnn.get_updates())
        self.updates.update(scan_updates)

        self.cost = costs.sum()
        self.params = gsn_params + self.rnn.get_params()
Ejemplo n.º 4
0
def main():
    ########################################
    # Initialization things with arguments #
    ########################################
    # use these arguments to get results from paper referenced above
    _train_args = {"epochs": 1000,  # maximum number of times to run through the dataset
                   "batch_size": 100,  # number of examples to process in parallel (minibatch)
                   "min_batch_size": 1,  # the minimum number of examples for a batch to be considered
                   "save_freq": 1,  # how many epochs between saving parameters
                   "stop_threshold": .9995,  # multiplier for how much the train cost to improve to not stop early
                   "stop_patience": 500,  # how many epochs to wait to see if the threshold has been reached
                   "learning_rate": .25,  # initial learning rate for SGD
                   "lr_decay": 'exponential',  # the decay function to use for the learning rate parameter
                   "lr_decay_factor": .995,  # by how much to decay the learning rate each epoch
                   "momentum": 0.5,  # the parameter momentum amount
                   'momentum_decay': False,  # how to decay the momentum each epoch (if applicable)
                   'momentum_factor': 0,  # by how much to decay the momentum (in this case not at all)
                   'nesterov_momentum': False,  # whether to use nesterov momentum update (accelerated momentum)
    }

    config_root_logger()
    log.info("Creating a new GSN")

    mnist = MNIST(concat_train_valid=True)
    gsn = GSN(layers=2,
              walkbacks=4,
              hidden_size=1500,
              visible_activation='sigmoid',
              hidden_activation='tanh',
              input_size=28*28,
              tied_weights=True,
              hidden_add_noise_sigma=2,
              input_salt_and_pepper=0.4,
              outdir='outputs/test_gsn/',
              vis_init=False,
              noiseless_h1=True,
              input_sampling=True,
              weights_init='uniform',
              weights_interval='montreal',
              bias_init=0,
              cost_function='binary_crossentropy')

    recon_cost_channel = MonitorsChannel(name='cost')
    recon_cost_channel.add(Monitor('recon_cost', gsn.get_monitors()['recon_cost'], test=True))
    recon_cost_channel.add(Monitor('noisy_recon_cost', gsn.get_monitors()['noisy_recon_cost'], test=True))

    # Load initial weights and biases from file
    # params_to_load = '../../../outputs/gsn/mnist/trained_epoch_395.pkl'
    # gsn.load_params(params_to_load)

    optimizer = SGD(model=gsn, dataset=mnist, **_train_args)
    # optimizer = AdaDelta(model=gsn, dataset=mnist, epochs=200, batch_size=100, learning_rate=1e-6)
    optimizer.train(monitor_channels=recon_cost_channel)

    # Save some reconstruction output images
    n_examples = 100
    xs_test = mnist.test_inputs[:n_examples]
    noisy_xs_test = gsn.f_noise(xs_test)
    reconstructed = gsn.run(noisy_xs_test)
    # Concatenate stuff
    stacked = numpy.vstack(
        [numpy.vstack([xs_test[i * 10: (i + 1) * 10],
                       noisy_xs_test[i * 10: (i + 1) * 10],
                       reconstructed[i * 10: (i + 1) * 10]])
         for i in range(10)])
    number_reconstruction = PIL.Image.fromarray(
        tile_raster_images(stacked, (gsn.image_height, gsn.image_width), (10, 30))
    )

    number_reconstruction.save(gsn.outdir + 'reconstruction.png')
    log.info("saved output image!")

    # Construct image from the weight matrix
    image = PIL.Image.fromarray(
        tile_raster_images(
            X=gsn.weights_list[0].get_value(borrow=True).T,
            img_shape=(28, 28),
            tile_shape=closest_to_square_factors(gsn.hidden_size),
            tile_spacing=(1, 1)
        )
    )
    image.save(gsn.outdir + "gsn_mnist_weights.png")
Ejemplo n.º 5
0
    def recurrent_step(self, x_t, u_tm1):
        """
        Performs one timestep for recurrence.

        Parameters
        ----------
        x_t : tensor
            The input at time t.
        u_tm1 : tensor
            The previous timestep (t-1) recurrent hiddens.

        Returns
        -------
        tuple
            Current generated visible x_t and recurrent u_t if generating (no x_t given as parameter),
            otherwise current recurrent u_t and hiddens h_t.
        """
        # If `x_t` is given, deterministic recurrence to compute the u_t. Otherwise, first generate.
        # Make current guess for hiddens based on U
        h_list = []
        for i in range(self.layers):
            if i % 2 == 0:
                log.debug("Using {0!s} and {1!s}".format(
                    self.recurrent_to_gsn_weights_list[(i+1) / 2], self.bias_list[i+1]))
                h = T.dot(u_tm1, self.recurrent_to_gsn_weights_list[(i+1) / 2]) + self.bias_list[i+1]
                h = self.hidden_activation_func(h)
                h_list.append(h)
        h_t = T.concatenate(h_list, axis=0)

        generate = x_t is None
        if generate:
            h_list_generate = [T.shape_padleft(h) for h in h_list]
            # create a GSN to generate x_t
            assert self.input_size, self
            gsn = GSN(
                inputs_hook        = (self.input_size, self.input),
                input_size         = self.input_size,
                hiddens_hook       = (self.hidden_size, T.concatenate(h_list_generate, axis=1)),
                params_hook        = self.gsn_params,
                outdir             = os.path.join(self.outdir, 'gsn_generate/'),
                layers             = self.layers,
                walkbacks          = self.walkbacks,
                visible_activation = self.visible_activation_func,
                hidden_activation  = self.hidden_activation_func,
                input_sampling     = self.input_sampling,
                mrg                = self.mrg,
                tied_weights       = self.tied_weights,
                cost_function      = self.cost_function,
                cost_args          = self.cost_args,
                add_noise          = self.add_noise,
                noiseless_h1       = self.noiseless_h1,
                hidden_noise       = self.hidden_noise,
                hidden_noise_level = self.hidden_noise_level,
                input_noise        = self.input_noise,
                input_noise_level  = self.input_noise_level,
                noise_decay        = self.noise_decay,
                noise_annealing    = self.noise_annealing,
                image_width        = self.image_width,
                image_height       = self.image_height
            )

            x_t = gsn.get_outputs().flatten()

        ua_t = T.dot(x_t, self.W_x_u) + T.dot(u_tm1, self.W_u_u) + self.recurrent_bias
        u_t = self.rnn_hidden_activation_func(ua_t)
        return [x_t, u_t] if generate else [u_t, h_t]
Ejemplo n.º 6
0
    def _build_rnngsn(self):
        """
        Creates the updates and other return variables for the computation graph.

        Returns
        -------
        List
            the sample at the end of the computation graph, the train cost function, the train monitors,
            the computation updates, the generated visible list, the generated computation updates, the ending
            recurrent states
        """
        # For training, the deterministic recurrence is used to compute all the
        # {h_t, 1 <= t <= T} given Xs. Conditional GSNs can then be trained
        # in batches using those parameters.
        (u, h_ts), updates_train = theano.scan(fn=lambda x_t, u_tm1: self.recurrent_step(x_t, u_tm1),
                                               sequences=self.input,
                                               outputs_info=[self.u0, None],
                                               name="rnngsn_computation_scan")

        h_list = [T.zeros_like(self.input)]
        for layer, w in enumerate(self.weights_list[:self.layers]):
            if layer % 2 != 0:
                h_list.append(T.zeros_like(T.dot(h_list[-1], w)))
            else:
                h_list.append((h_ts.T[(layer/2) * self.hidden_size:(layer/2 + 1) * self.hidden_size]).T)

        gsn = GSN(
            inputs_hook        = (self.input_size, self.input),
            input_size         = self.input_size,
            hiddens_hook       = (self.hidden_size, GSN.pack_hiddens(h_list)),
            params_hook        = self.gsn_params,
            outdir             = os.path.join(self.outdir, 'gsn_noisy/'),
            layers             = self.layers,
            walkbacks          = self.walkbacks,
            visible_activation = self.visible_activation_func,
            hidden_activation  = self.hidden_activation_func,
            input_sampling     = self.input_sampling,
            mrg                = self.mrg,
            tied_weights       = self.tied_weights,
            cost_function      = self.cost_function,
            cost_args          = self.cost_args,
            add_noise          = self.add_noise,
            noiseless_h1       = self.noiseless_h1,
            hidden_noise       = self.hidden_noise,
            hidden_noise_level = self.hidden_noise_level,
            input_noise        = self.input_noise,
            input_noise_level  = self.input_noise_level,
            noise_decay        = self.noise_decay,
            noise_annealing    = self.noise_annealing,
            image_width        = self.image_width,
            image_height       = self.image_height
        )

        cost = gsn.get_train_cost()
        monitors = gsn.get_monitors()  # frame-level error would be the 'mse' monitor from GSN
        x_sample_recon = gsn.get_outputs()

        # symbolic loop for sequence generation
        (x_ts, u_ts), updates_generate = theano.scan(lambda u_tm1: self.recurrent_step(None, u_tm1),
                                                     outputs_info=[None, self.generate_u0],
                                                     n_steps=self.n_steps,
                                                     name="rnngsn_generate_scan")

        return x_sample_recon, cost, monitors, updates_train, x_ts, updates_generate, u_ts[-1]
Ejemplo n.º 7
0
    def recurrent_step(self, x_t, u_tm1):
        """
        Performs one timestep for recurrence.

        Parameters
        ----------
        x_t : tensor
            The input at time t.
        u_tm1 : tensor
            The previous timestep (t-1) recurrent hiddens.

        Returns
        -------
        tuple
            Current generated visible x_t and recurrent u_t if generating (no x_t given as parameter),
            otherwise current recurrent u_t and hiddens h_t.
        """
        # If `x_t` is given, deterministic recurrence to compute the u_t. Otherwise, first generate.
        # Make current guess for hiddens based on U
        h_list = []
        for i in range(self.layers):
            if i % 2 == 0:
                log.debug("Using {0!s} and {1!s}".format(
                    self.recurrent_to_gsn_weights_list[(i+1) / 2], self.bias_list[i+1]))
                h = T.dot(u_tm1, self.recurrent_to_gsn_weights_list[(i+1) / 2]) + self.bias_list[i+1]
                h = self.hidden_activation_func(h)
                h_list.append(h)
        h_t = T.concatenate(h_list, axis=0)

        generate = x_t is None
        if generate:
            h_list_generate = [T.shape_padleft(h) for h in h_list]
            # create a GSN to generate x_t
            gsn = GSN(
                inputs_hook        = (self.input_size, self.input),
                hiddens_hook       = (self.hidden_size, T.concatenate(h_list_generate, axis=1)),
                params_hook        = self.gsn_params,
                outdir             = os.path.join(self.outdir, 'gsn_generate/'),
                layers             = self.layers,
                walkbacks          = self.walkbacks,
                visible_activation = self.visible_activation_func,
                hidden_activation  = self.hidden_activation_func,
                input_sampling     = self.input_sampling,
                mrg                = self.mrg,
                tied_weights       = self.tied_weights,
                cost_function      = self.cost_function,
                cost_args          = self.cost_args,
                add_noise          = self.add_noise,
                noiseless_h1       = self.noiseless_h1,
                hidden_noise       = self.hidden_noise,
                hidden_noise_level = self.hidden_noise_level,
                input_noise        = self.input_noise,
                input_noise_level  = self.input_noise_level,
                noise_decay        = self.noise_decay,
                noise_annealing    = self.noise_annealing,
                image_width        = self.image_width,
                image_height       = self.image_height
            )

            x_t = gsn.get_outputs().flatten()

        ua_t = T.dot(x_t, self.W_x_u) + T.dot(u_tm1, self.W_u_u) + self.recurrent_bias
        u_t = self.rnn_hidden_activation_func(ua_t)
        return [x_t, u_t] if generate else [u_t, h_t]
Ejemplo n.º 8
0
    def _build_rnngsn(self):
        """
        Creates the updates and other return variables for the computation graph.

        Returns
        -------
        List
            the sample at the end of the computation graph, the train cost function, the train monitors,
            the computation updates, the generated visible list, the generated computation updates, the ending
            recurrent states
        """
        # For training, the deterministic recurrence is used to compute all the
        # {h_t, 1 <= t <= T} given Xs. Conditional GSNs can then be trained
        # in batches using those parameters.
        (u, h_ts), updates_train = theano.scan(fn=lambda x_t, u_tm1: self.recurrent_step(x_t, u_tm1),
                                               sequences=self.input,
                                               outputs_info=[self.u0, None],
                                               name="rnngsn_computation_scan")

        h_list = [T.zeros_like(self.input)]
        for layer, w in enumerate(self.weights_list[:self.layers]):
            if layer % 2 != 0:
                h_list.append(T.zeros_like(T.dot(h_list[-1], w)))
            else:
                h_list.append((h_ts.T[(layer/2) * self.hidden_size:(layer/2 + 1) * self.hidden_size]).T)

        gsn = GSN(
            inputs_hook        = (self.input_size, self.input),
            hiddens_hook       = (self.hidden_size, GSN.pack_hiddens(h_list)),
            params_hook        = self.gsn_params,
            outdir             = os.path.join(self.outdir, 'gsn_noisy/'),
            layers             = self.layers,
            walkbacks          = self.walkbacks,
            visible_activation = self.visible_activation_func,
            hidden_activation  = self.hidden_activation_func,
            input_sampling     = self.input_sampling,
            mrg                = self.mrg,
            tied_weights       = self.tied_weights,
            cost_function      = self.cost_function,
            cost_args          = self.cost_args,
            add_noise          = self.add_noise,
            noiseless_h1       = self.noiseless_h1,
            hidden_noise       = self.hidden_noise,
            hidden_noise_level = self.hidden_noise_level,
            input_noise        = self.input_noise,
            input_noise_level  = self.input_noise_level,
            noise_decay        = self.noise_decay,
            noise_annealing    = self.noise_annealing,
            image_width        = self.image_width,
            image_height       = self.image_height
        )

        cost = gsn.get_train_cost()
        monitors = gsn.get_monitors()  # frame-level error would be the 'mse' monitor from GSN
        x_sample_recon = gsn.get_outputs()

        # symbolic loop for sequence generation
        (x_ts, u_ts), updates_generate = theano.scan(lambda u_tm1: self.recurrent_step(None, u_tm1),
                                                     outputs_info=[None, self.generate_u0],
                                                     n_steps=self.n_steps,
                                                     name="rnngsn_generate_scan")

        return x_sample_recon, cost, monitors, updates_train, x_ts, updates_generate, u_ts[-1]
Ejemplo n.º 9
0
    def __init__(self):
        super(RNN_GSN, self).__init__()

        gsn_hiddens = 500
        gsn_layers = 2

        # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them)
        self.rnn = RNN(
            input_size=28 * 28,
            hidden_size=100,
            # needs to output hidden units for odd layers of GSN
            output_size=gsn_hiddens * (math.ceil(gsn_layers/2.)),
            layers=1,
            activation='tanh',
            hidden_activation='relu',
            weights_init='uniform', weights_interval='montreal',
            r_weights_init='identity'
        )

        # Create the GSN that will encode the input space
        gsn = GSN(
            input_size=28 * 28,
            hidden_size=gsn_hiddens,
            layers=gsn_layers,
            walkbacks=4,
            visible_activation='sigmoid',
            hidden_activation='tanh',
            image_height=28,
            image_width=28
        )
        # grab the input arguments
        gsn_args = gsn.args.copy()
        # grab the parameters it initialized
        gsn_params = gsn.get_params()

        # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep)
        # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens
        def step(hiddens, x):
            gsn = GSN(
                inputs_hook=(28*28, x),
                hiddens_hook=(gsn_hiddens, hiddens),
                params_hook=(gsn_params),
                **gsn_args
            )
            # return the reconstruction and cost!
            return gsn.get_outputs(), gsn.get_train_cost()

        (outputs, costs), scan_updates = theano.scan(
            fn=lambda h, x: step(h, x),
            sequences=[self.rnn.output, self.rnn.input],
            outputs_info=[None, None]
        )

        self.outputs = outputs

        self.updates = dict()
        self.updates.update(self.rnn.get_updates())
        self.updates.update(scan_updates)

        self.cost = costs.sum()
        self.params = gsn_params + self.rnn.get_params()
Ejemplo n.º 10
0
def main():
    ########################################
    # Initialization things with arguments #
    ########################################
    # use these arguments to get results from paper referenced above
    _train_args = {"n_epoch": 1000,  # maximum number of times to run through the dataset
                   "batch_size": 100,  # number of examples to process in parallel (minibatch)
                   "minimum_batch_size": 1,  # the minimum number of examples for a batch to be considered
                   "save_frequency": 1,  # how many epochs between saving parameters
                   "early_stop_threshold": .9995,  # multiplier for how much the train cost to improve to not stop early
                   "early_stop_length": 500,  # how many epochs to wait to see if the threshold has been reached
                   "learning_rate": .25,  # initial learning rate for SGD
                   "lr_decay": 'exponential',  # the decay function to use for the learning rate parameter
                   "lr_factor": .995,  # by how much to decay the learning rate each epoch
                   "momentum": 0.5,  # the parameter momentum amount
                   'momentum_decay': False,  # how to decay the momentum each epoch (if applicable)
                   'momentum_factor': 0,  # by how much to decay the momentum (in this case not at all)
                   'nesterov_momentum': False,  # whether to use nesterov momentum update (accelerated momentum)
    }

    config_root_logger()
    log.info("Creating a new GSN")

    mnist = MNIST(concat_train_valid=True)
    gsn = GSN(layers=2,
              walkbacks=4,
              hidden_size=1500,
              visible_activation='sigmoid',
              hidden_activation='tanh',
              input_size=28*28,
              tied_weights=True,
              hidden_add_noise_sigma=2,
              input_salt_and_pepper=0.4,
              outdir='outputs/test_gsn/',
              vis_init=False,
              noiseless_h1=True,
              input_sampling=True,
              weights_init='uniform',
              weights_interval='montreal',
              bias_init=0,
              cost_function='binary_crossentropy')

    recon_cost_channel = MonitorsChannel(name='cost')
    recon_cost_channel.add(Monitor('recon_cost', gsn.get_monitors()['recon_cost'], test=True))
    recon_cost_channel.add(Monitor('noisy_recon_cost', gsn.get_monitors()['noisy_recon_cost'], test=True))

    # Load initial weights and biases from file
    # params_to_load = '../../../outputs/gsn/mnist/trained_epoch_395.pkl'
    # gsn.load_params(params_to_load)

    optimizer = SGD(model=gsn, dataset=mnist, **_train_args)
    # optimizer = AdaDelta(model=gsn, dataset=mnist, n_epoch=200, batch_size=100, learning_rate=1e-6)
    optimizer.train(monitor_channels=recon_cost_channel)

    # Save some reconstruction output images
    import opendeep.data.dataset as datasets
    n_examples = 100
    xs_test, _ = mnist.getSubset(datasets.TEST)
    xs_test = xs_test[:n_examples].eval()
    noisy_xs_test = gsn.f_noise(xs_test)
    reconstructed = gsn.run(noisy_xs_test)
    # Concatenate stuff
    stacked = numpy.vstack(
        [numpy.vstack([xs_test[i * 10: (i + 1) * 10],
                       noisy_xs_test[i * 10: (i + 1) * 10],
                       reconstructed[i * 10: (i + 1) * 10]])
         for i in range(10)])
    number_reconstruction = PIL.Image.fromarray(
        tile_raster_images(stacked, (gsn.image_height, gsn.image_width), (10, 30))
    )

    number_reconstruction.save(gsn.outdir + 'reconstruction.png')
    log.info("saved output image!")

    # Construct image from the weight matrix
    image = PIL.Image.fromarray(
        tile_raster_images(
            X=gsn.weights_list[0].get_value(borrow=True).T,
            img_shape=(28, 28),
            tile_shape=closest_to_square_factors(gsn.hidden_size),
            tile_spacing=(1, 1)
        )
    )
    image.save(gsn.outdir + "gsn_mnist_weights.png")