def __init__(self):
        super(RNN_GSN, self).__init__()

        gsn_hiddens = 500
        gsn_layers = 2

        # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them)
        self.rnn = RNN(
            input_size=28 * 28,
            hidden_size=100,
            # needs to output hidden units for odd layers of GSN
            output_size=gsn_hiddens * (math.ceil(gsn_layers / 2.)),
            layers=1,
            activation='tanh',
            hidden_activation='relu',
            weights_init='uniform',
            weights_interval='montreal',
            r_weights_init='identity')

        # Create the GSN that will encode the input space
        gsn = GSN(input_size=28 * 28,
                  hidden_size=gsn_hiddens,
                  layers=gsn_layers,
                  walkbacks=4,
                  visible_activation='sigmoid',
                  hidden_activation='tanh',
                  image_height=28,
                  image_width=28)
        # grab the input arguments
        gsn_args = gsn.args.copy()
        # grab the parameters it initialized
        gsn_params = gsn.get_params()

        # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep)
        # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens
        def step(hiddens, x):
            gsn = GSN(inputs_hook=(28 * 28, x),
                      hiddens_hook=(gsn_hiddens, hiddens),
                      params_hook=(gsn_params),
                      **gsn_args)
            # return the reconstruction and cost!
            return gsn.get_outputs(), gsn.get_train_cost()

        (outputs, costs), scan_updates = theano.scan(
            fn=lambda h, x: step(h, x),
            sequences=[self.rnn.output, self.rnn.input],
            outputs_info=[None, None])

        self.outputs = outputs

        self.updates = dict()
        self.updates.update(self.rnn.get_updates())
        self.updates.update(scan_updates)

        self.cost = costs.sum()
        self.params = gsn_params + self.rnn.get_params()
Example #2
0
def main():
    data = TextDataset(
        path='../../../../datasets/shakespeare_input.txt',
        source=
        "http://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt",
        target_n_future=1,
        sequence_length=50)

    rnn = RNN(outdir='outputs/rnn/',
              input_size=len(data.vocab),
              hidden_size=128,
              output_size=len(data.vocab),
              layers=2,
              activation='softmax',
              hidden_activation='relu',
              mrg=RNG_MRG.MRG_RandomStreams(1),
              weights_init='uniform',
              weights_interval='montreal',
              bias_init=0.0,
              r_weights_init='identity',
              r_bias_init=0.0,
              cost_function='nll',
              cost_args=None,
              noise='dropout',
              noise_level=.7,
              noise_decay='exponential',
              noise_decay_amount=.99,
              direction='forward')

    cost_monitor = Monitor("cost",
                           rnn.get_train_cost(),
                           train=False,
                           valid=True,
                           test=True)

    optimizer = RMSProp(model=rnn,
                        dataset=data,
                        grad_clip=5.,
                        hard_clip=False,
                        learning_rate=2e-3,
                        lr_decay='exponential',
                        lr_decay_factor=0.97,
                        decay=0.95,
                        batch_size=50,
                        epochs=50)
    # optimizer = AdaDelta(model=gsn, dataset=mnist, n_epoch=200, batch_size=100, learning_rate=1e-6)
    optimizer.train(monitor_channels=cost_monitor)
Example #3
0
def main():
    data = TextDataset(path='../../../../datasets/shakespeare_input.txt',
                       source="http://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt",
                       target_n_future=1,
                       sequence_length=50)



    rnn = RNN(outdir='outputs/rnn/',
              input_size=len(data.vocab),
              hidden_size=128,
              output_size=len(data.vocab),
              layers=2,
              activation='softmax',
              hidden_activation='relu',
              mrg=RNG_MRG.MRG_RandomStreams(1),
              weights_init='uniform',
              weights_interval='montreal',
              bias_init=0.0,
              r_weights_init='identity',
              r_bias_init=0.0,
              cost_function='nll',
              cost_args=None,
              noise='dropout',
              noise_level=.7,
              noise_decay='exponential',
              noise_decay_amount=.99,
              direction='forward')

    cost_monitor = Monitor("cost", rnn.get_train_cost(), train=False, valid=True, test=True)

    optimizer = RMSProp(model=rnn, dataset=data,
                        grad_clip=5., hard_clip=False,
                        learning_rate=2e-3, lr_decay='exponential', lr_decay_factor=0.97,
                        decay=0.95, batch_size=50, epochs=50)
    # optimizer = AdaDelta(model=gsn, dataset=mnist, n_epoch=200, batch_size=100, learning_rate=1e-6)
    optimizer.train(monitor_channels=cost_monitor)
    def __init__(self):
        super(RNN_GSN, self).__init__()

        gsn_hiddens = 500
        gsn_layers = 2

        # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them)
        self.rnn = RNN(
            input_size=28 * 28,
            hidden_size=100,
            # needs to output hidden units for odd layers of GSN
            output_size=gsn_hiddens * (math.ceil(gsn_layers/2.)),
            layers=1,
            activation='tanh',
            hidden_activation='relu',
            weights_init='uniform', weights_interval='montreal',
            r_weights_init='identity'
        )

        # Create the GSN that will encode the input space
        gsn = GSN(
            input_size=28 * 28,
            hidden_size=gsn_hiddens,
            layers=gsn_layers,
            walkbacks=4,
            visible_activation='sigmoid',
            hidden_activation='tanh',
            image_height=28,
            image_width=28
        )
        # grab the input arguments
        gsn_args = gsn.args.copy()
        # grab the parameters it initialized
        gsn_params = gsn.get_params()

        # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep)
        # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens
        def step(hiddens, x):
            gsn = GSN(
                inputs_hook=(28*28, x),
                hiddens_hook=(gsn_hiddens, hiddens),
                params_hook=(gsn_params),
                **gsn_args
            )
            # return the reconstruction and cost!
            return gsn.get_outputs(), gsn.get_train_cost()

        (outputs, costs), scan_updates = theano.scan(
            fn=lambda h, x: step(h, x),
            sequences=[self.rnn.output, self.rnn.input],
            outputs_info=[None, None]
        )

        self.outputs = outputs

        self.updates = dict()
        self.updates.update(self.rnn.get_updates())
        self.updates.update(scan_updates)

        self.cost = costs.sum()
        self.params = gsn_params + self.rnn.get_params()