Example #1
0
def jobman(state, channel):
    # load dataset
    rng = numpy.random.RandomState(state['seed'])

    # declare the dimensionalies of the input and output
    if state['chunks'] == 'words':
        state['n_in'] = 10000
        state['n_out'] = 10000
    else:
        state['n_in'] = 50
        state['n_out'] = 50
    train_data, valid_data, test_data = get_text_data(state)

    ## BEGIN Tutorial
    ### Define Theano Input Variables
    x = TT.lvector('x')
    y = TT.lvector('y')
    h0 = theano.shared(
        numpy.zeros((eval(state['nhids'])[-1], ), dtype='float32'))

    ### Neural Implementation of the Operators: \oplus
    #### Word Embedding
    emb_words = MultiLayer(rng,
                           n_in=state['n_in'],
                           n_hids=eval(state['inp_nhids']),
                           activation=eval(state['inp_activ']),
                           init_fn='sample_weights_classic',
                           weight_noise=state['weight_noise'],
                           rank_n_approx=state['rank_n_approx'],
                           scale=state['inp_scale'],
                           sparsity=state['inp_sparse'],
                           learn_bias=True,
                           bias_scale=eval(state['inp_bias']),
                           name='emb_words')

    #### Deep Transition Recurrent Layer
    rec = eval(state['rec_layer'])(
        rng,
        eval(state['nhids']),
        activation=eval(state['rec_activ']),
        #activation = 'TT.nnet.sigmoid',
        bias_scale=eval(state['rec_bias']),
        scale=eval(state['rec_scale']),
        sparsity=eval(state['rec_sparse']),
        init_fn=eval(state['rec_init']),
        weight_noise=state['weight_noise'],
        name='rec')

    #### Stiching them together
    ##### (1) Get the embedding of a word
    x_emb = emb_words(x, no_noise_bias=state['no_noise_bias'])
    ##### (2) Embedding + Hidden State via DT Recurrent Layer
    reset = TT.scalar('reset')
    rec_layer = rec(x_emb,
                    n_steps=x.shape[0],
                    init_state=h0 * reset,
                    no_noise_bias=state['no_noise_bias'],
                    truncate_gradient=state['truncate_gradient'],
                    batch_size=1)

    ## BEGIN Exercise: DOT-RNN
    ### Neural Implementation of the Operators: \lhd

    #### Exercise (1)
    #### TODO: Define a layer from the hidden state to the intermediate layer
    emb_layer = MultiLayer(rng, )

    #### Exercise (1)
    #### TODO: Define a layer from the input to the intermediate Layer

    #### Hidden State: Combine emb_state and emb_words_out
    #### Exercise (1)
    #### TODO: Define an activation layer

    #### Exercise (2)
    #### TODO: Define a dropout layer

    #### Softmax Layer
    output_layer = SoftmaxLayer(rng,
                                eval(state['dout_nhid']),
                                state['n_out'],
                                scale=state['out_scale'],
                                bias_scale=state['out_bias_scale'],
                                init_fn="sample_weights_classic",
                                weight_noise=state['weight_noise'],
                                sparsity=state['out_sparse'],
                                sum_over_time=True,
                                name='out')

    ### Few Optional Things
    #### Direct shortcut from x to y
    if state['shortcut_inpout']:
        shortcut = MultiLayer(rng,
                              n_in=state['n_in'],
                              n_hids=eval(state['inpout_nhids']),
                              activations=eval(state['inpout_activ']),
                              init_fn='sample_weights_classic',
                              weight_noise=state['weight_noise'],
                              scale=eval(state['inpout_scale']),
                              sparsity=eval(state['inpout_sparse']),
                              learn_bias=eval(state['inpout_learn_bias']),
                              bias_scale=eval(state['inpout_bias']),
                              name='shortcut')

    #### Learning rate scheduling (1/(1+n/beta))
    state['clr'] = state['lr']

    def update_lr(obj, cost):
        stp = obj.step
        if isinstance(obj.state['lr_start'],
                      int) and stp > obj.state['lr_start']:
            time = float(stp - obj.state['lr_start'])
            new_lr = obj.state['clr'] / (1 + time / obj.state['lr_beta'])
            obj.lr = new_lr

    if state['lr_adapt']:
        rec.add_schedule(update_lr)

    ### Neural Implementations of the Language Model
    #### Training
    if state['shortcut_inpout']:
        additional_inputs = [rec_layer, shortcut(x)]
    else:
        additional_inputs = [rec_layer]

    ##### Exercise (1): Compute the output intermediate layer
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer

    train_model = output_layer(outhid,
                               no_noise_bias=state['no_noise_bias'],
                               additional_inputs=additional_inputs).train(
                                   target=y,
                                   scale=numpy.float32(1. / state['seqlen']))

    nw_h0 = rec_layer.out[rec_layer.out.shape[0] - 1]
    if state['carry_h0']:
        train_model.updates += [(h0, nw_h0)]

    #### Validation
    h0val = theano.shared(
        numpy.zeros((eval(state['nhids'])[-1], ), dtype='float32'))
    rec_layer = rec(emb_words(x, use_noise=False),
                    n_steps=x.shape[0],
                    batch_size=1,
                    init_state=h0val * reset,
                    use_noise=False)
    nw_h0 = rec_layer.out[rec_layer.out.shape[0] - 1]

    ##### Exercise (1):
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer without noise

    if state['shortcut_inpout']:
        additional_inputs = [rec_layer, shortcut(x, use_noise=False)]
    else:
        additional_inputs = [rec_layer]
    valid_model = output_layer(outhid,
                               additional_inputs=additional_inputs,
                               use_noise=False).validate(target=y,
                                                         sum_over_time=True)

    valid_updates = []
    if state['carry_h0']:
        valid_updates = [(h0val, nw_h0)]

    valid_fn = theano.function([x, y, reset],
                               valid_model.cost,
                               name='valid_fn',
                               updates=valid_updates)

    #### Sampling
    ##### single-step sampling
    def sample_fn(word_tm1, h_tm1):
        x_emb = emb_words(word_tm1, use_noise=False, one_step=True)
        h0 = rec(x_emb, state_before=h_tm1, one_step=True, use_noise=False)[-1]
        outhid = outhid_dropout(outhid_activ(
            emb_state(h0, use_noise=False, one_step=True) +
            emb_words_out(word_tm1, use_noise=False, one_step=True),
            one_step=True),
                                use_noise=False,
                                one_step=True)
        word = output_layer.get_sample(state_below=outhid,
                                       additional_inputs=[h0],
                                       temp=1.)
        return word, h0

    ##### scan for iterating the single-step sampling multiple times
    [samples, summaries], updates = scan(sample_fn,
                                         states=[
                                             TT.alloc(numpy.int64(0),
                                                      state['sample_steps']),
                                             TT.alloc(numpy.float32(0), 1,
                                                      eval(state['nhids'])[-1])
                                         ],
                                         n_steps=state['sample_steps'],
                                         name='sampler_scan')

    ##### build a Theano function for sampling
    sample_fn = theano.function([], [samples],
                                updates=updates,
                                profile=False,
                                name='sample_fn')

    ##### Load a dictionary
    dictionary = numpy.load(state['dictionary'])
    if state['chunks'] == 'chars':
        dictionary = dictionary['unique_chars']
    else:
        dictionary = dictionary['unique_words']

    def hook_fn():
        sample = sample_fn()[0]
        print 'Sample:',
        if state['chunks'] == 'chars':
            print "".join(dictionary[sample])
        else:
            for si in sample:
                print dictionary[si],
            print

    ### Build and Train a Model
    #### Define a model
    model = LM_Model(cost_layer=train_model,
                     weight_noise_amount=state['weight_noise_amount'],
                     valid_fn=valid_fn,
                     clean_before_noise_fn=False,
                     noise_fn=None,
                     rng=rng)

    if state['reload']:
        model.load(state['prefix'] + 'model.npz')

    #### Define a trainer
    ##### Training algorithm (SGD)
    if state['moment'] < 0:
        algo = SGD(model, state, train_data)
    else:
        algo = SGD_m(model, state, train_data)
    ##### Main loop of the trainer
    main = MainLoop(train_data,
                    valid_data,
                    test_data,
                    model,
                    algo,
                    state,
                    channel,
                    train_cost=False,
                    hooks=hook_fn,
                    validate_postprocess=eval(state['validate_postprocess']))
    ## Run!
    main.main()
Example #2
0
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(
        "Case study of generating simple 1d sequences with RNN.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "mode",
        choices=["train", "plot"],
        help="The mode to run. Use `train` to train a new model"
        " and `plot` to plot a sequence generated by an"
        " existing one.")
    parser.add_argument("prefix",
                        default="sine",
                        help="The prefix for model, timing and state files")
    parser.add_argument("--input-noise",
                        type=float,
                        default=0.0,
                        help="Adds Gaussian noise of given intensity to the "
                        " training sequences.")
    parser.add_argument(
        "--function",
        default="lambda a, x: numpy.sin(a * x)",
        help="An analytical description of the sequence family to learn."
        " The arguments before the last one are considered parameters.")
    parser.add_argument("--steps",
                        type=int,
                        default=100,
                        help="Number of steps to plot")
    parser.add_argument("--params", help="Parameter values for plotting")
    args = parser.parse_args()

    function = eval(args.function)
    num_params = len(inspect.getargspec(function).args) - 1

    class Emitter(TrivialEmitter):
        @application
        def cost(self, readouts, outputs):
            """Compute MSE."""
            return ((readouts - outputs)**2).sum(axis=readouts.ndim - 1)

    transition = GatedRecurrent(name="transition",
                                activation=Tanh(),
                                dim=10,
                                weights_init=Orthogonal())
    with_params = AddParameters(transition,
                                num_params,
                                "params",
                                name="with_params")
    generator = SequenceGenerator(LinearReadout(
        readout_dim=1,
        source_names=["states"],
        emitter=Emitter(name="emitter"),
        name="readout"),
                                  with_params,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  name="generator")
    generator.allocate()
    logger.debug("Parameters:\n" +
                 pprint.pformat(Selector(generator).get_params().keys()))

    if args.mode == "train":
        seed = 1
        rng = numpy.random.RandomState(seed)
        batch_size = 10

        generator.initialize()

        cost = Cost(
            generator.cost(tensor.tensor3('x'),
                           params=tensor.matrix("params")).sum())
        if args.input_noise:
            cost.apply_noise(cost.inputs, args.input_noise)

        gh_model = GroundhogModel(generator, cost)
        state = GroundhogState(args.prefix, batch_size,
                               learning_rate=0.0001).as_dict()
        data = SeriesIterator(rng, function, 100, batch_size)
        trainer = SGD(gh_model, state, data)
        main_loop = MainLoop(data, None, None, gh_model, trainer, state, None)
        main_loop.load()
        main_loop.main()
    elif args.mode == "plot":
        load_params(generator, args.prefix + "model.npz")

        params = tensor.matrix("params")
        sample = theano.function([params],
                                 generator.generate(params=params,
                                                    n_steps=args.steps,
                                                    batch_size=1))

        param_values = numpy.array(map(float, args.params.split()),
                                   dtype=floatX)
        states, outputs, _ = sample(param_values[None, :])
        actual = outputs[:, 0, 0]
        desired = numpy.array(
            [function(*(list(param_values) + [T])) for T in range(args.steps)])
        print("MSE: {}".format(((actual - desired)**2).sum()))

        pyplot.plot(numpy.hstack([actual[:, None], desired[:, None]]))
        pyplot.show()
    else:
        assert False
Example #3
0
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(
        "Case study of generating a Markov chain with RNN.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "mode",
        choices=["train", "sample"],
        help="The mode to run. Use `train` to train a new model"
        " and `sample` to sample a sequence generated by an"
        " existing one.")
    parser.add_argument("prefix",
                        default="sine",
                        help="The prefix for model, timing and state files")
    parser.add_argument("--steps",
                        type=int,
                        default=100,
                        help="Number of steps to plot")
    args = parser.parse_args()

    dim = 10
    num_states = ChainIterator.num_states
    feedback_dim = 8

    transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim)
    generator = SequenceGenerator(LinearReadout(
        readout_dim=num_states,
        source_names=["states"],
        emitter=SoftmaxEmitter(name="emitter"),
        feedbacker=LookupFeedback(num_states, feedback_dim, name='feedback'),
        name="readout"),
                                  transition,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  name="generator")
    generator.allocate()
    logger.debug("Parameters:\n" + pprint.pformat(
        [(key, value.get_value().shape)
         for key, value in Selector(generator).get_params().items()],
        width=120))

    if args.mode == "train":
        rng = numpy.random.RandomState(1)
        batch_size = 50

        generator.push_initialization_config()
        transition.weights_init = Orthogonal()
        generator.initialize()
        logger.debug("transition.weights_init={}".format(
            transition.weights_init))

        cost = generator.cost(tensor.lmatrix('x')).sum()
        gh_model = GroundhogModel(generator, cost)
        state = GroundhogState(args.prefix, batch_size,
                               learning_rate=0.0001).as_dict()
        data = ChainIterator(rng, 100, batch_size)
        trainer = SGD(gh_model, state, data)
        main_loop = MainLoop(data, None, None, gh_model, trainer, state, None)
        main_loop.main()
    elif args.mode == "sample":
        load_params(generator, args.prefix + "model.npz")

        sample = ComputationGraph(
            generator.generate(n_steps=args.steps, batch_size=1,
                               iterate=True)).function()

        states, outputs, costs = [data[:, 0] for data in sample()]

        numpy.set_printoptions(precision=3, suppress=True)
        print("Generation cost:\n{}".format(costs.sum()))

        freqs = numpy.bincount(outputs).astype(floatX)
        freqs /= freqs.sum()
        print("Frequencies:\n {} vs {}".format(freqs,
                                               ChainIterator.equilibrium))

        trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX)
        for a, b in zip(outputs, outputs[1:]):
            trans_freqs[a, b] += 1
        trans_freqs /= trans_freqs.sum(axis=1)[:, None]
        print("Transition frequencies:\n{}\nvs\n{}".format(
            trans_freqs, ChainIterator.trans_prob))
    else:
        assert False
Example #4
0
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(
        "Case study of language modeling with RNN",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "mode",
        choices=["train", "sample"],
        help="The mode to run. Use `train` to train a new model"
        " and `sample` to sample a sequence generated by an"
        " existing one.")
    parser.add_argument("prefix",
                        default="sine",
                        help="The prefix for model, timing and state files")
    parser.add_argument("state",
                        nargs="?",
                        default="",
                        help="Changes to Groundhog state")
    parser.add_argument("--path", help="Path to a language dataset")
    parser.add_argument("--dict", help="Path to the dataset dictionary")
    parser.add_argument("--restart", help="Start anew")
    parser.add_argument("--reset",
                        action="store_true",
                        default=False,
                        help="Reset the hidden state between batches")
    parser.add_argument("--steps",
                        type=int,
                        default=100,
                        help="Number of steps to plot for the 'sample' mode"
                        " OR training sequence length for the 'train' mode.")
    args = parser.parse_args()
    logger.debug("Args:\n" + str(args))

    dim = 200
    num_chars = 50

    transition = GatedRecurrent(name="transition",
                                activation=Tanh(),
                                dim=dim,
                                weights_init=Orthogonal())
    generator = SequenceGenerator(LinearReadout(
        readout_dim=num_chars,
        source_names=["states"],
        emitter=SoftmaxEmitter(name="emitter"),
        feedbacker=LookupFeedback(num_chars, dim, name='feedback'),
        name="readout"),
                                  transition,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  name="generator")
    generator.allocate()
    logger.debug("Parameters:\n" + pprint.pformat(
        [(key, value.get_value().shape)
         for key, value in Selector(generator).get_params().items()],
        width=120))

    if args.mode == "train":
        batch_size = 1
        seq_len = args.steps

        generator.initialize()

        # Build cost computation graph that uses the saved hidden states.
        # An issue: for Groundhog this is completely transparent, that's
        # why it does not carry the hidden state over the period when
        # validation in done. We should find a way to fix in the future.
        x = tensor.lmatrix('x')
        init_states = shared_floatx_zeros((batch_size, dim),
                                          name='init_states')
        reset = tensor.scalar('reset')
        cost = ComputationGraph(
            generator.cost(x, states=init_states * reset).sum())
        # TODO: better search routine
        states = [
            v for v in cost.variables if hasattr(v.tag, 'application_call')
            and v.tag.application_call.brick == generator.transition and
            (v.tag.application_call.application == generator.transition.apply)
            and v.tag.role == VariableRole.OUTPUT and v.tag.name == 'states'
        ]
        assert len(states) == 1
        states = states[0]

        gh_model = GroundhogModel(generator, cost)
        gh_model.properties.append(
            ('bpc', cost.outputs[0] * numpy.log(2) / seq_len))
        gh_model.properties.append(('mean_init_state', init_states.mean()))
        gh_model.properties.append(('reset', reset))
        if not args.reset:
            gh_model.updates.append((init_states, states[-1]))

        state = GroundhogState(args.prefix, batch_size,
                               learning_rate=0.0001).as_dict()
        changes = eval("dict({})".format(args.state))
        state.update(changes)

        def output_format(x, y, reset):
            return dict(x=x[:, None], reset=reset)

        train, valid, test = [
            LMIterator(batch_size=batch_size,
                       use_infinite_loop=mode == 'train',
                       path=args.path,
                       seq_len=seq_len,
                       mode=mode,
                       chunks='chars',
                       output_format=output_format,
                       can_fit=True) for mode in ['train', 'valid', 'test']
        ]

        trainer = SGD(gh_model, state, train)
        state['on_nan'] = 'warn'
        state['cutoff'] = 1.

        main_loop = MainLoop(train, valid, None, gh_model, trainer, state,
                             None)
        if not args.restart:
            main_loop.load()
        main_loop.main()
    elif args.mode == "sample":
        load_params(generator, args.prefix + "model.npz")

        chars = numpy.load(args.dict)['unique_chars']

        sample = ComputationGraph(
            generator.generate(n_steps=args.steps, batch_size=10,
                               iterate=True)).function()

        states, outputs, costs = sample()

        for i in range(10):
            print("Generation cost: {}".format(costs[:, i].sum()))
            print("".join([chars[o] for o in outputs[:, i]]))
    else:
        assert False