Python SGD Examples

Programming Language: Python

Namespace/Package Name: groundhog.trainer.SGD

Class/Type: SGD

Examples at hotexamples.com: 4

Python SGD - 4 examples found. These are the top rated real world Python examples of groundhog.trainer.SGD.SGD extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SGD(4)

Frequently Used Methods

SGD (4)

Example #1

Show file

def jobman(state, channel):
    # load dataset
    rng = numpy.random.RandomState(state['seed'])

    # declare the dimensionalies of the input and output
    if state['chunks'] == 'words':
        state['n_in'] = 10000
        state['n_out'] = 10000
    else:
        state['n_in'] = 50
        state['n_out'] = 50
    train_data, valid_data, test_data = get_text_data(state)

    ## BEGIN Tutorial
    ### Define Theano Input Variables
    x = TT.lvector('x')
    y = TT.lvector('y')
    h0 = theano.shared(
        numpy.zeros((eval(state['nhids'])[-1], ), dtype='float32'))

    ### Neural Implementation of the Operators: \oplus
    #### Word Embedding
    emb_words = MultiLayer(rng,
                           n_in=state['n_in'],
                           n_hids=eval(state['inp_nhids']),
                           activation=eval(state['inp_activ']),
                           init_fn='sample_weights_classic',
                           weight_noise=state['weight_noise'],
                           rank_n_approx=state['rank_n_approx'],
                           scale=state['inp_scale'],
                           sparsity=state['inp_sparse'],
                           learn_bias=True,
                           bias_scale=eval(state['inp_bias']),
                           name='emb_words')

    #### Deep Transition Recurrent Layer
    rec = eval(state['rec_layer'])(
        rng,
        eval(state['nhids']),
        activation=eval(state['rec_activ']),
        #activation = 'TT.nnet.sigmoid',
        bias_scale=eval(state['rec_bias']),
        scale=eval(state['rec_scale']),
        sparsity=eval(state['rec_sparse']),
        init_fn=eval(state['rec_init']),
        weight_noise=state['weight_noise'],
        name='rec')

    #### Stiching them together
    ##### (1) Get the embedding of a word
    x_emb = emb_words(x, no_noise_bias=state['no_noise_bias'])
    ##### (2) Embedding + Hidden State via DT Recurrent Layer
    reset = TT.scalar('reset')
    rec_layer = rec(x_emb,
                    n_steps=x.shape[0],
                    init_state=h0 * reset,
                    no_noise_bias=state['no_noise_bias'],
                    truncate_gradient=state['truncate_gradient'],
                    batch_size=1)

    ## BEGIN Exercise: DOT-RNN
    ### Neural Implementation of the Operators: \lhd

    #### Exercise (1)
    #### TODO: Define a layer from the hidden state to the intermediate layer
    emb_layer = MultiLayer(rng, )

    #### Exercise (1)
    #### TODO: Define a layer from the input to the intermediate Layer

    #### Hidden State: Combine emb_state and emb_words_out
    #### Exercise (1)
    #### TODO: Define an activation layer

    #### Exercise (2)
    #### TODO: Define a dropout layer

    #### Softmax Layer
    output_layer = SoftmaxLayer(rng,
                                eval(state['dout_nhid']),
                                state['n_out'],
                                scale=state['out_scale'],
                                bias_scale=state['out_bias_scale'],
                                init_fn="sample_weights_classic",
                                weight_noise=state['weight_noise'],
                                sparsity=state['out_sparse'],
                                sum_over_time=True,
                                name='out')

    ### Few Optional Things
    #### Direct shortcut from x to y
    if state['shortcut_inpout']:
        shortcut = MultiLayer(rng,
                              n_in=state['n_in'],
                              n_hids=eval(state['inpout_nhids']),
                              activations=eval(state['inpout_activ']),
                              init_fn='sample_weights_classic',
                              weight_noise=state['weight_noise'],
                              scale=eval(state['inpout_scale']),
                              sparsity=eval(state['inpout_sparse']),
                              learn_bias=eval(state['inpout_learn_bias']),
                              bias_scale=eval(state['inpout_bias']),
                              name='shortcut')

    #### Learning rate scheduling (1/(1+n/beta))
    state['clr'] = state['lr']

    def update_lr(obj, cost):
        stp = obj.step
        if isinstance(obj.state['lr_start'],
                      int) and stp > obj.state['lr_start']:
            time = float(stp - obj.state['lr_start'])
            new_lr = obj.state['clr'] / (1 + time / obj.state['lr_beta'])
            obj.lr = new_lr

    if state['lr_adapt']:
        rec.add_schedule(update_lr)

    ### Neural Implementations of the Language Model
    #### Training
    if state['shortcut_inpout']:
        additional_inputs = [rec_layer, shortcut(x)]
    else:
        additional_inputs = [rec_layer]

    ##### Exercise (1): Compute the output intermediate layer
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer

    train_model = output_layer(outhid,
                               no_noise_bias=state['no_noise_bias'],
                               additional_inputs=additional_inputs).train(
                                   target=y,
                                   scale=numpy.float32(1. / state['seqlen']))

    nw_h0 = rec_layer.out[rec_layer.out.shape[0] - 1]
    if state['carry_h0']:
        train_model.updates += [(h0, nw_h0)]

    #### Validation
    h0val = theano.shared(
        numpy.zeros((eval(state['nhids'])[-1], ), dtype='float32'))
    rec_layer = rec(emb_words(x, use_noise=False),
                    n_steps=x.shape[0],
                    batch_size=1,
                    init_state=h0val * reset,
                    use_noise=False)
    nw_h0 = rec_layer.out[rec_layer.out.shape[0] - 1]

    ##### Exercise (1):
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer without noise

    if state['shortcut_inpout']:
        additional_inputs = [rec_layer, shortcut(x, use_noise=False)]
    else:
        additional_inputs = [rec_layer]
    valid_model = output_layer(outhid,
                               additional_inputs=additional_inputs,
                               use_noise=False).validate(target=y,
                                                         sum_over_time=True)

    valid_updates = []
    if state['carry_h0']:
        valid_updates = [(h0val, nw_h0)]

    valid_fn = theano.function([x, y, reset],
                               valid_model.cost,
                               name='valid_fn',
                               updates=valid_updates)

    #### Sampling
    ##### single-step sampling
    def sample_fn(word_tm1, h_tm1):
        x_emb = emb_words(word_tm1, use_noise=False, one_step=True)
        h0 = rec(x_emb, state_before=h_tm1, one_step=True, use_noise=False)[-1]
        outhid = outhid_dropout(outhid_activ(
            emb_state(h0, use_noise=False, one_step=True) +
            emb_words_out(word_tm1, use_noise=False, one_step=True),
            one_step=True),
                                use_noise=False,
                                one_step=True)
        word = output_layer.get_sample(state_below=outhid,
                                       additional_inputs=[h0],
                                       temp=1.)
        return word, h0

    ##### scan for iterating the single-step sampling multiple times
    [samples, summaries], updates = scan(sample_fn,
                                         states=[
                                             TT.alloc(numpy.int64(0),
                                                      state['sample_steps']),
                                             TT.alloc(numpy.float32(0), 1,
                                                      eval(state['nhids'])[-1])
                                         ],
                                         n_steps=state['sample_steps'],
                                         name='sampler_scan')

    ##### build a Theano function for sampling
    sample_fn = theano.function([], [samples],
                                updates=updates,
                                profile=False,
                                name='sample_fn')

    ##### Load a dictionary
    dictionary = numpy.load(state['dictionary'])
    if state['chunks'] == 'chars':
        dictionary = dictionary['unique_chars']
    else:
        dictionary = dictionary['unique_words']

    def hook_fn():
        sample = sample_fn()[0]
        print 'Sample:',
        if state['chunks'] == 'chars':
            print "".join(dictionary[sample])
        else:
            for si in sample:
                print dictionary[si],
            print

    ### Build and Train a Model
    #### Define a model
    model = LM_Model(cost_layer=train_model,
                     weight_noise_amount=state['weight_noise_amount'],
                     valid_fn=valid_fn,
                     clean_before_noise_fn=False,
                     noise_fn=None,
                     rng=rng)

    if state['reload']:
        model.load(state['prefix'] + 'model.npz')

    #### Define a trainer
    ##### Training algorithm (SGD)
    if state['moment'] < 0:
        algo = SGD(model, state, train_data)
    else:
        algo = SGD_m(model, state, train_data)
    ##### Main loop of the trainer
    main = MainLoop(train_data,
                    valid_data,
                    test_data,
                    model,
                    algo,
                    state,
                    channel,
                    train_cost=False,
                    hooks=hook_fn,
                    validate_postprocess=eval(state['validate_postprocess']))
    ## Run!
    main.main()

Example #2

Show file

def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(
        "Case study of generating simple 1d sequences with RNN.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "mode",
        choices=["train", "plot"],
        help="The mode to run. Use `train` to train a new model"
        " and `plot` to plot a sequence generated by an"
        " existing one.")
    parser.add_argument("prefix",
                        default="sine",
                        help="The prefix for model, timing and state files")
    parser.add_argument("--input-noise",
                        type=float,
                        default=0.0,
                        help="Adds Gaussian noise of given intensity to the "
                        " training sequences.")
    parser.add_argument(
        "--function",
        default="lambda a, x: numpy.sin(a * x)",
        help="An analytical description of the sequence family to learn."
        " The arguments before the last one are considered parameters.")
    parser.add_argument("--steps",
                        type=int,
                        default=100,
                        help="Number of steps to plot")
    parser.add_argument("--params", help="Parameter values for plotting")
    args = parser.parse_args()

    function = eval(args.function)
    num_params = len(inspect.getargspec(function).args) - 1

    class Emitter(TrivialEmitter):
        @application
        def cost(self, readouts, outputs):
            """Compute MSE."""
            return ((readouts - outputs)**2).sum(axis=readouts.ndim - 1)

    transition = GatedRecurrent(name="transition",
                                activation=Tanh(),
                                dim=10,
                                weights_init=Orthogonal())
    with_params = AddParameters(transition,
                                num_params,
                                "params",
                                name="with_params")
    generator = SequenceGenerator(LinearReadout(
        readout_dim=1,
        source_names=["states"],
        emitter=Emitter(name="emitter"),
        name="readout"),
                                  with_params,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  name="generator")
    generator.allocate()
    logger.debug("Parameters:\n" +
                 pprint.pformat(Selector(generator).get_params().keys()))

    if args.mode == "train":
        seed = 1
        rng = numpy.random.RandomState(seed)
        batch_size = 10

        generator.initialize()

        cost = Cost(
            generator.cost(tensor.tensor3('x'),
                           params=tensor.matrix("params")).sum())
        if args.input_noise:
            cost.apply_noise(cost.inputs, args.input_noise)

        gh_model = GroundhogModel(generator, cost)
        state = GroundhogState(args.prefix, batch_size,
                               learning_rate=0.0001).as_dict()
        data = SeriesIterator(rng, function, 100, batch_size)
        trainer = SGD(gh_model, state, data)
        main_loop = MainLoop(data, None, None, gh_model, trainer, state, None)
        main_loop.load()
        main_loop.main()
    elif args.mode == "plot":
        load_params(generator, args.prefix + "model.npz")

        params = tensor.matrix("params")
        sample = theano.function([params],
                                 generator.generate(params=params,
                                                    n_steps=args.steps,
                                                    batch_size=1))

        param_values = numpy.array(map(float, args.params.split()),
                                   dtype=floatX)
        states, outputs, _ = sample(param_values[None, :])
        actual = outputs[:, 0, 0]
        desired = numpy.array(
            [function(*(list(param_values) + [T])) for T in range(args.steps)])
        print("MSE: {}".format(((actual - desired)**2).sum()))

        pyplot.plot(numpy.hstack([actual[:, None], desired[:, None]]))
        pyplot.show()
    else:
        assert False

Example #3

Show file

File: markov_chain.py Project: madisonmay/blocks

def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(
        "Case study of generating a Markov chain with RNN.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "mode",
        choices=["train", "sample"],
        help="The mode to run. Use `train` to train a new model"
        " and `sample` to sample a sequence generated by an"
        " existing one.")
    parser.add_argument("prefix",
                        default="sine",
                        help="The prefix for model, timing and state files")
    parser.add_argument("--steps",
                        type=int,
                        default=100,
                        help="Number of steps to plot")
    args = parser.parse_args()

    dim = 10
    num_states = ChainIterator.num_states
    feedback_dim = 8

    transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim)
    generator = SequenceGenerator(LinearReadout(
        readout_dim=num_states,
        source_names=["states"],
        emitter=SoftmaxEmitter(name="emitter"),
        feedbacker=LookupFeedback(num_states, feedback_dim, name='feedback'),
        name="readout"),
                                  transition,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  name="generator")
    generator.allocate()
    logger.debug("Parameters:\n" + pprint.pformat(
        [(key, value.get_value().shape)
         for key, value in Selector(generator).get_params().items()],
        width=120))

    if args.mode == "train":
        rng = numpy.random.RandomState(1)
        batch_size = 50

        generator.push_initialization_config()
        transition.weights_init = Orthogonal()
        generator.initialize()
        logger.debug("transition.weights_init={}".format(
            transition.weights_init))

        cost = generator.cost(tensor.lmatrix('x')).sum()
        gh_model = GroundhogModel(generator, cost)
        state = GroundhogState(args.prefix, batch_size,
                               learning_rate=0.0001).as_dict()
        data = ChainIterator(rng, 100, batch_size)
        trainer = SGD(gh_model, state, data)
        main_loop = MainLoop(data, None, None, gh_model, trainer, state, None)
        main_loop.main()
    elif args.mode == "sample":
        load_params(generator, args.prefix + "model.npz")

        sample = ComputationGraph(
            generator.generate(n_steps=args.steps, batch_size=1,
                               iterate=True)).function()

        states, outputs, costs = [data[:, 0] for data in sample()]

        numpy.set_printoptions(precision=3, suppress=True)
        print("Generation cost:\n{}".format(costs.sum()))

        freqs = numpy.bincount(outputs).astype(floatX)
        freqs /= freqs.sum()
        print("Frequencies:\n {} vs {}".format(freqs,
                                               ChainIterator.equilibrium))

        trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX)
        for a, b in zip(outputs, outputs[1:]):
            trans_freqs[a, b] += 1
        trans_freqs /= trans_freqs.sum(axis=1)[:, None]
        print("Transition frequencies:\n{}\nvs\n{}".format(
            trans_freqs, ChainIterator.trans_prob))
    else:
        assert False

Example #4

Show file

def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(
        "Case study of language modeling with RNN",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "mode",
        choices=["train", "sample"],
        help="The mode to run. Use `train` to train a new model"
        " and `sample` to sample a sequence generated by an"
        " existing one.")
    parser.add_argument("prefix",
                        default="sine",
                        help="The prefix for model, timing and state files")
    parser.add_argument("state",
                        nargs="?",
                        default="",
                        help="Changes to Groundhog state")
    parser.add_argument("--path", help="Path to a language dataset")
    parser.add_argument("--dict", help="Path to the dataset dictionary")
    parser.add_argument("--restart", help="Start anew")
    parser.add_argument("--reset",
                        action="store_true",
                        default=False,
                        help="Reset the hidden state between batches")
    parser.add_argument("--steps",
                        type=int,
                        default=100,
                        help="Number of steps to plot for the 'sample' mode"
                        " OR training sequence length for the 'train' mode.")
    args = parser.parse_args()
    logger.debug("Args:\n" + str(args))

    dim = 200
    num_chars = 50

    transition = GatedRecurrent(name="transition",
                                activation=Tanh(),
                                dim=dim,
                                weights_init=Orthogonal())
    generator = SequenceGenerator(LinearReadout(
        readout_dim=num_chars,
        source_names=["states"],
        emitter=SoftmaxEmitter(name="emitter"),
        feedbacker=LookupFeedback(num_chars, dim, name='feedback'),
        name="readout"),
                                  transition,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  name="generator")
    generator.allocate()
    logger.debug("Parameters:\n" + pprint.pformat(
        [(key, value.get_value().shape)
         for key, value in Selector(generator).get_params().items()],
        width=120))

    if args.mode == "train":
        batch_size = 1
        seq_len = args.steps

        generator.initialize()

        # Build cost computation graph that uses the saved hidden states.
        # An issue: for Groundhog this is completely transparent, that's
        # why it does not carry the hidden state over the period when
        # validation in done. We should find a way to fix in the future.
        x = tensor.lmatrix('x')
        init_states = shared_floatx_zeros((batch_size, dim),
                                          name='init_states')
        reset = tensor.scalar('reset')
        cost = ComputationGraph(
            generator.cost(x, states=init_states * reset).sum())
        # TODO: better search routine
        states = [
            v for v in cost.variables if hasattr(v.tag, 'application_call')
            and v.tag.application_call.brick == generator.transition and
            (v.tag.application_call.application == generator.transition.apply)
            and v.tag.role == VariableRole.OUTPUT and v.tag.name == 'states'
        ]
        assert len(states) == 1
        states = states[0]

        gh_model = GroundhogModel(generator, cost)
        gh_model.properties.append(
            ('bpc', cost.outputs[0] * numpy.log(2) / seq_len))
        gh_model.properties.append(('mean_init_state', init_states.mean()))
        gh_model.properties.append(('reset', reset))
        if not args.reset:
            gh_model.updates.append((init_states, states[-1]))

        state = GroundhogState(args.prefix, batch_size,
                               learning_rate=0.0001).as_dict()
        changes = eval("dict({})".format(args.state))
        state.update(changes)

        def output_format(x, y, reset):
            return dict(x=x[:, None], reset=reset)

        train, valid, test = [
            LMIterator(batch_size=batch_size,
                       use_infinite_loop=mode == 'train',
                       path=args.path,
                       seq_len=seq_len,
                       mode=mode,
                       chunks='chars',
                       output_format=output_format,
                       can_fit=True) for mode in ['train', 'valid', 'test']
        ]

        trainer = SGD(gh_model, state, train)
        state['on_nan'] = 'warn'
        state['cutoff'] = 1.

        main_loop = MainLoop(train, valid, None, gh_model, trainer, state,
                             None)
        if not args.restart:
            main_loop.load()
        main_loop.main()
    elif args.mode == "sample":
        load_params(generator, args.prefix + "model.npz")

        chars = numpy.load(args.dict)['unique_chars']

        sample = ComputationGraph(
            generator.generate(n_steps=args.steps, batch_size=10,
                               iterate=True)).function()

        states, outputs, costs = sample()

        for i in range(10):
            print("Generation cost: {}".format(costs[:, i].sum()))
            print("".join([chars[o] for o in outputs[:, i]]))
    else:
        assert False