Beispiel #1
0
def test_rmsprop(random_learning_rate, decay_rate, epsilon):
    rmsprop_args = {
        'learning_rate': random_learning_rate,
        'epsilon': epsilon,
        'decay_rate': decay_rate
    }

    rmsprop_ref = RMSPropReference(**rmsprop_args)
    rms = RMSProp(**rmsprop_args)

    # test baseline against reference
    compare_optimizer(rms, rmsprop_ref)
def test_rmsprop(random_learning_rate, decay_rate, epsilon, select_variables):
    rmsprop_args = {'learning_rate': random_learning_rate,
                    'epsilon': epsilon,
                    'decay_rate': decay_rate}

    rmsprop_ref = RMSPropReference(**rmsprop_args)
    rms = RMSProp(**rmsprop_args)

    # test baseline against reference
    if select_variables:
        compare_optimizer_variable_select(rms, rmsprop_ref)
    else:
        compare_optimizer(rms, rmsprop_ref)
Beispiel #3
0
def make_optimizer(name=None, weight_clip_value=None, loss_type="WGAN-GP"):
    if loss_type == "WGAN":
        optimizer = RMSProp(learning_rate=5e-5,
                            decay_rate=0.99,
                            epsilon=1e-8,
                            weight_clip_value=weight_clip_value)

    if loss_type == "WGAN-GP":
        optimizer = Adam(learning_rate=1e-4,
                         beta_1=0.5,
                         beta_2=0.9,
                         epsilon=1e-8,
                         weight_clip_value=weight_clip_value)

    return optimizer
Beispiel #4
0
if args.layer_type == "rnn":
    rlayer = Recurrent(hidden_size, init, activation=Tanh())
elif args.layer_type == "birnn":
    rlayer = BiRNN(hidden_size,
                   init,
                   activation=Tanh(),
                   return_sequence=True,
                   sum_out=True)

# model initialization
seq1 = Sequential([
    layer_0, rlayer,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
])

optimizer = RMSProp()

train_prob = seq1(inputs['inp_txt'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
                                    usebits=True)
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['inp_txt'])

eval_loss = ng.cross_entropy_multi(inference_prob,
                                   ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
Beispiel #5
0
    rlayer = BiRNN(hidden_size,
                   init,
                   activation=Tanh(),
                   reset_cells=True,
                   return_sequence=False,
                   sum_out=True)

# model initialization
seq1 = Sequential([
    LookupTable(vocab_size, embed_size, init, update=True, pad_idx=pad_idx),
    rlayer,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
])

optimizer = RMSProp(decay_rate=0.95,
                    learning_rate=2e-3,
                    epsilon=1e-6,
                    gradient_clip_value=gradient_clip_value)

train_prob = seq1(inputs['review'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['label'], axis=ax.Y),
                                    usebits=True)
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['review'])
eval_loss = ng.cross_entropy_multi(inference_prob,
                                   ng.one_hot(inputs['label'], axis=ax.Y),
        u_i_list.append(score)

    output_prob = ng.softmax(ng.stack(u_i_list, axis=ax.Y, pos=0), ax.Y)
    u_list.append(output_prob)

pointer_out = ng.stack(u_list, axis=rec_axis, pos=2)

# specify loss function, calculate loss and update weights
one_hot_target = ng.one_hot(inputs['tgt_txt'], axis=ax.Y)

loss = ng.cross_entropy_multi(pointer_out,
                              one_hot_target,
                              usebits=True)

mean_cost = ng.mean(loss, out_axes=[])
optimizer = RMSProp(decay_rate=0.96, learning_rate=args.lr, epsilon=1e-6,
                    gradient_clip_value=gradient_clip_value)
updates = optimizer(loss)

# provide outputs for bound computation
train_outputs = dict(batch_cost=mean_cost, updates=updates, pointer_out=pointer_out)

# Train Loop
with closing(ngt.make_transformer()) as transformer:
    # bind the computations
    train_computation = make_bound_computation(transformer, train_outputs, inputs)

    eval_frequency = 500
    loss = []
    # iterate over training set
    for idx, data in enumerate(train_set):
        train_output = train_computation(data)
Beispiel #7
0
                   activation=Tanh(),
                   gate_activation=Logistic(),
                   return_sequence=True)
    rlayer2 = LSTM(hidden_size,
                   init,
                   activation=Tanh(),
                   gate_activation=Logistic(),
                   return_sequence=True)

# model initialization
seq1 = Sequential([
    Preprocess(functor=expand_onehot), rlayer1, rlayer2,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
])

optimizer = RMSProp(gradient_clip_value=gradient_clip_value)

train_prob = seq1(inputs['inp_txt'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
                                    usebits=True)
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['inp_txt'])

errors = ng.not_equal(ng.argmax(inference_prob, reduction_axes=[ax.Y]),
                      inputs['tgt_txt'])
Beispiel #8
0
    }

    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=0.5,
                                        wdecay=4e-5,
                                        iteration=inputs['iteration'])
elif args.optimizer_name == 'rmsprop':
    learning_rate_policy = {
        'name': 'schedule',
        'schedule': list(80000 * np.arange(1, 10, 1)),
        'gamma': 0.94,
        'base_lr': 0.01
    }
    optimizer = RMSProp(learning_rate=learning_rate_policy,
                        wdecay=4e-5,
                        decay_rate=0.9,
                        momentum_coef=0.9,
                        epsilon=1.,
                        iteration=inputs['iteration'])
else:
    raise NotImplementedError("Unrecognized Optimizer")

# Build the main and auxiliary loss functions
y_onehot = ng.one_hot(inputs['label'], axis=ax.Y)
train_prob_main = inception.seq2(inception.seq1(inputs['image']))
train_prob_main = ng.map_roles(train_prob_main, {"C": ax.Y.name})
train_loss_main = ng.cross_entropy_multi(train_prob_main,
                                         y_onehot,
                                         enable_softmax_opt=False)

train_prob_aux = inception.seq_aux(inception.seq1(inputs['image']))
train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name})
inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes),
              iteration=ng.placeholder(axes=()))
preds_inputs = dict(X=inputs['X'])

# define model
n_hidden = list(map(int, args.n_hidden.split(",")))
filter_shape = list(map(int, args.filter_shape.split(",")))
if args.modeltype in ["RNN", "LSTM"]:
    seq1 = Sequential(recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=n_hidden, return_sequence=args.predict_seq).layers + [Rectlin()])
elif args.modeltype == "CNN":
    seq1 = convolutional_model.define_model(out_axis, filter_shapes=filter_shape, n_filters=n_hidden)
    layers_modified = [lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})] + seq1.layers + [Rectlin()]
    seq1 = Sequential(layers_modified)

# Optimizer
optimizer = RMSProp(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)

# Define the loss function (squared L2 loss)
fwd_prop = seq1(inputs['X'])
train_loss = ng.squared_L2(fwd_prop - inputs['y'])

# Cost calculation
batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
train_computation = ng.computation(batch_cost, "all")

# Forward prop of test set
# Required for correct functioning of batch norm and dropout layers during inference mode
with Layer.inference_mode_on():
    preds = seq1(inputs['X'])
    preds = ng.axes_with_order(preds, out_axes)
eval_loss = ng.mean(ng.squared_L2(preds - inputs['y']), out_axes=())
Beispiel #10
0
    Affine(weight_init=init,
           activation=Softmax(),
           bias_init=init,
           axes=(ax.Y, ax.REC))
])

# Bind axes lengths:
ax.Y.length = len(tree_bank_data.vocab)
ax.REC.length = time_steps
ax.N.length = args.batch_size

# placeholders with descriptive names
inputs = dict(inp_txt=ng.placeholder([ax.REC, ax.N]),
              tgt_txt=ng.placeholder([ax.REC, ax.N]))

optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6)
output_prob = seq1.train_outputs(inputs['inp_txt'])
loss = ng.cross_entropy_multi(output_prob,
                              ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
                              usebits=True)
mean_cost = ng.mean(loss, out_axes=[])
updates = optimizer(loss)

train_outputs = dict(batch_cost=mean_cost, updates=updates)
loss_outputs = dict(cross_ent_loss=loss)

# Now bind the computations we are interested in
transformer = ngt.make_transformer()
train_computation = make_bound_computation(transformer, train_outputs, inputs)
loss_computation = make_bound_computation(transformer, loss_outputs, inputs)