def define_recurrent_layers(out_axes=None, celltype='RNN', recurrent_units=[32], init=GlorotInit(), return_sequence=True): layers = [] for e, i in enumerate(recurrent_units): layer_return_sequence = e < len(recurrent_units) - 1 or return_sequence if celltype == 'RNN': layers.append( Recurrent(nout=i, init=init, backward=False, activation=Tanh(), return_sequence=layer_return_sequence)) elif celltype == 'LSTM': layers.append( LSTM(nout=i, init=init, backward=False, activation=Tanh(), gate_activation=Logistic(), return_sequence=layer_return_sequence)) if out_axes is not None: affine_layer = Affine(weight_init=init, bias_init=init, activation=Identity(), axes=out_axes) layers.append(affine_layer) return layers
def test_inference_reuse_lstm(recurrent_input): layer = LSTM(10, dummy_init, activation=lambda x: x) layer(recurrent_input) train_params = (layer.W_input["f"], layer.W_recur["f"]) with Layer.inference_mode_on(): layer(recurrent_input) inference_params = (layer.W_input["f"], layer.W_recur["f"]) for train_param, inference_param in zip(train_params, inference_params): assert train_param is inference_param
# number of classes ax.Y.length = time_steps # create iterator and placeholders for training data train_set = TSPSequentialArrayIterator(data_arrays=tsp_data['train'], nfeatures=num_features, batch_size=args.batch_size, time_steps=time_steps, total_iterations=args.num_iterations) inputs = train_set.make_placeholders() # weight initializationn init = UniformInit(low=-0.08, high=0.08) # build computational graph enc = LSTM(args.hs, init, activation=Tanh(), reset_cells=True, gate_activation=Logistic(), return_sequence=True) dec = LSTM(args.hs, init, activation=Tanh(), reset_cells=True, gate_activation=Logistic(), return_sequence=True) if args.emb is True: # encoder input embedding hidden_feature_axis = ng.make_axis(length=args.hs, name='hidden_feature_axis') feature_axis = ng.make_axis(length=num_features, name='feature_axis') W_emb = ng.variable(axes=[hidden_feature_axis, feature_axis], initial_value=init) emb_enc_inputs = ng.dot(W_emb, inputs['inp_txt']) # decoder input embedding emb_dec_input = [] ax.N.length = args.batch_size for i in range(ax.N.length):
inputs = train_set.make_placeholders() ax.Y.length = len(tree_bank_data.vocab) def expand_onehot(x): return ng.one_hot(x, axis=ax.Y) # weight initialization init = UniformInit(low=-0.08, high=0.08) if args.layer_type == "lstm": rlayer1 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) # model initialization seq1 = Sequential([ Preprocess(functor=expand_onehot), rlayer1, rlayer2, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp(gradient_clip_value=gradient_clip_value)
def check_lstm(seq_len, input_size, hidden_size, batch_size, init_func, return_seq=True, backward=False, reset_cells=False, num_iter=2): Cin = ng.make_axis(input_size) REC = ng.make_axis(seq_len, name='R') N = ng.make_axis(batch_size, name='N') with ExecutorFactory() as ex: np.random.seed(0) inp_ng = ng.placeholder([Cin, REC, N]) lstm_ng = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(), reset_cells=reset_cells, return_sequence=return_seq, backward=backward) out_ng = lstm_ng.train_outputs(inp_ng) fprop_neon_fun = ex.executor(out_ng, inp_ng) fprop_neon_list = [] input_value_list = [] for i in range(num_iter): # fprop on random inputs input_value = rng.uniform(-1, 1, inp_ng.axes) fprop_neon = fprop_neon_fun(input_value).copy() if return_seq is True: fprop_neon = fprop_neon[:, :, 0] input_value_list.append(input_value) fprop_neon_list.append(fprop_neon) if reset_cells is False: # look at the last hidden states assert ng.testing.allclose(fprop_neon[:, -1].reshape(-1, 1), lstm_ng.h_init.value.get(None), rtol=rtol, atol=atol) # after the rnn graph has been executed, can get the W values. Get copies so # shared values don't confuse derivatives # concatenate weights to i, f, o, g together (in this order) gates = ['i', 'f', 'o', 'g'] Wxh_neon = [lstm_ng.W_input[k].value.get(None).copy().T for k in gates] Whh_neon = [lstm_ng.W_recur[k].value.get(None).copy().T for k in gates] bh_neon = [lstm_ng.b[k].value.get(None).copy() for k in gates] # reference numpy LSTM lstm_ref = RefLSTM() WLSTM = lstm_ref.init(input_size, hidden_size) # make ref weights and biases with neon model WLSTM[0, :] = np.concatenate(bh_neon) WLSTM[1:input_size + 1, :] = np.concatenate(Wxh_neon, 1) WLSTM[input_size + 1:] = np.concatenate(Whh_neon, 1) # transpose input X and do fprop fprop_ref_list = [] c0 = h0 = None for i in range(num_iter): input_value = input_value_list[i] inp_ref = input_value.copy().transpose([1, 2, 0]) (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref, WLSTM, c0, h0) if reset_cells is False: c0 = cprev h0 = hprev # the output needs transpose as well Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T fprop_ref_list.append(Hout_ref) for i in range(num_iter): assert ng.testing.allclose(fprop_neon_list[i], fprop_ref_list[i], rtol=rtol, atol=atol)
def check_stacked_lstm(seq_len, input_size, hidden_size, batch_size, init_func, return_seq=True, backward=False, reset_cells=False, num_iter=2): Cin = ng.make_axis(input_size) REC = ng.make_axis(seq_len, name='R') N = ng.make_axis(batch_size, name='N') with ExecutorFactory() as ex: np.random.seed(0) inp_ng = ng.placeholder([Cin, REC, N]) lstm_ng_1 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(), reset_cells=reset_cells, return_sequence=return_seq, backward=backward) lstm_ng_2 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(), reset_cells=reset_cells, return_sequence=return_seq, backward=backward) out_ng_1 = lstm_ng_1.train_outputs(inp_ng) out_ng_2 = lstm_ng_2.train_outputs(out_ng_1) fprop_neon_fun_2 = ex.executor(out_ng_2, inp_ng) # fprop on random inputs for multiple iterations fprop_neon_2_list = [] input_value_list = [] for i in range(num_iter): input_value = rng.uniform(-1, 1, inp_ng.axes) fprop_neon_2 = fprop_neon_fun_2(input_value).copy() # comparing outputs if return_seq is True: fprop_neon_2 = fprop_neon_2[:, :, 0] input_value_list.append(input_value) fprop_neon_2_list.append(fprop_neon_2) if reset_cells is False: # look at the last hidden states assert ng.testing.allclose(fprop_neon_2[:, -1].reshape(-1, 1), lstm_ng_2.h_init.value.get(None), rtol=rtol, atol=atol) # after the rnn graph has been executed, can get the W values. Get copies so # shared values don't confuse derivatives # concatenate weights to i, f, o, g together (in this order) gates = ['i', 'f', 'o', 'g'] Wxh_neon_1 = \ np.concatenate([lstm_ng_1.W_input[k].value.get(None).copy().T for k in gates], 1) Whh_neon_1 = \ np.concatenate([lstm_ng_1.W_recur[k].value.get(None).copy().T for k in gates], 1) bh_neon_1 = \ np.concatenate([lstm_ng_1.b[k].value.get(None).copy() for k in gates]) Wxh_neon_2 = \ np.concatenate([lstm_ng_2.W_input[k].value.get(None).copy().T for k in gates], 1) Whh_neon_2 = \ np.concatenate([lstm_ng_2.W_recur[k].value.get(None).copy().T for k in gates], 1) bh_neon_2 = \ np.concatenate([lstm_ng_2.b[k].value.get(None).copy() for k in gates]) # reference numpy LSTM lstm_ref_1 = RefLSTM() lstm_ref_2 = RefLSTM() WLSTM_1 = lstm_ref_1.init(input_size, hidden_size) WLSTM_2 = lstm_ref_2.init(hidden_size, hidden_size) # make ref weights and biases the same with neon model WLSTM_1[0, :] = bh_neon_1 WLSTM_1[1:input_size + 1, :] = Wxh_neon_1 WLSTM_1[input_size + 1:] = Whh_neon_1 WLSTM_2[0, :] = bh_neon_2 WLSTM_2[1:hidden_size + 1, :] = Wxh_neon_2 WLSTM_2[hidden_size + 1:] = Whh_neon_2 # transpose input X and do fprop fprop_ref_2_list = [] c0_1 = h0_1 = None c0_2 = h0_2 = None for i in range(num_iter): input_value = input_value_list[i] inp_ref = input_value.copy().transpose([1, 2, 0]) (Hout_ref_1, cprev_1, hprev_1, batch_cache) = lstm_ref_1.forward(inp_ref, WLSTM_1, c0_1, h0_1) (Hout_ref_2, cprev_2, hprev_2, batch_cache) = lstm_ref_2.forward(Hout_ref_1, WLSTM_2, c0_2, h0_2) if reset_cells is False: c0_1 = cprev_1 h0_1 = hprev_1 c0_2 = cprev_2 h0_2 = hprev_2 # the output needs transpose as well Hout_ref_2 = Hout_ref_2.reshape(seq_len * batch_size, hidden_size).T fprop_ref_2_list.append(Hout_ref_2) for i in range(num_iter): assert ng.testing.allclose(fprop_neon_2_list[i], fprop_ref_2_list[i], rtol=rtol, atol=atol)
out_axes = ng.make_axes([batch_axis, time_axis, out_axis]) else: out_axes = ng.make_axes([batch_axis, out_axis]) # Build placeholders for the created axes inputs = { 'X': ng.placeholder(in_axes), 'y': ng.placeholder(out_axes), 'iteration': ng.placeholder(axes=()) } # Network Definition seq1 = Sequential([ LSTM(nout=recurrent_units, init=init_uni, backward=False, activation=Logistic(), gate_activation=Tanh(), return_sequence=predict_seq), Affine(weight_init=init_uni, bias_init=init_uni, activation=Identity(), axes=out_axis) ]) # Optimizer # Following policy will set the initial learning rate to 0.05 (base_lr) # At iteration (num_iterations // 5), learning rate is multiplied by gamma (new lr = .005) # At iteration (num_iterations // 2), it will be reduced by gamma again (new lr = .0005) schedule = [num_iterations // 5, num_iterations // 2] learning_rate_policy = { 'name': 'schedule',
# Build placeholders for the created axes inputs = { 'X': ng.placeholder(in_axes), 'y': ng.placeholder(out_axes), 'iteration': ng.placeholder(axes=()) } # Network Definition if (use_embedding is False): seq1 = Sequential([ Preprocess(functor=expand_onehot), LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True, activation=Logistic(), gate_activation=Tanh(), return_sequence=True), Affine(weight_init=init_uni, bias_init=init_uni, activation=Softmax(), axes=out_axis) ]) else: embedding_dim = 8 seq1 = Sequential([ LookupTable(len(shakes.vocab) + 1, embedding_dim, init_uni, update=True),
def __init__(self): super(LSTMLayer, self).__init__() self.layer = LSTM(nout=16, init=ConstantInit(0.0), activation=Tanh(), gate_activation=Tanh())
# Use Array Iterator for training set train_set = ArrayIterator(train, batch_size=params_dict['batch_size'], total_iterations=params_dict['num_iterations']) # Use Array Iterator for validation set valid_set = ArrayIterator(dev, batch_size=params_dict['batch_size'], total_iterations=params_dict['num_iterations']) # Make placeholderds for training inputs = train_set.make_placeholders(include_iteration=True) # Encoding Layer rlayer_1 = LSTM(hidden_size, init, activation=Tanh(), reset_cells=True, gate_activation=Logistic(), return_sequence=True) # Embedding Layer embed_layer = LookupTable(params_dict['vocab_size'], params_dict['embed_size'], embeddings, update=False, pad_idx=params_dict['pad_idx']) # Initialzers for LSTM Cells input_placeholder, input_value = make_placeholder(2 * hidden_size, 1, params_dict['batch_size']) input_placeholder_a, input_value = make_placeholder(2 * hidden_size, 1, params_dict['batch_size'])