def test_inference_reuse_lut(recurrent_axis, batch_axis): seq_input = ng.placeholder(axes=[recurrent_axis, batch_axis]) layer = LookupTable(20, 10, dummy_init) layer(seq_input) train_params = (layer.W, ) with Layer.inference_mode_on(): layer(seq_input) inference_params = (layer.W, ) for train_param, inference_param in zip(train_params, inference_params): assert train_param is inference_param
def make_embedding_layer(vocab_size): vectors = [] vectors.append(np.zeros((1, vocab_size))) vectors.append(np.eye(vocab_size)) vectors = np.concatenate(vectors) embed_init = ConstantInit(vectors) embed_layer = LookupTable(vocab_size + 1, vocab_size, embed_init, update=False, pad_idx=0) return embed_layer
def __init__(self, number_embeddings_features, tokens_in_embeddings, deep_parameters, deep_activation_fn, drop_out_rate=0.0): super(WideDeepClassifier, self).__init__(name="WideAndDeep") # Embeddings # Make the axes self.luts = [] for e in range(len(number_embeddings_features)): init_uniform = UniformInit(0, 1) # pad_idx have to be initialize to 0 explicitly. lut = LookupTable(tokens_in_embeddings[e], number_embeddings_features[e], init_uniform, pad_idx=0, update=True) self.luts.append(lut) # Model specification init_xavier = XavierInit() layers = [] for i in range(len(deep_parameters)): layers.append( Affine(nout=deep_parameters[i], weight_init=init_xavier, activation=deep_activation_fn)) if drop_out_rate > 0.0: layers.append(Dropout(keep=drop_out_rate)) layers.append(Affine(axes=tuple(), weight_init=init_xavier)) self.deep_layers = Sequential(layers) self.linear_layer = Affine(axes=tuple(), weight_init=init_xavier)
batch_size=args.batch_size, time_steps=time_steps) inputs = train_set.make_placeholders() ax.Y.length = len(tree_bank_data.vocab) def expand_onehot(x): return ng.one_hot(x, axis=ax.Y) # weight initialization init = UniformInit(low=-0.08, high=0.08) if args.use_lut: layer_0 = LookupTable(50, 100, init, update=True, pad_idx=0) else: layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)) if args.layer_type == "rnn": rlayer = Recurrent(hidden_size, init, activation=Tanh()) elif args.layer_type == "birnn": rlayer = BiRNN(hidden_size, init, activation=Tanh(), return_sequence=True, sum_out=True) # model initialization seq1 = Sequential([ layer_0, rlayer,
rlayer = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False) else: rlayer = BiRNN(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False, sum_out=True) # model initialization seq1 = Sequential([ LookupTable(vocab_size, embed_size, init, update=True, pad_idx=pad_idx), rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_value=gradient_clip_value) train_prob = seq1(inputs['review']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss),
init=init_uni, backward=False, reset_cells=True, activation=Logistic(), gate_activation=Tanh(), return_sequence=True), Affine(weight_init=init_uni, bias_init=init_uni, activation=Softmax(), axes=out_axis) ]) else: embedding_dim = 8 seq1 = Sequential([ LookupTable(len(shakes.vocab) + 1, embedding_dim, init_uni, update=True), LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True, activation=Logistic(), gate_activation=Tanh(), return_sequence=True), Affine(weight_init=init_uni, bias_init=init_uni, activation=Softmax(), axes=out_axis) ]) # Optimizer