def input_layers(self, analytics_input, init, activation, gate): """ return the input layers. we currently support convolutional and LSTM :return: """ if self.recurrent: if analytics_input: # support analytics + content input_layers = MergeMultistream([[ LSTM(300, init, init_inner=Kaiming(), activation=activation, gate_activation=gate, reset_cells=True), RecurrentSum() ], [Affine(30, init, activation=activation)]], 'stack') else: # content only input_layers = [ LSTM(300, init, init_inner=Kaiming(), activation=activation, gate_activation=gate, reset_cells=True), RecurrentSum() ] else: if analytics_input: # support analytics + content input_layers = MergeMultistream([ self.conv_net(activation), [Affine(30, init, activation=Logistic())] ], 'stack') else: # content only input_layers = self.conv_net(activation) return input_layers
depth=1, reset_cells=True, batch_norm=False, bi_sum=False) elif args.rlayer_type == 'bibnrnn': rlayer = DeepBiRNN(hidden_size, g_uni, activation=Tanh(), depth=1, reset_cells=True, batch_norm=True) layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=uni), rlayer, RecurrentSum(), Dropout(keep=0.5), Affine(2, g_uni, bias=g_uni, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = Adagrad(learning_rate=0.01, gradient_clip_value=gradient_clip_value) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set,
def test_reshape_layer_model(backend_default, fargs): """ test cases: - conv before RNNs - conv after RNNs - conv after LUT """ np.random.seed(seed=0) nin, nout, bsz = fargs be = backend_default be.bsz = bsz input_size = (nin, be.bsz) init = Uniform(-0.1, 0.1) g_uni = GlorotUniform() inp_np = np.random.rand(nin, be.bsz) delta_np = np.random.rand(nout, be.bsz) inp = be.array(inp_np) delta = be.array(delta_np) conv_lut_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Reshape(reshape=(4, 100, -1)), Conv((3, 3, 16), init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Affine(nout, init, bias=init, activation=Softmax()) ] conv_lut_2 = [ LookupTable(vocab_size=1000, embedding_dim=400, init=init), Reshape(reshape=(4, 50, -1)), Conv((3, 3, 16), init=init), Pooling(2, strides=2), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] conv_rnn_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), Reshape(reshape=(4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] conv_rnn_2 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Recurrent(64, g_uni, activation=Tanh(), reset_cells=True), Reshape(reshape=(4, -1, 32)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] lut_sum_1 = [ LookupTable(vocab_size=1000, embedding_dim=128, init=init), RecurrentSum(), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] lut_birnn_1 = [ LookupTable(vocab_size=1000, embedding_dim=200, init=init), DeepBiRNN(32, init=GlorotUniform(), batch_norm=True, activation=Tanh(), reset_cells=True, depth=1), Reshape((4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout=nout, init=init, bias=init, activation=Softmax()) ] layers_test = [ conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1 ] for lg in layers_test: model = Model(layers=lg) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(input_size, cost) model.fprop(inp) model.bprop(delta)
def load_sent_encoder(model_dict, expand_vocab=False, orig_vocab=None, w2v_vocab=None, w2v_path=None, use_recur_last=False): """ Custom function to load the model saved from skip-thought vector training and reconstruct another model just using the LUT and encoding layer for transfering sentence representations. Arguments: model_dict: saved s2v model dict expand_vocab: Bool to indicate if w2v vocab expansion should be attempted orig_vocab: If using expand_vocab, original vocabulary dict is needed for expansion w2v_vocab: If using expand_vocab, w2v vocab dict w2v_path: Path to trained w2v binary (GoogleNews) use_recur_last: If True a RecurrentLast layer is used as the final layer, if False a RecurrentSum layer is used as the last layer of the returned model. """ embed_dim = model_dict['model']['config']['embed_dim'] model_train = Model(model_dict) # RecurrentLast should be used for semantic similarity evaluation if use_recur_last: last_layer = RecurrentLast() else: last_layer = RecurrentSum() if expand_vocab: assert orig_vocab and w2v_vocab, ("All vocabs and w2v_path " + "need to be specified when using expand_vocab") neon_logger.display("Computing vocab expansion regression...") # Build inverse word dictionary (word -> index) word_idict = dict() for kk, vv in orig_vocab.items(): # Add 2 to the index to allow for padding and oov tokens as 0 and 1 word_idict[vv + 2] = kk word_idict[0] = '' word_idict[1] = 'UNK' # Create dictionary of word -> vec orig_word_vecs = get_embeddings(model_train.layers.layer_dict['lookupTable'], word_idict) # Load GooleNews w2v weights w2v_W, w2v_dim, _ = get_google_word2vec_W(w2v_path, w2v_vocab) # Compute the expanded vocab lookup table from a linear mapping of # words2vec into RNN word space init_embed = compute_vocab_expansion(orig_word_vecs, w2v_W, w2v_vocab, word_idict) init_embed_dev = model_train.be.array(init_embed) w2v_vocab_size = len(w2v_vocab) table = LookupTable(vocab_size=w2v_vocab_size, embedding_dim=embed_dim, init=init_embed_dev, pad_idx=0) model = Model(layers=[table, model_train.layers.layer_dict['encoder'], last_layer]) else: model = Model(layers=[model_train.layers.layer_dict['lookupTable'], model_train.layers.layer_dict['encoder'], last_layer]) return model