def create_model(vocab_size, rlayer_type): """ Create LSTM/GRU model for bAbI dataset. Args: vocab_size (int) : String of bAbI data. rlayer_type (string) : Type of recurrent layer to use (gru or lstm). Returns: Model : Model of the created network """ # recurrent layer parameters (default gru) rlayer_obj = GRU if rlayer_type == 'gru' else LSTM rlayer_params = dict(output_size=100, reset_cells=True, init=GlorotUniform(), init_inner=Orthonormal(0.5), activation=Tanh(), gate_activation=Logistic()) # if using lstm, swap the activation functions if rlayer_type == 'lstm': rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh())) # lookup layer parameters lookup_params = dict(vocab_size=vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05)) # Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] layers = [MergeMultistream(layers=[story_path, query_path], merge="stack"), Affine(vocab_size, init=GlorotUniform(), activation=Softmax())] return Model(layers=layers)
def conv_net(self, activation, init=Kaiming(), version=-1): width = max([self.width, self.lookup_dim]) if version == -1: if self.lookup_size: pre_layers = [ LookupTable(vocab_size=self.lookup_size, embedding_dim=width, init=GlorotUniform()), Reshape((1, self.num_words, width)), ] first_width = width else: pre_layers = [ Conv((1, width, width), padding=0, init=init, activation=activation) ] first_width = 1 return pre_layers + \ [ MergeBroadcast( [ [ Conv((3, first_width, 15), padding={'pad_h': 1, 'pad_w': 0}, init=init, activation=activation) ], [ Conv((5, first_width, 15), padding={'pad_h': 2, 'pad_w': 0}, init=init, activation=activation) ], [ Conv((7, first_width, 15), padding={'pad_h': 3, 'pad_w': 0}, init=init, activation=activation) ], ], merge='depth' ), NoisyDropout(keep=0.5, noise_pct=1.0, noise_std=0.001), Conv((5, 1, 15), strides={'str_h': 2 if self.num_words > 59 else 1, 'str_w': 1}, padding=0, init=init, activation=activation), NoisyDropout(keep=0.9, noise_pct=1.0, noise_std=0.00001), Conv((3, 1, 9), strides={'str_h': 2, 'str_w': 1}, padding=0, init=init, activation=activation), NoisyDropout(keep=0.9, noise_pct=1.0, noise_std=0.00001), Conv((9, 1, 9), strides={'str_h': 2, 'str_w': 1}, padding=0, init=init, activation=activation) ]
g_uni, activation=Tanh(), depth=1, reset_cells=True, batch_norm=False, bi_sum=False) elif args.rlayer_type == 'bibnrnn': rlayer = DeepBiRNN(hidden_size, g_uni, activation=Tanh(), depth=1, reset_cells=True, batch_norm=True) layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=uni), rlayer, RecurrentSum(), Dropout(keep=0.5), Affine(2, g_uni, bias=g_uni, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = Adagrad(learning_rate=0.01, gradient_clip_value=gradient_clip_value) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)
hidden_size = 128 reset_cells = True num_epochs = args.epochs # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) be.bsz = 1 # define same model as in train init_glorot = GlorotUniform() init_emb = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim) nclass = 2 layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=init_emb, pad_idx=0, update=True), LSTM(hidden_size, init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Dropout(keep=0.5), Affine(nclass, init_glorot, bias=init_glorot, activation=Softmax()) ] # load the weights print("Initialized the models - ") model_new = Model(layers=layers)
def test_reshape_layer_model(backend_default, fargs): """ test cases: - conv before RNNs - conv after RNNs - conv after LUT """ np.random.seed(seed=0) nin, nout, bsz = fargs be = backend_default be.bsz = bsz input_size = (nin, be.bsz) init = Uniform(-0.1, 0.1) g_uni = GlorotUniform() inp_np = np.random.rand(nin, be.bsz) delta_np = np.random.rand(nout, be.bsz) inp = be.array(inp_np) delta = be.array(delta_np) conv_lut_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Reshape(reshape=(4, 100, -1)), Conv((3, 3, 16), init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Affine(nout, init, bias=init, activation=Softmax()) ] conv_lut_2 = [ LookupTable(vocab_size=1000, embedding_dim=400, init=init), Reshape(reshape=(4, 50, -1)), Conv((3, 3, 16), init=init), Pooling(2, strides=2), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] conv_rnn_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), Reshape(reshape=(4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] conv_rnn_2 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Recurrent(64, g_uni, activation=Tanh(), reset_cells=True), Reshape(reshape=(4, -1, 32)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] lut_sum_1 = [ LookupTable(vocab_size=1000, embedding_dim=128, init=init), RecurrentSum(), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] lut_birnn_1 = [ LookupTable(vocab_size=1000, embedding_dim=200, init=init), DeepBiRNN(32, init=GlorotUniform(), batch_norm=True, activation=Tanh(), reset_cells=True, depth=1), Reshape((4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout=nout, init=init, bias=init, activation=Softmax()) ] layers_test = [ conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1 ] for lg in layers_test: model = Model(layers=lg) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(input_size, cost) model.fprop(inp) model.bprop(delta)
# model initialization rlayer_params = { "output_size": hidden_size, "init": init, "activation": Tanh(), "gate_activation": Logistic() } if args.rlayer_type == 'lstm': rlayer1, rlayer2 = LSTM(**rlayer_params), LSTM(**rlayer_params) else: rlayer1, rlayer2 = GRU(**rlayer_params), GRU(**rlayer_params) layers = [ LookupTable(vocab_size=len(train_set.vocab), embedding_dim=hidden_size, init=init), rlayer1, rlayer2, Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) # vanilla gradient descent with decay schedule on learning rate and gradient scaling learning_rate_sched = Schedule(list(range(5, args.epochs)), .5) optimizer = GradientDescentMomentum(1, 0, gradient_clip_norm=gradient_clip_norm, schedule=learning_rate_sched)
path, vocab_size=vocab_size, sentence_length=sentence_length) print "Vocab size - ", vocab_size print "Sentence Length - ", sentence_length print "# of train sentences", X_train.shape[0] print "# of test sentence", X_test.shape[0] train_set = DataIterator(X_train, y_train, nclass=2) valid_set = DataIterator(X_test, y_test, nclass=2) # weight initialization init_emb = Uniform(low=-0.1/embedding_dim, high=0.1/embedding_dim) init_glorot = GlorotUniform() layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=init_emb), LSTM(hidden_size, init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Dropout(keep=0.5), Affine(2, init_glorot, bias=init_glorot, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) metric = Accuracy() model = Model(layers=layers) optimizer = Adagrad(learning_rate=0.01, clip_gradients=clip_gradients)
# setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # load data train_path = os.path.join(args.data_dir, 'nmt', dataset) train_set = TextNMT(time_steps, train_path, get_prev_target=True, onehot_input=False, split='train', dataset=dataset, subset_pct=args.subset_pct) valid_set = TextNMT(time_steps, train_path, get_prev_target=False, onehot_input=False, split='valid', dataset=dataset) # weight initialization init = Uniform(low=-0.08, high=0.08) # Standard or Conditional encoder / decoder: encoder = [LookupTable(vocab_size=len(train_set.s_vocab), embedding_dim=embedding_dim, init=init, name="LUT_en")] decoder = [LookupTable(vocab_size=len(train_set.t_vocab), embedding_dim=embedding_dim, init=init, name="LUT_de")] decoder_connections = [] # link up recurrent layers for ii in range(num_layers): encoder.append(GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), reset_cells=True, name="GRU1Enc")) decoder.append(GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), reset_cells=True, name="GRU1Dec")) decoder_connections.append(ii) decoder.append(Affine(train_set.nout, init, bias=init, activation=Softmax(), name="Affout")) layers = Seq2Seq([encoder, decoder], decoder_connections=decoder_connections, name="Seq2Seq")
init=GlorotUniform(), init_inner=Orthonormal(0.5), activation=Tanh(), gate_activation=Logistic()) # if using lstm, swap the activation functions if args.rlayer_type == 'lstm': rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh())) # lookup layer parameters lookup_params = dict(vocab_size=babi.vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05)) # Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] layers = [ MergeMultistream(layers=[story_path, query_path], merge="stack"), Affine(babi.vocab_size, init=GlorotUniform(), activation=Softmax()) ] model = Model(layers=layers) # setup callbacks callbacks = Callbacks(model, train_set, eval_set=valid_set, **args.callback_args)
# import pdb; pdb.set_trace() #categorizing training dataset and testing dataset train_set = imdb.train_iter test_set = imdb.test_iter valid_set = imdb.test_iter #Model specification #Initialization init_glorot = GlorotUniform() init_uniform = Uniform(-0.1 / 128, 0.1 / 128) #Following are the list of layers we are gonna implement in out network layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=128, init=init_uniform), LSTM(output_size=128, init=init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Dropout(keep=0.5), Affine(nout=2, init=init_glorot, bias=init_glorot, activation=Softmax()) ] #cost optimizer and callbacks cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = Adagrad(learning_rate=0.01)
def load_sent_encoder(model_dict, expand_vocab=False, orig_vocab=None, w2v_vocab=None, w2v_path=None, use_recur_last=False): """ Custom function to load the model saved from skip-thought vector training and reconstruct another model just using the LUT and encoding layer for transfering sentence representations. Arguments: model_dict: saved s2v model dict expand_vocab: Bool to indicate if w2v vocab expansion should be attempted orig_vocab: If using expand_vocab, original vocabulary dict is needed for expansion w2v_vocab: If using expand_vocab, w2v vocab dict w2v_path: Path to trained w2v binary (GoogleNews) use_recur_last: If True a RecurrentLast layer is used as the final layer, if False a RecurrentSum layer is used as the last layer of the returned model. """ embed_dim = model_dict['model']['config']['embed_dim'] model_train = Model(model_dict) # RecurrentLast should be used for semantic similarity evaluation if use_recur_last: last_layer = RecurrentLast() else: last_layer = RecurrentSum() if expand_vocab: assert orig_vocab and w2v_vocab, ("All vocabs and w2v_path " + "need to be specified when using expand_vocab") neon_logger.display("Computing vocab expansion regression...") # Build inverse word dictionary (word -> index) word_idict = dict() for kk, vv in orig_vocab.items(): # Add 2 to the index to allow for padding and oov tokens as 0 and 1 word_idict[vv + 2] = kk word_idict[0] = '' word_idict[1] = 'UNK' # Create dictionary of word -> vec orig_word_vecs = get_embeddings(model_train.layers.layer_dict['lookupTable'], word_idict) # Load GooleNews w2v weights w2v_W, w2v_dim, _ = get_google_word2vec_W(w2v_path, w2v_vocab) # Compute the expanded vocab lookup table from a linear mapping of # words2vec into RNN word space init_embed = compute_vocab_expansion(orig_word_vecs, w2v_W, w2v_vocab, word_idict) init_embed_dev = model_train.be.array(init_embed) w2v_vocab_size = len(w2v_vocab) table = LookupTable(vocab_size=w2v_vocab_size, embedding_dim=embed_dim, init=init_embed_dev, pad_idx=0) model = Model(layers=[table, model_train.layers.layer_dict['encoder'], last_layer]) else: model = Model(layers=[model_train.layers.layer_dict['lookupTable'], model_train.layers.layer_dict['encoder'], last_layer]) return model