Beispiel #1
0
def create_model(vocab_size, rlayer_type):
    """
    Create LSTM/GRU model for bAbI dataset.

    Args:
        vocab_size (int) : String of bAbI data.
        rlayer_type (string) : Type of recurrent layer to use (gru or lstm).

    Returns:
        Model : Model of the created network
    """
    # recurrent layer parameters (default gru)
    rlayer_obj = GRU if rlayer_type == 'gru' else LSTM
    rlayer_params = dict(output_size=100, reset_cells=True,
                         init=GlorotUniform(), init_inner=Orthonormal(0.5),
                         activation=Tanh(), gate_activation=Logistic())

    # if using lstm, swap the activation functions
    if rlayer_type == 'lstm':
        rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh()))

    # lookup layer parameters
    lookup_params = dict(vocab_size=vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05))

    # Model construction
    story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]
    query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]

    layers = [MergeMultistream(layers=[story_path, query_path], merge="stack"),
              Affine(vocab_size, init=GlorotUniform(), activation=Softmax())]

    return Model(layers=layers)
Beispiel #2
0
    def conv_net(self, activation, init=Kaiming(), version=-1):
        width = max([self.width, self.lookup_dim])
        if version == -1:
            if self.lookup_size:
                pre_layers = [
                    LookupTable(vocab_size=self.lookup_size,
                                embedding_dim=width,
                                init=GlorotUniform()),
                    Reshape((1, self.num_words, width)),
                ]
                first_width = width
            else:
                pre_layers = [
                    Conv((1, width, width),
                         padding=0,
                         init=init,
                         activation=activation)
                ]
                first_width = 1

            return pre_layers + \
                   [
                       MergeBroadcast(
                           [
                               [
                                   Conv((3, first_width, 15), padding={'pad_h': 1, 'pad_w': 0}, init=init,
                                        activation=activation)
                               ],
                               [
                                   Conv((5, first_width, 15), padding={'pad_h': 2, 'pad_w': 0}, init=init,
                                        activation=activation)
                               ],
                               [
                                   Conv((7, first_width, 15), padding={'pad_h': 3, 'pad_w': 0}, init=init,
                                        activation=activation)
                               ],
                           ],
                           merge='depth'
                       ),
                       NoisyDropout(keep=0.5, noise_pct=1.0, noise_std=0.001),
                       Conv((5, 1, 15), strides={'str_h': 2 if self.num_words > 59 else 1,
                                                 'str_w': 1}, padding=0, init=init,
                            activation=activation),
                       NoisyDropout(keep=0.9, noise_pct=1.0, noise_std=0.00001),
                       Conv((3, 1, 9), strides={'str_h': 2, 'str_w': 1}, padding=0, init=init,
                            activation=activation),
                       NoisyDropout(keep=0.9, noise_pct=1.0, noise_std=0.00001),
                       Conv((9, 1, 9), strides={'str_h': 2, 'str_w': 1}, padding=0, init=init,
                            activation=activation)
                   ]
Beispiel #3
0
                       g_uni,
                       activation=Tanh(),
                       depth=1,
                       reset_cells=True,
                       batch_norm=False,
                       bi_sum=False)
elif args.rlayer_type == 'bibnrnn':
    rlayer = DeepBiRNN(hidden_size,
                       g_uni,
                       activation=Tanh(),
                       depth=1,
                       reset_cells=True,
                       batch_norm=True)

layers = [
    LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=uni),
    rlayer,
    RecurrentSum(),
    Dropout(keep=0.5),
    Affine(2, g_uni, bias=g_uni, activation=Softmax())
]

model = Model(layers=layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))
optimizer = Adagrad(learning_rate=0.01,
                    gradient_clip_value=gradient_clip_value)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)
Beispiel #4
0
hidden_size = 128
reset_cells = True
num_epochs = args.epochs

# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))
be.bsz = 1

# define same model as in train
init_glorot = GlorotUniform()
init_emb = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim)
nclass = 2
layers = [
    LookupTable(vocab_size=vocab_size,
                embedding_dim=embedding_dim,
                init=init_emb,
                pad_idx=0,
                update=True),
    LSTM(hidden_size,
         init_glorot,
         activation=Tanh(),
         gate_activation=Logistic(),
         reset_cells=True),
    RecurrentSum(),
    Dropout(keep=0.5),
    Affine(nclass, init_glorot, bias=init_glorot, activation=Softmax())
]

# load the weights
print("Initialized the models - ")
model_new = Model(layers=layers)
Beispiel #5
0
def test_reshape_layer_model(backend_default, fargs):
    """
    test cases:
    - conv before RNNs
    - conv after RNNs
    - conv after LUT
    """
    np.random.seed(seed=0)

    nin, nout, bsz = fargs
    be = backend_default
    be.bsz = bsz
    input_size = (nin, be.bsz)

    init = Uniform(-0.1, 0.1)
    g_uni = GlorotUniform()

    inp_np = np.random.rand(nin, be.bsz)
    delta_np = np.random.rand(nout, be.bsz)

    inp = be.array(inp_np)
    delta = be.array(delta_np)

    conv_lut_1 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        Reshape(reshape=(4, 100, -1)),
        Conv((3, 3, 16), init=init),
        LSTM(64,
             g_uni,
             activation=Tanh(),
             gate_activation=Logistic(),
             reset_cells=True),
        RecurrentSum(),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    conv_lut_2 = [
        LookupTable(vocab_size=1000, embedding_dim=400, init=init),
        Reshape(reshape=(4, 50, -1)),
        Conv((3, 3, 16), init=init),
        Pooling(2, strides=2),
        Affine(nout=nout, init=init, bias=init, activation=Softmax()),
    ]

    conv_rnn_1 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        LSTM(64,
             g_uni,
             activation=Tanh(),
             gate_activation=Logistic(),
             reset_cells=True),
        Reshape(reshape=(4, 32, -1)),
        Conv((3, 3, 16), init=init),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    conv_rnn_2 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        Recurrent(64, g_uni, activation=Tanh(), reset_cells=True),
        Reshape(reshape=(4, -1, 32)),
        Conv((3, 3, 16), init=init),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    lut_sum_1 = [
        LookupTable(vocab_size=1000, embedding_dim=128, init=init),
        RecurrentSum(),
        Affine(nout=nout, init=init, bias=init, activation=Softmax()),
    ]

    lut_birnn_1 = [
        LookupTable(vocab_size=1000, embedding_dim=200, init=init),
        DeepBiRNN(32,
                  init=GlorotUniform(),
                  batch_norm=True,
                  activation=Tanh(),
                  reset_cells=True,
                  depth=1),
        Reshape((4, 32, -1)),
        Conv((3, 3, 16), init=init),
        Affine(nout=nout, init=init, bias=init, activation=Softmax())
    ]

    layers_test = [
        conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1
    ]

    for lg in layers_test:
        model = Model(layers=lg)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(input_size, cost)
        model.fprop(inp)
        model.bprop(delta)
Beispiel #6
0
# model initialization
rlayer_params = {
    "output_size": hidden_size,
    "init": init,
    "activation": Tanh(),
    "gate_activation": Logistic()
}
if args.rlayer_type == 'lstm':
    rlayer1, rlayer2 = LSTM(**rlayer_params), LSTM(**rlayer_params)
else:
    rlayer1, rlayer2 = GRU(**rlayer_params), GRU(**rlayer_params)

layers = [
    LookupTable(vocab_size=len(train_set.vocab),
                embedding_dim=hidden_size,
                init=init), rlayer1, rlayer2,
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

# vanilla gradient descent with decay schedule on learning rate and gradient scaling
learning_rate_sched = Schedule(list(range(5, args.epochs)), .5)
optimizer = GradientDescentMomentum(1,
                                    0,
                                    gradient_clip_norm=gradient_clip_norm,
                                    schedule=learning_rate_sched)
Beispiel #7
0
    path, vocab_size=vocab_size, sentence_length=sentence_length)

print "Vocab size - ", vocab_size
print "Sentence Length - ", sentence_length
print "# of train sentences", X_train.shape[0]
print "# of test sentence", X_test.shape[0]

train_set = DataIterator(X_train, y_train, nclass=2)
valid_set = DataIterator(X_test, y_test, nclass=2)

# weight initialization
init_emb = Uniform(low=-0.1/embedding_dim, high=0.1/embedding_dim)
init_glorot = GlorotUniform()

layers = [
    LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=init_emb),
    LSTM(hidden_size, init_glorot, activation=Tanh(),
         gate_activation=Logistic(), reset_cells=True),
    RecurrentSum(),
    Dropout(keep=0.5),
    Affine(2, init_glorot, bias=init_glorot, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))
metric = Accuracy()

model = Model(layers=layers)

optimizer = Adagrad(learning_rate=0.01, clip_gradients=clip_gradients)

Beispiel #8
0
# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# load data
train_path = os.path.join(args.data_dir, 'nmt', dataset)
train_set = TextNMT(time_steps, train_path, get_prev_target=True, onehot_input=False,
                    split='train', dataset=dataset, subset_pct=args.subset_pct)
valid_set = TextNMT(time_steps, train_path, get_prev_target=False, onehot_input=False,
                    split='valid', dataset=dataset)

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# Standard or Conditional encoder / decoder:
encoder = [LookupTable(vocab_size=len(train_set.s_vocab), embedding_dim=embedding_dim,
                       init=init, name="LUT_en")]
decoder = [LookupTable(vocab_size=len(train_set.t_vocab), embedding_dim=embedding_dim,
                       init=init, name="LUT_de")]
decoder_connections = []  # link up recurrent layers
for ii in range(num_layers):
    encoder.append(GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic(),
                       reset_cells=True, name="GRU1Enc"))
    decoder.append(GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic(),
                       reset_cells=True, name="GRU1Dec"))
    decoder_connections.append(ii)
decoder.append(Affine(train_set.nout, init, bias=init, activation=Softmax(), name="Affout"))

layers = Seq2Seq([encoder, decoder],
                 decoder_connections=decoder_connections,
                 name="Seq2Seq")
Beispiel #9
0
                     init=GlorotUniform(),
                     init_inner=Orthonormal(0.5),
                     activation=Tanh(),
                     gate_activation=Logistic())

# if using lstm, swap the activation functions
if args.rlayer_type == 'lstm':
    rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh()))

# lookup layer parameters
lookup_params = dict(vocab_size=babi.vocab_size,
                     embedding_dim=50,
                     init=Uniform(-0.05, 0.05))

# Model construction
story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]
query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]

layers = [
    MergeMultistream(layers=[story_path, query_path], merge="stack"),
    Affine(babi.vocab_size, init=GlorotUniform(), activation=Softmax())
]

model = Model(layers=layers)

# setup callbacks
callbacks = Callbacks(model,
                      train_set,
                      eval_set=valid_set,
                      **args.callback_args)
Beispiel #10
0
# import pdb; pdb.set_trace()
#categorizing training dataset and testing dataset
train_set = imdb.train_iter
test_set = imdb.test_iter
valid_set = imdb.test_iter
#Model specification
#Initialization

init_glorot = GlorotUniform()
init_uniform = Uniform(-0.1 / 128, 0.1 / 128)

#Following are the list of layers we are gonna implement in out network

layers = [
    LookupTable(vocab_size=vocab_size, embedding_dim=128, init=init_uniform),
    LSTM(output_size=128,
         init=init_glorot,
         activation=Tanh(),
         gate_activation=Logistic(),
         reset_cells=True),
    RecurrentSum(),
    Dropout(keep=0.5),
    Affine(nout=2, init=init_glorot, bias=init_glorot, activation=Softmax())
]

#cost optimizer and callbacks

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))
optimizer = Adagrad(learning_rate=0.01)
Beispiel #11
0
def load_sent_encoder(model_dict, expand_vocab=False, orig_vocab=None,
                      w2v_vocab=None, w2v_path=None, use_recur_last=False):
    """
    Custom function to load the model saved from skip-thought vector training
    and reconstruct another model just using the LUT and encoding layer for
    transfering sentence representations.

    Arguments:
        model_dict: saved s2v model dict
        expand_vocab: Bool to indicate if w2v vocab expansion should be attempted
        orig_vocab: If using expand_vocab, original vocabulary dict is needed for expansion
        w2v_vocab: If using expand_vocab, w2v vocab dict
        w2v_path: Path to trained w2v binary (GoogleNews)
        use_recur_last: If True a RecurrentLast layer is used as the final layer, if False
                        a RecurrentSum layer is used as the last layer of the returned model.
    """

    embed_dim = model_dict['model']['config']['embed_dim']
    model_train = Model(model_dict)

    # RecurrentLast should be used for semantic similarity evaluation
    if use_recur_last:
        last_layer = RecurrentLast()
    else:
        last_layer = RecurrentSum()

    if expand_vocab:
        assert orig_vocab and w2v_vocab, ("All vocabs and w2v_path " +
                                          "need to be specified when using expand_vocab")

        neon_logger.display("Computing vocab expansion regression...")
        # Build inverse word dictionary (word -> index)
        word_idict = dict()
        for kk, vv in orig_vocab.items():
            # Add 2 to the index to allow for padding and oov tokens as 0 and 1
            word_idict[vv + 2] = kk
        word_idict[0] = ''
        word_idict[1] = 'UNK'

        # Create dictionary of word -> vec
        orig_word_vecs = get_embeddings(model_train.layers.layer_dict['lookupTable'], word_idict)

        # Load GooleNews w2v weights
        w2v_W, w2v_dim, _ = get_google_word2vec_W(w2v_path, w2v_vocab)

        # Compute the expanded vocab lookup table from a linear mapping of
        # words2vec into RNN word space
        init_embed = compute_vocab_expansion(orig_word_vecs, w2v_W, w2v_vocab, word_idict)

        init_embed_dev = model_train.be.array(init_embed)
        w2v_vocab_size = len(w2v_vocab)

        table = LookupTable(vocab_size=w2v_vocab_size, embedding_dim=embed_dim,
                            init=init_embed_dev, pad_idx=0)

        model = Model(layers=[table,
                              model_train.layers.layer_dict['encoder'],
                              last_layer])

    else:
        model = Model(layers=[model_train.layers.layer_dict['lookupTable'],
                              model_train.layers.layer_dict['encoder'],
                              last_layer])
    return model