예제 #1
0
파일: ra.py 프로젝트: jingmouren/monogreedy
    def __init__(self, name='ra', nimg=2048, nnh=512, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nnh = f.attrs['nnh']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'nnh': nnh, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nnh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, nimg)).astype(theano.config.floatX))
예제 #2
0
class SkipGram:
    def __init__(self, vocab_size, hidden_size, window_size, corpus):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')

        # layers
        self.embed_layer = Embedding(w_in)
        self.ns_loss_layers = [
            NegativeSamplingLoss(w_out, corpus) for _ in range(2 * window_size)
        ]

        # おもみ, 勾配まとめ
        layers = [self.embed_layer] + self.ns_loss_layers
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # 単語の分散表現
        self.word_vecs = w_in

    def forward(self, contexts, target):
        h = self.embed_layer.forward(target)
        loss = sum([
            l.forward(h, contexts[:, i])
            for i, l in enumerate(self.ns_loss_layers)
        ])
        return loss

    def backward(self, dl=1):
        dh = sum([l.backward(dl) for i, l in enumerate(self.ns_loss_layers)])
        self.embed_layer.backward(dh)
예제 #3
0
    def __init__(self,
                 input_size=INPUT_SIZE,
                 output_size=OUTPUT_SIZE,
                 hidden_size=HIDDEN_SIZE,
                 embed_size=EMBED_SIZE,
                 lr=LEARNING_RATE,
                 clip_grad=CLIP_GRAD,
                 init_range=INIT_RANGE):
        # this model will generate a vector representation based on the input
        input_layers = [
            Embedding(input_size, embed_size, init_range),
            Lstm(embed_size, hidden_size, init_range),
        ]

        # this model will generate an output sequence based on the hidden vector
        output_layers = [
            Embedding(output_size, embed_size, init_range),
            Lstm(embed_size, hidden_size, init_range,
                 previous=input_layers[1]),
            Softmax(hidden_size, output_size, init_range)
        ]

        self.input_layers, self.output_layers = input_layers, output_layers
        self.hidden_size = hidden_size
        self.embed_size = embed_size
        self.input_size = input_size
        self.output_size = output_size
        self.lr = lr
        self.clip_grad = clip_grad
예제 #4
0
    def forward(self, xs: np.ndarray) -> np.ndarray:
        N, T = xs.shape
        V, D = self.W.shape

        out = np.empty((N, T, D), dtype=float)
        self.layers = []

        for t in range(T):
            layer = Embedding(self.W)
            out[:, t, :] = layer.forward(xs[:, t])
            self.layers.append(layer)

        return out
예제 #5
0
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))
예제 #6
0
    def forward(self, idxs):
        w, = self.params
        N, T = idxs.shape
        V, D = w.shape  # 語彙数, 分散表現の次元数

        self.layers = []
        ys = np.empty((N, T, D), dtype='f')

        for t in range(T):
            layer = Embedding(w)
            ys[:, t, :] = layer.forward(idxs[:, t])
            self.layers.append(layer)

        return ys
예제 #7
0
파일: model.py 프로젝트: tallbru/TTTGen
class Encoder(tf.keras.Model):
    def __init__( self
                , word_vocab_size
                , word_emb_dim
                , field_vocab_size
                , field_emb_dim
                , pos_vocab_size
                , pos_emb_dim
                , fglstm_dim):
        super(Encoder, self).__init__()
        self.embedding_layer = Embedding( word_vocab_size
                                        , word_emb_dim
                                        , field_vocab_size
                                        , field_emb_dim
                                        , pos_vocab_size
                                        , pos_emb_dim)
        self._field_pos_emb_dim = self.embedding_layer.get_output_shape()[1][2]
        self.cell = FieldGatingLSTMCell( fglstm_dim
                                       , word_emb_dim
                                       , self._field_pos_emb_dim)
        self.rnn = tf.keras.layers.RNN( self.cell
                                      , return_sequences=True
                                      , return_state=True)

    def get_field_pos_emb_dim(self):
        return self._field_pos_emb_dim

    def call(self, inputs):
        table_embeddings, field_pos_embeddings = self.embedding_layer(inputs)
        outputs, h, c = self.rnn((table_embeddings, field_pos_embeddings))
        return outputs, (h, c), field_pos_embeddings
예제 #8
0
    def __init__(self, FLAGS):

        self.embeddingLayers = Embedding(FLAGS.vocab_size, FLAGS.embedding_dim)
        self.cnnGLUBlock = CnnGLUBlock(dropout_rate = FLAGS.dropout_rate,
                                       is_batch_norm = FLAGS.is_batch_norm,
                                       is_training = FLAGS.is_training,
                                       pad_format = FLAGS.pad_format)
예제 #9
0
    def __init__(self, name='ra', nimg=2048, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, na)).astype(theano.config.floatX))
예제 #10
0
class Decoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size,
                                           output_size,
                                           activation='tanh')
        self.softmax = TimeDistributed(output_size,
                                       vocab_size,
                                       activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [
            self.lstm, self.lstm_output, self.softmax, self.embedding
        ]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))

    def forward(self, ec_H, ec_C, mask):
        (sens_size, batch_size) = T.shape(mask)

        def step(m, prev_Y, prev_H, prev_C):
            """Forward a time step of the decoder."""
            # LSTM forward time step
            (H, C) = self.lstm.step(prev_Y, m, prev_H, prev_C)
            # LSTM output
            O = self.lstm_output.forward(H)
            # Apply softmax to LSTM output
            P = self.softmax.forward(O)
            # Make prediction
            one_hot_Y = T.argmax(P, axis=1)
            # Feed the output to the next time step
            Y = self.embedding.forward(one_hot_Y)
            # FIXME: Deal with differ length ?
            return (P, Y, H, C)

        results, updates = theano.scan(fn=step,
                                       sequences=[mask],
                                       outputs_info=[
                                           None,
                                           dict(initial=T.zeros(
                                               (batch_size,
                                                self.embedding_size)),
                                                taps=[-1]),
                                           dict(initial=ec_H, taps=[-1]),
                                           dict(initial=ec_C, taps=[-1])
                                       ])

        # return np.swapaxes(results[0], 0, 1)       # returns the softmax probabilities
        return results[0]
예제 #11
0
파일: models.py 프로젝트: Yevgnen/seq2seq
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))
예제 #12
0
def create_output_node(model=None, input_sequences=None, num_gru=None, old_h0s=None, reset=None, num_pixelCNN_layer = None):
	assert(model is not None)
	assert(input_sequences is not None)
	assert(num_gru is not None)
	assert(old_h0s is not None)
	assert(reset is not None)
	assert(num_pixelCNN_layer is not None)

	new_h0s = T.zeros_like(old_h0s)
	h0s = theano.ifelse.ifelse(reset, new_h0s, old_h0s)

	embedding_layer = Embedding(Q_LEVELS, DIM, input_sequences, name = model.name+"Embedding.Q_LEVELS")
	model.add_layer(embedding_layer)


	prev_out = embedding_layer.output()
	last_layer = WrapperLayer(prev_out.reshape((prev_out.shape[0], prev_out.shape[1], WIDTH, DEPTH)))

	pixel_CNN = pixelConv(
		last_layer,
		DEPTH,
		DEPTH,
		name = model.name + ".pxCNN",
		num_layers = NUM_PIXEL_CNN_LAYER
	)

	prev_out = pixel_CNN.output()
	last_layer = WrapperLayer(prev_out.reshape((prev_out.shape[0], prev_out.shape[1], -1)))

	last_hidden_list = []

	for i in range(num_gru):
		gru_layer = GRU(DIM, DIM, last_layer, s0 = h0s[i,:,:], name = model.name+"GRU_{}".format(i))
		last_hidden_list.append(gru_layer.output()[:,-1])
		model.add_layer(gru_layer)
		last_layer = gru_layer

	fc1 = FC(DIM, Q_LEVELS, last_layer, name = model.name+"FullyConnected")
	model.add_layer(fc1)

	softmax = Softmax(fc1, name= model.name+"Softmax")
	model.add_layer(softmax)

	return softmax.output(), T.stack(last_hidden_list, axis = 0)
class EmbeddingDot:
    def __init__(self, w):
        self.embed = Embedding(w)
        self.params = self.embed.params
        self.grads = self.embed.grads
        self.cache = None

    def forward(self, h, idx):
        w_idx = self.embed.forward(idx)
        s = np.sum(h * w_idx, axis=1)
        self.cache = (h, w_idx)
        return s

    def backward(self, ds):
        ds = ds.reshape(ds.shape[0], 1)  # ???
        h, w_idx = self.cache
        dw_idx = ds * h
        self.embed.backward(dw_idx)
        dh = ds * w_idx
        return dh
예제 #14
0
    def __init__(self, vocab_size, hidden_size, window_size, corpus):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')

        # layers
        self.embed_layer = Embedding(w_in)
        self.ns_loss_layers = [
            NegativeSamplingLoss(w_out, corpus) for _ in range(2 * window_size)
        ]

        # おもみ, 勾配まとめ
        layers = [self.embed_layer] + self.ns_loss_layers
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # 単語の分散表現
        self.word_vecs = w_in
예제 #15
0
    def __init__(self, name='ss', nimg=2048, nh=512, nw=512, nout=8843, ns=80, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                ns = f.attrs['ns']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'nout': nout, 'ns': ns}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.proj_mlp = MLP(layer_sizes=[nimg, 2*nh], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=nw+ns, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.matrix('img')
        scene = T.matrix('scene')
        self.inputs = [cap, img, scene]

        # go through sequence
        init_state = self.proj_mlp.compute(img)
        (state, self.p, loss), _ = theano.scan(fn=self.scan_func,
                                               sequences=[cap[0:-1, :], cap[1:, :]],
                                               outputs_info=[init_state, None, None],
                                               non_sequences=[scene])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.proj_mlp, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # initialization for test stage
        self._init_func = None
        self._step_func = None
        self._scene_shared = theano.shared(np.zeros((1, ns)).astype(theano.config.floatX))
예제 #16
0
파일: model.py 프로젝트: tallbru/TTTGen
 def __init__( self
             , word_vocab_size
             , word_emb_dim
             , field_vocab_size
             , field_emb_dim
             , pos_vocab_size
             , pos_emb_dim
             , fglstm_dim):
     super(Encoder, self).__init__()
     self.embedding_layer = Embedding( word_vocab_size
                                     , word_emb_dim
                                     , field_vocab_size
                                     , field_emb_dim
                                     , pos_vocab_size
                                     , pos_emb_dim)
     self._field_pos_emb_dim = self.embedding_layer.get_output_shape()[1][2]
     self.cell = FieldGatingLSTMCell( fglstm_dim
                                    , word_emb_dim
                                    , self._field_pos_emb_dim)
     self.rnn = tf.keras.layers.RNN( self.cell
                                   , return_sequences=True
                                   , return_state=True)
예제 #17
0
	def __init__(self, num_layers, d_model, num_heads, dff, vocab_size,batch_size, rate=0.1, use_stats=False):
		super(Transformer, self).__init__()

		self.num_layers =num_layers
		self.vocab_size = vocab_size
		self.batch_size = batch_size
		self.model_depth = d_model
		self.num_heads = num_heads

		self.embedding = Embedding(vocab_size, d_model)
		self.encoder = Encoder(num_layers, d_model, num_heads, dff, vocab_size, rate)
		self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size, rate, use_stats)
		self.final_layer = tf.keras.layers.Dense(vocab_size)
예제 #18
0
파일: models.py 프로젝트: Yevgnen/seq2seq
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size, output_size, activation='tanh')
        self.softmax = TimeDistributed(output_size, vocab_size, activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [self.lstm, self.lstm_output, self.softmax, self.embedding]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))
class EmbeddingDot:
    def __init__(self, W: np.ndarray) -> None:
        self.embed = Embedding(W)
        self.params = self.embed.params
        self.grads = self.embed.grads
        self.cache = None

    def forward(self, h: np.ndarray, idx: List[int]):
        target_W = self.embed.forward(idx)
        out = np.sum(target_W * h, axis=1)

        self.cache = (h, target_W)
        return out

    def backward(self, dout: np.ndarray) -> np.ndarray:
        h, target_W = self.cache

        dout = dout.reshape(dout.shape[0], 1)
        dtarget_W = dout * h
        self.embed.backward(dtarget_W)
        dh = dout * target_W
        return dh
예제 #20
0
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size,
                                           output_size,
                                           activation='tanh')
        self.softmax = TimeDistributed(output_size,
                                       vocab_size,
                                       activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [
            self.lstm, self.lstm_output, self.softmax, self.embedding
        ]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))
예제 #21
0
    def _def_layers(self):

        # word embeddings
        self.word_embedding = Embedding(embedding_size=self.embedding_size,
                                        vocab_size=self.word_vocab_size,
                                        enable_cuda=self.enable_cuda)

        # lstm encoder
        self.encoder = FastUniLSTM(
            ninp=self.embedding_size,
            nhids=self.encoder_rnn_hidden_size,
            dropout_between_rnn_layers=self.dropout_between_rnn_layers)

        self.action_scorer_shared = torch.nn.Linear(
            self.encoder_rnn_hidden_size[-1], self.action_scorer_hidden_dim)
        action_scorers = []
        for _ in range(self.generate_length):
            action_scorers.append(
                torch.nn.Linear(self.action_scorer_hidden_dim,
                                self.word_vocab_size,
                                bias=False))
        self.action_scorers = torch.nn.ModuleList(action_scorers)
        self.fake_recurrent_mask = None
예제 #22
0
파일: models.py 프로젝트: Yevgnen/seq2seq
class Encoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))

    def forward(self, batch, mask):
        # ``batch`` is a matrix whose row ``x`` is a sentence, e.g. x = [1, 4, 5, 2, 0]
        # ``emb`` is a list of embedding matrix, e[i].shape = (sene_size, embedding_size)
        emb = self.embedding.forward(batch)
        (H, C) = self.lstm.forward(emb, mask)
        return (H[-1], C[-1])
예제 #23
0
파일: models.py 프로젝트: Yevgnen/seq2seq
class Decoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size, output_size, activation='tanh')
        self.softmax = TimeDistributed(output_size, vocab_size, activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [self.lstm, self.lstm_output, self.softmax, self.embedding]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))

    def forward(self, ec_H, ec_C, mask):
        (sens_size, batch_size) = T.shape(mask)

        def step(m, prev_Y, prev_H, prev_C):
            """Forward a time step of the decoder."""
            # LSTM forward time step
            (H, C) = self.lstm.step(prev_Y, m, prev_H, prev_C)
            # LSTM output
            O = self.lstm_output.forward(H)
            # Apply softmax to LSTM output
            P = self.softmax.forward(O)
            # Make prediction
            one_hot_Y = T.argmax(P, axis=1)
            # Feed the output to the next time step
            Y = self.embedding.forward(one_hot_Y)
            # FIXME: Deal with differ length ?
            return (P, Y, H, C)

        results, updates = theano.scan(
            fn=step,
            sequences=[mask],
            outputs_info=[
                None,
                dict(initial=T.zeros((batch_size, self.embedding_size)), taps=[-1]),
                dict(initial=ec_H, taps=[-1]),
                dict(initial=ec_C, taps=[-1])
            ]
        )

        # return np.swapaxes(results[0], 0, 1)       # returns the softmax probabilities
        return results[0]
예제 #24
0
    def __init__(self, vocab_size: int, hidden_size: int, window_size: int, corpus: List[int]) -> None:
        W_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype(float)
        W_out = 0.01 * np.random.randn(vocab_size, hidden_size).astype(float)

        self.in_layers = []
        for i in range(2 * window_size):
            layer = Embedding(W_in)
            self.in_layers.append(layer)
        self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)

        layers = self.in_layers + [self.ns_loss]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads
        
        self.word_vecs = W_in
예제 #25
0
 def __init__(self, config):
     super(AESModel, self).__init__()
     self.config = config
     self.e0 = Embedding(config.vocab_size, config.embedding_output, config)
     self.m0 = Modeling(config.embedding_output, config.hidden_size, config)
     self.a0 = Attn(2 * config.hidden_size,
                    2 * config.hidden_size,
                    config.max_length_sent,
                    config,
                    dropout_p=config.dropout)
     self.m1 = Modeling(4 * config.hidden_size, config.hidden_size, config)
     self.a1 = Attn(2 * config.hidden_size,
                    2 * config.hidden_size,
                    config.max_length_sent,
                    config,
                    dropout_p=config.dropout)
     self.m2 = Modeling(4 * config.hidden_size, config.hidden_size, config)
     # self.m2 = Modeling(config.hidden_size, config.hidden_size, config)
     self.o0 = Output(
         2 * config.hidden_size * config.max_length_sent *
         config.max_num_sent, config)
예제 #26
0
class Encoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))

    def forward(self, batch, mask):
        # ``batch`` is a matrix whose row ``x`` is a sentence, e.g. x = [1, 4, 5, 2, 0]
        # ``emb`` is a list of embedding matrix, e[i].shape = (sene_size, embedding_size)
        emb = self.embedding.forward(batch)
        (H, C) = self.lstm.forward(emb, mask)
        return (H[-1], C[-1])
예제 #27
0
class LSTM_DQN(torch.nn.Module):
    model_name = 'lstm_dqn'

    def __init__(self,
                 model_config,
                 word_vocab,
                 generate_length=5,
                 enable_cuda=False):
        super(LSTM_DQN, self).__init__()
        self.model_config = model_config
        self.enable_cuda = enable_cuda
        self.word_vocab_size = len(word_vocab)
        self.id2word = word_vocab
        self.generate_length = generate_length
        self.read_config()
        self._def_layers()
        self.init_weights()
        # self.print_parameters()

    def print_parameters(self):
        amount = 0
        for p in self.parameters():
            amount += np.prod(p.size())
        print("total number of parameters: %s" % (amount))
        parameters = filter(lambda p: p.requires_grad, self.parameters())
        amount = 0
        for p in parameters:
            amount += np.prod(p.size())
        print("number of trainable parameters: %s" % (amount))

    def read_config(self):
        # model config
        self.embedding_size = self.model_config['embedding_size']
        self.encoder_rnn_hidden_size = self.model_config[
            'encoder_rnn_hidden_size']
        self.action_scorer_hidden_dim = self.model_config[
            'action_scorer_hidden_dim']
        self.dropout_between_rnn_layers = self.model_config[
            'dropout_between_rnn_layers']

    def _def_layers(self):

        # word embeddings
        self.word_embedding = Embedding(embedding_size=self.embedding_size,
                                        vocab_size=self.word_vocab_size,
                                        enable_cuda=self.enable_cuda)

        # lstm encoder
        self.encoder = FastUniLSTM(
            ninp=self.embedding_size,
            nhids=self.encoder_rnn_hidden_size,
            dropout_between_rnn_layers=self.dropout_between_rnn_layers)

        self.action_scorer_shared = torch.nn.Linear(
            self.encoder_rnn_hidden_size[-1], self.action_scorer_hidden_dim)
        action_scorers = []
        for _ in range(self.generate_length):
            action_scorers.append(
                torch.nn.Linear(self.action_scorer_hidden_dim,
                                self.word_vocab_size,
                                bias=False))
        self.action_scorers = torch.nn.ModuleList(action_scorers)
        self.fake_recurrent_mask = None

    def init_weights(self):
        torch.nn.init.xavier_uniform_(self.action_scorer_shared.weight.data)
        for i in range(len(self.action_scorers)):
            torch.nn.init.xavier_uniform_(self.action_scorers[i].weight.data)
        self.action_scorer_shared.bias.data.fill_(0)

    def representation_generator(self, _input_words):
        embeddings, mask = self.word_embedding.forward(
            _input_words)  # batch x time x emb
        encoding_sequence, _, _ = self.encoder.forward(
            embeddings, mask)  # batch x time x h
        mean_encoding = masked_mean(encoding_sequence, mask)  # batch x h
        return mean_encoding

    def action_scorer(self, state_representation):
        hidden = self.action_scorer_shared.forward(
            state_representation)  # batch x hid
        hidden = F.relu(hidden)  # batch x hid
        action_ranks = []
        for i in range(len(self.action_scorers)):
            action_ranks.append(
                self.action_scorers[i].forward(hidden))  # batch x n_vocab
        return action_ranks
 def __init__(self, w):
     self.embed = Embedding(w)
     self.params = self.embed.params
     self.grads = self.embed.grads
     self.cache = None
예제 #29
0
class Model(object):
    """
    Region Attention model
    """
    def __init__(self, name='ra', nimg=2048, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, na)).astype(theano.config.floatX))

    def compute(self, state, w_idx, feat):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])])
        # attention
        e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1))
        # lstm step
        e_w = T.concatenate([e_t, word_vec], axis=-1)
        c_t, h_t = self.lstm.compute(e_w, c_tm1, h_tm1)  # (mb,nh)
        # merge state
        new_state = T.concatenate([e_t, c_t, h_t], axis=-1)
        # predict word probability
        p = self.pred_mlp.compute(T.concatenate([e_t, h_t, word_vec], axis=-1))
        return new_state, p, alpha

    def scan_func(self, w_tm1, w_t, state, feat):
        # update state
        new_state, p, alpha = self.compute(state, w_tm1, feat)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss, alpha

    def init_func(self, img_value):
        if self._proj_func is None:
            img = T.tensor3()
            self._proj_func = theano.function([img], self.proj_mlp.compute(img))
        if self._init_func is None:
            init_e = self._feat_shared.mean(axis=1)
            init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
            self._init_func = theano.function([], init_state)
        self._feat_shared.set_value(self._proj_func(img_value))
        return self._init_func()

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p, _ = self.compute(state, w, self._feat_shared)
            self._step_func = theano.function([state, w], [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name+'.h5.'+str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
예제 #30
0
    def _def_layers(self):

        # word embeddings
        if self.use_pretrained_embedding:
            self.word_embedding = Embedding(embedding_size=self.word_embedding_size,
                                            vocab_size=self.word_vocab_size,
                                            id2word=self.word_vocab,
                                            dropout_rate=self.embedding_dropout,
                                            load_pretrained=True,
                                            trainable=self.word_embedding_trainable,
                                            embedding_oov_init="random",
                                            pretrained_embedding_path=self.pretrained_embedding_path)
        else:
            self.word_embedding = Embedding(embedding_size=self.word_embedding_size,
                                            vocab_size=self.word_vocab_size,
                                            trainable=self.word_embedding_trainable,
                                            dropout_rate=self.embedding_dropout)

        # node embeddings
        self.node_embedding = Embedding(embedding_size=self.node_embedding_size,
                                        vocab_size=self.node_vocab_size,
                                        trainable=self.node_embedding_trainable,
                                        dropout_rate=self.embedding_dropout)

        # relation embeddings
        self.relation_embedding = Embedding(embedding_size=self.relation_embedding_size,
                                            vocab_size=self.relation_vocab_size,
                                            trainable=self.relation_embedding_trainable,
                                            dropout_rate=self.embedding_dropout)

        self.word_embedding_prj = torch.nn.Linear(self.word_embedding_size, self.block_hidden_dim, bias=False)
        self.encoder =  torch.nn.ModuleList([EncoderBlock(conv_num=self.encoder_conv_num, ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.encoder_layers)])

        self.rgcns = StackedRelationalGraphConvolution(entity_input_dim=self.node_embedding_size+self.block_hidden_dim, relation_input_dim=self.relation_embedding_size+self.block_hidden_dim, num_relations=self.relation_vocab_size, hidden_dims=self.gcn_hidden_dims, num_bases=self.gcn_num_bases,
        use_highway_connections=self.gcn_highway_connections, dropout_rate=self.dropout, real_valued_graph=self.real_valued_graph)
        self.attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)

        self.self_attention_text = SelfAttention(self.block_hidden_dim, self.n_heads, self.dropout)
        self.self_attention_graph = SelfAttention(self.block_hidden_dim, self.n_heads, self.dropout)

        # recurrent memories
        self.recurrent_memory_bi_input = LSTMCell(self.block_hidden_dim * 2, self.block_hidden_dim, use_bias=True)
        self.recurrent_memory_single_input = LSTMCell(self.block_hidden_dim, self.block_hidden_dim, use_bias=True)

        linear_function = NoisyLinear if self.noisy_net else torch.nn.Linear
        self.action_scorer_linear_1_tri_input = linear_function(self.block_hidden_dim * 3, self.block_hidden_dim)
        self.action_scorer_linear_1_bi_input = linear_function(self.block_hidden_dim * 2, self.block_hidden_dim)
        self.action_scorer_linear_2 = linear_function(self.block_hidden_dim, 1)

        # text encoder for pretraining tasks
        # (we separate this because we don't want to init text encoder with pretrained parameters when training RL)
        self.encoder_for_pretraining_tasks =  torch.nn.ModuleList([EncoderBlock(conv_num=self.encoder_conv_num, ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.encoder_layers)])

        # command generation
        self.cmd_gen_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.cmd_gen_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)
        self.decoder = torch.nn.ModuleList([DecoderBlock(ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.decoder_layers)])
        self.tgt_word_prj = torch.nn.Linear(self.block_hidden_dim, self.word_vocab_size, bias=False)
        self.pointer_softmax = PointerSoftmax(input_dim=self.block_hidden_dim, hidden_dim=self.block_hidden_dim)

        # observation generation
        self.obs_gen_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.obs_gen_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)
        self.obs_gen_decoder = torch.nn.ModuleList([DecoderBlockForObsGen(ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.decoder_layers)])
        self.obs_gen_tgt_word_prj = torch.nn.Linear(self.block_hidden_dim, self.word_vocab_size, bias=False)
        self.obs_gen_linear_1 = torch.nn.Linear(self.block_hidden_dim, self.block_hidden_dim)
        self.obs_gen_linear_2 = torch.nn.Linear(self.block_hidden_dim, int(len(self.relation_vocab) / 2) * len(self.node_vocab) * len(self.node_vocab))
        self.obs_gen_attention_to_rnn_input = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim)
        self.obs_gen_graph_rnncell = torch.nn.GRUCell(self.block_hidden_dim, self.block_hidden_dim)
        self.observation_discriminator = ObservationDiscriminator(self.block_hidden_dim)

        # action prediction
        self.ap_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.ap_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)
        self.ap_self_attention = SelfAttention(self.block_hidden_dim * 3, self.n_heads, self.dropout)
        self.ap_linear_1 = torch.nn.Linear(self.block_hidden_dim * 3, self.block_hidden_dim)
        self.ap_linear_2 = torch.nn.Linear(self.block_hidden_dim, 1)

        # state prediction
        self.sp_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.sp_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)
        self.sp_self_attention = SelfAttention(self.block_hidden_dim * 3, self.n_heads, self.dropout)
        self.sp_linear_1 = torch.nn.Linear(self.block_hidden_dim * 3, self.block_hidden_dim)
        self.sp_linear_2 = torch.nn.Linear(self.block_hidden_dim, 1)

        # deep graph infomax
        self.dgi_discriminator = DGIDiscriminator(self.gcn_hidden_dims[-1])
예제 #31
0
from tensor import Tensor
from layers import Embedding
from rnn import RNNCell
from losses import CrossEntropyLoss
from optimizers import SGD

with open('data/shakespear.txt', 'r') as f:
    raw = f.read()

vocab = list(set(raw))
word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i
indices = np.array(list(map(lambda x: word2index[x], raw)))

embed = Embedding(vocab_size=len(vocab), dim=512)
model = RNNCell(n_inputs=512, n_hidden=512, n_output=len(vocab))

criterion = CrossEntropyLoss()
optim = SGD(parameters=model.get_parameters() + embed.get_parameters(),
            alpha=0.01)

batch_size = 32
bptt = 16
n_batches = int((indices.shape[0] / batch_size))
trimmed_indices = indices[:n_batches * batch_size]
# batch_indices: each column represents a sub-sequence from indices -> continuous
batched_indices = trimmed_indices.reshape(batch_size, n_batches)
batched_indices = batched_indices.transpose()

input_batched_indices = batched_indices[:-1]
예제 #32
0
d_vocab_size = len(d_w2i)

x = tf.placeholder(tf.int32, [None, None], name='x')
m = tf.cast(tf.not_equal(x, -1), tf.float32)
t = tf.placeholder(tf.int32, [None, None], name='t')
t_in = t[:, :-1]
t_out = t[:, 1:]
t_out_one_hot = tf.one_hot(t_out, depth=d_vocab_size, dtype=tf.float32)

# Attention mask
ma = tf.where(condition=tf.equal(x, PADDING_ID),
              x=tf.ones_like(x, dtype=tf.float32) * np.float32(-1e+10),
              y=tf.ones_like(x, dtype=tf.float32))

encoder = [
    Embedding(e_vocab_size, EMB_DIM),
    GRU(EMB_DIM, HID_DIM, m),
    GRU(EMB_DIM, HID_DIM, m[:, ::-1])
]

x_emb = f_props(encoder[:1], x)
h_ef = f_props(encoder[1:2], x_emb)
h_eb = f_props(encoder[2:], x_emb[:, ::-1])[:, ::-1, :]
h_e = tf.concat([h_ef, h_eb], axis=2)
h_d1_0 = tf.reduce_mean(h_e, axis=1)
h_d2_0 = tf.reduce_mean(h_e, axis=1)

decoder = [
    Embedding(d_vocab_size, EMB_DIM),
    GRU(EMB_DIM, 2 * HID_DIM, tf.ones_like(t_in, dtype='float32'), h_0=h_d1_0),
    Attention(2 * HID_DIM, 2 * HID_DIM, h_e, ma),
예제 #33
0
파일: model.py 프로젝트: wcAlex/QA-XL
    def __init__(self, args):
        super(QAxl, self).__init__()

        hidden_size = args['hidden_size']
        dropout = args['dropout']
        attention_size = args['attention_size']
        word_emb = np.array(read_json(args['data_dir'] + 'word_emb.json'),
                            dtype=np.float32)
        word_size = word_emb.shape[0]
        word_dim = word_emb.shape[1]
        char_dim = args['char_dim']
        char_len = len(read_json(args['data_dir'] + 'char2id.json'))
        pos_dim = args['pos_dim']
        ner_dim = args['ner_dim']

        self.args = args
        self.train_loss = AverageMeter()
        self.use_cuda = args['use_cuda']
        self.use_xl = args['use_xl']

        if self.use_xl:
            self.xl = TransfoXLModel.from_pretrained('transfo-xl-wt103')
            xl_dim = 1024

        ## Embedding Layer
        print('Building embedding...')
        self.word_embeddings = nn.Embedding(word_emb.shape[0],
                                            word_dim,
                                            padding_idx=0)
        self.word_embeddings.weight.data = torch.from_numpy(word_emb)
        self.char_embeddings = nn.Embedding(char_len, char_dim, padding_idx=0)
        self.pos_embeddings = nn.Embedding(args['pos_size'],
                                           args['pos_dim'],
                                           padding_idx=0)
        self.ner_embeddings = nn.Embedding(args['ner_size'],
                                           args['ner_dim'],
                                           padding_idx=0)
        with open(args['data_dir'] + 'tune_word_idx.pkl', 'rb') as f:
            tune_idx = pkl.load(f)
        self.fixed_idx = list(
            set([i for i in range(word_size)]) - set(tune_idx))
        fixed_embedding = torch.from_numpy(word_emb)[self.fixed_idx]
        self.register_buffer('fixed_embedding', fixed_embedding)
        self.fixed_embedding = fixed_embedding

        low_p_dim = word_dim + word_dim + args['pos_dim'] + args['ner_dim'] + 4
        low_q_dim = word_dim + args['pos_dim'] + args['ner_dim']
        if self.use_xl:
            low_p_dim += xl_dim
            low_q_dim += xl_dim

        self.emb_char = Embedding(word_dim, char_dim, hidden_size)

        ## Forward Layers Declaration
        high_p_dim = 2 * hidden_size
        full_q_dim = 2 * high_p_dim
        attention_dim = word_dim + full_q_dim
        if self.use_xl:
            attention_dim += xl_dim

        self.word_attention_layer = WordAttention(word_dim, attention_size,
                                                  dropout)

        self.low_rnn = StackedPaddedRNN(low_p_dim,
                                        hidden_size,
                                        1,
                                        dropout=dropout)
        self.high_rnn = StackedPaddedRNN(high_p_dim,
                                         hidden_size,
                                         1,
                                         dropout=dropout)
        self.full_rnn = StackedPaddedRNN(full_q_dim,
                                         hidden_size,
                                         1,
                                         dropout=dropout)

        self.low_attention_layer = MultiAttention(attention_dim,
                                                  attention_size, dropout)
        self.high_attention_layer = MultiAttention(attention_dim,
                                                   attention_size, dropout)
        self.full_attention_layer = MultiAttention(attention_dim,
                                                   attention_size, dropout)

        ## Fusion Layer and Final Attention + Final RNN
        fuse_dim = 10 * hidden_size
        self_attention_dim = 12 * hidden_size + word_dim + ner_dim + pos_dim + 1
        if self.use_xl:
            self_attention_dim += xl_dim

        self.fuse_rnn = StackedPaddedRNN(fuse_dim,
                                         hidden_size,
                                         1,
                                         dropout=dropout)
        self.self_attention_layer = MultiAttention(self_attention_dim,
                                                   attention_size, dropout)
        self.self_rnn = StackedPaddedRNN(4 * hidden_size,
                                         hidden_size,
                                         1,
                                         dropout=dropout)

        ## Verifier and output
        self.summ_layer = PointerS(2 * hidden_size,
                                   dropout=dropout,
                                   use_cuda=self.use_cuda)
        self.summ_layer2 = PointerS(2 * hidden_size,
                                    dropout=dropout,
                                    use_cuda=self.use_cuda)
        self.pointer_layer = PointerNet(2 * hidden_size,
                                        use_cuda=self.use_cuda)
        self.has_ans = nn.Sequential(nn.Dropout(p=dropout),
                                     nn.Linear(6 * hidden_size, 2))
예제 #34
0
파일: ra.py 프로젝트: jingmouren/monogreedy
class Model(object):
    """
    Region Attention model
    """
    def __init__(self, name='ra', nimg=2048, nnh=512, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nnh = f.attrs['nnh']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'nnh': nnh, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nnh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, nimg)).astype(theano.config.floatX))

    def compute(self, state, w_idx, feat):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])])
        # attention
        e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1))
        # lstm step
        e_w = T.concatenate([e_t, word_vec], axis=-1)
        c_t, h_t = self.lstm.compute(e_w, c_tm1, h_tm1)  # (mb,nh)
        # merge state
        new_state = T.concatenate([e_t, c_t, h_t], axis=-1)
        # predict word probability
        p = self.pred_mlp.compute(T.concatenate([e_t, h_t, word_vec], axis=-1))
        return new_state, p, alpha

    def scan_func(self, w_tm1, w_t, state, feat):
        # update state
        new_state, p, alpha = self.compute(state, w_tm1, feat)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss, alpha

    def init_func(self, img_value):
        if self._proj_func is None:
            img = T.tensor3()
            self._proj_func = theano.function([img], self.proj_mlp.compute(img))
        if self._init_func is None:
            init_e = self._feat_shared.mean(axis=1)
            init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
            self._init_func = theano.function([], init_state)
        self._feat_shared.set_value(self._proj_func(img_value))
        return self._init_func()

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p, _ = self.compute(state, w, self._feat_shared)
            self._step_func = theano.function([state, w], [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name+'.h5.'+str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
예제 #35
0
    def __init__(self,
                 name='gnic',
                 nimg=2048,
                 nh=512,
                 nw=512,
                 nout=8843,
                 model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'nout': nout}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout,
                                   dim_emb=nw,
                                   name=self.name + '@embedding')

        # initialization mlp layer
        self.proj_mlp = MLP(layer_sizes=[nimg, 2 * nh],
                            output_type='tanh',
                            name=self.name + '@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=nw, dim_h=nh, name=self.name + '@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[nh + nw, nout],
                            output_type='softmax',
                            name=self.name + '@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.matrix('img')
        self.inputs = [cap, img]

        # go through sequence
        init_state = self.proj_mlp.compute(img)
        (state, self.p,
         loss), _ = theano.scan(fn=self.scan_func,
                                sequences=[cap[0:-1, :], cap[1:, :]],
                                outputs_info=[init_state, None, None])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.proj_mlp, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions are used in test stage
        self._init_func = None
        self._step_func = None
예제 #36
0
class Model(object):
    """
    scene-specific contexts
    """
    def __init__(self, name='ss', nimg=2048, nh=512, nw=512, nout=8843, ns=80, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                ns = f.attrs['ns']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'nout': nout, 'ns': ns}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.proj_mlp = MLP(layer_sizes=[nimg, 2*nh], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=nw+ns, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.matrix('img')
        scene = T.matrix('scene')
        self.inputs = [cap, img, scene]

        # go through sequence
        init_state = self.proj_mlp.compute(img)
        (state, self.p, loss), _ = theano.scan(fn=self.scan_func,
                                               sequences=[cap[0:-1, :], cap[1:, :]],
                                               outputs_info=[init_state, None, None],
                                               non_sequences=[scene])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.proj_mlp, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # initialization for test stage
        self._init_func = None
        self._step_func = None
        self._scene_shared = theano.shared(np.zeros((1, ns)).astype(theano.config.floatX))

    def compute(self, state, w_idx, scene):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        c_tm1, h_tm1 = split_state(state, scheme=[(2, self.config['nh'])])
        # lstm step
        w_s = T.concatenate([word_vec, scene], axis=1)
        c_t, h_t = self.lstm.compute(w_s, c_tm1, h_tm1)
        # merge state
        new_state = T.concatenate([c_t, h_t], axis=-1)
        # add w_{t-1} as feature
        h_and_w = T.concatenate([h_t, word_vec], axis=-1)
        # predict probability
        p = self.pred_mlp.compute(h_and_w)
        return new_state, p

    def scan_func(self, w_tm1, w_t, state, scene):
        # update state
        new_state, p = self.compute(state, w_tm1, scene)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss

    def init_func(self, img_value, scene_value):
        if self._init_func is None:
            img = T.matrix()
            init_state = self.proj_mlp.compute(img)
            self._init_func = theano.function([img], init_state)
        self._scene_shared.set_value(scene_value)
        return self._init_func(img_value)

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p = self.compute(state, w, self._scene_shared)
            self._step_func = theano.function([state, w], [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name+'.h5.'+str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
예제 #37
0
class Model(object):
    """
    an re-implementation of google NIC system, used as the baseline in our paper
    """
    def __init__(self,
                 name='gnic',
                 nimg=2048,
                 nh=512,
                 nw=512,
                 nout=8843,
                 model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'nout': nout}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout,
                                   dim_emb=nw,
                                   name=self.name + '@embedding')

        # initialization mlp layer
        self.proj_mlp = MLP(layer_sizes=[nimg, 2 * nh],
                            output_type='tanh',
                            name=self.name + '@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=nw, dim_h=nh, name=self.name + '@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[nh + nw, nout],
                            output_type='softmax',
                            name=self.name + '@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.matrix('img')
        self.inputs = [cap, img]

        # go through sequence
        init_state = self.proj_mlp.compute(img)
        (state, self.p,
         loss), _ = theano.scan(fn=self.scan_func,
                                sequences=[cap[0:-1, :], cap[1:, :]],
                                outputs_info=[init_state, None, None])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.proj_mlp, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions are used in test stage
        self._init_func = None
        self._step_func = None

    def compute(self, state, w_idx):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        c_tm1, h_tm1 = split_state(state, scheme=[(2, self.config['nh'])])
        # lstm step
        c_t, h_t = self.lstm.compute(word_vec, c_tm1, h_tm1)
        # merge state
        new_state = T.concatenate([c_t, h_t], axis=-1)
        # add w_{t-1} as feature
        h_and_w = T.concatenate([h_t, word_vec], axis=-1)
        # predict probability
        p = self.pred_mlp.compute(h_and_w)
        return new_state, p

    def scan_func(self, w_tm1, w_t, state):
        # update state
        new_state, p = self.compute(state, w_tm1)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss

    def init_func(self, img_value):
        if self._init_func is None:
            img = T.matrix()
            init_state = self.proj_mlp.compute(img)
            self._init_func = theano.function([img], init_state)
        return self._init_func(img_value)

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p = self.compute(state, w)
            self._step_func = theano.function([state, w],
                                              [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name + '.h5.' + str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
예제 #38
0
import numpy as np
from numpy.random import randn
from random import randint

from layers import Lstm, Softmax, Embedding

DELTA = 1e-5
THRESHOLD = 1e-2

EOS = 0
HIDDEN_SIZE = 10

input_layers = [
    Embedding(5, 10),
    Lstm(10, 10),
]

output_layers = [
    Embedding(5, 10),
    Lstm(10, 10, previous=input_layers[1]),
    Softmax(10, 4),
]

X = [randint(0, 4), randint(0, 4)]
Y = [randint(0, 3), randint(0, 3)]


def train():
    # reset state
    for layer in input_layers:
        layer.initSequence()