Esempio n. 1
0
class ObliqueForwardNet(object):
    def __init__(self, n_h):
        self.unit = GRU(n_in=n_h * 2, n_h=n_h)
        self.params = self.unit.params

    def forward_all(self, x, h_prev, h0):
        """
        :param x: 1D: n_prds, 2D: n_words, 3D: batch, dim_h
        :param h_prev: 1D: n_words, 2D: batch, 3D: dim_h
        :param h0: 1D: batch, 2D: dim_h
        :return: 1D: n_prds, 2D: n_words, 3D: batch, 3D: dim_h
        """
        h, _ = theano.scan(fn=self.forward_row,
                           sequences=[x],
                           outputs_info=[h_prev],
                           non_sequences=[h0])
        return h

    def forward_row(self, x, h_prev, h0):
        """
        :param x: 1D: n_words, 2D: batch, 3D: dim_h
        :param h_prev: 1D: n_words, 2D: batch, 3D: dim_h
        :param h0: 1D: batch, 2D: dim_h
        :return: 1D: n_words, 2D: batch, 3D: dim_h
        """
        return self.forward_column(T.concatenate([x, h_prev], axis=2), h0)

    def forward_column(self, x, h):
        """
        :param x: 1D: n_words, 2D: batch, 3D: dim_h
        :param h: 1D: n_words, 2D: batch, 3D: dim_h
        :return: 1D: n_words, 2D: batch, 3D: dim_h
        """
        return self.unit.forward_all(x, h)
Esempio n. 2
0
    def __init__(self,
                 rng,
                 embedding,
                 vocab_size,
                 hidden_size,
                 max_length,
                 num_layers=1):
        """
        model init
        :param rng: np random with seed.
        :param embedding: decoder embedding
        :param vocab_size: target vocab_size
        :param hidden_size: hidden size for gru layer
        :param max_length: sequence max length
        :param num_layers: num of layers
        """
        self.embedding = embedding
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.num_layers = num_layers
        self.max_length = max_length
        self.gru_layer = GRU(rng, hidden_size, hidden_size)
        self.linear = theano.shared(value=(rng.randn(hidden_size, vocab_size) *
                                           0.1).astype(theano.config.floatX),
                                    name="linear",
                                    borrow=True)

        self.params = [self.linear]
        self.params += self.gru_layer.params
Esempio n. 3
0
 def __init__(self, rng, embedding, hidden_size, num_layers=1):
     """
     model init.
     :param rng: np random with seed.
     :param embedding: encoder embedding
     :param hidden_size: hidden_size for gru.
     :param num_layers: num of layers.
     """
     self.embedding = embedding
     self.num_layers = num_layers
     self.hidden_size = hidden_size
     self.gru_layer = GRU(rng, hidden_size, hidden_size)
     self.params = []
     self.params += self.gru_layer.params
Esempio n. 4
0
    def __init__(self, rnn_type, input_size, node_fdim, hidden_size, depth):
        super(MPNEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.depth = depth
        self.W_o = nn.Sequential( 
                nn.Linear(node_fdim + hidden_size, hidden_size), 
                nn.ReLU()
        )

        if rnn_type == 'GRU':
            self.rnn = GRU(input_size, hidden_size, depth) 
        elif rnn_type == 'LSTM':
            self.rnn = LSTM(input_size, hidden_size, depth) 
        else:
            raise ValueError('unsupported rnn cell type ' + rnn_type)
Esempio n. 5
0
 def __init__(self,
              vocab_size,
              embedding_dim,
              hidden_dim,
              n_classes=1,
              bidirectional=False,
              padding_idx=0,
              n_layers=1,
              dropout=0.2):
     super(SentimentGRU, self).__init__()
     self.bridge = nn.Linear(embedding_dim, embedding_dim)
     self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx)
     self.rnn = GRU(
         embedding_dim,
         hidden_dim,
     )
     self.out = nn.Linear(hidden_dim * n_layers, n_classes)
     self.n_layers = n_layers
Esempio n. 6
0
 def __init__(self, word_dim, hidden_dim, output_dim, bptt_truncate= 1):
     GRU.__init__(self, word_dim, hidden_dim, output_dim, 1)
     self.wi = np.random.uniform(-np.sqrt(1. / word_dim), np.sqrt(1. / word_dim), (hidden_dim, word_dim))
     self.wh = np.random.uniform(-np.sqrt(1. / word_dim), np.sqrt(1. / word_dim), (hidden_dim, hidden_dim))
     self.aw = self.wh = np.random.uniform(-np.sqrt(1. / word_dim), np.sqrt(1. / word_dim), (hidden_dim, hidden_dim))
# NOTE ==============================================
# This is where your model code will be called.
if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'TRANSFORMER':
    if args.debug:  # use a very small model
        model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2)
    else:
        # Note that we're using num_layers and hidden_size to mean slightly
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=args.hidden_size,
                            n_blocks=args.num_layers,
                            dropout=1. - args.dp_keep_prob)
Esempio n. 8
0
def get_lstm(opt):

    print('rnn_type ', opt.rnn_type)

    # LSTM
    if opt.rnn_type == "LSTM":
        core = LSTM.LSTM(opt.input_encoding_size, opt.vocab_size + 1,
                         opt.rnn_size, opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT":
        core = LSTM.LSTM_SOFT_ATT(opt.input_encoding_size, opt.vocab_size + 1,
                                  opt.rnn_size, opt.att_size, opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT":
        core = LSTM.LSTM_DOUBLE_ATT(opt.input_encoding_size, opt.vocab_size + 1,
                                    opt.rnn_size, opt.att_size, opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK":
        core = LSTM.LSTM_SOFT_ATT_STACK(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                        opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK":
        core = LSTM.LSTM_DOUBLE_ATT_STACK(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                          opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL":
        core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_POLICY":
        core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_POLICY(opt.input_encoding_size, opt.vocab_size + 1,
                                                          opt.num_layers,
                                                          opt.num_parallels, opt.rnn_size, opt.att_size,
                                                          dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_BN":
        core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_BN(opt.input_encoding_size, opt.vocab_size + 1,
                                                      opt.num_layers,
                                                      opt.num_parallels, opt.rnn_size, opt.att_size,
                                                      dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_BN_RELU":
        core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_BN_RELU(opt.input_encoding_size, opt.vocab_size + 1,
                                                           opt.num_layers,
                                                           opt.num_parallels, opt.rnn_size, opt.att_size,
                                                           dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_DROPOUT":
        core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_DROPOUT(opt.input_encoding_size, opt.vocab_size + 1,
                                                           opt.num_layers,
                                                           opt.num_parallels, opt.rnn_size, opt.att_size,
                                                           dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_DROPOUT_SET":
        core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_DROPOUT_SET(opt.input_encoding_size, opt.vocab_size + 1,
                                                               opt.num_layers,
                                                               opt.num_parallels,
                                                               opt.rnn_size,
                                                               opt.rnn_size_list, opt.att_size,
                                                               dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "GRU_DOUBLE_ATT_STACK_PARALLEL_DROPOUT":
        core = GRU.GRU_DOUBLE_ATT_STACK_PARALLEL_DROPOUT(opt.input_encoding_size, opt.vocab_size + 1,
                                                         opt.num_layers,
                                                         opt.num_parallels, opt.rnn_size, opt.att_size,
                                                         dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_IT_ATT":
        core = LSTM1.LSTM_IT_ATT(opt.input_encoding_size,
                                 opt.vocab_size + 1,
                                 opt.rnn_size,
                                 opt.att_size,
                                 opt.drop_prob_lm,
                                 opt.num_layers,
                                 opt.word_input_layer,
                                 opt.att_input_layer)
    elif opt.rnn_type == "LSTM_IT_ATT_COMBINE":
        core = LSTM1.LSTM_IT_ATT_COMBINE(opt.input_encoding_size,
                                         opt.vocab_size + 1,
                                         opt.rnn_size,
                                         opt.att_size,
                                         opt.drop_prob_lm,
                                         opt.num_layers,
                                         opt.word_input_layer,
                                         opt.att_input_layer)
    elif opt.rnn_type == "FO_IT_ATT_COMBINE":
        core = LSTM1.FO_IT_ATT_COMBINE(opt.input_encoding_size,
                                       opt.vocab_size + 1,
                                       opt.rnn_size,
                                       opt.att_size,
                                       opt.drop_prob_lm,
                                       opt.num_layers,
                                       opt.word_input_layer,
                                       opt.att_input_layer)
    elif opt.rnn_type == "CONV_IT_ATT_COMBINE":
        core = LSTM1.CONV_IT_ATT_COMBINE(opt.input_encoding_size,
                                         opt.vocab_size + 1,
                                         opt.rnn_size,
                                         opt.att_size,
                                         opt.drop_prob_lm,
                                         opt.num_layers,
                                         opt.word_input_layer,
                                         opt.att_input_layer)
    elif opt.rnn_type == "CONV_LSTM":
        core = LSTM1.CONV_LSTM(opt.input_encoding_size, opt.vocab_size + 1,
                               opt.rnn_size, opt.drop_prob_lm, opt.num_layers, opt.block_num, opt.use_proj_mul)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_NEW":
        core = LSTM1.LSTM_DOUBLE_ATT_STACK_PARALLEL(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT":
        core = LSTM1.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_NEW":
        core = LSTM1.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_NEW(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT":
        core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_WITH_BU":
        core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_WITH_BU(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size, opt.bu_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_NEW":
        core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_NEW(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_LSTM_MUL":
        core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_LSTM_MUL(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    opt.drop_prob_lm, opt.block_num)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_A":
        core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_A(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL":
        core = LSTM2.LSTM_SOFT_ATT_STACK_PARALLEL(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT":
        core = LSTM2.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_MUL_WEIGHT":
        core = LSTM2.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_MUL_WEIGHT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_WITH_WEIGHT":
        core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_WITH_WEIGHT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                    opt.num_parallels, opt.rnn_size, opt.att_size,
                                                    dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT_SPP":
        core = LSTM3.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT_SPP(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                          opt.num_parallels, opt.rnn_size, opt.att_size,
                                                          opt.pool_size, opt.spp_num,
                                                          dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_SPP":
        core = LSTM3.LSTM_SOFT_ATT_STACK_PARALLEL_SPP(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                          opt.num_parallels, opt.rnn_size, opt.att_size,
                                                          opt.pool_size, opt.spp_num,
                                                          dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_MEMORY":
        core = LSTM4.LSTM_SOFT_ATT_STACK_PARALLEL_MEMORY(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                          opt.num_parallels, opt.rnn_size, opt.att_size, opt.memory_num_hop,
                                                          dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_NO_MEMORY":
        core = LSTM4.LSTM_SOFT_ATT_STACK_PARALLEL_NO_MEMORY(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers,
                                                          opt.num_parallels, opt.rnn_size, opt.att_size,
                                                          dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT_BU":
        core = LSTM5.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT_BU(opt.input_encoding_size, opt.vocab_size + 1,
                                                              opt.num_layers,
                                                              opt.num_parallels, opt.rnn_size, opt.att_size, opt.bu_size,
                                                              dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_C_S_ATT_STACK_PARALLEL_WITH_WEIGHT_BU":
        core = LSTM5.LSTM_C_S_ATT_STACK_PARALLEL_WITH_WEIGHT_BU(opt.input_encoding_size, opt.vocab_size + 1,
                                                              opt.num_layers,
                                                              opt.num_parallels, opt.rnn_size, opt.att_size, opt.bu_size,
                                                              dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_WITH_TOP_DOWN_ATTEN":
        core = LSTM6.LSTM_WITH_TOP_DOWN_ATTEN(opt.input_encoding_size, opt.vocab_size + 1,
                                                                 opt.num_layers,
                                                                 opt.num_parallels, opt.rnn_size, opt.att_size,
                                                                 opt.bu_size,
                                                                 opt.bu_num,
                                                                 dropout=opt.drop_prob_lm)
    elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_FC_WEIGHT":
        core = LSTM2.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_FC_WEIGHT(opt.input_encoding_size, opt.vocab_size + 1,
                                                              opt.num_layers,
                                                              opt.num_parallels, opt.rnn_size, opt.att_size,
                                                              dropout=opt.drop_prob_lm)
    else:
        raise Exception("rnn type not supported: {}".format(opt.rnn_type))

    return core
Esempio n. 9
0
 def __init__(self, n_h):
     self.unit = GRU(n_in=n_h * 2, n_h=n_h)
     self.params = self.unit.params
Esempio n. 10
0
        loss_log = []

        if 'ResNestedLSTM' == rnn_type:
            rnn = ResNestedLSTM(x_size, state_size, layer_norm=layer_norm)
        elif 'ResLSTM' == rnn_type:
            rnn = ResLSTM(x_size, state_size, layer_norm=layer_norm)
        elif 'ResRNN' == rnn_type:
            rnn = ResRNN(x_size, state_size, layer_norm=layer_norm)
        elif 'NestedLSTM' == rnn_type:
            rnn = NestedLSTM(x_size, state_size, layer_norm=layer_norm)
        elif 'LSTM' == rnn_type:
            rnn = LSTM(x_size, state_size, layer_norm=layer_norm)
        elif 'DoubleGRU' == rnn_type:
            rnn = DoubleGRU(x_size, state_size, layer_norm=layer_norm)
        elif 'GRU' == rnn_type:
            rnn = GRU(x_size, state_size, layer_norm=layer_norm)
        elif "ResGRU" == rnn_type:
            rnn = ResGRU(x_size, state_size, layer_norm=layer_norm)
        adam = optim.SGD(rnn.parameters(), lr=lr)
        adam.zero_grad()
        classifier = Variable(rand_vector.clone(), requires_grad=True)

        for i in range(n_epochs):
            X, Y = Variable(X.data), Variable(Y.data)
            state_vars = [
                Variable(torch.zeros(batch_size, state_size))
                for i in range(rnn.n_state_vars)
            ]
            for j in range(X.shape[1]):
                x, y = X[:, j], Y[:, j]
                prediction, state_vars = pred_fxn(rnn, state_vars, classifier,