Python LSTMLayer Examples

Programming Language: Python

Namespace/Package Name: lstm

Class/Type: LSTMLayer

Examples at hotexamples.com: 9

Python LSTMLayer - 9 examples found. These are the top rated real world Python examples of lstm.LSTMLayer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LSTMLayer(9)

forward(1)

hidden(1)

init_hidden(1)

parameters(1)

train(1)

Example #1

Show file

    def __init__(self, num_input=256, num_hidden=512, num_output=256):
        X = T.matrix()
        Y = T.matrix()
        eta = T.scalar()
        alpha = T.scalar()

        self.num_input = num_input
        self.num_hidden = num_hidden
        self.num_output = num_output

        inputs = InputLayer(X, name="inputs")
        lstm1f = LSTMLayer(num_input, num_hidden, input_layers=[inputs], name="lstm1f")
        lstm1b = LSTMLayer(num_input, num_hidden, input_layers=[inputs], name="lstm1b", go_backwards=True)

        fc = FullyConnectedLayer(2*num_hidden, num_output, input_layers=[lstm1f, lstm1b], name="yhat")

        Y_hat = sigmoid(T.mean(fc.output(), axis=0))

        self.layers = inputs, lstm1f, lstm1b, fc

        params = get_params(self.layers)
        caches = make_caches(params)


        mean_cost = - T.mean( Y * T.log(Y_hat) + (1-Y) * T.log(1-Y_hat) )

        last_step_cost = - T.mean( Y[-1] * T.log(Y_hat[-1]) + (1-Y[-1]) * T.log(1-Y_hat[-1]) )

        cost = alpha * mean_cost + (1-alpha) * last_step_cost

        updates = momentum(cost, params, caches, eta, clip_at=3.0)

        self.train = theano.function([X, Y, eta, alpha], [cost, last_step_cost], updates=updates, allow_input_downcast=True)

        self.predict=theano.function([X], [Y_hat[-1]], allow_input_downcast=True)

Example #2

Show file

File: spec.py Project: shijogeorge24/seq2sql

  def create_rnn_layer(self, hidden_dim, input_dim, vocab_size, is_encoder):
    if self.rnn_type == 'vanillarnn':
      return VanillaRNNLayer(hidden_dim, input_dim, vocab_size,
                             create_init_state=is_encoder)
    elif self.rnn_type == 'gru':
      return GRULayer(hidden_dim, input_dim, vocab_size,
                      create_init_state=is_encoder)
    elif self.rnn_type == 'lstm':
      return LSTMLayer(hidden_dim, input_dim, vocab_size,
              create_init_state=is_encoder)
    elif self.rnn_type == 'atnh':
      return LSTMLayer(hidden_dim, input_dim, vocab_size,
                       create_init_state=is_encoder)

    raise Exception('Unrecognized rnn_type %s' % self.rnn_type)

Example #3

Show file

File: char_rnn.py Project: zackchase/PyRNN

    def __init__(self):
        X = T.matrix()
        Y = T.matrix()
        eta = T.scalar()
        temperature = T.scalar()

        num_input = 256
        num_hidden = 500
        num_output = 256

        inputs = InputLayer(X, name="inputs")
        lstm1 = LSTMLayer(num_input,
                          num_hidden,
                          input_layer=inputs,
                          name="lstm1")
        lstm2 = LSTMLayer(num_hidden,
                          num_hidden,
                          input_layer=lstm1,
                          name="lstm2")
        softmax = SoftmaxLayer(num_hidden,
                               num_output,
                               input_layer=lstm2,
                               name="yhat",
                               temperature=temperature)

        Y_hat = softmax.output()

        self.layers = inputs, lstm1, lstm2, softmax

        params = get_params(self.layers)
        caches = make_caches(params)

        cost = T.mean(T.nnet.categorical_crossentropy(Y_hat, Y))
        updates = momentum(cost, params, caches, eta)

        self.train = theano.function([X, Y, eta, temperature],
                                     cost,
                                     updates=updates,
                                     allow_input_downcast=True)

        predict_updates = one_step_updates(self.layers)
        self.predict_char = theano.function([X, temperature],
                                            Y_hat,
                                            updates=predict_updates,
                                            allow_input_downcast=True)

Example #4

Show file

    def __init__(self,
                 num_input=256,
                 num_hidden=[512, 512],
                 num_output=256,
                 clip_at=0.0,
                 scale_norm=0.0):
        X = T.matrix()
        Y = T.matrix()
        eta = T.scalar()
        alpha = T.scalar()
        lambda2 = T.scalar()
        dropout_lstm = T.scalar()

        self.num_input = num_input
        self.num_hidden = num_hidden
        self.num_output = num_output
        self.clip_at = clip_at
        self.scale_norm = scale_norm

        inputs = InputLayer(X, name="inputs")
        num_prev = num_input
        prev_layer = inputs

        self.layers = [inputs]
        for i, num_curr in enumerate(num_hidden):
            lstm = LSTMLayer(num_prev,
                             num_curr,
                             input_layers=[prev_layer],
                             name="lstm{0}".format(i + 1),
                             drop_prob=drop_prob)
            num_prev = num_curr
            prev_layer = lstm
            prev_layer = DropoutLayer(input_layers=[prev_layer],
                                      dropout_probability=dropout_lstm)
            self.layers.append(lstm)
        sigmoid = SigmoidLayer(num_prev,
                               num_output,
                               input_layers=[prev_layer],
                               name="yhat")
        self.layers.append(sigmoid)
        Y_hat = sigmoid.output()

        params = get_params(self.layers)
        caches = make_caches(params)

        mean_cost = -T.mean(Y * T.log(Y_hat) + (1 - Y) * T.log(1 - Y_hat))

        last_step_cost = -T.mean(Y[-1] * T.log(Y_hat[-1]) +
                                 (1 - Y[-1]) * T.log(1 - Y_hat[-1]))

        cost = alpha * mean_cost + (1 - alpha) * last_step_cost

        updates = momentum(cost,
                           params,
                           caches,
                           eta,
                           clip_at=self.clip_at,
                           scale_norm=self.scale_norm,
                           lambda2=lambda2)

        self.train_func = theano.function(
            [X, Y, eta, alpha, lambda2, dropout_lstm], [cost, last_step_cost],
            updates=updates,
            allow_input_downcast=True)

        self.predict_func = theano.function([X, dropout_lstm], [Y_hat[-1]],
                                            allow_input_downcast=True)

        self.predict_sequence_func = theano.function([X, dropout_lstm],
                                                     [Y_hat],
                                                     allow_input_downcast=True)

Example #5

Show file

File: main.py Project: wzzzzZ1/Context-Aware-Sequential-Recommendation

def main(num_epochs=NUM_EPOCHS, vocab_size=VOCAB_SIZE):
    logging.info("Building network ...")

    # First, we build the network, starting with an input layer
    # Recurrent layers expect input of shape
    # (batch size, SEQ_LENGTH, num_features)
    l_in = lasagne.layers.InputLayer(shape=(None, None, NDIM))
    l_mask = lasagne.layers.InputLayer(shape=(None, None))

    # We now build the LSTM layer which takes l_in as the input layer
    # We clip the gradients at GRAD_CLIP to prevent the problem of exploding gradients.
    l_forward = None

    if MODEL_TYPE == 'LSTM' or MODEL_TYPE == 'LSTM_T':
        l_t = lasagne.layers.InputLayer(
            shape=(None, None)) if USE_TIME_INPUT else None
        l_forward = LSTMLayer(l_in,
                              time_input=l_t,
                              mask_input=l_mask,
                              num_units=N_HIDDEN,
                              peepholes=True,
                              ingate=lasagne.layers.Gate(),
                              forgetgate=lasagne.layers.Gate(),
                              cell=lasagne.layers.Gate(
                                  W_cell=None,
                                  nonlinearity=lasagne.nonlinearities.tanh),
                              outgate=lasagne.layers.Gate(),
                              cell_init=lasagne.init.Constant(0.),
                              hid_init=lasagne.init.Constant(0.),
                              grad_clipping=GRAD_CLIP,
                              nonlinearity=lasagne.nonlinearities.tanh,
                              bn=BN,
                              only_return_final=False)
    elif MODEL_TYPE == 'TLSTM1':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = TLSTM1Layer(
            l_in,
            time_input=l_t,
            num_units=N_HIDDEN,
            mask_input=l_mask,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh),
            outgate=OutGate(),
            nonlinearity=lasagne.nonlinearities.tanh,
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            only_return_final=False,
            bn=BN,
        )
    elif MODEL_TYPE == 'TLSTM2':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = TLSTM2Layer(
            l_in,
            time_input=l_t,
            num_units=N_HIDDEN,
            mask_input=l_mask,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh),
            outgate=OutGate(),
            nonlinearity=lasagne.nonlinearities.tanh,
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            only_return_final=False,
            bn=BN,
        )
    elif MODEL_TYPE == 'TLSTM3':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = TLSTM3Layer(
            l_in,
            time_input=l_t,
            num_units=N_HIDDEN,
            mask_input=l_mask,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            # forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh),
            outgate=OutGate(),
            nonlinearity=lasagne.nonlinearities.tanh,
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            only_return_final=False,
            bn=BN,
        )
    elif MODEL_TYPE == 'PLSTM':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = PLSTMLayer(l_in,
                               time_input=l_t,
                               num_units=N_HIDDEN,
                               mask_input=l_mask,
                               grad_clipping=GRAD_CLIP,
                               bn=BN,
                               timegate=PLSTMTimeGate())

    # Theano tensor for the targets
    target_values = T.matrix('target_values', dtype='int32')
    # The output of l_forward of shape (batch_size,time_sequence, N_HIDDEN) is then passed through the
    # softmax nonlinearity to
    # create probability distribution of the prediction
    # The output of this stage is (batch_size, time_sequence, vocab_size)
    l_out = lasagne.layers.DenseLayer(l_forward,
                                      num_units=vocab_size,
                                      W=lasagne.init.Normal(),
                                      num_leading_axes=2,
                                      nonlinearity=None)
    # lasagne.layers.get_output produces a variable for the output of the net
    network_output = lasagne.layers.get_output(l_out)
    # We need sum up all the cost through time.
    # network_output ( time_sequence,batch_size, vocab_size)
    network_output = network_output.dimshuffle(1, 0, 2)

    def calculate_softmax(n_input):
        return T.nnet.softmax(n_input)

    def merge_cost(n_input, n_target, n_mask, cost_prev):
        n_target = n_target.ravel()
        n_cost = T.nnet.categorical_crossentropy(n_input, n_target)
        n_cost = n_cost * n_mask
        n_cost = n_cost.sum()

        return cost_prev + n_cost

    network_output_softmax, _ = theano.scan(fn=calculate_softmax,
                                            sequences=network_output)

    # The loss function is calculated as the mean of the (categorical) cross-entropy between the prediction and target.
    m_cost, _ = theano.scan(fn=merge_cost,
                            sequences=[
                                network_output_softmax, target_values.T,
                                l_mask.input_var.T
                            ],
                            outputs_info=T.constant(0.))
    m_cost = m_cost[-1]
    cost = m_cost / l_mask.input_var.sum()

    # convert back to: (batch_size, time_seqsence, vocab_size)
    network_output_softmax = network_output_softmax.dimshuffle(1, 0, 2)

    # Compute AdaGrad updates for training
    logging.info("Computing updates ...")
    all_params = lasagne.layers.get_all_params(l_out, trainable=True)
    updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)

    # Theano functions for training, predict
    logging.info("Compiling functions ...")
    input_var = [l_in.input_var, l_mask.input_var]
    if USE_TIME_INPUT:
        input_var += [l_t.input_var]

    predict = theano.function(input_var,
                              network_output_softmax,
                              allow_input_downcast=True)
    input_var += [target_values]
    train = theano.function(input_var,
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    # compute_cost return cost but without update
    compute_cost = theano.function(input_var, cost, allow_input_downcast=True)

    def do_evaluate(test_x,
                    test_y,
                    test_mask,
                    lengths,
                    test_t=None,
                    n=100,
                    test_batch=5):
        # evaluate and calculate recall@10, MRR@10

        logging.info("Evaluate: Start predicting")

        p = 0
        probs_all_time = None
        while True:
            input_var = [test_x[p:p + test_batch], test_mask[p:p + test_batch]]
            if test_t is not None:
                input_var += [test_t[p:p + test_batch]]
            batch_probs = predict(*input_var)
            p += test_batch
            if probs_all_time is None:
                probs_all_time = np.zeros(
                    (test_x.shape[0] + TEST_BATCH, batch_probs.shape[2]))

            probs_all_time[p:p + batch_probs.shape[0], :] = batch_probs[:,
                                                                        -1, :]
            if p >= test_x.shape[0]:
                break

        logging.info("Evaluate: End predicting")
        total_size = test_x.shape[0]
        recall10 = 0.
        MRR10_score = 0.
        NDCG_score = 0.
        rate_sum = 0

        sample_time = SAMPLE_TIME

        for idx in range(total_size):
            gnd = test_y[idx]
            probs = probs_all_time[idx, :]
            prob_index = np.argsort(probs)[-1::-1].tolist()
            gnd_rate = prob_index.index(gnd) + 1
            rate_sum += gnd_rate
            # Sample multiple times to reduce randomness
            for _ in range(sample_time):
                samples = np.random.choice(range(vocab_size),
                                           n + 1,
                                           replace=False).tolist()
                # for i, sample in enumerate(samples):
                #     o = 0
                #     while sample in test_x[idx].tolist() and o < 10:
                #         sample = random.choice(range(vocab_size))
                #         samples[i] = sample
                #         o+=1

                # make sure the fist element is gnd
                try:
                    samples.remove(gnd)
                    samples.insert(0, gnd)
                except ValueError:
                    samples[0] = gnd

                sample_probs = probs[samples]
                prob_index = np.argsort(sample_probs)[-1::-1].tolist()
                rate = prob_index.index(0) + 1

                # caculate Recall@10, NDCG@10 and MRR@10
                if rate <= 10:
                    recall10 += 1
                    MRR10_score += 1. / rate
                    NDCG_score += 1. / math.log(rate + 1, 2)

        logging.info("Evaluate: End calculating scores")

        count = total_size * sample_time
        recall10 = recall10 / count
        MRR10_score = MRR10_score / count
        NDCG_score = NDCG_score / count
        avg_rate = float(rate_sum) / total_size

        logging.info('Recall@10 {}'.format(recall10))
        logging.info('MRR@10 1/rate {}'.format(MRR10_score))
        logging.info('NDCG@10 1/rate {}'.format(NDCG_score))
        logging.info('Average rate {}'.format(avg_rate))

    def onehot2int(onehot_vec):
        # convert onehot vector to index
        ret = []
        for onehot in onehot_vec:
            ret.append(onehot.tolist().index(1))
        return ret

    def get_short_test_data(length):
        print("Get short test data")
        # generate short sequence in the test_data.
        test_x = test_data['x'][:, :length]
        test_mask = test_data['mask'][:, :length]
        test_t = test_data['t'][:, :length] if USE_TIME_INPUT else None
        lengths = np.sum(test_mask, axis=1).astype('int')

        test_y = test_data['y'].copy()
        for idx in range(test_y.shape[0]):
            whole_length = test_data['lengths'][idx]
            if length < whole_length:
                test_y[idx] = test_data['x'][idx, length, :].tolist().index(
                    1) if ONE_HOT else test_data['x'][idx, length, 0]
        logging.info("Finished getting short test data")
        return test_x, test_y, test_mask, lengths, test_t

    def evaluate(model, current_epoch, additional_test_length):
        # Evaluate the model
        logging.info('Evaluate')
        test_x = test_data['x']
        test_y = test_data['y']
        test_mask = test_data['mask']
        lengths = test_data['lengths']
        logging.info(
            '-----------Evaluate Normal:{},{},{}-------------------'.format(
                MODEL_TYPE, DATA_TYPE, N_HIDDEN))
        do_evaluate(test_x,
                    test_y,
                    test_mask,
                    lengths,
                    test_data['t'] if USE_TIME_INPUT else None,
                    test_batch=TEST_BATCH)
        # Evaluate the model on short data
        if additional_test_length > 0:
            logging.info('-----------Evaluate Additional---------------')
            test_x, test_y, test_mask, lengths, test_t = get_short_test_data(
                additional_test_length)
            do_evaluate(test_x,
                        test_y,
                        test_mask,
                        lengths,
                        test_t,
                        test_batch=TEST_BATCH)
        logging.info('-----------Evaluate End----------------------')
        if not DEBUG:
            utils.save_model(
                '{}-{}-{}-{}'.format(MODEL_TYPE, current_epoch,
                                     DATA_TYPE, N_HIDDEN),
                str(datetime.datetime.now()), model, '_new')

        logging.info("Done saving")

    def add_test_to_train(length):
        logging.info('Length {} test cases added to train set'.format(length))
        global train_data
        logging.info('Old train data size {}'.format(len(train_data['x'])))
        # Remote the train_data added before
        train_data['x'] = train_data['x'][:train_data_size]
        train_data['y'] = train_data['y'][:train_data_size]
        if 't' in train_data:
            train_data['t'] = train_data['t'][:train_data_size]
        test_x = test_data['x']
        lengths = test_data['lengths']
        for idx in range(test_x.shape[0]):
            n_length = length
            # To make sure the complete test case will not be added into train set
            if lengths[idx] <= length:
                n_length = length - 1
            if ONE_HOT:
                # if ONE_HOT is used, we convert one hot vector to int first.
                new_x = onehot2int(test_x[idx, :n_length, :])
                new_y = onehot2int(test_x[idx, 1:n_length + 1, :])
            else:
                new_x = test_x[idx, :n_length, 0]
                new_y = test_x[idx, 1:n_length + 1, 0]
            train_data['x'].append(new_x)
            train_data['y'].append(new_y)
            if 't' in train_data:
                test_t = test_data['t']
                new_t = test_t[idx, :n_length].tolist()
                train_data['t'].append(new_t)
        logging.info('New train data size {}'.format(len(train_data['x'])))
        logging.info('--Data Added--')

    logging.info("Training ...")
    logging.info('Data size {},Max epoch {},Batch {}'.format(
        train_data_size, num_epochs, BATCH_SIZE))

    p = 0
    current_epoch = 0
    it = 0
    data_size = train_data_size
    last_it = 0
    avg_cost = 0
    avg_seq_len = 0
    try:
        while True:
            #logging.info("Load batch")
            batch_data = gen_data(p, train_data, batch_size=BATCH_SIZE)
            x = batch_data['x']
            y = batch_data['y']
            mask = batch_data['mask']
            avg_seq_len += x.shape[1]
            input_var = [x, mask, y]

            #logging.info("Train batch")

            if USE_TIME_INPUT:
                t = batch_data['t']
                input_var.insert(2, t)
            avg_cost += train(*input_var)
            it += 1
            p += BATCH_SIZE
            #logging.info("Done bitch")
            #if True:
            if (p >= data_size):
                p = 0
                last_it = it
                current_epoch += 1
                # First stage: Using original train data to train model in #FIXED_EPOCHS
                # Second stage: After that add part of test data to train data.
                # The first stage is using user information with similar interest, and the second stage is using history information
                additional_length = int(
                    (current_epoch - FIXED_EPOCHS) * test_data_length /
                    (NUM_EPOCHS - FIXED_EPOCHS))
                #if current_epoch % 2 == 0:
                evaluate(l_out,
                         current_epoch=current_epoch,
                         additional_test_length=additional_length)

                if current_epoch >= num_epochs:
                    break
                if current_epoch > FIXED_EPOCHS:
                    data_size = train_data_size + test_data_size
                    logging.info(
                        '>> length {} test cases added to train set.'.format(
                            additional_length))
                    add_test_to_train(additional_length)
                logging.info('Epoch {} Carriage Return'.format(current_epoch))
            if it % PRINT_FREQ == 0:
                logging.info(
                    "Epoch {}-{},iter {} average seq length = {} average loss = {}"
                    .format(current_epoch,
                            (it - last_it) * 1.0 * BATCH_SIZE / data_size, it,
                            avg_seq_len / PRINT_FREQ, avg_cost / PRINT_FREQ))
                avg_cost = 0
                avg_seq_len = 0
        logging.info('End')
    except KeyboardInterrupt:
        pass

Example #6

Show file

File: model.py Project: gray-stanton/dropout-analytical

    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 tie_weights=False,
                 no_dropout=False,
                 custom_lstm=False):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.use_dropout = not no_dropout

        if wdrop is None:
            wdrop = 0
        wdrop = wdrop if self.use_dropout else 0
        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            # we need to use own lstm for second order derivative
            if not custom_lstm:
                self.rnns = [
                    torch.nn.LSTM(ninp if l == 0 else nhid,
                                  nhid if l != nlayers - 1 else
                                  (ninp if tie_weights else nhid),
                                  1,
                                  dropout=0) for l in range(nlayers)
                ]
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
            else:
                self.rnns = [
                    LSTMLayer(
                        ninp if l == 0 else nhid, nhid if l != nlayers - 1 else
                        (ninp if tie_weights else nhid))
                    for l in range(nlayers)
                ]
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'GRU':
            self.rnns = [
                torch.nn.GRU(ninp if l == 0 else nhid,
                             nhid if l != nlayers - 1 else ninp,
                             1,
                             dropout=0) for l in range(nlayers)
            ]
            self.rnns = [
                WeightDrop(rnn, ['weight_hh'], dropout=wdrop)
                for rnn in self.rnns
            ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=ninp if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else
                          (ninp if tie_weights else nhid),
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayers)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.wdrop = wdrop
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights

Example #7

Show file

File: run_length_exp.py Project: wzzzzZ1/Context-Aware-Sequential-Recommendation

def main(num_epochs=NUM_EPOCHS, vocab_size=VOCAB_SIZE):
    logging.info("Building network ...")

    # First, we build the network, starting with an input layer
    # Recurrent layers expect input of shape
    # (batch size, SEQ_LENGTH, num_features)
    l_in = lasagne.layers.InputLayer(shape=(None, None, NDIM))
    l_mask = lasagne.layers.InputLayer(shape=(None, None))

    # We now build the LSTM layer which takes l_in as the input layer
    # We clip the gradients at GRAD_CLIP to prevent the problem of exploding gradients.
    l_forward = None

    if MODEL_TYPE == 'LSTM' or MODEL_TYPE == 'LSTM_T':
        l_t = lasagne.layers.InputLayer(
            shape=(None, None)) if USE_TIME_INPUT else None
        l_forward = LSTMLayer(l_in,
                              time_input=l_t,
                              mask_input=l_mask,
                              num_units=N_HIDDEN,
                              peepholes=True,
                              ingate=lasagne.layers.Gate(),
                              forgetgate=lasagne.layers.Gate(),
                              cell=lasagne.layers.Gate(
                                  W_cell=None,
                                  nonlinearity=lasagne.nonlinearities.tanh),
                              outgate=lasagne.layers.Gate(),
                              cell_init=lasagne.init.Constant(0.),
                              hid_init=lasagne.init.Constant(0.),
                              grad_clipping=GRAD_CLIP,
                              nonlinearity=lasagne.nonlinearities.tanh,
                              bn=BN,
                              only_return_final=False)
    elif MODEL_TYPE == 'TLSTM1':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = TLSTM1Layer(
            l_in,
            time_input=l_t,
            num_units=N_HIDDEN,
            mask_input=l_mask,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh),
            outgate=OutGate(),
            nonlinearity=lasagne.nonlinearities.tanh,
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            only_return_final=False,
            bn=BN,
        )
    elif MODEL_TYPE == 'TLSTM2':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = TLSTM2Layer(
            l_in,
            time_input=l_t,
            num_units=N_HIDDEN,
            mask_input=l_mask,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh),
            outgate=OutGate(),
            nonlinearity=lasagne.nonlinearities.tanh,
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            only_return_final=False,
            bn=BN,
        )
    elif MODEL_TYPE == 'TLSTM3':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = TLSTM3Layer(
            l_in,
            time_input=l_t,
            num_units=N_HIDDEN,
            mask_input=l_mask,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            # forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh),
            outgate=OutGate(),
            nonlinearity=lasagne.nonlinearities.tanh,
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            only_return_final=False,
            bn=BN,
        )
    elif MODEL_TYPE == 'PLSTM':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = PLSTMLayer(l_in,
                               time_input=l_t,
                               num_units=N_HIDDEN,
                               mask_input=l_mask,
                               grad_clipping=GRAD_CLIP,
                               bn=BN,
                               timegate=PLSTMTimeGate())

    # Theano tensor for the targets
    target_values = T.matrix('target_values', dtype='int32')
    # The output of l_forward of shape (batch_size,time_sequence, N_HIDDEN) is then passed through the
    # softmax nonlinearity to
    # create probability distribution of the prediction
    # The output of this stage is (batch_size, time_sequence, vocab_size)
    l_out = lasagne.layers.DenseLayer(l_forward,
                                      num_units=vocab_size,
                                      W=lasagne.init.Normal(),
                                      num_leading_axes=2,
                                      nonlinearity=None)
    # lasagne.layers.get_output produces a variable for the output of the net
    network_output = lasagne.layers.get_output(l_out)
    # We need sum up all the cost through time.
    # network_output ( time_sequence,batch_size, vocab_size)
    network_output = network_output.dimshuffle(1, 0, 2)

    def calculate_softmax(n_input):
        return T.nnet.softmax(n_input)

    def merge_cost(n_input, n_target, n_mask, cost_prev):
        n_target = n_target.ravel()
        n_cost = T.nnet.categorical_crossentropy(n_input, n_target)
        n_cost = n_cost * n_mask
        n_cost = n_cost.sum()

        return cost_prev + n_cost

    network_output_softmax, _ = theano.scan(fn=calculate_softmax,
                                            sequences=network_output)

    # The loss function is calculated as the mean of the (categorical) cross-entropy between the prediction and target.
    m_cost, _ = theano.scan(fn=merge_cost,
                            sequences=[
                                network_output_softmax, target_values.T,
                                l_mask.input_var.T
                            ],
                            outputs_info=T.constant(0.))
    m_cost = m_cost[-1]
    cost = m_cost / l_mask.input_var.sum()

    # convert back to: (batch_size, time_seqsence, vocab_size)
    network_output_softmax = network_output_softmax.dimshuffle(1, 0, 2)

    # Compute AdaGrad updates for training
    logging.info("Computing updates ...")
    all_params = lasagne.layers.get_all_params(l_out, trainable=True)
    updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)

    # Theano functions for training, predict
    logging.info("Compiling functions ...")
    input_var = [l_in.input_var, l_mask.input_var]
    if USE_TIME_INPUT:
        input_var += [l_t.input_var]

    predict = theano.function(input_var,
                              network_output_softmax,
                              allow_input_downcast=True)
    input_var += [target_values]
    train = theano.function(input_var,
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    # compute_cost return cost but without update
    compute_cost = theano.function(input_var, cost, allow_input_downcast=True)

    def do_evaluate(current_epoch,
                    test_x,
                    test_y,
                    test_mask,
                    lengths,
                    test_t=None,
                    n=100,
                    test_batch=5,
                    name=None):
        # evaluate and calculate recall@10, MRR@10
        p = 0
        probs_all_time = None
        while True:
            input_var = [test_x[p:p + test_batch], test_mask[p:p + test_batch]]
            if test_t is not None:
                input_var += [test_t[p:p + test_batch]]
            batch_probs = predict(*input_var)
            p += test_batch
            probs_all_time = batch_probs if probs_all_time is None else np.concatenate(
                [probs_all_time, batch_probs], axis=0)
            if p >= test_x.shape[0]:
                break

        total_size = test_x.shape[0]
        recall10 = 0.
        MRR10_score = 0.
        NDCG_score = 0.
        rate_sum = 0

        sample_time = SAMPLE_TIME

        for idx in range(total_size):
            gnd = test_y[idx]
            probs = probs_all_time[idx, lengths[idx] - 1, :]
            prob_index = np.argsort(probs)[-1::-1].tolist()
            gnd_rate = prob_index.index(gnd) + 1
            rate_sum += gnd_rate
            # Sample multiple times to reduce randomness
            for _ in range(sample_time):
                samples = np.random.choice(range(vocab_size),
                                           n + 1,
                                           replace=False).tolist()
                # make sure the fist element is gnd
                try:
                    samples.remove(gnd)
                    samples.insert(0, gnd)
                except ValueError:
                    samples[0] = gnd

                sample_probs = probs[samples]
                prob_index = np.argsort(sample_probs)[-1::-1].tolist()
                rate = prob_index.index(0) + 1

                # caculate Recall@10 and MRR@10
                if rate <= 10:
                    recall10 += 1
                    MRR10_score += 1. / rate
                    NDCG_score += 1. / np.log2(rate + 1)

        count = total_size * sample_time
        recall10 = recall10 / count
        MRR10_score = MRR10_score / count
        NDCG_score = NDCG_score / count
        avg_rate = float(rate_sum) / total_size

        logging.info('Recall@10 {}'.format(recall10))
        logging.info('MRR@10 1/rate {}'.format(MRR10_score))
        logging.info('NDCG@10 {}'.format(NDCG_score))
        logging.info('Average rate {}'.format(avg_rate))

        from log import log_results
        log_results(result_dir, current_epoch, recall10, MRR10_score,
                    NDCG_score, avg_rate, cost, name)

    def onehot2int(onehot_vec):
        # convert onehot vector to index
        ret = []
        for onehot in onehot_vec:
            ret.append(onehot.tolist().index(1))
        return ret

    def get_short_test_data(length):
        # generate short sequence in the test_data.
        test_x = test_data['x'][:, :length]
        test_mask = test_data['mask'][:, :length]
        test_t = test_data['t'][:, :length] if USE_TIME_INPUT else None
        lengths = np.sum(test_mask, axis=1).astype('int')

        test_y = test_data['y'].copy()
        for idx in range(test_y.shape[0]):
            whole_length = test_data['lengths'][idx]
            if length < whole_length:
                test_y[idx] = test_data['x'][idx, length, :].tolist().index(
                    1) if ONE_HOT else test_data['x'][idx, length, 0]

        return test_x, test_y, test_mask, lengths, test_t

    def evaluate(model, current_epoch, additional_test_length):
        # Evaluate the model
        logging.info('Evaluate')
        test_x = test_data['x']
        test_y = test_data['y']
        test_mask = test_data['mask']
        lengths = test_data['lengths']
        logging.info(
            '-----------Evaluate Normal:{},{},{}-------------------'.format(
                MODEL_TYPE, DATA_TYPE, N_HIDDEN))
        do_evaluate(current_epoch,
                    test_x,
                    test_y,
                    test_mask,
                    lengths,
                    test_data['t'] if USE_TIME_INPUT else None,
                    test_batch=TEST_BATCH,
                    name='additional')
        # Evaluate the model on short data
        if additional_test_length > 0:
            logging.info('-----------Evaluate Additional---------------')
            test_x, test_y, test_mask, lengths, test_t = get_short_test_data(
                additional_test_length)
            do_evaluate(current_epoch,
                        test_x,
                        test_y,
                        test_mask,
                        lengths,
                        test_t,
                        test_batch=TEST_BATCH,
                        name='additional_test')
        logging.info('-----------Evaluate End----------------------')
        if not DEBUG:
            utils.save_model(
                '{}-{}-{}-{}'.format(MODEL_TYPE, current_epoch,
                                     DATA_TYPE, N_HIDDEN),
                str(datetime.datetime.now()), model, '_new')

    def add_test_to_train(length):
        logging.info('Length {} test cases added to train set'.format(length))
        global train_data
        logging.info('Old train data size {}'.format(len(train_data['x'])))
        # Remote the train_data added before
        train_data['x'] = train_data['x'][:train_data_size]
        train_data['y'] = train_data['y'][:train_data_size]
        if train_data.has_key('t'):
            train_data['t'] = train_data['t'][:train_data_size]
        test_x = test_data['x']
        lengths = test_data['lengths']
        for idx in range(test_x.shape[0]):
            n_length = length
            # To make sure the complete test case will not be added into train set
            if lengths[idx] <= length:
                n_length = length - 1
            if ONE_HOT:
                # if ONE_HOT is used, we convert one hot vector to int first.
                new_x = onehot2int(test_x[idx, :n_length, :])
                new_y = onehot2int(test_x[idx, 1:n_length + 1, :])
            else:
                new_x = test_x[idx, :n_length, 0]
                new_y = test_x[idx, 1:n_length + 1, 0]
            train_data['x'].append(new_x)
            train_data['y'].append(new_y)
            if train_data.has_key('t'):
                test_t = test_data['t']
                new_t = test_t[idx, :n_length].tolist()
                train_data['t'].append(new_t)
        logging.info('New train data size {}'.format(len(train_data['x'])))
        logging.info('--Data Added--')

    logging.info("Training ...")
    logging.info('Data size {},Max epoch {},Batch {}'.format(
        train_data_size, num_epochs, BATCH_SIZE))

    logging.info("Load pickle")
    utils.load_model("TLSTM3-9-music-128_2019-10-16 14:00:39.099161", l_out)

    lengths = [25, 50, 100, 200]
    max_length = 200

    for seq_length in lengths:
        mask_length = max_length - lengths
        # Evaluate the model
        logging.info('Evaluate')
        test_x = test_data['x']
        test_y = test_data['y']

        test_mask = np.copy(test_data['mask'])
        test_mask[:, :mask_length] = 1
        lengths = np.minimum(test_data['lengths'], seq_length)

        logging.info(
            '-----------Evaluate length: {}-------------------'.format(
                seq_length))
        do_evaluate(test_x,
                    test_y,
                    test_mask,
                    lengths,
                    test_data['t'] if USE_TIME_INPUT else None,
                    test_batch=TEST_BATCH)

Example #8

Show file

    sampler = RandomSampler(dataset)
    loader = DataLoader(dataset,
                        batch_size=batch_size,
                        sampler=sampler,
                        shuffle=False,
                        num_workers=2)

    #    dataiter = iter(loader)
    #    images, labels = dataiter.next()
    #    print (images)
    #    images=tensor_to_img(images)
    #    print (labels)
    #    print (images)

    net = Net(14 * batch_size)
    lstm = LSTMLayer(7 * 7 * (16 + 5 * 2), 64, 14 * 14 * (num_class + 5 * 2),
                     2, batch_size)
    lossfunction = Loss(batch_size)
    optimizer = optim.Adam([{
        'params': net.parameters()
    }, {
        'params': lstm.parameters(),
        'lr': 0.0001
    }],
                           lr=0,
                           weight_decay=0)
    if load_checkpoint:
        net.load_state_dict(torch.load(SAVE_PATH))

    net.cuda()

    optimizer = optim.Adam(net.parameters(), lr=0.0001)

Example #9

Show file

def main(num_epochs=NUM_EPOCHS, vocab_size=VOCAB_SIZE):
    logging.info("Building network ...")
    # (batch size, SEQ_LENGTH, num_features)
    # v: None表示该维度的大小在编译时没有固定。
    # InputLayer，它可用于表示网络的输入。张量的第一个维度通常是批量维度
    l_in = lasagne.layers.InputLayer(shape=(None, None, NDIM))
    l_mask = lasagne.layers.InputLayer(shape=(None, None))

    # addv
    l_pos = lasagne.layers.InputLayer(shape=(None, None))

    # We now build the LSTM layer which takes l_in as the input layer
    # We clip the gradients at GRAD_CLIP to prevent the problem of exploding gradients.
    l_forward = None
    if MODEL_TYPE == 'LSTM' or MODEL_TYPE == 'LSTM_T':
        l_t = lasagne.layers.InputLayer(shape=(None, None)) if USE_TIME_INPUT else None
        l_forward = LSTMLayer(
            l_in,
            time_input=l_t,
            mask_input=l_mask,
            num_units=N_HIDDEN,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh),
            outgate=lasagne.layers.Gate(),
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            nonlinearity=lasagne.nonlinearities.tanh,
            bn=BN,
            only_return_final=False)
    elif MODEL_TYPE == 'RNN':
        l_t = lasagne.layers.InputLayer(shape=(None, None)) if USE_TIME_INPUT else None
        l_forward = RNNLayer(
            l_in,
            time_input=l_t,
            mask_input=l_mask,
            num_units=N_HIDDEN,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh),
            outgate=lasagne.layers.Gate(),
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            nonlinearity=lasagne.nonlinearities.tanh,
            bn=BN,
            only_return_final=False)
    elif MODEL_TYPE == 'DTLSTM':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_d = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = VDTLSTMLayer(
            l_in,
            time_input=l_t,
            duration_input=l_d,
            num_units=N_HIDDEN,
            mask_input=l_mask,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh),
            outgate=OutGate(),
            nonlinearity=lasagne.nonlinearities.tanh,
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            only_return_final=False,
            bn=BN,
        )
    elif MODEL_TYPE == 'DTLSTM_EM':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_d = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = VDTLSTMEMLayer(
            l_in,
            time_input=l_t,
            duration_input=l_d,
            num_units=N_HIDDEN,
            mask_input=l_mask,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh),
            outgate=OutGate(),
            nonlinearity=lasagne.nonlinearities.tanh,
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            only_return_final=False,
            bn=BN,
        )
    elif MODEL_TYPE == 'TLSTM2':
        l_t = lasagne.layers.InputLayer(shape=(None, None))
        l_forward = VTLSTM2Layer(
            l_in,
            time_input=l_t,
            num_units=N_HIDDEN,
            mask_input=l_mask,
            peepholes=True,
            ingate=lasagne.layers.Gate(),
            forgetgate=lasagne.layers.Gate(),
            cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh),
            outgate=OutGate(),
            nonlinearity=lasagne.nonlinearities.tanh,
            cell_init=lasagne.init.Constant(0.),
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=GRAD_CLIP,
            only_return_final=False,
            bn=BN,
        )
    else:
        logging.info('没有这种模型类型')
        exit(0)

    target_values = T.matrix('target_values', dtype='int32')

    # v:输出层(N_HIDDEN,vocab_size)
    # 调用了l_forward中get_output_shape_for()方法
    # l_forward (num_batch, sequence_length, num_units)
    l_out = lasagne.layers.DenseLayer(l_forward, num_units=vocab_size, W=lasagne.init.Normal(),
                                      num_leading_axes=2, nonlinearity=None)

    # 获取输出层的输出(None, None, 500)
    # 调用了l_forward中get_output_for()方法
    # l_out (num_batch, sequence_length, vocab_size)
    network_output = lasagne.layers.get_output(l_out)

    # (2, 0, 1) -> AxBxC to CxAxB
    # (0, ‘x’, 1) -> AxB to Ax1xB
    # (1, ‘x’, 0) -> AxB to Bx1xA
    # (sequence_length, num_batch, vocab_size)
    network_output = network_output.dimshuffle(1, 0, 2)

    def calculate_softmax(n_input):
        return T.nnet.softmax(n_input)

    def merge_cost(n_input, n_target, n_mask, n_pos, cost_prev):
        # 使用ravel将原始矩阵张开
        n_target = n_target.ravel()
        # addv
        # n_pos = T.reshape(n_pos, (5, 1))
        # n_input = n_pos - n_input
        # n_pos = (n_pos - 0.5) * 2
        # n_input = n_input * n_pos

        n_cost = T.nnet.categorical_crossentropy(n_input, n_target)
        n_cost = n_cost * n_mask * n_pos   # * (1.0 - n_pos)
        n_cost = n_cost.sum()
        return cost_prev + n_cost

    network_output_softmax, _ = theano.scan(fn=calculate_softmax, sequences=network_output)

    # The loss function is calculated as the mean of the (categorical) cross-entropy between the prediction and target.
    # 后面用于计算交叉熵损失函数的sum
    m_cost, _ = theano.scan(fn=merge_cost,
                            sequences=[network_output_softmax, target_values.T, l_mask.input_var.T, l_pos.input_var.T],

                            outputs_info=T.constant(0.))
    # m_cost是一个序列,但是只需要最后一个叠加值cost[-1]
    m_cost = m_cost[-1]
    # 求平均cost
    cost = m_cost / l_mask.input_var.sum()

    # 转换回来: (batch_size, time_seqsence, vocab_size)
    network_output_softmax = network_output_softmax.dimshuffle(1, 0, 2)

    # Compute AdaGrad updates for training
    logging.info("Computing updates ...")
    # 这个get_all_params方法应该是用于获取所有的在lstmlayer中add_param
    all_params = lasagne.layers.get_all_params(l_out, trainable=True)
    # 根据cost更新所有的参数all_params,学习率为LEARNING_RATE
    updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)

    # Theano functions for training, predict
    logging.info("Compiling functions ...")
    input_var = [l_in.input_var, l_mask.input_var]
    # add
    if USE_TIME_INPUT:
        input_var += [l_t.input_var]
        # addv
        if USE_DURATION:
            input_var += [l_d.input_var]

    predict = theano.function(input_var, network_output_softmax, allow_input_downcast=True)
    input_var += [target_values]
    # addv
    input_var.insert(2, l_pos.input_var)

    # v:计算损失函数值
    # input_var[l_in.input_var, l_mask.input_var, l_pos.input_var,l_t.input_var,l_d.input_var,target_values]
    train = theano.function(input_var, cost, updates=updates, allow_input_downcast=True)
    # compute_cost return cost but without update
    compute_cost = theano.function(input_var, cost, allow_input_downcast=True)

    # v:评估方法!!!!
    # addv
    def do_evaluate(test_x, test_y, test_mask, lengths, test_t=None, test_d=None, n=1000, test_batch=5):
        # evaluate and calculate recall@10, MRR@10
        p = 0
        probs_all_time = None  # 所有的预测值
        while True:
            input_var = [test_x[p:p + test_batch], test_mask[p:p + test_batch]]
            if test_t is not None:
                input_var += [test_t[p:p + test_batch]]
                # addv
                if test_d is not None:
                    input_var += [test_d[p:p + test_batch]]
            batch_probs = predict(*input_var)
            p += test_batch
            probs_all_time = batch_probs if probs_all_time is None else np.concatenate([probs_all_time, batch_probs],
                                                                                       axis=0)
            if p >= test_x.shape[0]:
                break

        total_size = test_x.shape[0]
        recall10 = 0.
        MRR10_score = 0.
        rate_sum = 0

        sample_time = SAMPLE_TIME

        # addv
        _rank = []

        for idx in range(total_size):
            gnd = test_y[idx]
            probs = probs_all_time[idx, lengths[idx] - 1, :]  # 取每一个test的最后一个的预测值,一个500维的向量
            prob_index = np.argsort(probs)[-1::-1].tolist()  # argsort函数返回的是数组值从小到大的索引值[3 1 2]-->[1 2 0]
            gnd_rate = prob_index.index(gnd) + 1
            # 这个是所有的东西的排名
            rate_sum += gnd_rate
            # Sample multiple times to reduce randomness
            for _ in range(sample_time):

                # addvv
                samples = np.random.choice(range(vocab_size), vocab_size, replace=False).tolist()
                # make sure the fist element is gnd
                # v 这样在随机之后,只要选择index(0)知道是第几了
                try:
                    samples.remove(gnd)
                    samples.insert(0, gnd)
                except ValueError:
                    samples[0] = gnd

                sample_probs = probs[samples]
                prob_index = np.argsort(sample_probs)[-1::-1].tolist()
                # v 这个是随机100个的排名
                rate = prob_index.index(0) + 1

                # addvv
                # logging.info('rank:{}'.format(rate))

                # caculate Recall@10 and MRR@10
                # addvc
                if rate <= RANK:
                    recall10 += 1
                    MRR10_score += 1. / rate

        count = total_size * sample_time
        recall10 = recall10 / count
        MRR10_score = MRR10_score / count
        avg_rate = float(rate_sum) / total_size

        logging.info('Recall@10 {}'.format(recall10))
        logging.info('MRR@10 1/rate {}'.format(MRR10_score))
        logging.info('Average rate {}'.format(avg_rate))

    def onehot2int(onehot_vec):
        # convert onehot vector to index
        ret = []
        for onehot in onehot_vec:
            ret.append(onehot.tolist().index(1))
        return ret

    def get_short_test_data(length):
        # generate short sequence in the test_data.
        test_x = test_data['x'][:, :length]
        test_mask = test_data['mask'][:, :length]
        # add
        test_t = test_data['t'][:, :length] if USE_TIME_INPUT else None
        # addv
        test_d = test_data['d'][:, :length] if USE_DURATION else None

        lengths = np.sum(test_mask, axis=1).astype('int')

        test_y = test_data['y'].copy()
        for idx in range(test_y.shape[0]):
            whole_length = test_data['lengths'][idx]
            if length < whole_length:
                test_y[idx] = test_data['x'][idx, length, :].tolist().index(1) if ONE_HOT else test_data['x'][
                    idx, length, 0]

        return test_x, test_y, test_mask, lengths, test_t, test_d

    def evaluate(model, current_epoch, additional_test_length):
        # Evaluate the model
        logging.info('Evaluate')
        # 包括了所有测试集合
        test_x = test_data['x']
        test_y = test_data['y']
        test_mask = test_data['mask']
        lengths = test_data['lengths']
        logging.info('-----------Evaluate Normal:{},{},{}-------------------'.format(MODEL_TYPE, DATA_TYPE, N_HIDDEN))
        do_evaluate(test_x, test_y, test_mask, lengths,
                    test_data['t'] if USE_TIME_INPUT else None,
                    test_data['d'] if USE_DURATION else None,
                    test_batch=TEST_BATCH)
        # Evaluate the model on short data
        if additional_test_length > 0:
            logging.info('-----------Evaluate Additional---------------')
            # addv
            test_x, test_y, test_mask, lengths, test_t, test_d = get_short_test_data(additional_test_length)
            do_evaluate(test_x, test_y, test_mask, lengths, test_t, test_d, test_batch=TEST_BATCH)
        logging.info('-----------Evaluate End----------------------')
        if not DEBUG:
            vutils.save_model('{}-{}-{}-{}'.format(MODEL_TYPE, current_epoch, DATA_TYPE, N_HIDDEN),
                              str(datetime.datetime.now()), model, '_new')

    def add_test_to_train(length):
        logging.info('Length {} test cases added to train set'.format(length))
        global train_data
        logging.info('Old train data size {}'.format(len(train_data['x'])))
        # Remote the train_data added before
        train_data['x'] = train_data['x'][:train_data_size]
        train_data['y'] = train_data['y'][:train_data_size]
        if train_data.has_key('t'):
            train_data['t'] = train_data['t'][:train_data_size]
            # addv
            if train_data.has_key('d'):
                train_data['d'] = train_data['d'][:train_data_size]

        test_x = test_data['x']
        lengths = test_data['lengths']
        for idx in range(test_x.shape[0]):
            n_length = length
            # To make sure the complete test case will not be added into train set
            if lengths[idx] <= length:
                n_length = length - 1
            if ONE_HOT:
                # if ONE_HOT is used, we convert one hot vector to int first.
                new_x = onehot2int(test_x[idx, :n_length, :])
                new_y = onehot2int(test_x[idx, 1:n_length + 1, :])
            else:
                new_x = test_x[idx, :n_length, 0]
                new_y = test_x[idx, 1:n_length + 1, 0]
            train_data['x'].append(new_x)
            train_data['y'].append(new_y)
            if train_data.has_key('t'):
                test_t = test_data['t']
                new_t = test_t[idx, :n_length].tolist()
                train_data['t'].append(new_t)

                # addv
                if train_data.has_key('d'):
                    test_d = test_data['d']
                    new_d = test_d[idx, :n_length].tolist()
                    train_data['d'].append(new_d)

        logging.info('New train data size {}'.format(len(train_data['x'])))
        logging.info('--Data Added--')

    logging.info("Training ...")
    logging.info('Data size {},Max epoch {},Batch {}'.format(train_data_size, num_epochs, BATCH_SIZE))
    p = 0
    current_epoch = 0
    it = 0
    data_size = train_data_size
    last_it = 0  # 最后一次迭代的次数
    avg_cost = 0  # 平均损失函数值
    avg_seq_len = 0  # 平均序列长度

    # 随机模块
    plist = vutils.genPlist(data_size, BATCH_SIZE)


    try:
        while True:
            randP = plist[p / BATCH_SIZE]
            batch_data = gen_data(randP, train_data, batch_size=BATCH_SIZE)
            # mask:[[1 1 1 1 1...0 0 0 0 0],[1 1 1 ... 0 0]] 1的个数表示物品的长度
            # lengths_x:[1519 1596 ...] 每一个数字表示用户的序列长度
            # y:next game id的list [0 0 0 1 0 ...] 0为英雄联盟
            x = batch_data['x']
            y = batch_data['y']
            mask = batch_data['mask']
            pos = batch_data['pos']
            avg_seq_len += x.shape[1]

            input_var = [x, mask, pos, y]

            # add
            if USE_TIME_INPUT:
                t = batch_data['t']
                # 消耗时间
                input_var.insert(3, t)
                # addv
                if USE_DURATION:
                    d = batch_data['d']
                    input_var.insert(4, d)
            # v:训练主要方法
            # input_var[x, mask, pos, t, d, y]
            avg_cost += train(*input_var)
            it += 1
            # input_var = [x, mask, t, y]
            p += BATCH_SIZE
            if (p >= data_size):  # 如果p>=data_size,说明一次循环结束
                p = 0
                last_it = it
                current_epoch += 1
                # First stage: Using original train data to train model in #FIXED_EPOCHS
                # Second stage: After that add part of test data to train data.
                # The first stage is using user information with similar interest, and the second stage is using history information
                '''v
                第一阶段：使用原始列车数据在#FIXED_EPOCHS中训练模型
                第二阶段：之后添加部分测试数据来训练数据。
                第一阶段是使用具有类似兴趣的用户信息，第二阶段是使用历史信息.
                '''
                additional_length = int((current_epoch - FIXED_EPOCHS) * test_data_length / (NUM_EPOCHS - FIXED_EPOCHS))
                evaluate(l_out, current_epoch=current_epoch, additional_test_length=additional_length)
                if current_epoch >= num_epochs:
                    break
                if current_epoch > FIXED_EPOCHS:
                    data_size = train_data_size + test_data_size
                    logging.info('>> length {} test cases added to train set.'.format(additional_length))
                    add_test_to_train(additional_length)
                logging.info('Epoch {} Carriage Return'.format(current_epoch))
            if it % PRINT_FREQ == 0:
                # 所以每 PRINT_FREQ * BATCH_SIZE 打印一次
                # current_epoch 循环次数
                logging.info("Epoch {}-{},iter {} average seq length = {} average loss = {}".format(current_epoch, (
                        it - last_it) * 1.0 * BATCH_SIZE / data_size, it, avg_seq_len / PRINT_FREQ,
                                                                                                    avg_cost / PRINT_FREQ))
                avg_cost = 0
                avg_seq_len = 0
        logging.info('End')
    except KeyboardInterrupt:
        logging.info('由于你的自行中断,程序已经停止.')