Ejemplo n.º 1
0
def analyze_model(test_inters, testtype, model):
    path_to_load = c.PATH_TO_RESULTS + "ByIntersection" + os.sep
    load_folder = path_to_load + testtype + os.sep
    save_folder = load_folder + "TestOn" + ",".join(
        [str(i) for i in test_inters]) + os.sep
    Ypred = None
    if "LSTM" in model:
        Xtrain, Ytrain = du.getFeaturesLSTM(
            load_folder, testtype,
            list({1, 2, 3, 4, 5, 6, 7, 8, 9} - set(test_inters)))
        #Xtest, Ytest = du.getFeaturesLSTM(load_folder, testtype, test_inters)
        means, stddevs = du.normalize_get_params(Xtrain)
        Xtrain = du.normalize(Xtrain, means, stddevs)
        numFeatures = Xtrain.shape[2]
        Xtest, Ytest = createAnalysisTestData(numFeatures,
                                              traj_len=Xtrain.shape[1])
        #train the LSTM again
        Ypred, timeFit, timePred, all_tests_x, all_tests_y = LSTM.run_LSTM(
            (Xtrain, Ytrain), (Xtest, Ytest),
            model=model,
            save_path="ignore.out")
    else:
        Xtrain, Ytrain = du.getFeaturesnonLSTM(
            load_folder, testtype,
            list({1, 2, 3, 4, 5, 6, 7, 8, 9} - set(test_inters)))
        #Xtest, Ytest = du.getFeaturesnonLSTM(load_folder, testtype, test_inters)
        means, stddevs = du.normalize_get_params(Xtrain)
        Xtrain = du.normalize(Xtrain, means, stddevs)
        numFeatures = Xtrain.shape[1]
        Xtest, _ = createAnalysisTestData(numFeatures, traj_len=1)
        classifier = skflow.DNNClassifier(
            feature_columns=tf.contrib.learn.
            infer_real_valued_columns_from_input(Xtrain),
            hidden_units=[128, 128],
            n_classes=3)  #, model_dir=save_folder)
        #try:
        #    Ypred = classifier.predict_proba(Xtest)
        #except:
        print("Could not load saved model, re-training :(.")
        Ytrain = [int(i - 1) for i in Ytrain]
        start = time.clock()
        max_epochs = 10
        if max_epochs:
            start2 = time.clock()
            for epoch in range(max_epochs):
                classifier.fit(Xtrain, Ytrain, steps=1000)
                end2 = time.clock()
                print("Epoch", epoch, "Done. Took:", end2 - start2)
                start2 = end2
        else:
            classifier.fit(Xtrain, Ytrain)  #, logdir=log_path)
        Ypred = classifier.predict_proba(Xtest)
        end = time.clock()
        timeFit = end - start
    print("Done fitting, time spent:", timeFit)

    np.savetxt(save_folder + "analysis_Ypred_" + model, np.array(Ypred))
    print(model, "analysis predictions saved, test", testtype, save_folder,
          "analysis_Ypred_", model)
    return Ypred
Ejemplo n.º 2
0
    def calculate_loss(self):
        """
        训练的入口,计算损失函数
        :return:
        """
        # 销量的对数
        raw_features = self.create_features()
        embedding_dic = self.create_embedding()
        # 对时间属性embedding
        embedding_featrues = self.encoder_embedding(embedding_dic)
        # 使用全连接网络对用户特征降维
        embedding_profile_feature = tf.layers.dense(self.profile_features,
                                                    10,
                                                    activation=tf.nn.relu)
        encoder_features = tf.concat(
            [raw_features, embedding_featrues, embedding_profile_feature],
            axis=2)

        outputs, final_state = LSTM(encoder_features, self.encode_len,
                                    self.batch_size, self.n_hidden_units)
        c, h = final_state
        # 加两层全连接网络
        output_d1 = tf.layers.dense(h,
                                    self.n_hidden_units,
                                    activation=tf.nn.relu)
        output_d2 = tf.layers.dense(output_d1 + h,
                                    self.num_class,
                                    activation=None)
        # 对预测结果进行掩码,使预测结果在推荐列表中
        mask_output = output_d2 * self.plan_mask
        # 预测结果 0-1之间
        self.preds = tf.argmax(tf.nn.softmax(mask_output), axis=-1)
        self.labels = tf.one_hot(self.click_mode, 12)
        # 添加权重
        # weight_list = np.ones(12, dtype=np.float32)
        # weight_list[[3, 4, 8, 10, 11]] = 0.1
        # weight_list[[7, 9]] = 2.5
        # weight_list[[2, 5]] = 3
        # weight_list = np.array([93., 141., 273., 49., 25., 95., 24., 156., 4., 20., 30., 12.], dtype=np.float32)
        weight_list = np.array(
            [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=np.float32)
        weight_tensor = tf.convert_to_tensor(weight_list)
        weight_labels = self.labels * weight_tensor
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            logits=mask_output, labels=weight_labels),
                                   axis=-1)
        # self.loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=mask_output, labels=weight_labels), axis=-1)
        # (batch,64)
        self.score = tf.reduce_mean(
            tf.cast(tf.equal(self.click_mode, tf.cast(self.preds, tf.int32)),
                    tf.float32))
        # self.score = f1_score()
        self.prediction_tensors = {
            'labels': self.click_mode,
            'preds': self.preds,
            'sid': self.sid,
        }

        return self.loss
Ejemplo n.º 3
0
    def __init__(self,
                 state_space,
                 act_n,
                 batch_size,
                 il_weight,
                 device="cpu"):
        nn.Module.__init__(self)

        self.state_space = state_space
        self.batch_size = batch_size
        self.il_weight = il_weight

        self.device = torch.device(device)

        self.rnd_target = self._rnd_net(state_space)
        self.rnd = self._rnd_net(state_space)

        self.conv = nn.Sequential(
            self._conv_block(state_space[-1], 32, 5, 2, 0),
            self._conv_block(32, 32, 3, 2,
                             0), self._conv_block(32, 64, 3, 2, 0),
            self._conv_block(64, 64, 3, 2, 0),
            self._conv_block(64, 64, 3, 1, 0))

        self.linear = nn.Sequential(nn.Linear(1024, 256), nn.ReLU(),
                                    nn.Dropout(p=0.5))

        self.lstm = LSTM(self.batch_size, device, 256, 256, 1)
        self.lstm_dropout = nn.Dropout(p=0.2)

        self.policy = nn.Sequential(nn.Linear(256, act_n))

        self.noise = GaussianNoise(mean=0, std=0.02)

        self.value = nn.Linear(256, 1)

        self.loss = nn.CrossEntropyLoss(reduction="none")
        self.mse_loss = nn.MSELoss()

        self.optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
Ejemplo n.º 4
0
 def __init__(self, args):
     super(BIDAF, self).__init__(args)
     '''
     already declared in BaseModel:
     self.config = args
     self.embedding = nn.Embedding(args.word_num, args.embed_dim)
     self.loss_func = MarginLoss(args.adv_temperature, margin=args.margin)
     '''
     self.context_LSTM = LSTM(input_size=args.embed_dim,
                              hidden_size=args.hidden_dim,
                              num_layers=1,
                              dropout=args.dropout)
     self.att_Linear = nn.Linear(args.hidden_dim, 1)
     self.att_Linear2 = nn.Linear(args.hidden_dim, 1)
     self.att_Linear3 = nn.Linear(args.hidden_dim, 1)
     self.model_LSTM = LSTM(input_size=4 * args.hidden_dim,
                            hidden_size=args.hidden_dim,
                            num_layers=2,
                            dropout=args.dropout,
                            batch_first=True,
                            bidirectional=True)
     self.output_Linear = nn.Linear(6 * args.hidden_dim, 1)
     return
def validateLSTM(test_folder, Xtrain, Ytrain, Xtest, Ytest, numEpochs=2):
    print(Xtrain.shape)
    print(Ytrain.shape)
    print(Xtest.shape)
    print(Ytest.shape)
    for model in ["LSTM_128x2"
                  ]:  #, "test", "test1", "test2", "test3", "test4"]:
        print("\n")
        print("===========================")
        print("Starting:", model)
        p_dists, timeTrain, timePred, all_tests_x, all_tests_y = LSTM.run_LSTM(
            (Xtrain, Ytrain), (Xtest, Ytest),
            model=model,
            save_path=None,
            numEpochs=numEpochs)
        np.savetxt(test_folder + "Ypred_LSTM", np.array(p_dists))
        score = 0
        numWrong = 0
        #all_tests_x = np.reshape(Xtest, (Xtest.shape[0] * Xtest.shape[1], Xtest.shape[2]))
        #all_tests_y = np.reshape(Ytest, (Ytest.shape[0] * Ytest.shape[1], Ytest.shape[2]))
        print(all_tests_x.shape)
        print(all_tests_y.shape)
        typesOfY = {}
        for val in all_tests_y.flat:
            if not val in typesOfY.keys():
                typesOfY[val] = 1
            else:
                typesOfY[val] = typesOfY[val] + 1
        print(typesOfY)

        print("Total predictions:", len(all_tests_y))
        print(p_dists.shape)
        for i in range(0, len(all_tests_y) - 1):
            if i > len(p_dists):
                break
            actual = all_tests_y[i]
            p_right = p_dists[i][int(actual)]
            score += 1 - p_right
            if not p_right == max(p_dists[i]):
                numWrong += 1
        print("Score:", score)
        print("Num wrong:", numWrong)
        sumP0 = sum(p_dists[:, 0])
        print("Sum of P for 0:", sumP0)
    return
Ejemplo n.º 6
0
    def create_lstm_net(self):
        """
        生成lstm网络
        :return:
        """
        self.encode_features = tf.concat([
            tf.expand_dims(self.distance_encode, 2),
            tf.expand_dims(self.eta_encode, 2),
            tf.expand_dims(self.price_encode, 2),
            tf.one_hot(self.mode_encode, 12),
            # tf.tile(tf.reshape(self.o1, shape=(tf.shape(self.o1)[0], 1, 1)), (1, self.num_encode_steps, 1)),
            # tf.tile(tf.reshape(self.o2, shape=(tf.shape(self.o2)[0], 1, 1)), (1, self.num_encode_steps, 1)),
            # tf.tile(tf.reshape(self.d1, shape=(tf.shape(self.d1)[0], 1, 1)), (1, self.num_encode_steps, 1)),
            # tf.tile(tf.reshape(self.d2, shape=(tf.shape(self.d2)[0], 1, 1)), (1, self.num_encode_steps, 1)),
            # tf.tile(tf.reshape(self.euc_dis, shape=(tf.shape(self.euc_dis)[0], 1, 1)), (1, self.num_encode_steps, 1))
        ], axis=2)
        # encoder_features = tf.concat([self.encode_features, embedding_featrues, embedding_profile_feature], axis=2)
        outputs, final_state = LSTM(self.encode_features, self.encode_len, self.batch_size, self.n_hidden_units)
        # 添加注意力

        return outputs, final_state
Ejemplo n.º 7
0
def test(Xtrain,
         Ytrain,
         Xtest,
         Ytest,
         model,
         testnum,
         save_path=None,
         exper=False):
    all_tests_x = np.array([])
    all_tests_y = np.array([])
    print("Starting model", model, "test", testnum)
    if exper == True:
        Xtrain = Xtrain[::8]
        Ytrain = Ytrain[::8]
        save_path = None
    if model == "SVM":
        random.seed(42)
        combined = list(zip(Xtrain, Ytrain))
        random.shuffle(combined)
        Xtrain[:], Ytrain[:] = zip(*combined)
        iters = 50000
        if exper:
            iters = 100000
            Xtrain = Xtrain[::
                            3]  #24th  --- a further every 5th to make it every 25th
            Ytrain = Ytrain[::3]
        Ypred, timeFit, timePred = trainTestSVM(Xtrain,
                                                Ytrain,
                                                Xtest,
                                                Ytest,
                                                testnum,
                                                save_path,
                                                probs=True,
                                                num_iter=iters)
    elif model == "nb":
        random.seed(42)
        combined = list(zip(Xtrain, Ytrain))
        random.shuffle(combined)
        Xtrain[:], Ytrain[:] = zip(*combined)
        Ypred, timeFit, timePred = trainTestNaiveBayes(Xtrain,
                                                       Ytrain,
                                                       Xtest,
                                                       Ytest,
                                                       testnum,
                                                       probs=True)
    elif model == "DNN":
        max_epochs = 40
        nsteps = 1000
        if exper == True:
            max_epochs = 2
            nsteps = 10000
        Ypred, timeFit, timePred = trainTestDNN(Xtrain,
                                                Ytrain.astype(int),
                                                Xtest,
                                                Ytest.astype(int),
                                                testnum,
                                                save_path,
                                                max_epochs=max_epochs,
                                                nsteps=nsteps)
    elif "LSTM" in model:
        if exper == True:
            numEpochs = 5
        else:
            numEpochs = 30
        Ypred, timeFit, timePred, all_tests_x, all_tests_y = LSTM.run_LSTM(
            (Xtrain, Ytrain), (Xtest, Ytest),
            model=model,
            save_path=save_path,
            numEpochs=numEpochs)
        #np.save(save_path + "usedY", all_tests_y)
        #np.save(save_path + "usedX", all_tests_x)
    elif "Marginal" in model:
        Ypred, timeFit, timePred = sutil.trainTestMarginal(
            Xtrain, Ytrain.astype(int), Xtest, Ytest.astype(int), testnum,
            save_path)
    elif "Conditional" in model:
        Ypred, timeFit, timePred = sutil.trainTestConditional(
            Xtrain, Ytrain.astype(int), Xtest, Ytest.astype(int), testnum,
            save_path)
    else:
        print("Invalid model type:", model, "Running test number:", testnum)
        return None
    return Ypred, timeFit, timePred, all_tests_x, all_tests_y
Ejemplo n.º 8
0
class Model(nn.Module):
    def __init__(self,
                 state_space,
                 act_n,
                 batch_size,
                 il_weight,
                 device="cpu"):
        nn.Module.__init__(self)

        self.state_space = state_space
        self.batch_size = batch_size
        self.il_weight = il_weight

        self.device = torch.device(device)

        self.rnd_target = self._rnd_net(state_space)
        self.rnd = self._rnd_net(state_space)

        self.conv = nn.Sequential(
            self._conv_block(state_space[-1], 32, 5, 2, 0),
            self._conv_block(32, 32, 3, 2,
                             0), self._conv_block(32, 64, 3, 2, 0),
            self._conv_block(64, 64, 3, 2, 0),
            self._conv_block(64, 64, 3, 1, 0))

        self.linear = nn.Sequential(nn.Linear(1024, 256), nn.ReLU(),
                                    nn.Dropout(p=0.5))

        self.lstm = LSTM(self.batch_size, device, 256, 256, 1)
        self.lstm_dropout = nn.Dropout(p=0.2)

        self.policy = nn.Sequential(nn.Linear(256, act_n))

        self.noise = GaussianNoise(mean=0, std=0.02)

        self.value = nn.Linear(256, 1)

        self.loss = nn.CrossEntropyLoss(reduction="none")
        self.mse_loss = nn.MSELoss()

        self.optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)

    def _conv_no_dropout(self, filt_in, filt_out, kernel, stride, padding):
        return nn.Sequential(
            nn.Conv2d(filt_in, filt_out, kernel, stride, padding),
            nn.ReLU(),
        )

    def _conv_block(self, filt_in, filt_out, kernel, stride, padding):
        return nn.Sequential(
            self._conv_no_dropout(filt_in, filt_out, kernel, stride, padding),
            nn.Dropout(p=0.2),
        )

    def _rnd_net(self, state_space):
        return nn.Sequential(
            self._conv_no_dropout(state_space[-1], 32, 5, 2, 0),
            self._conv_no_dropout(32, 32, 3, 2, 0),
            self._conv_no_dropout(32, 64, 3, 2, 0),
            self._conv_no_dropout(64, 64, 3, 2, 0),
            self._conv_no_dropout(64, 64, 3, 1, 0))

    def forward(self, inp):
        conv = self.conv(inp)
        conv = conv.view(-1, 1024)

        linear = self.linear(conv)
        linear = linear.view(len(inp) // self.batch_size, -1, 256)

        lstm = self.lstm(linear)
        lstm = lstm.view(-1, 256)
        lstm = self.lstm_dropout(lstm)

        policy = self.policy(lstm)

        actions = nn.Softmax(-1)(policy)
        actions = torch.distributions.Categorical(actions)
        actions = actions.sample()

        value = self.value(lstm)

        return actions, policy, value

    def step(self, inp, stochastic=True):
        conv = self.conv(inp)
        conv = conv.view(-1, 1024)

        linear = self.linear(conv)
        linear = linear.view(1, 1, 256)

        lstm = self.lstm(linear)
        lstm = lstm.view(-1, 256)
        lstm = self.lstm_dropout(lstm)

        policy = self.policy(lstm)

        actions = nn.Softmax(-1)(policy)
        actions = torch.distributions.Categorical(actions)
        actions = actions.sample()

        actions = actions[0].detach().item()
        policy = policy[0].detach().cpu().numpy()

        value = self.value(lstm)
        value = value.item()

        mse = nn.MSELoss()
        rnd_reward = mse(self.rnd(inp), self.rnd_target(inp).detach()).item()

        return actions, policy, value, rnd_reward

    def train_supervised(self, states, actions):
        self.optimizer.zero_grad()
        states = self.noise(states)

        self.lstm.reset_hidden()

        new_acts, policy, value = self(states)

        policy_loss = self.il_weight * self.loss(policy, actions.argmax(1))
        policy_loss = policy_loss.mean()

        policy_loss.backward()
        self.optimizer.step()

        return policy_loss.cpu().detach().numpy()

    def train_reinforce(self, rollouts):
        self.lstm.reset_hidden()

        states, acts, rewards, advs = [
            torch.from_numpy(tensor).to(self.device) for tensor in rollouts
        ]

        states = states.permute(0, 3, 1, 2)

        actions, policy, value = self(states)

        policy_loss = advs.unsqueeze(1) * self.loss(policy, acts.argmax(1))
        policy_loss = policy_loss.mean()

        value_loss = self.mse_loss(value, rewards.unsqueeze(1))

        rnd_loss = self.mse_loss(self.rnd(states),
                                 self.rnd_target(states).detach())

        loss = policy_loss + value_loss + rnd_loss

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.item()

    def reset_hidden_state(self):
        """
        Resets the hidden state for the LSTM
        """
        self.lstm.reset_hidden()

    def save(self, save_path):
        """
        Saves the model at the given save path

        save_path : The path to save the model at
        """
        torch.save(self.state_dict(), save_path)

    def load(self, load_path):
        """
        Loads the model at the given load path

        load_path : The of the model to load
        """
        self.load_state_dict(torch.load(load_path))
Ejemplo n.º 9
0
# from utils import estimating_transition_matrix

num_options = 5
num_presses = 1000

# Hyper-parameters
sequence_length = 1
input_size = 15
hidden_size = 128
num_layers = 1
output_size = 21
batch_size = 1
num_epochs = 50
learning_rate = 0.01

model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)

# inp = torch.from_numpy(np.zeros((20, 1)).reshape(-1, sequence_length, input_size)).to(device)
# print(inp.shape)
# model(inp.float())
#
# Loss and optimizer
# criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = num_presses-3

transition_matrix = generating_transition_matrix()
Ejemplo n.º 10
0
plt.imshow(transition_matrix, cmap='magma', interpolation='nearest')
plt.xticks(np.arange(num_options),xticks, fontsize=9)
plt.yticks(np.arange(num_options**2), yticks, fontsize=9)
plt.colorbar()

plt.savefig('transition_matrix.pdf')
plt.show()

input_size = 15
hidden_size = 128
num_layers = 1
output_size = 21
sequence_length = 1

model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)
model.load_state_dict(torch.load('model_LSTM.ckpt'))

sequence_test1 = np.array([[1, 2, 3, 4, 5],
                           [4, 3, 3, 4, 5],
                           [1, 2, 3, 4, 5],
                           [4, 2, 1, 5, 3],
                           [4, 2, 1, 2, 3],
                           [2, 5, 4, 3, 3],
                           [2, 5, 4, 2, 1]])

sequence_test2 = np.array([[1, 2, 3, 3, 4, 5],
                           [4, 3, 3, 3, 4, 5],
                           [1, 2, 3, 3, 4, 5],
                           [4, 2, 1, 1, 5, 3],
                           [4, 2, 1, 1, 2, 3],
Ejemplo n.º 11
0
    def __init__(self, args, pretrained):
        super(BiDAF, self).__init__()
        self.args = args

        # 1. Character Embedding Layer
        self.char_embedding = nn.Embedding(args.char_vocab_size,
                                           args.char_dim,
                                           padding_idx=1)
        nn.init.uniform_(self.char_embedding.weight, -0.001, 0.001)

        self.char_convolution = nn.Sequential(
            nn.Conv2d(1, args.char_channel_size,
                      (args.char_dim, args.char_channel_width)), nn.ReLU())

        # 2. Word Embedding Layer
        # initialize word embedding with GloVe
        self.word_embedding = nn.Embedding.from_pretrained(pretrained,
                                                           freeze=True)

        # highway network
        assert self.args.hidden_size * 2 == (self.args.char_channel_size +
                                             self.args.word_dim)
        for i in range(2):
            setattr(
                self, 'highway_linear{}'.format(i),
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.ReLU()))
            setattr(
                self, 'highway_gate{}'.format(i),
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=args.hidden_size * 2,
                                 hidden_size=args.hidden_size,
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=args.dropout)

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(args.hidden_size * 2, 1)
        self.att_weight_q = Linear(args.hidden_size * 2, 1)
        self.att_weight_cq = Linear(args.hidden_size * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=args.hidden_size * 8,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        self.modeling_LSTM2 = LSTM(input_size=args.hidden_size * 2,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        # 6. Output Layer
        self.p1_weight_g = Linear(args.hidden_size * 8,
                                  1,
                                  dropout=args.dropout)
        self.p1_weight_m = Linear(args.hidden_size * 2,
                                  1,
                                  dropout=args.dropout)
        self.p2_weight_g = Linear(args.hidden_size * 8,
                                  1,
                                  dropout=args.dropout)
        self.p2_weight_m = Linear(args.hidden_size * 2,
                                  1,
                                  dropout=args.dropout)

        self.output_LSTM = LSTM(input_size=args.hidden_size * 2,
                                hidden_size=args.hidden_size,
                                bidirectional=True,
                                batch_first=True,
                                dropout=args.dropout)

        self.dropout = nn.Dropout(p=args.dropout)