def analyze_model(test_inters, testtype, model): path_to_load = c.PATH_TO_RESULTS + "ByIntersection" + os.sep load_folder = path_to_load + testtype + os.sep save_folder = load_folder + "TestOn" + ",".join( [str(i) for i in test_inters]) + os.sep Ypred = None if "LSTM" in model: Xtrain, Ytrain = du.getFeaturesLSTM( load_folder, testtype, list({1, 2, 3, 4, 5, 6, 7, 8, 9} - set(test_inters))) #Xtest, Ytest = du.getFeaturesLSTM(load_folder, testtype, test_inters) means, stddevs = du.normalize_get_params(Xtrain) Xtrain = du.normalize(Xtrain, means, stddevs) numFeatures = Xtrain.shape[2] Xtest, Ytest = createAnalysisTestData(numFeatures, traj_len=Xtrain.shape[1]) #train the LSTM again Ypred, timeFit, timePred, all_tests_x, all_tests_y = LSTM.run_LSTM( (Xtrain, Ytrain), (Xtest, Ytest), model=model, save_path="ignore.out") else: Xtrain, Ytrain = du.getFeaturesnonLSTM( load_folder, testtype, list({1, 2, 3, 4, 5, 6, 7, 8, 9} - set(test_inters))) #Xtest, Ytest = du.getFeaturesnonLSTM(load_folder, testtype, test_inters) means, stddevs = du.normalize_get_params(Xtrain) Xtrain = du.normalize(Xtrain, means, stddevs) numFeatures = Xtrain.shape[1] Xtest, _ = createAnalysisTestData(numFeatures, traj_len=1) classifier = skflow.DNNClassifier( feature_columns=tf.contrib.learn. infer_real_valued_columns_from_input(Xtrain), hidden_units=[128, 128], n_classes=3) #, model_dir=save_folder) #try: # Ypred = classifier.predict_proba(Xtest) #except: print("Could not load saved model, re-training :(.") Ytrain = [int(i - 1) for i in Ytrain] start = time.clock() max_epochs = 10 if max_epochs: start2 = time.clock() for epoch in range(max_epochs): classifier.fit(Xtrain, Ytrain, steps=1000) end2 = time.clock() print("Epoch", epoch, "Done. Took:", end2 - start2) start2 = end2 else: classifier.fit(Xtrain, Ytrain) #, logdir=log_path) Ypred = classifier.predict_proba(Xtest) end = time.clock() timeFit = end - start print("Done fitting, time spent:", timeFit) np.savetxt(save_folder + "analysis_Ypred_" + model, np.array(Ypred)) print(model, "analysis predictions saved, test", testtype, save_folder, "analysis_Ypred_", model) return Ypred
def calculate_loss(self): """ 训练的入口,计算损失函数 :return: """ # 销量的对数 raw_features = self.create_features() embedding_dic = self.create_embedding() # 对时间属性embedding embedding_featrues = self.encoder_embedding(embedding_dic) # 使用全连接网络对用户特征降维 embedding_profile_feature = tf.layers.dense(self.profile_features, 10, activation=tf.nn.relu) encoder_features = tf.concat( [raw_features, embedding_featrues, embedding_profile_feature], axis=2) outputs, final_state = LSTM(encoder_features, self.encode_len, self.batch_size, self.n_hidden_units) c, h = final_state # 加两层全连接网络 output_d1 = tf.layers.dense(h, self.n_hidden_units, activation=tf.nn.relu) output_d2 = tf.layers.dense(output_d1 + h, self.num_class, activation=None) # 对预测结果进行掩码,使预测结果在推荐列表中 mask_output = output_d2 * self.plan_mask # 预测结果 0-1之间 self.preds = tf.argmax(tf.nn.softmax(mask_output), axis=-1) self.labels = tf.one_hot(self.click_mode, 12) # 添加权重 # weight_list = np.ones(12, dtype=np.float32) # weight_list[[3, 4, 8, 10, 11]] = 0.1 # weight_list[[7, 9]] = 2.5 # weight_list[[2, 5]] = 3 # weight_list = np.array([93., 141., 273., 49., 25., 95., 24., 156., 4., 20., 30., 12.], dtype=np.float32) weight_list = np.array( [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=np.float32) weight_tensor = tf.convert_to_tensor(weight_list) weight_labels = self.labels * weight_tensor self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits=mask_output, labels=weight_labels), axis=-1) # self.loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=mask_output, labels=weight_labels), axis=-1) # (batch,64) self.score = tf.reduce_mean( tf.cast(tf.equal(self.click_mode, tf.cast(self.preds, tf.int32)), tf.float32)) # self.score = f1_score() self.prediction_tensors = { 'labels': self.click_mode, 'preds': self.preds, 'sid': self.sid, } return self.loss
def __init__(self, state_space, act_n, batch_size, il_weight, device="cpu"): nn.Module.__init__(self) self.state_space = state_space self.batch_size = batch_size self.il_weight = il_weight self.device = torch.device(device) self.rnd_target = self._rnd_net(state_space) self.rnd = self._rnd_net(state_space) self.conv = nn.Sequential( self._conv_block(state_space[-1], 32, 5, 2, 0), self._conv_block(32, 32, 3, 2, 0), self._conv_block(32, 64, 3, 2, 0), self._conv_block(64, 64, 3, 2, 0), self._conv_block(64, 64, 3, 1, 0)) self.linear = nn.Sequential(nn.Linear(1024, 256), nn.ReLU(), nn.Dropout(p=0.5)) self.lstm = LSTM(self.batch_size, device, 256, 256, 1) self.lstm_dropout = nn.Dropout(p=0.2) self.policy = nn.Sequential(nn.Linear(256, act_n)) self.noise = GaussianNoise(mean=0, std=0.02) self.value = nn.Linear(256, 1) self.loss = nn.CrossEntropyLoss(reduction="none") self.mse_loss = nn.MSELoss() self.optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
def __init__(self, args): super(BIDAF, self).__init__(args) ''' already declared in BaseModel: self.config = args self.embedding = nn.Embedding(args.word_num, args.embed_dim) self.loss_func = MarginLoss(args.adv_temperature, margin=args.margin) ''' self.context_LSTM = LSTM(input_size=args.embed_dim, hidden_size=args.hidden_dim, num_layers=1, dropout=args.dropout) self.att_Linear = nn.Linear(args.hidden_dim, 1) self.att_Linear2 = nn.Linear(args.hidden_dim, 1) self.att_Linear3 = nn.Linear(args.hidden_dim, 1) self.model_LSTM = LSTM(input_size=4 * args.hidden_dim, hidden_size=args.hidden_dim, num_layers=2, dropout=args.dropout, batch_first=True, bidirectional=True) self.output_Linear = nn.Linear(6 * args.hidden_dim, 1) return
def validateLSTM(test_folder, Xtrain, Ytrain, Xtest, Ytest, numEpochs=2): print(Xtrain.shape) print(Ytrain.shape) print(Xtest.shape) print(Ytest.shape) for model in ["LSTM_128x2" ]: #, "test", "test1", "test2", "test3", "test4"]: print("\n") print("===========================") print("Starting:", model) p_dists, timeTrain, timePred, all_tests_x, all_tests_y = LSTM.run_LSTM( (Xtrain, Ytrain), (Xtest, Ytest), model=model, save_path=None, numEpochs=numEpochs) np.savetxt(test_folder + "Ypred_LSTM", np.array(p_dists)) score = 0 numWrong = 0 #all_tests_x = np.reshape(Xtest, (Xtest.shape[0] * Xtest.shape[1], Xtest.shape[2])) #all_tests_y = np.reshape(Ytest, (Ytest.shape[0] * Ytest.shape[1], Ytest.shape[2])) print(all_tests_x.shape) print(all_tests_y.shape) typesOfY = {} for val in all_tests_y.flat: if not val in typesOfY.keys(): typesOfY[val] = 1 else: typesOfY[val] = typesOfY[val] + 1 print(typesOfY) print("Total predictions:", len(all_tests_y)) print(p_dists.shape) for i in range(0, len(all_tests_y) - 1): if i > len(p_dists): break actual = all_tests_y[i] p_right = p_dists[i][int(actual)] score += 1 - p_right if not p_right == max(p_dists[i]): numWrong += 1 print("Score:", score) print("Num wrong:", numWrong) sumP0 = sum(p_dists[:, 0]) print("Sum of P for 0:", sumP0) return
def create_lstm_net(self): """ 生成lstm网络 :return: """ self.encode_features = tf.concat([ tf.expand_dims(self.distance_encode, 2), tf.expand_dims(self.eta_encode, 2), tf.expand_dims(self.price_encode, 2), tf.one_hot(self.mode_encode, 12), # tf.tile(tf.reshape(self.o1, shape=(tf.shape(self.o1)[0], 1, 1)), (1, self.num_encode_steps, 1)), # tf.tile(tf.reshape(self.o2, shape=(tf.shape(self.o2)[0], 1, 1)), (1, self.num_encode_steps, 1)), # tf.tile(tf.reshape(self.d1, shape=(tf.shape(self.d1)[0], 1, 1)), (1, self.num_encode_steps, 1)), # tf.tile(tf.reshape(self.d2, shape=(tf.shape(self.d2)[0], 1, 1)), (1, self.num_encode_steps, 1)), # tf.tile(tf.reshape(self.euc_dis, shape=(tf.shape(self.euc_dis)[0], 1, 1)), (1, self.num_encode_steps, 1)) ], axis=2) # encoder_features = tf.concat([self.encode_features, embedding_featrues, embedding_profile_feature], axis=2) outputs, final_state = LSTM(self.encode_features, self.encode_len, self.batch_size, self.n_hidden_units) # 添加注意力 return outputs, final_state
def test(Xtrain, Ytrain, Xtest, Ytest, model, testnum, save_path=None, exper=False): all_tests_x = np.array([]) all_tests_y = np.array([]) print("Starting model", model, "test", testnum) if exper == True: Xtrain = Xtrain[::8] Ytrain = Ytrain[::8] save_path = None if model == "SVM": random.seed(42) combined = list(zip(Xtrain, Ytrain)) random.shuffle(combined) Xtrain[:], Ytrain[:] = zip(*combined) iters = 50000 if exper: iters = 100000 Xtrain = Xtrain[:: 3] #24th --- a further every 5th to make it every 25th Ytrain = Ytrain[::3] Ypred, timeFit, timePred = trainTestSVM(Xtrain, Ytrain, Xtest, Ytest, testnum, save_path, probs=True, num_iter=iters) elif model == "nb": random.seed(42) combined = list(zip(Xtrain, Ytrain)) random.shuffle(combined) Xtrain[:], Ytrain[:] = zip(*combined) Ypred, timeFit, timePred = trainTestNaiveBayes(Xtrain, Ytrain, Xtest, Ytest, testnum, probs=True) elif model == "DNN": max_epochs = 40 nsteps = 1000 if exper == True: max_epochs = 2 nsteps = 10000 Ypred, timeFit, timePred = trainTestDNN(Xtrain, Ytrain.astype(int), Xtest, Ytest.astype(int), testnum, save_path, max_epochs=max_epochs, nsteps=nsteps) elif "LSTM" in model: if exper == True: numEpochs = 5 else: numEpochs = 30 Ypred, timeFit, timePred, all_tests_x, all_tests_y = LSTM.run_LSTM( (Xtrain, Ytrain), (Xtest, Ytest), model=model, save_path=save_path, numEpochs=numEpochs) #np.save(save_path + "usedY", all_tests_y) #np.save(save_path + "usedX", all_tests_x) elif "Marginal" in model: Ypred, timeFit, timePred = sutil.trainTestMarginal( Xtrain, Ytrain.astype(int), Xtest, Ytest.astype(int), testnum, save_path) elif "Conditional" in model: Ypred, timeFit, timePred = sutil.trainTestConditional( Xtrain, Ytrain.astype(int), Xtest, Ytest.astype(int), testnum, save_path) else: print("Invalid model type:", model, "Running test number:", testnum) return None return Ypred, timeFit, timePred, all_tests_x, all_tests_y
class Model(nn.Module): def __init__(self, state_space, act_n, batch_size, il_weight, device="cpu"): nn.Module.__init__(self) self.state_space = state_space self.batch_size = batch_size self.il_weight = il_weight self.device = torch.device(device) self.rnd_target = self._rnd_net(state_space) self.rnd = self._rnd_net(state_space) self.conv = nn.Sequential( self._conv_block(state_space[-1], 32, 5, 2, 0), self._conv_block(32, 32, 3, 2, 0), self._conv_block(32, 64, 3, 2, 0), self._conv_block(64, 64, 3, 2, 0), self._conv_block(64, 64, 3, 1, 0)) self.linear = nn.Sequential(nn.Linear(1024, 256), nn.ReLU(), nn.Dropout(p=0.5)) self.lstm = LSTM(self.batch_size, device, 256, 256, 1) self.lstm_dropout = nn.Dropout(p=0.2) self.policy = nn.Sequential(nn.Linear(256, act_n)) self.noise = GaussianNoise(mean=0, std=0.02) self.value = nn.Linear(256, 1) self.loss = nn.CrossEntropyLoss(reduction="none") self.mse_loss = nn.MSELoss() self.optimizer = torch.optim.Adam(self.parameters(), lr=1e-4) def _conv_no_dropout(self, filt_in, filt_out, kernel, stride, padding): return nn.Sequential( nn.Conv2d(filt_in, filt_out, kernel, stride, padding), nn.ReLU(), ) def _conv_block(self, filt_in, filt_out, kernel, stride, padding): return nn.Sequential( self._conv_no_dropout(filt_in, filt_out, kernel, stride, padding), nn.Dropout(p=0.2), ) def _rnd_net(self, state_space): return nn.Sequential( self._conv_no_dropout(state_space[-1], 32, 5, 2, 0), self._conv_no_dropout(32, 32, 3, 2, 0), self._conv_no_dropout(32, 64, 3, 2, 0), self._conv_no_dropout(64, 64, 3, 2, 0), self._conv_no_dropout(64, 64, 3, 1, 0)) def forward(self, inp): conv = self.conv(inp) conv = conv.view(-1, 1024) linear = self.linear(conv) linear = linear.view(len(inp) // self.batch_size, -1, 256) lstm = self.lstm(linear) lstm = lstm.view(-1, 256) lstm = self.lstm_dropout(lstm) policy = self.policy(lstm) actions = nn.Softmax(-1)(policy) actions = torch.distributions.Categorical(actions) actions = actions.sample() value = self.value(lstm) return actions, policy, value def step(self, inp, stochastic=True): conv = self.conv(inp) conv = conv.view(-1, 1024) linear = self.linear(conv) linear = linear.view(1, 1, 256) lstm = self.lstm(linear) lstm = lstm.view(-1, 256) lstm = self.lstm_dropout(lstm) policy = self.policy(lstm) actions = nn.Softmax(-1)(policy) actions = torch.distributions.Categorical(actions) actions = actions.sample() actions = actions[0].detach().item() policy = policy[0].detach().cpu().numpy() value = self.value(lstm) value = value.item() mse = nn.MSELoss() rnd_reward = mse(self.rnd(inp), self.rnd_target(inp).detach()).item() return actions, policy, value, rnd_reward def train_supervised(self, states, actions): self.optimizer.zero_grad() states = self.noise(states) self.lstm.reset_hidden() new_acts, policy, value = self(states) policy_loss = self.il_weight * self.loss(policy, actions.argmax(1)) policy_loss = policy_loss.mean() policy_loss.backward() self.optimizer.step() return policy_loss.cpu().detach().numpy() def train_reinforce(self, rollouts): self.lstm.reset_hidden() states, acts, rewards, advs = [ torch.from_numpy(tensor).to(self.device) for tensor in rollouts ] states = states.permute(0, 3, 1, 2) actions, policy, value = self(states) policy_loss = advs.unsqueeze(1) * self.loss(policy, acts.argmax(1)) policy_loss = policy_loss.mean() value_loss = self.mse_loss(value, rewards.unsqueeze(1)) rnd_loss = self.mse_loss(self.rnd(states), self.rnd_target(states).detach()) loss = policy_loss + value_loss + rnd_loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() return loss.item() def reset_hidden_state(self): """ Resets the hidden state for the LSTM """ self.lstm.reset_hidden() def save(self, save_path): """ Saves the model at the given save path save_path : The path to save the model at """ torch.save(self.state_dict(), save_path) def load(self, load_path): """ Loads the model at the given load path load_path : The of the model to load """ self.load_state_dict(torch.load(load_path))
# from utils import estimating_transition_matrix num_options = 5 num_presses = 1000 # Hyper-parameters sequence_length = 1 input_size = 15 hidden_size = 128 num_layers = 1 output_size = 21 batch_size = 1 num_epochs = 50 learning_rate = 0.01 model = LSTM(input_size, hidden_size, num_layers, output_size).to(device) # inp = torch.from_numpy(np.zeros((20, 1)).reshape(-1, sequence_length, input_size)).to(device) # print(inp.shape) # model(inp.float()) # # Loss and optimizer # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model total_step = num_presses-3 transition_matrix = generating_transition_matrix()
plt.imshow(transition_matrix, cmap='magma', interpolation='nearest') plt.xticks(np.arange(num_options),xticks, fontsize=9) plt.yticks(np.arange(num_options**2), yticks, fontsize=9) plt.colorbar() plt.savefig('transition_matrix.pdf') plt.show() input_size = 15 hidden_size = 128 num_layers = 1 output_size = 21 sequence_length = 1 model = LSTM(input_size, hidden_size, num_layers, output_size).to(device) model.load_state_dict(torch.load('model_LSTM.ckpt')) sequence_test1 = np.array([[1, 2, 3, 4, 5], [4, 3, 3, 4, 5], [1, 2, 3, 4, 5], [4, 2, 1, 5, 3], [4, 2, 1, 2, 3], [2, 5, 4, 3, 3], [2, 5, 4, 2, 1]]) sequence_test2 = np.array([[1, 2, 3, 3, 4, 5], [4, 3, 3, 3, 4, 5], [1, 2, 3, 3, 4, 5], [4, 2, 1, 1, 5, 3], [4, 2, 1, 1, 2, 3],
def __init__(self, args, pretrained): super(BiDAF, self).__init__() self.args = args # 1. Character Embedding Layer self.char_embedding = nn.Embedding(args.char_vocab_size, args.char_dim, padding_idx=1) nn.init.uniform_(self.char_embedding.weight, -0.001, 0.001) self.char_convolution = nn.Sequential( nn.Conv2d(1, args.char_channel_size, (args.char_dim, args.char_channel_width)), nn.ReLU()) # 2. Word Embedding Layer # initialize word embedding with GloVe self.word_embedding = nn.Embedding.from_pretrained(pretrained, freeze=True) # highway network assert self.args.hidden_size * 2 == (self.args.char_channel_size + self.args.word_dim) for i in range(2): setattr( self, 'highway_linear{}'.format(i), nn.Sequential( Linear(args.hidden_size * 2, args.hidden_size * 2), nn.ReLU())) setattr( self, 'highway_gate{}'.format(i), nn.Sequential( Linear(args.hidden_size * 2, args.hidden_size * 2), nn.Sigmoid())) # 3. Contextual Embedding Layer self.context_LSTM = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) # 4. Attention Flow Layer self.att_weight_c = Linear(args.hidden_size * 2, 1) self.att_weight_q = Linear(args.hidden_size * 2, 1) self.att_weight_cq = Linear(args.hidden_size * 2, 1) # 5. Modeling Layer self.modeling_LSTM1 = LSTM(input_size=args.hidden_size * 8, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) self.modeling_LSTM2 = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) # 6. Output Layer self.p1_weight_g = Linear(args.hidden_size * 8, 1, dropout=args.dropout) self.p1_weight_m = Linear(args.hidden_size * 2, 1, dropout=args.dropout) self.p2_weight_g = Linear(args.hidden_size * 8, 1, dropout=args.dropout) self.p2_weight_m = Linear(args.hidden_size * 2, 1, dropout=args.dropout) self.output_LSTM = LSTM(input_size=args.hidden_size * 2, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, dropout=args.dropout) self.dropout = nn.Dropout(p=args.dropout)