def __next__(self): train_perm = cuda.LongTensor(next(self.training_batches)) test_perm = cuda.LongTensor(next(self.test_batches)) return (DataStreamer(X=self.data_streamer.X[:, train_perm], Y=self.data_streamer.Y[:, train_perm]), DataStreamer(X=self.data_streamer.X[:, test_perm], Y=self.data_streamer.Y[:, test_perm]))
def train(): PYRNG = Random(0) ttv_proportions = dict(test=0.001, train=.96, validation=0.039) # Whiten, add flips, mask regions outside circle, train/test/val split DATA = (get_training_data().to_gpu().normalize().enrich().mask_circle(). test_train_validation(PYRNG, **ttv_proportions)) VALDATA = DATA.validation.get_examples(50, PYRNG) VALIMGS = Variable(T.from_numpy(VALDATA.images).type(TP.FloatTensor)) VALCLASSES = Variable(TP.LongTensor(VALDATA.is_iceberg)) BETA = 1e1 BETA_FACTOR = .9999 BATCH_SIZE = 32 model = BentesModel() if T.cuda.is_available(): model = model.cuda() optimizer = optim.Adam(model.parameters()) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9999) for i in range(1_000_000_000): scheduler.step() optimizer.zero_grad() batch = DATA.train.get_examples(BATCH_SIZE, PYRNG).rotate(PYRNG) imgvar = Variable(T.from_numpy(batch.images).type(TP.FloatTensor)) result = model(imgvar) classvar = Variable(TP.LongTensor(batch.is_iceberg)) accuracy = F.cross_entropy(result.activations, classvar) kl = T.mean(result.kl) loss = accuracy + BETA * kl loss.backward() valresult = model(VALIMGS) valaccuracy = F.cross_entropy(valresult.activations, VALCLASSES) optimizer.step() gf = lambda t: f'{t.data[0]:12.3f}' # noqa: E731 print(f'Step: {i:6d} CE: {gf(accuracy)} KL: {gf(kl)} loss: {gf(loss)} ' f'val: {gf(valaccuracy)}') scores = (F.log_softmax( result.activations, dim=1).data.cpu().numpy()[list(range(BATCH_SIZE)), batch.is_iceberg]) print( np.array( list(zip(*(s.astype(float) for s in np.histogram(scores))))).T) probs = F.softmax(result.activations).data.cpu().numpy().tolist() pprint(list(zip(batch.is_iceberg, probs))) BETA *= BETA_FACTOR print('first layer parameters/gradients for first kernel') print('convolution') print(model.layers[1].layer.weight[0]) print(model.layers[1].layer.weight.grad[0]) print('noise') print(model.layers[1].noise.weight[0]) print(model.layers[1].noise.weight.grad[0]) print('prior mean') print(model.layers[1].prior.mean[0]) print(model.layers[1].prior.mean.grad[0]) print('prior alpha') print(model.layers[1].prior.alpha[0]) print(model.layers[1].prior.alpha.grad[0])
def forward(self, batch_item_index, place_correlation): """ The forward pass of the autoencoder. :param batch_item_index: a list of arrays that each array stores the place id a user has been to :param place_correlation: the pairwise poi relation matrix :return: the predicted ratings """ item_vector = self.linear1.weight[:, T.LongTensor(batch_item_index[0]. astype(np.int32))] # Compute the neighbor inner products inner_product = item_vector.t().mm(self.linear4.weight.t()) item_corr = Variable( torch.from_numpy( place_correlation[batch_item_index[0]].toarray()).type( T.FloatTensor)) inner_product = inner_product * item_corr neighbor_product = inner_product.sum(dim=0).unsqueeze(0) # Compute the self attention score score = F.tanh(self.attention_matrix1.mm(item_vector)) score = F.softmax(score, dim=1) embedding_matrix = score.mm(item_vector.t()) linear_z = self.self_attention(embedding_matrix.t()).t() # print score for i in range(1, len(batch_item_index)): item_vector = self.linear1.weight[:, T.LongTensor(batch_item_index[i]. astype(np.int32))] # Compute the neighbor inner products inner_product = item_vector.t().mm(self.linear4.weight.t()) item_corr = Variable( torch.from_numpy( place_correlation[batch_item_index[i]].toarray()).type( T.FloatTensor)) inner_product = inner_product * item_corr inner_product = inner_product.sum(dim=0).unsqueeze(0) neighbor_product = torch.cat((neighbor_product, inner_product), 0) # Compute the self attention score score = F.tanh(self.attention_matrix1.mm(item_vector)) score = F.softmax(score, dim=1) embedding_matrix = score.mm(item_vector.t()) tmp_z = self.self_attention(embedding_matrix.t()).t() linear_z = torch.cat((linear_z, tmp_z), 0) z = F.tanh(linear_z) z = F.dropout(z, training=self.training, p=self.dropout_rate) z = F.tanh(self.linear2(z)) z = F.dropout(z, training=self.training, p=self.dropout_rate) d_z = F.tanh(self.linear3(z)) d_z = F.dropout(d_z, training=self.training, p=self.dropout_rate) y_pred = F.sigmoid(self.linear4(d_z) + neighbor_product) return y_pred
def neg_log_likelihood(self, sentences, tags): total_loss = Variable(cuda.FloatTensor([0])) for sentence, tag in zip(sentences, tags): sentence = sentence[1:-1] tag = tag[1:-1] sent_var = Variable(cuda.LongTensor(sentence)) tag_var = Variable(cuda.LongTensor(tag)) feats = self._get_lstm_features(sent_var) forward_score = self._forward_alg(feats) gold_score = self._score_sentence(feats, tag_var) total_loss += forward_score - gold_score return total_loss
def forward(self, input_user, input_item): regression_result, review_softmax, context = self.encoder.forward( input_user, input_item) output_tip_probs = Variable( device.LongTensor(self.empty_output * len(input_user))) output, hidden = self.decoder.forward(output_tip_probs, context) return regression_result, review_softmax, output
def init_start_input(self, batch_size): # GO input for decoder # Re-initialize when batch size changes if self.init_input is None or self.init_input.size(0) != batch_size: self.init_input = Variable( device.LongTensor([[self.vocab.SOS_token_id] * batch_size ])).view(batch_size, -1) return self.init_input
def teacher_forcing(self, feed_x, is_train=False): loss_fn = nn.NLLLoss() if type(feed_x) is np.ndarray: feed_x = tcg.LongTensor(feed_x) batch_size, seq_len = feed_x.size() x_t = train.Variable( tc.LongTensor( np.array([self.start_token] * batch_size, dtype=np.int32))) log_g_prediction = train.Variable( tc.zeros(batch_size, seq_len, self.vocab_size)) feed_x = feed_x.permute(1, 0) # seq_len x batch_size h = self.__h0_getter(batch_size) c = self.__h0_getter(batch_size) if self.is_cuda: x_t = x_t.cuda() loss = 0 for i in range(seq_len): log_pred, h, c = self.forward(x_t, h, c) x_t = feed_x[i] loss += loss_fn(log_pred, x_t) log_g_prediction[:, i, :] = log_pred loss /= self.sequence_length if is_train: self.g_opt.zero_grad() loss.backward() self.g_opt.step() return loss.detach().cpu().numpy(), log_g_prediction
def train(data, model, optimizer, verbose=True): criterion = nn.NLLLoss() if model.use_cuda: criterion.cuda() correct_actions = 0 total_actions = 0 tot_loss = 0. instance_count = 0 for sentence, actions in data: if len(sentence) <= 2: continue optimizer.zero_grad() model.refresh() outputs, _, actions_done = model(sentence, actions) if model.use_cuda: loss = ag.Variable(cuda.FloatTensor([0])) action_idxs = [ ag.Variable(cuda.LongTensor([a])) for a in actions_done ] else: loss = ag.Variable(torch.FloatTensor([0])) action_idxs = [ ag.Variable(torch.LongTensor([a])) for a in actions_done ] for output, act in zip(outputs, action_idxs): loss += criterion(output.view(-1, 3), act) tot_loss += utils.to_scalar(loss.data) instance_count += 1 for gold, output in zip(actions_done, outputs): pred_act = utils.argmax(output.data) if pred_act == gold: correct_actions += 1 total_actions += len(outputs) loss.backward() optimizer.step() acc = float(correct_actions) / total_actions loss = float(tot_loss) / instance_count if verbose: print( "Number of instances: {} Number of network actions: {}".format( instance_count, total_actions)) print("Acc: {} Loss: {}".format( float(correct_actions) / total_actions, tot_loss / instance_count))
def forward(self, minibatch): out_stack = [] minibatch = list(minibatch) minibatch_lengths = [len(sent) for sent in minibatch] batch_of_words = list(chain.from_iterable(minibatch)) self.init_state(len(batch_of_words)) # a hack to get index of the sorted words, so i can unsort them back after they are processed # print(batch_of_words) sent, ridx = self.len_sort(batch_of_words) padded, seq_lengths = self.pad(sent, 0) # print(padded) out = self.emb(Variable(cuda.LongTensor(padded))) # out is of size (all_words x max_len x char_emb_size) # print("out size: {0}".format(out.size())) out = rnn.pack_padded_sequence(out, seq_lengths, batch_first=True) out, hidden_state = self.rnn(out, self.hidden_state) # hidden_state[0] is of size: (num_dir x batch_size x lstm_hidden_dim) # print("hidden state size: {0}".format(hidden_state[0].size())) # TODO verify # unsorting IMPORTANT. cos we initially sorted the seq of chars to pass it to rnn. hidden_state = torch.index_select(hidden_state[0], dim=1, index=Variable( cuda.LongTensor(ridx))) # TODO verify that this is indeed the last outputs of both forward rnn and backward rnn out = cat([hidden_state[0], hidden_state[1]], dim=1) # print("cat out size: {0}".format(out.size())) cfg.ver_print("Hidden state concat", out) out = self.linear(out) out = self.tanh(out) # print("before split and pad function {0}".format(out.size())) # this will split 1d tensor of word embeddings, into 2d array of word embeddings based on lengths final_out = self.split_and_pad(out, minibatch_lengths) # final_out is of size (batch_size x max_seq_len x emb_size) # print(final_out.size()) return final_out
def forward(self, sentences): # dont confuse this with _forward_alg above. # Get the emission scores from the BiLSTM tag_seq = [] for sentence in sentences: sentence = sentence[1:-1] sent_var = Variable(cuda.LongTensor(sentence)) lstm_feats = self._get_lstm_features(sent_var) # Find the best path, given the features. tag_seq.append([self.tag_idx[cfg.SENT_START]] + self._viterbi_decode(lstm_feats) + [self.tag_idx[cfg.SENT_END]]) return tag_seq
def evaluate(data, model, verbose=False): correct_actions = 0 total_actions = 0 tot_loss = 0. instance_count = 0 criterion = nn.NLLLoss() if model.use_cuda: criterion.cuda() for sentence, actions in data: if len(sentence) > 1: outputs, _, actions_done = model(sentence, actions) if model.use_cuda: loss = ag.Variable(cuda.FloatTensor([0])) action_idxs = [ ag.Variable(cuda.LongTensor([a])) for a in actions_done ] else: loss = ag.Variable(torch.FloatTensor([0])) action_idxs = [ ag.Variable(torch.LongTensor([a])) for a in actions_done ] for output, act in zip(outputs, action_idxs): loss += criterion(output.view((-1, 3)), act) tot_loss += utils.to_scalar(loss.data) instance_count += 1 for gold, output in zip(actions_done, outputs): pred_act = utils.argmax(output.data) if pred_act == gold: correct_actions += 1 total_actions += len(outputs) acc = float(correct_actions) / total_actions loss = float(tot_loss) / instance_count if verbose: print( "Number of instances: {} Number of network actions: {}".format( instance_count, total_actions)) print("Acc: {} Loss: {}".format( float(correct_actions) / total_actions, tot_loss / instance_count)) return acc, loss
def to_variables(X, C, POS, Y): if cfg.BATCH_TYPE == "multi": x_var = X c_var = C pos_var = POS y_var = list(chain.from_iterable(list(Y))) lm_X = [[ cfg.LM_MAX_VOCAB_SIZE - 1 if (x >= cfg.LM_MAX_VOCAB_SIZE) else x for x in x1d ] for x1d in X] else: x_var = Variable(cuda.LongTensor([X])) c_var = C # f_var = Variable(torch.from_numpy(f)).float().unsqueeze(dim=0).cuda() pos_var = Variable(torch.from_numpy(POS).cuda()).unsqueeze(dim=0) lm_X = [ cfg.LM_MAX_VOCAB_SIZE - 1 if (x >= cfg.LM_MAX_VOCAB_SIZE) else x for x in X ] y_var = Variable(cuda.LongTensor(Y)) return x_var, c_var, pos_var, y_var, lm_X
def main(): target_params = pickle.load(open('save/target_params_py3.pkl', 'rb')) target_lstm = TARGET_LSTM(VOCAB_SIZE, BATCH_SIZE, 32, 32, SEQ_LENGTH, START_TOKEN, target_params) # The oracle model train_data = target_lstm.generate(batch_size=10000) generator = Generator(VOCAB_SIZE, BATCH_SIZE, 32, 32, SEQ_LENGTH, START_TOKEN, learning_rate=1e-3) mediator = Generator(VOCAB_SIZE, BATCH_SIZE, 64, 64, SEQ_LENGTH, START_TOKEN, learning_rate=1e-3) data_loader = tcdata.DataLoader(tcdata.TensorDataset( tcg.LongTensor(train_data)), batch_size=32, shuffle=True) log_cot = open("save/cot.log", "w") for epoch in range(20000): for i, (x, ) in enumerate(data_loader): m_loss, _ = mediator.teacher_forcing(tc.cat( (generator.generate(32, keep_torch=True), x), dim=0), is_train=True) gen_x = generator.generate(64) _, log_pred = mediator.teacher_forcing(gen_x) generator.cooperative_training(gen_x, log_pred) if i % 20 == 0: print("mediator loss at iteration #%d-%d" % (epoch, i), m_loss) print("oracle loss at epoch #%d" % epoch, target_lstm.calc_nll(generator.generate(64))) print("test loss at epoch #%d" % epoch, generator.teacher_forcing(target_lstm.generate(64))[0]) print("oracle loss at epoch #%d" % epoch, target_lstm.calc_nll(generator.generate(64)), file=log_cot) print("test loss at epoch #%d" % epoch, generator.teacher_forcing(target_lstm.generate(64))[0], file=log_cot) log_cot.close()
def forward(self, chars): out_stack = [] for word in chars: out = self.emb(Variable(cuda.LongTensor(word))) out = unsqueeze(out, dim=0) out, hidden_state = self.rnn(out, self.hidden_state) # TODO verify that this is indeed the last outputs of both forward rnn and backward rnn # and that we are concatenating correctly out = cat([hidden_state[0][0], hidden_state[0][1]], dim=1) cfg.ver_print("Hidden state concat", out) out = self.linear(out) out = self.tanh(out) out_stack.append(out) final_out = stack(out_stack, dim=1) return final_out
def __getitem__(self, index): if not hasattr(self, 'hdf5_dataset'): self.open_hdf5() idx_eFTrack_Eta = tcuda.LongTensor([self.eFTrack_Eta[index]], device=self.rank) idx_eFTrack_Phi = tcuda.LongTensor([self.eFTrack_Phi[index]], device=self.rank) val_eFTrack_PT = tcuda.FloatTensor(self.eFTrack_PT[index], device=self.rank) idx_eFPhoton_Eta = tcuda.LongTensor([self.eFPhoton_Eta[index]], device=self.rank) idx_eFPhoton_Phi = tcuda.LongTensor([self.eFPhoton_Phi[index]], device=self.rank) val_eFPhoton_ET = tcuda.FloatTensor(self.eFPhoton_ET[index], device=self.rank) idx_eFNHadron_Eta = tcuda.LongTensor([self.eFNHadron_Eta[index]], device=self.rank) idx_eFNHadron_Phi = tcuda.LongTensor([self.eFNHadron_Phi[index]], device=self.rank) val_eFNHadron_ET = tcuda.FloatTensor(self.eFNHadron_ET[index], device=self.rank) calorimeter, scaler = self.process_images(idx_eFTrack_Eta, idx_eFPhoton_Eta, idx_eFNHadron_Eta, idx_eFTrack_Phi, idx_eFPhoton_Phi, idx_eFNHadron_Phi, val_eFTrack_PT, val_eFPhoton_ET, val_eFNHadron_ET) # Set labels labels_raw = tcuda.FloatTensor(self.labels[index], device=self.rank) labels_processed = self.process_labels(labels_raw, scaler) if self.return_baseline: base_raw = tcuda.FloatTensor(self.base[index], device=self.rank) base_processed = self.process_baseline(base_raw) return calorimeter, labels_processed, base_processed, scaler return calorimeter, labels_processed
def variableFromSentence(lang, sentence, target=False): indexes, length = indexesFromSentence(lang, sentence) if target is True: indexes.insert(0, SOS_token) # start with SOS token return (Variable(cuda.LongTensor(indexes).view(-1, 1), requires_grad=False), length)
def train_autoencoder(train_matrix, test_set): num_users, num_items = train_matrix.shape weight_matrix = log_surplus_confidence_matrix(train_matrix, alpha=args.alpha, epsilon=args.epsilon) train_matrix[train_matrix > 0] = 1.0 place_correlation = scipy.sparse.load_npz( './data/Foursquare/place_correlation_gamma60.npz') assert num_items == place_correlation.shape[0] print(train_matrix.shape) # Construct the model by instantiating the class defined in model.py model = AutoEncoder(num_items, args.inner_layers, num_items, da=args.num_attention, dropout_rate=args.dropout_rate) if torch.cuda.is_available(): model.cuda() criterion = torch.nn.MSELoss(size_average=False, reduce=False) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) batch_size = args.batch_size user_indexes = np.arange(num_users) model.train() for t in range(args.epoch): print("epoch:{}".format(t)) np.random.shuffle(user_indexes) avg_cost = 0. for batchID in range(int(num_users / batch_size)): start = batchID * batch_size end = start + batch_size batch_user_index = user_indexes[start:end] batch_x, batch_x_weight, batch_item_index = get_mini_batch( train_matrix, weight_matrix, batch_user_index) batch_x_weight += 1 batch_x = Variable(torch.from_numpy(batch_x).type(T.FloatTensor), requires_grad=False) y_pred = model(batch_item_index, place_correlation) # Compute and print loss batch_x_weight = Variable(torch.from_numpy(batch_x_weight).type( T.FloatTensor), requires_grad=False) loss = (batch_x_weight * criterion(y_pred, batch_x)).sum() / batch_size print(batchID, loss.data) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() avg_cost += loss / num_users * batch_size print("Avg loss:{}".format(avg_cost)) # print the prediction score for the user 0 print( model([train_matrix.getrow(0).indices], place_correlation) [:, T.LongTensor(train_matrix.getrow(0).indices.astype(np.int32))]) print(model([train_matrix.getrow(0).indices], place_correlation)) # Evaluation model.eval() topk = 20 recommended_list = [] for user_id in range(num_users): user_rating_vector = train_matrix.getrow(user_id).toarray() pred_rating_vector = model([train_matrix.getrow(user_id).indices], place_correlation) pred_rating_vector = pred_rating_vector.cpu().data.numpy() user_rating_vector = user_rating_vector[0] pred_rating_vector = pred_rating_vector[0] pred_rating_vector[user_rating_vector > 0] = 0 item_recommended_dict = dict() for item_inner_id, score in enumerate(pred_rating_vector): item_recommended_dict[item_inner_id] = score sorted_item = heapq.nlargest(topk, item_recommended_dict, key=item_recommended_dict.get) recommended_list.append(sorted_item) print(test_set[user_id], sorted_item[:topk]) print(pred_rating_vector[sorted_item[0]], pred_rating_vector[sorted_item[1]], pred_rating_vector[sorted_item[2]], pred_rating_vector[sorted_item[3]], pred_rating_vector[sorted_item[4]]) print("user:%d, precision@5:%f, precision@10:%f" % (user_id, eval_metrics.precision_at_k_per_sample(test_set[user_id], sorted_item[:5], 5), eval_metrics.precision_at_k_per_sample( test_set[user_id], sorted_item[:topk], topk))) precision, recall, MAP = [], [], [] for k in [5, 10, 15, 20]: precision.append( eval_metrics.precision_at_k(test_set, recommended_list, k)) recall.append(eval_metrics.recall_at_k(test_set, recommended_list, k)) MAP.append(eval_metrics.mapk(test_set, recommended_list, k)) print(precision) print(recall) print(MAP)
noises = t.tensor(noises).cuda() totalMisclassifications = 0 num_adversarial_logs = 0 ff = open("../data/true_adversarial_logs.txt", "w") with open("../data/log_adversarials.txt", "w") as f: for log, label_true in tqdm(zip( logs, label_trues)): # enumeration of the dataset if label_true == 1: # Wrap log as a variable log = log.float() label_true = t.tensor([label_true.long()]).cuda() log = Variable(torch.FloatTensor(log.reshape(1, 200)), requires_grad=True) label_true = Variable(torch.LongTensor(label_true), requires_grad=False) # Classification before Adv _, label_pred = t.max(net(log).data, 1) # find the index of the biggest value # Forward pass # print(log.size()) outputs = net(log) loss = SoftmaxWithXent(outputs, label_true) loss.backward() # obtain gradients on x # Add perturbation log_adversarial = [] epsilon = 0.004
def variableFromPersona(lang, persona): if lang.persona2count[persona] > 20: indexes = [lang.persona2index[persona]] else: indexes = [lang.persona2index['UNK']] return Variable(cuda.LongTensor(indexes).view(-1, 1), requires_grad=False)
def train_a_epoch(name, data, tag_idx, is_oov, model, optimizer, seq_criterion, lm_f_criterion, lm_b_criterion, att_loss, gamma): evaluator = Evaluator(name, [0, 1], main_label_name=cfg.POSITIVE_LABEL, label2id=tag_idx, conll_eval=True) t = tqdm(data, total=len(data)) if is_oov[0] == 1: print("Yes, UNKNOWN token is out of vocab") else: print("No, UNKNOWN token is not out of vocab") for SENT, X, C, POS, Y, P in t: batch_size = len(SENT) # zero the parameter gradients optimizer.zero_grad() model.zero_grad() model.init_state(len(X)) x_var, c_var, pos_var, y_var, lm_X = to_variables(X=X, C=C, POS=POS, Y=Y) np.set_printoptions(threshold=np.nan) if cfg.CHAR_LEVEL == "Attention": lm_f_out, lm_b_out, seq_out, seq_lengths, emb, char_emb = model( x_var, c_var) unrolled_x_var = list(chain.from_iterable(x_var)) not_oov_seq = [-1 if is_oov[idx] else 1 for idx in unrolled_x_var] char_att_loss = att_loss( emb.detach(), char_emb, Variable(torch.cuda.LongTensor(not_oov_seq))) / batch_size else: lm_f_out, lm_b_out, seq_out, seq_lengths = model(x_var, c_var) logger.debug("lm_f_out : {0}".format(lm_f_out)) logger.debug("lm_b_out : {0}".format(lm_b_out)) logger.debug("seq_out : {0}".format(seq_out)) logger.debug("tensor X variable: {0}".format(x_var)) # remove start and stop tags pred = argmax(seq_out) logger.debug("Predicted output {0}".format(pred)) seq_loss = seq_criterion( seq_out, Variable(torch.LongTensor(y_var)).cuda()) / batch_size # to limit the vocab size of the sample sentence ( trick used to improve lm model) # TODO make sure that start and end symbol of sentence gets through this filtering. logger.debug("Sample input {0}".format(lm_X)) if gamma != 0: lm_X_f = [x1d[1:] for x1d in lm_X] lm_X_b = [x1d[:-1] for x1d in lm_X] lm_X_f = list(chain.from_iterable(lm_X_f)) lm_X_b = list(chain.from_iterable(lm_X_b)) lm_f_loss = lm_f_criterion( lm_f_out.squeeze(), Variable(cuda.LongTensor(lm_X_f)).squeeze()) / batch_size lm_b_loss = lm_b_criterion( lm_b_out.squeeze(), Variable(cuda.LongTensor(lm_X_b)).squeeze()) / batch_size if cfg.CHAR_LEVEL == "Attention": total_loss = seq_loss + Variable(cuda.FloatTensor( [gamma])) * (lm_f_loss + lm_b_loss) + char_att_loss else: total_loss = seq_loss + Variable(cuda.FloatTensor( [gamma])) * (lm_f_loss + lm_b_loss) else: if cfg.CHAR_LEVEL == "Attention": total_loss = seq_loss + char_att_loss else: total_loss = seq_loss desc = "total_loss: {0:.4f} = seq_loss: {1:.4f}".format( to_scalar(total_loss), to_scalar(seq_loss)) if gamma != 0: desc += " + gamma: {0} * (lm_f_loss: {1:.4f} + lm_b_loss: {2:.4f})".format( gamma, to_scalar(lm_f_loss), to_scalar(lm_b_loss)) if cfg.CHAR_LEVEL == "Attention": desc += " + char_att_loss: {0:.4f}".format( to_scalar(char_att_loss)) t.set_description(desc) preds = roll(pred, seq_lengths) for pred, x, y in zip(preds, X, Y): evaluator.append_data(to_scalar(total_loss), pred, x, y) total_loss.backward() if cfg.CLIP is not None: clip_grad_norm(model.parameters(), cfg.CLIP) optimizer.step() evaluator.classification_report() return evaluator, model
def __next__(self): next_batch_idx = next(self.batch_iterator) perm = cuda.LongTensor(next_batch_idx) return (self.X[:, perm], self.Y[:, perm])
def get_candidatelist(epoch, test_dist, dataset, model, config, R, flag='test'): # R = config['R'] test_geo = defaultdict() location = dataset.location candidatelist = defaultdict(dict) all_venues = range(dataset.item_nums) if flag == 'test': test = dataset.test user = test['test_user'] else: test = dataset.valid user = test['valid_user'] for i, uid in enumerate(tqdm.tqdm(user, desc="test")): train_checks = dataset.data[uid]['item'][:-1] if flag == 'test': target = test['test_target_item'][i] target_time = test['test_target_time'][i] history = test['test_history'][i] seq = test['test_seq_item'][i] seq_time = test['test_seq_time'][i] seq_dist = test['test_seq_dist'][i] # seq_dist = [1] delatime = test['test_delatime'][i] else: target = test['valid_target_item'][i] history = test['valid_history'][i] seq = test['valid_seq_item'][i] if target in train_checks: continue if config['recommend_new']: recommend_list = np.setdiff1d(np.array(all_venues), np.array(train_checks)) current_location = list( location.loc[location.vid == target].values[0])[1:] x, y = current_location lat_max = R / 111 + x lat_min = x - R / 111 lon_max = R / (111 * np.cos(x * math.pi / 180.0)) + y lon_min = y - R / (111 * np.cos(x * math.pi / 180.0)) near_location = location[(location["lon"] > lon_min) & \ (location["lon"] < lon_max) & \ (location["lat"] > lat_min) & \ (location["lat"] < lat_max)] neighbors = list( np.intersect1d(near_location.vid.values, recommend_list)) # geo_dist = [] if epoch == 0: #geo_dist = dataset.place_correlation[neighbors][:, history].toarray() geo_dist = [] # test_geo[uid] = geo_dist else: # geo_dist = test_geo[uid] geo_dist = [] overall_scores = model(T.LongTensor([uid] * len(neighbors)), T.LongTensor(seq), T.LongTensor(seq_time), T.LongTensor(history), seq_dist, geo_dist, T.LongTensor(neighbors), T.LongTensor([target_time] * len(neighbors)), T.LongTensor(delatime), flag='test').cpu().detach().numpy() predict_scores = zip(neighbors, list(overall_scores)) predict_scores = sorted(predict_scores, key=lambda x: x[1], reverse=True)[0:100] candidatelist[uid][target] = [x[0] for x in predict_scores] else: current_location = list( location.loc[location.vid == target].values[0])[1:] x, y = current_location lat_max = R / 111 + x lat_min = x - R / 111 lon_max = R / (111 * np.cos(x * math.pi / 180.0)) + y lon_min = y - R / (111 * np.cos(x * math.pi / 180.0)) near_location = location[(location["lon"] > lon_min) & \ (location["lon"] < lon_max) & \ (location["lat"] > lat_min) & \ (location["lat"] < lat_max)] neighbors = list(near_location.vid.values) overall_scores = model(T.LongTensor([uid] * len(neighbors)), T.LongTensor(seq), T.LongTensor(history), T.LongTensor(neighbors), flag='test').cpu().detach().numpy() predict_scores = zip(neighbors, list(overall_scores)) predict_scores = sorted(predict_scores, key=lambda x: x[1], reverse=True)[0:100] candidatelist[uid][target] = [x[0] for x in predict_scores] return candidatelist, test_geo
# prioritized experience replay if DEVICE == torch.device(type='cpu'): # use latest episode for training agent.learn( (FloatTensor(states), LongTensor(actions), FloatTensor(rewards), FloatTensor(next_states), FloatTensor(dones)), (1. - (1. / action_size))) # use enhanced training data agent.learn( (FloatTensor(total_states), LongTensor(total_actions), FloatTensor(total_rewards), FloatTensor(total_next_states), FloatTensor(total_dones)), (1. - (1. / action_size))) else: # use latest episode for training agent.learn((cuda.FloatTensor(states), cuda.LongTensor(actions), cuda.FloatTensor(rewards), cuda.FloatTensor(next_states), cuda.FloatTensor(dones)), (1. - (1. / action_size))) # use enhanced training data agent.learn( (cuda.FloatTensor(total_states), cuda.LongTensor(total_actions), cuda.FloatTensor(total_rewards), cuda.FloatTensor(total_next_states), cuda.FloatTensor(total_dones)), (1. - (1. / action_size))) total_scores.append(score) if len(total_scores) > 100: total_scores = total_scores[(len(total_scores) - 100):] avg_score = float(sum(total_scores)) / float(len(total_scores)) with open('data.csv', 'a+') as f: f.write("{},{},{},{}\n".format(i, score, eps, avg_score))
def forward(self, batch_pairs, train=True): N = len(batch_pairs) # pair = tuple of (question, answer) if self.persona is True: (persona1, input_variable, input_length, persona2, target_variable, target_length) = utils.variablesFromPairPersona(self.lang, pair) p1 = self.persona_embedding(persona1).view(1, -1) p2 = self.persona_embedding(persona2).view(1, -1) else: input_batch = Variable(cuda.LongTensor(N, self.max_length).zero_(), requires_grad=False) target_batch = Variable( cuda.LongTensor(N, self.max_length + 1).zero_(), requires_grad=False) # start with SOS token input_batch_len = [] target_batch_len = [] for i in xrange(N): (input_variable, input_length, target_variable, target_length) = utils.variablesFromPair( self.lang, batch_pairs[i]) input_batch[i] = input_variable target_batch[i] = target_variable input_batch_len.append(input_length) target_batch_len.append(target_length) input_batch_len = cuda.LongTensor(input_batch_len) target_batch_len = cuda.LongTensor(target_batch_len) p1 = None p2 = None if train is False: print input_variable encoder_hidden = self.encoder.initHidden(N) decoder_hidden = self.decoder.initHidden(N) self.encoder_optimizer.zero_grad() self.decoder_optimizer.zero_grad() # input_length = input_variable.size()[0] # target_length = target_variable.size()[0] loss = 0 if self.attention is True: encoder_states = Variable( cuda.FloatTensor(input_length, self.encoder.hidden_size).zero_()) # Encode the sentence # for ei in range(input_length): # encoder_output, encoder_hidden = self.encoder(input_variable[ei], encoder_hidden) # if self.attention is True: # encoder_states[ei] = encoder_output[0][0] # First element in batch, only hidden state and not cell state # print encoder_hidden[0].size(), input_batch.size() encoder_output, encoder_hidden = self.encoder(input_batch, encoder_hidden) encoder_hidden_states = Variable( cuda.FloatTensor(N, self.encoder.hidden_size).zero_()) for i in xrange(N): encoder_hidden_states[i] = encoder_output[i, input_batch_len[i] - 1, :] # if self.attention is True: # self.wf = torch.t(self.wf_layer(encoder_states)) # D x f # print torch.mean(encoder_output) del input_variable # Decode with start symbol as SOS response = [] if train is True: decoder_output, decoder_hidden = self.decoder( target_batch, decoder_hidden, encoder_hidden_states, p1, p2) assert False # for di in xrange(self.max_length): # if self.attention is True: # decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_states, self.wf, p1, p2) # else: # decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_output[0][0], p1, p2) # # TODO change the loss to batch loss considering pad symbols # if di == target_length: # break # loss += self.criterion(decoder_output[0], target_variable[di]) # decoder_input = target_variable[di] # Teacher forcing # ind = target_variable[di][0] else: # greedy decode response = [] for di in xrange(self.max_length): if self.attention is True: decoder_output, decoder_hidden = self.decoder( decoder_input, decoder_hidden, encoder_states, self.wf, p1, p2) else: decoder_output, decoder_hidden = self.decoder( decoder_input, decoder_hidden, encoder_output[0][0], p1, p2) topv, topi = decoder_output.data.topk(1) ind = topi[0][0] if ind == utils.EOS_token: break decoder_input = Variable(cuda.LongTensor([[ind]]), requires_grad=False) response.append(self.lang.index2word[ind]) # This implementation of beam search is wrong, we need to predict and follow the pointers back. # beam_size = 5 # di = 0 # while di < self.max_length: # tf.summary.scalar('loss', loss) # Step back if train is True: loss.backward() self.encoder_optimizer.step() self.decoder_optimizer.step() del encoder_hidden del decoder_hidden del decoder_output del target_variable response = ' '.join(response) return response, loss
def sequence_to_variable(sequence, to_ix, use_cuda=False): if use_cuda: return ag.Variable( cuda.LongTensor([ to_ix[t] for t in sequence ]) ) else: return ag.Variable( torch.LongTensor([ to_ix[t] for t in sequence ]) )
def prepare_sequence(seq, to_ix): idxs = [to_ix[w] for w in seq] tensor = cuda.LongTensor(idxs) return Variable(tensor)