def evaluate_auc(model, pos_data, neg_data, question_data, batch_size): auc = AUCMeter() evaluate_pair_set(model, pos_data, 1, question_data, auc, batch_size) evaluate_pair_set(model, neg_data, 0, question_data, auc, batch_size) return auc.value(max_fpr=0.05)
def eval_part2(model, android_data, use_dev, model_type, using_part1_model=False, batch_size=1, tfidf_weighting=False): print "Begin eval_part2..." auc_eval = AUCMeter() num_batches = len(android_data.dev_data) / batch_size if use_dev \ else len(android_data.test_data) / batch_size for i in xrange(num_batches): title, body, similar = android_data.get_next_eval_feature( use_dev, tfidf_weighting=tfidf_weighting) h = None if using_part1_model: h = run_model(model, title, body, True, True, model_type) else: title_vectors, title_masks = title body_vectors, body_masks = body h, _ = run_part2_model(model, title_vectors, body_vectors, title_masks, body_masks, model_type, False) candidate_scores = [] # The candidates are all results after the first one, which is h_q. h_q = h[0] for c in h[1:]: candidate_scores.append(get_cosine_similarity(h_q, c)) # Get the correct labels. # (1 if the candidate is similar to query question, 0 otherwise.) labels = np.zeros(len(candidate_scores)) for similar_idx in similar: labels[similar_idx] = 1 auc_eval.add(np.array(candidate_scores), labels) print "Part 2 AUC for %s: %f" % ("dev" if use_dev else "test", auc_eval.value(.05))
def unsupervised_methods_helper(android_data, use_dev): auc_eval = AUCMeter() batch_size = 1 num_batches = len(android_data.dev_data) / batch_size if use_dev \ else len(android_data.test_data) / batch_size for i in xrange(num_batches): bows, labels = android_data.get_next_eval_bow_feature( use_dev, batch_size) for j in xrange(batch_size): # TODO: this currently only works when batch size is 1, fix indexing query = bows[0] scores = [] for sample in bows[1:]: scores.append(get_tfidf_cosine_similarity(query, sample)) assert len(scores) == len(labels[j]) auc_eval.add(np.array(scores), labels[j]) # Report AUC. print "AUC for %s: %f" % ("dev" if use_dev else "test", auc_eval.value(.05))
def evaluate_tfidf_auc(data, tfidf_vectors, query_to_index): auc = AUCMeter() for entry_id, eval_query_result in data.items(): similar_ids = eval_query_result.similar_ids positives = set(similar_ids) candidate_ids = eval_query_result.candidate_ids entry_encoding = tfidf_vectors[query_to_index[entry_id]] candidate_similarities = [] targets = [] for candidate_id in candidate_ids: candidate_encoding = tfidf_vectors[query_to_index[candidate_id]] similarity = cosine(entry_encoding, candidate_encoding) candidate_similarities.append(similarity.item(0)) targets.append(IS_SIMMILAR_LABEL if candidate_id in positives else NOT_SIMMILAR_LABEL) similarities = torch.Tensor(candidate_similarities) auc.add(similarities, torch.Tensor(targets)) return auc.value(MAXIMUM_FALSE_POSITIVE_RATIO)
def evaluate(self, dev_or_test): ''' dev_or_test must be one of 'dev' or 'test' ''' print('lv0') self.reset_params() auc_meter = AUCMeter() # get the id batches pos_ids_batches_pair = self.pre.eval_split_into_batches(is_pos=True, dev_or_test=dev_or_test) neg_ids_batches_pair = self.pre.eval_split_into_batches(is_pos=False, dev_or_test=dev_or_test) # start looping thru the batches data_sets = [neg_ids_batches_pair, pos_ids_batches_pair] print('lv1') i_target = 0 for ids_batches_pair in data_sets: assert i_target < 2 print('dataset number %d'%(i_target)) ids_batches_left = ids_batches_pair[0] ids_batches_right = ids_batches_pair[1] for i in xrange(len(ids_batches_left)): ids_batch_left = ids_batches_left[i] ids_batch_right = ids_batches_right[i] feats_left = self.get_output(ids_batch_left) feats_right = self.get_output(ids_batch_right) preds = self.get_cosine_scores(feats_left, feats_right).data.numpy() targets = np.ones(len(preds)) * i_target # 0s if neg, 1s if pos auc_meter.add(preds, targets) i_target += 1 print('lv3') # all predictions are added # now get the AUC value auc_value = auc_meter.value(params.auc_max_fpr) print('AUC(%f) value for %s = %f' %(params.auc_max_fpr, dev_or_test, auc_value))
def evaluation(args, padding_id, android_ids_corpus, model, vocab_map, embeddings): print "starting evaluation" if args.model == 'lstm': lstm = model else: cnn = model meter = AUCMeter() android_test_pos_path = os.path.join(args.android_path, 'test.pos.txt') android_test_neg_path = os.path.join(args.android_path, 'test.neg.txt') android_test_annotations = android_pairs_to_annotations( android_test_pos_path, android_test_neg_path) android_test_batches = corpus.create_eval_batches( android_ids_corpus, android_test_annotations, padding_id) count = 0 for batch in android_test_batches: titles, bodies, qlabels = batch if args.model == 'lstm': model = lstm else: model = cnn hidden = vectorize_question(args, batch, model, vocab_map, embeddings, padding_id) query = hidden[0].unsqueeze(0) examples = hidden[1:] cos_similarity = F.cosine_similarity(query, examples, dim=1) target = torch.DoubleTensor(qlabels) meter.add(cos_similarity.data, target) print meter.value(0.05)
def evaluate(dataset): meter = AUCMeter() dev_loader = torch.utils.data.DataLoader(dataset, batch_size=dev_batch_size, shuffle=True) start_time = time.time() for dev_batch in tqdm(dev_loader): model.eval() domain_model.eval() dev_x = autograd.Variable(dev_batch["x"]) if args.cuda: dev_x = dev_x.cuda() dev_pad_title = dev_batch["pad_title"] dev_pad_body = dev_batch["pad_body"] if args.model == "lstm": hidden2 = model.init_hidden(dev_x.shape[1]) hidden2 = repackage_hidden(hidden2) out_dev_x_raw, _ = model(dev_x, dev_pad_title, dev_pad_body, hidden2) else: out_dev_x_raw, _ = model(dev_x, dev_pad_title, dev_pad_body) out_dev_x = out_dev_x_raw.data truth = [0] * len(out_dev_x[0]) truth[0] = 1 truth = np.asarray(truth) for i in range(len(out_dev_x)): meter.add(out_dev_x[i], truth) print("auc middle", meter.value(0.05)) print("AUC DONE", meter.value(0.05))
def evaluate_model(model, data, corpus, word_to_index, cuda): auc = AUCMeter() for query in data.keys(): positives = set(data[query][0]) candidates = data[query][1] embeddings = [pad(merge_title_and_body(corpus[query]), len(word_to_index))] targets = [] for candidate in candidates: embeddings.append(pad(merge_title_and_body(corpus[candidate]), len(word_to_index))) targets.append(IS_SIMMILAR_LABEL if candidate in positives else NOT_SIMMILAR_LABEL) embeddings = Variable(torch.from_numpy(np.array(embeddings))) targets = torch.from_numpy(np.array(targets)) if cuda: embeddings = embeddings.cuda() encodings = model(embeddings) query_encoding = encodings[0] candidate_encodings = encodings[1:] similarities = (F.cosine_similarity(candidate_encodings, query_encoding.repeat(len(encodings)-1, 1), dim=1)) auc.add(similarities.data, targets) return auc.value(MAXIMUM_FALSE_POSITIVE_RATIO)
def calculate_meter(data): """Calculate the AUC score. """ positives = {} negatives = {} print "loading data" # dev.[pos|neg].txt and test.[pos|neg].txt format: # id \w id if data == 'dev': pos_ids_X, pos_Y = load_data("../Android/dev.pos.txt", True) else: pos_ids_X, pos_Y = load_data("../Android/test.pos.txt", True) for q1, q2 in pos_ids_X: if q1 in positives: positives[q1].append(q2) else: positives[q1] = [q2] if data == 'dev': neg_ids_X, neg_Y = load_data("../Android/dev.neg.txt", False) else: neg_ids_X, neg_Y = load_data("../Android/test.neg.txt", False) for q1, q2 in neg_ids_X: if q1 in negatives: negatives[q1].append(q2) else: negatives[q1] = [q2] vectorizer = TfidfVectorizer() print "tfidf fit" vectorizer.fit(all_sequences) # 36404 unique words # print len(vectorizer.vocabulary_) meter = AUCMeter() qlabels = [] all_questions = [] question_ids = set() question_ids.update(positives.keys()) question_ids.update(negatives.keys()) for qid in question_ids: questions = [raw_corpus[qid][0] + " " + raw_corpus[qid][1]] questions.extend([raw_corpus[nid][0] + " " + raw_corpus[nid][1] for nid in negatives[qid]]) questions.extend([raw_corpus[pid][0] + " " + raw_corpus[pid][1] for pid in positives[qid]]) all_questions.append(questions) qlabels.append([0]*len(negatives[qid]) + [1]*len(positives[qid])) for question, qlabel in zip(all_questions, qlabels): query = torch.DoubleTensor(vectorizer.transform([question[0]]).todense()) examples = torch.DoubleTensor(vectorizer.transform(question[1:]).todense()) cos_similarity = F.cosine_similarity(query, examples, dim=1) target = torch.DoubleTensor(qlabel) meter.add(cos_similarity, target) print meter.value(0.05)
def evaluate_auc(args, model, embedding, batches, padding_id): model.eval() meter = AUCMeter() for i, batch in enumerate(batches): title_ids, body_ids, labels = batch hidden = forward(args, model, embedding, title_ids, body_ids, padding_id) q = hidden[0].unsqueeze(0) p = hidden[1:] scores = F.cosine_similarity(q, p, dim=1).cpu().data assert len(scores) == len(labels) target = torch.DoubleTensor(labels) meter.add(scores, target) auc_score = meter.value(0.05) print 'AUC(0.05): {}'.format(auc_score) return auc_score
def do_eval(embedding_layer, eval_name, batch_first=False): if eval_name == 'Dev': eval_data = dev_android elif eval_name == 'Test': eval_data = test_android eval_map = {} for qid_ in eval_data.keys(): eval_map[qid_] = process_eval_batch(qid_, eval_data, batch_first=batch_first) labels = [] auc = AUCMeter() for qid_ in eval_map.keys(): eval_title_batch, eval_body_batch, eval_title_len, eval_body_len = eval_map[ qid_] # process_eval_batch(qid_, eval_data) embedding_layer.title_hidden = embedding_layer.init_hidden( eval_title_batch.shape[1]) embedding_layer.body_hidden = embedding_layer.init_hidden( eval_body_batch.shape[1]) eval_title_qs = Variable(torch.FloatTensor(eval_title_batch)) eval_body_qs = Variable(torch.FloatTensor(eval_body_batch)) if cuda_available: eval_title_qs, eval_body_qs = eval_title_qs.cuda( ), eval_body_qs.cuda() embeddings = embedding_layer(eval_title_qs, eval_body_qs, eval_title_len, eval_body_len) cos_scores = evaluate(embeddings).cpu().data.numpy() true_labels = np.array(eval_data[qid_]['label']) auc.add(cos_scores, true_labels) labels.append(true_labels[np.argsort(cos_scores)][::-1]) auc_stdout = eval_name + ' AUC ' + str(auc.value(0.05)) print(auc_stdout) logging.debug(auc_stdout) eval_metrics(labels, eval_name) return auc.value(0.05)
def evaluate(model, test_data, test_labels): m = AUCMeter() cos_sims = [] labels = [] titles, bodies = test_data print "Getting test query embeddings" title_output = model(Variable(torch.FloatTensor(titles))) body_output = model(Variable(torch.FloatTensor(bodies))) question_embeddings = (title_output + body_output) / 2 print "Getting cosine similarities" for i in range(len(question_embeddings) / 2): q_ind = 2 * i r_ind = 2 * i + 1 q_emb = question_embeddings[q_ind] r_emb = question_embeddings[r_ind] cos_sim = F.cosine_similarity(q_emb, r_emb, dim=0, eps=1e-6) cos_sims.append(cos_sim.data[0]) labels.append(test_labels[q_ind]) if i % 3000 == 0 or i == len(question_embeddings) / 2: print "index ", q_ind m.add(torch.FloatTensor(cos_sims), torch.IntTensor(labels)) print m.value(max_fpr=0.05)
for q, v in android_data.items(): t, t_mask, b, b_mask = v t = list(map(lambda x: x[0] ,filter(lambda x: x[1] == 1 and x[0] and fil(x[0]),zip(t, t_mask)))) b = list(map(lambda x: x[0] ,filter(lambda x: x[1] == 1 and x[0] and fil(x[0]),zip(b, b_mask)))) contents.append(' '.join(t) + ' ' + ' '.join(b)) question_ids[q] = count count += 1 stop_words = stopwords.words('english') stop_words.append('') meter = AUCMeter() vectorizer = TfidfVectorizer(lowercase=True, stop_words=stop_words, use_idf=True, ngram_range=(1, 2), tokenizer=lambda x: x.split(' ')) vs = vectorizer.fit_transform(contents) res = [] for q, p, ns in tqdm.tqdm(android_test): sims = [] question = vs[question_ids[q]] for candidate in [p]+ns: cos_sim = cosine_similarity(question, vs[question_ids[candidate]]) sims.append(cos_sim[0][0]) sims = np.array(sims) ind = np.argsort(sims)[::-1] labels = np.array([1] + [0] * len(ns)) labels = labels[ind]
def run_epoch(train_data, dev_data, test_data, model, domain_model, optimizer_feature, optimizer_domain, args, is_training): data_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True) losses_feature = 0 losses_domain = 0 if is_training: model.train() else: model.eval() dev_batch_size = 15 count = 0 def evaluate(dataset): dev_loader = torch.utils.data.DataLoader(dataset, batch_size=dev_batch_size, shuffle=True) start_time = time.time() for dev_batch in tqdm(dev_loader): print("TIME START", time.time() - start_time) dev_x = autograd.Variable(dev_batch["x"]) if args.cuda: dev_x = dev_x.cuda() dev_pad_title = dev_batch["pad_title"] dev_pad_body = dev_batch["pad_body"] if args.model == "lstm": hidden2 = model.init_hidden(dev_x.shape[1]) hidden2 = repackage_hidden(hidden2) out_dev_x_raw, _ = model(dev_x, dev_pad_title, dev_pad_body, hidden2) else: out_dev_x_raw, _ = model(dev_x, dev_pad_title, dev_pad_body) # out_dev_x_raw, _ = model(dev_x, dev_pad_title, dev_pad_body) out_dev_x = out_dev_x_raw.data truth = [0] * len(out_dev_x[0]) truth[0] = 1 truth = np.asarray(truth) print(out_dev_x.shape) for i in range(len(out_dev_x)): meter.add(out_dev_x[i], truth) print("auc middle", meter.value(0.05)) print("TIME END", time.time() - start_time) print("AUC DONE", meter.value(0.05)) meter = AUCMeter() # Switch between the two datasets for batch in tqdm(data_loader): count += 1 x = autograd.Variable(batch["x"]) y = autograd.Variable(torch.zeros(batch["x"].shape[0])) if args.cuda: x = x.cuda() y = y.cuda() pad_title = batch["pad_title"] pad_body = batch["pad_body"] if is_training: optimizer_feature.zero_grad() if args.model == "lstm": hidden = model.init_hidden(batch["x"].shape[1]) hidden = repackage_hidden(hidden) out, embeddings = model(x, pad_title, pad_body, hidden) else: out, embeddings = model(x, pad_title, pad_body) # out, embeddings = model(x, pad_title, pad_body) loss_feature = F.multi_margin_loss(out, y.long(), margin=args.margin) # loss_domain = F.binary_cross_entropy(predictions, dataset_y) # loss_domain = F.cross_entropy(domain_out, dataset_y.long()) if is_training: # total_loss = loss_feature - args.lambda_val * loss_domain total_loss = loss_feature total_loss.backward() optimizer_feature.step() # if count % 400 == 0: evaluate() #print "Test Loss" print("FEATURE LOSS", loss_feature.data[0]) losses_feature += loss_feature.data # losses_domain += loss_domain.data print("DEV DATA") evaluate(dev_data) print("TEST DATA") evaluate(dev_data)
def run_epoch(args, ubuntu_loader, android_loader, qr_model, qr_criterion, qr_optimizer, dc_model, dc_criterion, dc_optimizer, epoch, mode='train'): queries_per_batch = args.batch_size / args.examples_per_query data_and_target_loaders = [ izip(ubuntu_loader, repeat(0)), izip(android_loader, repeat(1)) ] data_and_target_loader = roundrobin(*data_and_target_loaders) #if mode == 'train': # print "Training..." # data_and_target_loaders = [ izip(ubuntu_loader , repeat(0)), # izip(android_loader, repeat(1)) ] # data_and_target_loader = roundrobin(*data_and_target_loaders) #elif mode == 'val': # print "Validation..." # data_and_target_loader = izip(android_loader, repeat(1)) print "Epoch {}".format(epoch) qr_total_loss = 0 dc_total_loss = 0 dc_count = 0 qr_metrics = QuestionRetrievalMetrics() auc_meter = AUCMeter() for i_batch, (data, target_domain) in enumerate(data_and_target_loader): padded_things, ys = data print "Epoch {}, Batch #{}, Domain {}".format(epoch, i_batch, target_domain) ys = create_variable(ys) qt, qb, ot, ob = padded_things # padded_things might also be packed. # qt is (PackedSequence, perm_idx), or (seq_tensor, set_lengths) # Step 1. Remember that Pytorch accumulates gradients. # We need to clear them out before each instance for model in [qr_model, dc_model]: model.zero_grad() # Generate embeddings. query_title = qr_model.get_embed(*qt) query_body = qr_model.get_embed(*qb) other_title = qr_model.get_embed(*ot) other_body = qr_model.get_embed(*ob) query_embed = (query_title + query_body) / 2 other_embed = (other_title + other_body) / 2 grl = GradientReversalLayer(args.dc_factor) # Classify their domains other_domain = dc_model(grl(other_embed)) target = create_variable( torch.FloatTensor([float(target_domain)] * other_domain.size(0))) auc_meter.add(other_domain.data, target.data) if mode == 'train': # Compute batch loss qr_batch_loss = qr_criterion(query_embed, other_embed, ys) qr_total_loss += qr_batch_loss.data[0] print "avg QR loss for batch {} was {}".format( i_batch, qr_batch_loss.data[0] / queries_per_batch) dc_batch_loss = dc_criterion(other_domain, target) dc_total_loss += dc_batch_loss.data[0] print "avg DC loss for batch {} was {}".format( i_batch, dc_batch_loss.data[0] / args.batch_size) dc_count += args.batch_size if target_domain == 0: # ubuntu. We don't have android training data for QR. qr_batch_loss.backward(retain_graph=True) else: pass # android. dc_batch_loss.backward() qr_optimizer.step() dc_optimizer.step() if mode == "val" and target_domain == 0: pass else: update_metrics_for_batch(args, query_embed, other_embed, ys, qr_metrics) if i_batch % args.stats_display_interval == 0: qr_metrics.display(i_batch) print "AUC Meter {} final stats for epoch {} was {}".format( mode, epoch, auc_meter.value(0.05)) if mode == 'train': qr_avg_loss = qr_total_loss / qr_metrics.queries_count dc_avg_loss = dc_total_loss / dc_count print "average {} QR loss for epoch {} was {}".format( mode, epoch, qr_avg_loss) print "average {} DC loss for epoch {} was {}".format( mode, epoch, dc_avg_loss)
def evaluate(pairs, label, text_data, question_lookup, auc): for p in pairs: id_1, id_2 = p cos = cosine_similarity(text_data.getrow(question_lookup[id_1]), text_data.getrow(question_lookup[id_2])) cos = float(cos) auc.add(torch.DoubleTensor([cos]), torch.LongTensor([label])) print >> sys.stderr, "LOADING DATA..." question_lookup, text_raw = read_raw_data(CORPUS_PATH) dev_pos_pairs = import_pairs(DEV_POS_PATH) dev_neg_pairs = import_pairs(DEV_NEG_PATH) test_pos_pairs = import_pairs(TEST_POS_PATH) test_neg_pairs = import_pairs(TEST_NEG_PATH) print >> sys.stderr, "COMPUTING TF-IDF FEATURES..." text_tfidf = compute_tfidf(text_raw) print >> sys.stderr, "COMPUTING AUC..." d_auc = AUCMeter() t_auc = AUCMeter() evaluate(dev_pos_pairs, 1, text_tfidf, question_lookup, d_auc) evaluate(dev_neg_pairs, 0, text_tfidf, question_lookup, d_auc) evaluate(test_pos_pairs, 1, text_tfidf, question_lookup, t_auc) evaluate(test_neg_pairs, 0, text_tfidf, question_lookup, t_auc) print "Dev AUC: %.2f" % (d_auc.value(max_fpr=0.05)) print "Test AUC: %.2f" % (t_auc.value(max_fpr=0.05))
def run_adversarial_epoch(data, is_training, encoder, encoder_optimizer, classifier, classifier_optimizer): # Make batches data_loader = torch.utils.data.DataLoader( data, batch_size=10, shuffle=True, num_workers=4, drop_last=False) losses = [] bce_losses = [] actual = [] expected = [] if is_training: encoder.train() classifier.train() else: encoder.eval() classifier.eval() for batch in data_loader: # Unpack training instances pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_title_mask = torch.unsqueeze(Variable(batch['pid_title_mask']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1).cuda() # Size: batch_size x 1 x body_length=100 pid_body_mask = torch.unsqueeze(Variable(batch['pid_body_mask']), 1).cuda() # Size: batch_size x 1 x body_length=100 candidate_title = Variable(batch['candidate_titles']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_title_mask = Variable(batch['candidate_titles_mask']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_body = Variable(batch['candidate_body']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=100 candidate_body_mask = Variable(batch['candidate_body_mask']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=40 if is_training: android_title = torch.unsqueeze(Variable(batch['android_title']), 1).cuda() android_title_mask = torch.unsqueeze(Variable(batch['android_title_mask']), 1).cuda() android_body = torch.unsqueeze(Variable(batch['android_body']), 1).cuda() android_body_mask = torch.unsqueeze(Variable(batch['android_body_mask']), 1).cuda() sz = pid_title.size()[0] if is_training: encoder_optimizer.zero_grad() classifier_optimizer.zero_grad() # Run text through model pid_title = encoder(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body = encoder(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title = encoder(candidate_title) # batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body = encoder(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) if is_training: android_title = encoder(android_title) android_body = encoder(android_body) pid_title_mask = torch.unsqueeze(pid_title_mask, 2).expand_as(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body_mask = torch.unsqueeze(pid_body_mask, 2).expand_as(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title_mask = torch.unsqueeze(candidate_title_mask, 2).expand_as(candidate_title)# batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body_mask = torch.unsqueeze(candidate_body_mask, 2).expand_as(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) if is_training: android_title_mask = torch.unsqueeze(android_title_mask, 2).expand_as(android_title) android_body_mask = torch.unsqueeze(android_body_mask, 2).expand_as(android_body) good_title = torch.sum(pid_title * pid_title_mask, 3) # batch_size x 1 x output_size=500 good_body = torch.sum(pid_body * pid_body_mask, 3) # batch_size x 1 x output_size=500 cand_titles = torch.sum(candidate_title * candidate_title_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 cand_bodies = torch.sum(candidate_body * candidate_body_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 if is_training: android_title = torch.sum(android_title * android_title_mask, 3) android_body = torch.sum(android_body * android_body_mask, 3) good_tensor = (good_title + good_body)/2 # batch_size x 1 x output_size=500 cand_tensor = (cand_titles + cand_bodies)/2 # batch_size x # candidates (21 in training) x output_size=500 if is_training: android_tensor = (android_title + android_body)/2 if is_training: good_domain = classifier(good_tensor.view(sz, -1)) android_domain = classifier(android_tensor.view(sz, -1)) softmax = nn.Softmax(dim=1) good_dist = softmax(good_domain) android_dist = softmax(android_domain) dists = torch.cat((good_dist, android_dist)).clamp(min=0.0001, max=0.9999) expected = Variable(torch.FloatTensor([0] * sz + [1] * sz)).cuda() bce_loss = torch.nn.BCELoss()(dists[:,0], expected) l = loss(good_tensor, cand_tensor, 1.0) - 0.01 * bce_loss l.backward() losses.append(l.cpu().data[0]) bce_losses.append(bce_loss.cpu().data[0]) encoder_optimizer.step() classifier_optimizer.step() else: similarity = cosine_sim(good_tensor.expand_as(cand_tensor), cand_tensor, dim=2) similarity = torch.FloatTensor(similarity.data.cpu().numpy()) labels = batch['labels'] def predict(sim, labels): predictions = [] for i in range(sim.shape[0]): sorted_cand = (-sim[i]).argsort() predictions.append(labels[i][sorted_cand]) return predictions for sim in similarity: actual.append(sim) expected.extend(labels.view(-1)) if is_training: avg_loss = np.mean(losses) avg_bce_loss = np.mean(bce_losses) return avg_loss, avg_bce_loss else: auc = AUCMeter() auc.reset() auc.add(torch.cat(actual), torch.LongTensor(expected)) return auc.value(max_fpr=0.05)
pos[main_qid].add(candidate_qid) with open(filepath_neg, 'r') as f: for line in f.readlines(): main_qid, candidate_qid = line.split() neg[main_qid].add(candidate_qid) return pos, neg # Gets the cosine distance between two vectors x and y and maps the result to [0, 1] # 0 means that the two vectors are opposite, 1 means that they are the same # x and y are sparse vectors def cosine(x, y): return (1 + cosine_similarity(sparse.vstack([x, y]))[0][1]) / 2 if __name__ == '__main__': get_id_to_vector() meter = AUCMeter() pos, neg = get_dev_data_android(use_test_data=False) # Only use questions that have at least one positive match for main_qid in pos: main_vector = X[id_to_index[main_qid]] similiarities, targets = [], [] # For all positive matches, append similarity score + a 1 on the targets for pos_match_qid in pos[main_qid]: similiarities.append(cosine(main_vector, X[id_to_index[pos_match_qid]])) targets.append(1) # For all negative matches, append similarity score + a 0 on the targets for neg_match_qid in neg[main_qid]: similiarities.append(cosine(main_vector, X[id_to_index[neg_match_qid]])) targets.append(0) meter.add(np.array(similiarities), np.array(targets))
def evaluate_model(model, use_test_data=False, use_lstm=True): if use_test_data: print "Running evaluate on the TEST data:" else: print "Running evaluate on the DEV data:" # Set the model to eval mode model.eval() # samples has shape (num_dev_samples, 22) samples = get_dev_data_android(use_test_data=use_test_data) num_samples = len(samples) num_batches = int(math.ceil(1. * num_samples / BATCH_SIZE)) score_matrix = torch.Tensor().cuda() if USE_GPU else torch.Tensor() orig_time = time() for i in range(num_batches): # Get the samples ready batch = samples[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] # If this is the last batch, then need to pad the batch to get the same shape as expected if i == num_batches - 1 and num_samples % BATCH_SIZE != 0: batch = np.concatenate( (batch, np.full( ((i + 1) * BATCH_SIZE - num_samples, 22), "0")), axis=0) # Convert from numpy arrays to tensors title_tensor, title_lengths = get_tensor_from_batch(batch, use_title=True) body_tensor, body_lengths = get_tensor_from_batch(batch, use_title=False) # Run the model model.hidden = model.init_hidden() title_lstm = model(title_tensor) title_encoding = get_encodings(title_lstm, title_lengths, use_lstm=use_lstm) model.hidden = model.init_hidden() body_lstm = model(body_tensor) body_encoding = get_encodings(body_lstm, body_lengths, use_lstm=use_lstm) # Compute evaluation X, _ = generate_score_matrix(title_encoding, body_encoding) X = X.data if i == num_batches - 1 and num_samples % BATCH_SIZE != 0: score_matrix = torch.cat( [score_matrix, X[:num_samples - i * BATCH_SIZE]]) else: score_matrix = torch.cat([score_matrix, X]) #print "Finished batch " + str(i) + " after " + str(time() - orig_time) + " seconds" # score_matrix is a shape (num_dev_samples, 21) matrix that contains the cosine similarity scores meter = AUCMeter() similarities, targets = [], [] for i in range(len(score_matrix)): similarities.append(score_matrix[i][0]) targets.append(1) for j in range(1, 21): similarities.append(score_matrix[i][j]) targets.append(0) meter.add(np.array(similarities), np.array(targets)) print "The AUC(0.05) value is " + str(meter.value(0.05)) # Set the model back to train mode model.train()
def evaluation(args, padding_id, ids_corpus, vocab_map, embeddings, model): """Calculate the AUC score of the model on Android data. """ meter = AUCMeter() print "starting evaluation" val_data = corpus.read_annotations(args.test) print "number of lines in test data: " + str(len(val_data)) val_batches = corpus.create_eval_batches(ids_corpus, val_data, padding_id) count = 0 similarities = [] for batch in val_batches: titles, bodies, qlabels = batch title_length, title_num_questions = titles.shape body_length, body_num_questions = bodies.shape title_embeddings, body_embeddings = corpus.get_embeddings(titles, bodies, vocab_map, embeddings) if args.model == 'lstm': if args.cuda: title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings).cuda())] title_inputs = torch.cat(title_inputs).view(title_length, title_num_questions, -1) title_hidden = (autograd.Variable(torch.zeros(1, title_num_questions, args.hidden_size).cuda()), autograd.Variable(torch.zeros((1, title_num_questions, args.hidden_size)).cuda())) else: title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings))] title_inputs = torch.cat(title_inputs).view(title_length, title_num_questions, -1) # title_inputs = torch.cat(title_inputs).view(title_num_questions, title_length, -1) title_hidden = (autograd.Variable(torch.zeros(1, title_num_questions, args.hidden_size)), autograd.Variable(torch.zeros((1, title_num_questions, args.hidden_size)))) else: if args.cuda: title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings).cuda())] else: title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings))] title_inputs = torch.cat(title_inputs).transpose(0,1).transpose(1,2) if args.model == 'lstm': title_out, title_hidden = model(title_inputs, title_hidden) else: title_out = model(title_inputs) title_out = F.tanh(title_out) title_out = title_out.transpose(1,2).transpose(0,1) average_title_out = average_questions(title_out, titles, padding_id) # body if args.model == 'lstm': if args.cuda: body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings).cuda())] body_inputs = torch.cat(body_inputs).view(body_length, body_num_questions, -1) body_hidden = (autograd.Variable(torch.zeros(1, body_num_questions, args.hidden_size).cuda()), autograd.Variable(torch.zeros((1, body_num_questions, args.hidden_size)).cuda())) else: body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings))] body_inputs = torch.cat(body_inputs).view(body_length, body_num_questions, -1) body_hidden = (autograd.Variable(torch.zeros(1, body_num_questions, args.hidden_size)), autograd.Variable(torch.zeros((1, body_num_questions, args.hidden_size)))) else: if args.cuda: body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings).cuda())] #body_inputs = torch.cat(body_inputs).view(body_num_questions, 200, -1) else: body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings))] #body_inputs = torch.cat(body_inputs).view(body_num_questions, 200, -1) body_inputs = torch.cat(body_inputs).transpose(0,1).transpose(1,2) if args.model == 'lstm': body_out, body_hidden = model(body_inputs, body_hidden) else: body_out = model(body_inputs) body_out = F.tanh(body_out) body_out = body_out.transpose(1,2).transpose(0,1) # average all words of each question from body_out average_body_out = average_questions(body_out, bodies, padding_id) # average body and title # representations of the questions as found by the CNN # 560 x 100 hidden = (average_title_out + average_body_out) * 0.5 query = torch.DoubleTensor(hidden[0].unsqueeze(0).cpu().data.numpy()) examples = torch.DoubleTensor(hidden[1:].cpu().data.numpy()) cos_similarity = F.cosine_similarity(query, examples, dim=1) qlabels = [float(qlabel) for qlabel in list(qlabels)] target = torch.DoubleTensor(qlabels) meter.add(cos_similarity, target) print meter.value(0.05)
def main(): # load users, ads and their information of features users, ads, u_feat_infos, a_feat_infos = load_users_and_ads( cfg["data"]["user_fn"], cfg["data"]["ad_fn"], cfg["data"]["user_fi_fn"], cfg["data"]["ad_fi_fn"], ) r_feat_infos = load_feature_infos(cfg["data"]["r_fi_fp"]) logging.info("There are {} users.".format(len(users))) logging.info("There are {} ads.".format(len(ads))) # load data list and history features if not args.test: train_list = load_data_list(cfg["train_fp"]) #print("train list len:",len(train_list)) train_rfeats = load_rfeats(cfg["data"]["train_rfeat_fp"]) valid_list = load_data_list(cfg["valid_fp"]) valid_rfeats = load_rfeats(cfg["data"]["valid_rfeat_fp"]) else: test_list = load_data_list(cfg["test_fp"]) test_rfeats = load_rfeats(cfg["data"]["test_rfeat_fp"]) filter = cfg["feat"]["filter"] # construct mappng and filter [fi.construct_mapping() for fi in u_feat_infos] [fi.construct_mapping() for fi in a_feat_infos] [fi.construct_mapping() for fi in r_feat_infos] # filter out low-frequency features. for fi in u_feat_infos: if fi.name in filter: fi.construct_filter(l_freq=filter[fi.name]) else: fi.construct_filter(l_freq=0) logging.warning("Users Filtering!!!") for fi in a_feat_infos: if fi.name in filter: fi.construct_filter(l_freq=filter[fi.name]) else: fi.construct_filter(l_freq=0) logging.warning("Ads Filtering!!!") reg = cfg["reg"] if not args.test: train_dataset = DatasetYouth(users, u_feat_infos, ads, a_feat_infos, train_rfeats, r_feat_infos, train_list, cfg["feat"]["u_enc"], cfg["feat"]["a_enc"], cfg["feat"]["r_enc"], reg=reg, pos_weight=cfg["train"]["pos_weight"], has_label=True) #print("train num: ",train_dataset.original_len) if cfg["train"]["use_radio_sampler"]: radio_sampler = RadioSampler(train_dataset, p2n_radio=cfg["train"]["p2n_radio"]) logging.info("Using radio sampler with p:n={}".format( cfg["train"]["p2n_radio"])) valid_dataset = DatasetYouth(users, u_feat_infos, ads, a_feat_infos, valid_rfeats, r_feat_infos, valid_list, cfg["feat"]["u_enc"], cfg["feat"]["a_enc"], cfg["feat"]["r_enc"], reg=reg, pos_weight=cfg["train"]["pos_weight"], has_label=True) dataset = train_dataset else: test_dataset = DatasetYouth(users, u_feat_infos, ads, a_feat_infos, test_rfeats, r_feat_infos, test_list, cfg["feat"]["u_enc"], cfg["feat"]["a_enc"], cfg["feat"]["r_enc"], reg=reg, has_label=False) dataset = test_dataset logging.info("shuffle: {}".format( False if cfg["train"]["use_radio_sampler"] else True)) # set up model emedding_cfgs = {} emedding_cfgs.update(cfg["feat"]["u_embed_cfg"]) emedding_cfgs.update(cfg["feat"]["a_embed_cfg"]) loss_cfg = cfg["loss"] # create model model = eval(cfg["model_name"])( n_out=1, u_embedding_feat_infos=dataset.embedding_u_feat_infos, u_one_hot_feat_infos=dataset.one_hot_u_feat_infos, a_embedding_feat_infos=dataset.embedding_a_feat_infos, a_one_hot_feat_infos=dataset.one_hot_a_feat_infos, r_embedding_feat_infos=dataset.embedding_r_feat_infos, embedding_cfgs=emedding_cfgs, loss_cfg=loss_cfg, ) # model = DataParallel(model,device_ids=cfg["gpus"]) # logging.info("Using model {}.".format(cfg["model_name"])) ## optmizers # todo lr,weight decay optimizer = Adam(model.get_train_policy(), lr=cfg["optim"]["lr"], weight_decay=cfg["optim"]["weight_decay"], amsgrad=True) #optimizer = optim.SGD(model.parameters(), lr = 0.005, momentum=0.9,weight_decay=cfg["optim"]["weight_decay"]) logging.info("Using optimizer {}.".format(optimizer)) if cfg["train"]["resume"] or args.test: checkpoint_file = cfg["resume_fp"] state = load_checkpoint(checkpoint_file) logging.info("Load checkpoint file {}.".format(checkpoint_file)) st_epoch = state["cur_epoch"] + 1 logging.info("Start from {}th epoch.".format(st_epoch)) model.load_state_dict(state["model_state"]) optimizer.load_state_dict(state["optimizer_state"]) else: st_epoch = 1 ed_epoch = cfg["train"]["ed_epoch"] # move tensor to gpu and wrap tensor with Variable to_gpu_variable = dataset.get_to_gpu_variable_func() if args.extract_weight: model = model.module path = os.path.join(cfg["output_path"], "weight") os.makedirs(path, exist_ok=True) u_embedder = model.u_embedder u_embedder.save_weight(path) a_embedder = model.a_embedder a_embedder.save_weight(path) exit(0) def evaluate(output, label, label_weights): ''' Note the input to this function should be converted to data first. :param output: :param label_weights: :param target: :return: ''' output = output.view(-1) label = label.view(-1).byte() #print(output[0:100]) #print(label[0:100]) scores = torch.sigmoid(output) output = scores > 0.1 #print(output) # print(label.float().sum()) tp = ((output == label) * label).float().sum() fp = ((output != label) * output).float().sum() fn = ((output != label) * (1 - output)).float().sum() tn = ((output == label) * (1 - label)).float().sum() return tp, fp, fn, tn, scores.cpu() def valid(cur_train_epoch, phase="valid", extract_features=False): ''' :param cur_train_epoch: :param phase: "valid" or "test" :return: ''' assert phase in ["valid", "test"] results = [] valid_detail_meters = { "loss": SumMeter(), "model_loss": SumMeter(), "tp": SumMeter(), "fn": SumMeter(), "fp": SumMeter(), "tn": SumMeter(), "batch_time": AverageMeter(), "io_time": AverageMeter(), } if phase == "valid": logging.info("Valid data.") dataset = valid_dataset else: logging.info("Test data.") dataset = test_dataset model.eval() logging.info("Set network to eval model") if extract_features: features = np.zeros(shape=(dataset.original_len, model.n_output_feat), dtype=np.float32) features_ctr = 0 batch_idx = 0 # chunked here chunk_size = 200 n_chunk = (dataset.original_len + (cfg[phase]["batch_size"] * chunk_size) - 1) // (cfg[phase]["batch_size"] * chunk_size) n_batch = (dataset.original_len + cfg[phase]["batch_size"] - 1) // cfg[phase]["batch_size"] for chunk_idx in range(n_chunk): s = chunk_idx * cfg[phase]["batch_size"] * chunk_size e = (chunk_idx + 1) * cfg[phase]["batch_size"] * chunk_size dataloader = DataLoader( dataset.slice(s, e), batch_size=cfg[phase]["batch_size"], shuffle=False, num_workers=cfg[phase]["n_worker"], collate_fn=dataset.get_collate_func(), pin_memory=True, drop_last=False, ) batch_time_s = time.time() for samples in dataloader: batch_idx = batch_idx + 1 cur_batch = batch_idx valid_detail_meters["io_time"].update(time.time() - batch_time_s) # move to gpu samples = to_gpu_variable(samples, volatile=True) # forward loss, output, model_loss, reg_loss, d = model(samples) if phase == "valid": # evaluate metrics valid_detail_meters["loss"].update( loss.data[0] * samples["size"], samples["size"]) valid_detail_meters["model_loss"].update( model_loss.data[0] * samples["size"], samples["size"]) tp, fp, fn, tn, scores = evaluate( output.data, samples["labels"].data, samples["label_weights"].data) #print(tp,fn,fp,tn) valid_detail_meters["tp"].update(tp, samples["size"]) valid_detail_meters["fp"].update(fp, samples["size"]) valid_detail_meters["fn"].update(fn, samples["size"]) valid_detail_meters["tn"].update(tn, samples["size"]) # the large the better tp_rate = valid_detail_meters["tp"].sum / ( valid_detail_meters["tp"].sum + valid_detail_meters["fn"].sum + 1e-20) # the smaller the better fp_rate = valid_detail_meters["fp"].sum / ( valid_detail_meters["fp"].sum + valid_detail_meters["tn"].sum + 1e-20) valid_detail_meters["batch_time"].update(time.time() - batch_time_s) batch_time_s = time.time() else: scores = torch.sigmoid(output.data) valid_detail_meters["batch_time"].update(time.time() - batch_time_s) batch_time_s = time.time() # collect results uids = samples["uids"] aids = samples["aids"] results.extend(zip(aids, uids, scores)) # collect features if extract_features: bs = samples["size"] features[features_ctr:features_ctr + bs, :] = d.data.cpu().numpy() features_ctr += bs # log results if phase == "valid": if cur_batch % cfg["valid"]["logging_freq"] == 0: logging.info( "Valid Batch [{cur_batch}/{ed_batch}] " "loss: {loss} " "model_loss: {model_loss} " "tp: {tp} fn: {fn} fp: {fp} tn: {tn} " "tp_rate: {tp_rate} fp_rate: {fp_rate} " "io time: {io_time}s batch time {batch_time}s". format( cur_batch=cur_batch, ed_batch=n_batch, loss=valid_detail_meters["loss"].mean, model_loss=valid_detail_meters["model_loss"]. mean, tp=valid_detail_meters["tp"].sum, fn=valid_detail_meters["fn"].sum, fp=valid_detail_meters["fp"].sum, tn=valid_detail_meters["tn"].sum, tp_rate=tp_rate, fp_rate=fp_rate, io_time=valid_detail_meters["io_time"].mean, batch_time=valid_detail_meters["batch_time"]. mean, )) else: if cur_batch % cfg["test"]["logging_freq"] == 0: logging.info( "Test Batch [{cur_batch}/{ed_batch}] " "io time: {io_time}s batch time {batch_time}s". format( cur_batch=cur_batch, ed_batch=n_batch, io_time=valid_detail_meters["io_time"].mean, batch_time=valid_detail_meters["batch_time"]. mean, )) if phase == "valid": logging.info("{phase} for {cur_train_epoch} train epoch " "loss: {loss} " "model_loss: {model_loss} " "tp_rate: {tp_rate} fp_rate: {fp_rate} " "io time: {io_time}s batch time {batch_time}s".format( phase=phase, cur_train_epoch=cur_train_epoch, loss=valid_detail_meters["loss"].mean, model_loss=valid_detail_meters["model_loss"].mean, tp_rate=tp_rate, fp_rate=fp_rate, io_time=valid_detail_meters["io_time"].mean, batch_time=valid_detail_meters["batch_time"].mean, )) # write results to file res_fn = "{}_{}".format(cfg["valid_res_fp"], cur_train_epoch) with open(res_fn, 'w') as f: f.write("aid,uid,score\n") for res in results: f.write("{},{},{:.8f}\n".format(res[0], res[1], res[2])) # evaluate results avg_auc, aucs = cal_avg_auc(res_fn, cfg["valid_fp"]) logging.info("Valid for {cur_train_epoch} train epoch " "average auc {avg_auc}".format( cur_train_epoch=cur_train_epoch, avg_auc=avg_auc, )) logging.info("aucs: ") logging.info(pprint.pformat(aucs)) else: logging.info( "Test for {} train epoch ends.".format(cur_train_epoch)) res_fn = "{}_{}".format(cfg["test_res_fp"], cur_train_epoch) with open(res_fn, 'w') as f: f.write("aid,uid,score\n") for res in results: f.write("{},{},{:.8f}\n".format(res[0], res[1], res[2])) # extract features if extract_features: import pickle as pkl with open(cfg["extracted_features_fp"], "wb") as f: pkl.dump(features, f, protocol=pkl.HIGHEST_PROTOCOL) model.cuda() logging.info("Move network to gpu.") if args.test: valid(st_epoch - 1, phase="test", extract_features=args.extract_features) exit(0) elif cfg["valid"]["init_valid"]: valid(st_epoch - 1) model.train() logging.info("Set network to train model.") # train: main loop model.train() logging.info("Set network to train model.") # original_lambda = cfg["reg"]["lambda"] total_n_batch = 0 warnings.warn("total_n_batch always start at 0...") for cur_epoch in range(st_epoch, ed_epoch + 1): # meters k = cfg["train"]["logging_freq"] detail_meters = { "loss": RunningValue(k), "epoch_loss": SumMeter(), "model_loss": RunningValue(k), "epoch_model_loss": SumMeter(), "tp": RunningValue(k), "fn": RunningValue(k), "fp": RunningValue(k), "tn": RunningValue(k), "auc": AUCMeter(), "batch_time": AverageMeter(), "io_time": AverageMeter(), } # adjust lr adjust_learning_rate(cfg["optim"]["lr"], optimizer, cur_epoch, cfg["train"]["lr_steps"], lr_decay=cfg["train"]["lr_decay"]) # dynamic adjust cfg["reg"]["lambda"] # decay = 1/(0.5 ** (sum(cur_epoch > np.array(cfg["train"]["lr_steps"])))) # cfg["reg"]["lambda"] = original_lambda * decay # print("using dynamic regularizer, {}".format(cfg["reg"]["lambda"])) train_dataset.shuffle() batch_idx = -1 # chunked here because of memory issue. we always create new DataLoader after several batches. chunk_size = 200 n_chunk = (train_dataset.original_len + (cfg["train"]["batch_size"] * chunk_size) - 1) // (cfg["train"]["batch_size"] * chunk_size) n_batch = (train_dataset.original_len + cfg["train"]["batch_size"] - 1) // cfg["train"]["batch_size"] for chunk_idx in range(n_chunk): s = chunk_idx * cfg["train"]["batch_size"] * chunk_size e = (chunk_idx + 1) * cfg["train"]["batch_size"] * chunk_size train_dataloader = DataLoader( train_dataset.slice(s, e), batch_size=cfg["train"]["batch_size"], shuffle=False if cfg["train"]["use_radio_sampler"] else True, num_workers=cfg["train"]["n_worker"], collate_fn=train_dataset.get_collate_func(), sampler=radio_sampler if cfg["train"]["use_radio_sampler"] else None, pin_memory=True, drop_last=True, ) batch_time_s = time.time() for samples in train_dataloader: total_n_batch += 1 batch_idx = batch_idx + 1 detail_meters["io_time"].update(time.time() - batch_time_s) # move to gpu samples = to_gpu_variable(samples) # forward loss, output, model_loss, reg_loss, d = model(samples) #print("reg_loss",reg_loss) # clear grads optimizer.zero_grad() # backward loss.backward() # This is a little useful warnings.warn("Using gradients clipping") clip_grad_norm(model.parameters(), max_norm=5) # update weights optimizer.step() # evaluate metrics detail_meters["loss"].update(loss.data[0]) detail_meters["epoch_loss"].update(loss.data[0]) detail_meters["model_loss"].update(model_loss.data[0]) detail_meters["epoch_model_loss"].update(model_loss.data[0]) tp, fp, fn, tn, scores = evaluate( output.data, samples["labels"].data, samples["label_weights"].data) #print(tp,fn,fp,tn) detail_meters["tp"].update(tp) detail_meters["fp"].update(fp) detail_meters["fn"].update(fn) detail_meters["tn"].update(tn) # the large the better tp_rate = detail_meters["tp"].sum / ( detail_meters["tp"].sum + detail_meters["fn"].sum + 1e-20) # the smaller the better fp_rate = detail_meters["fp"].sum / ( detail_meters["fp"].sum + detail_meters["tn"].sum + 1e-20) detail_meters["batch_time"].update(time.time() - batch_time_s) # collect results uids = samples["uids"] aids = samples["aids"] preds = zip(aids, uids, scores) gts = zip(aids, uids, samples["labels"].cpu().data) detail_meters["auc"].update(preds, gts) batch_time_s = time.time() # log results if (batch_idx + 1) % cfg["train"]["logging_freq"] == 0: logging.info( "Train Batch [{cur_batch}/{ed_batch}] " "loss: {loss} " "model_loss: {model_loss} " "auc: {auc} " "tp: {tp} fn: {fn} fp: {fp} tn: {tn} " "tp_rate: {tp_rate} fp_rate: {fp_rate} " "io time: {io_time}s batch time {batch_time}s".format( cur_batch=batch_idx + 1, ed_batch=n_batch, loss=detail_meters["loss"].mean, model_loss=detail_meters["model_loss"].mean, tp=detail_meters["tp"].sum, fn=detail_meters["fn"].sum, fp=detail_meters["fp"].sum, tn=detail_meters["tn"].sum, auc=detail_meters["auc"].auc, tp_rate=tp_rate, fp_rate=fp_rate, io_time=detail_meters["io_time"].mean, batch_time=detail_meters["batch_time"].mean, )) detail_meters["auc"].reset() if total_n_batch % cfg["train"][ "backup_freq_batch"] == 0 and total_n_batch >= cfg[ "train"]["start_backup_batch"]: state_to_save = { "cur_epoch": cur_epoch, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), } checkpoint_file = os.path.join( cfg["output_path"], "epoch_{}_tbatch_{}.checkpoint".format( cur_epoch, total_n_batch)) save_checkpoint(state_to_save, checkpoint_file) logging.info( "Save checkpoint to {}.".format(checkpoint_file)) if total_n_batch % cfg["train"][ "valid_freq_batch"] == 0 and total_n_batch >= cfg[ "train"]["start_valid_batch"]: valid(cur_epoch) model.train() logging.info("Set network to train model.") logging.info("Train Epoch [{cur_epoch}] " "loss: {loss} " "model_loss: {model_loss} ".format( cur_epoch=cur_epoch, loss=detail_meters["epoch_loss"].mean, model_loss=detail_meters["epoch_model_loss"].mean, )) # back up if cur_epoch % cfg["train"][ "backup_freq_epoch"] == 0 and cur_epoch >= cfg["train"][ "start_backup_epoch"]: state_to_save = { "cur_epoch": cur_epoch, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), } checkpoint_file = os.path.join( cfg["output_path"], "epoch_{}.checkpoint".format(cur_epoch)) save_checkpoint(state_to_save, checkpoint_file) logging.info("Save checkpoint to {}.".format(checkpoint_file)) # valid on valid dataset if cur_epoch % cfg["train"][ "valid_freq_epoch"] == 0 and cur_epoch >= cfg["train"][ "start_valid_epoch"]: valid(cur_epoch) model.train() logging.info("Set network to train model.")
def run_epoch(data, is_training, model, optimizer, transfer=False): # Make batches data_loader = torch.utils.data.DataLoader( data, batch_size=10, shuffle=True, num_workers=4, drop_last=False) losses = [] actual = [] expected = [] if is_training: model.train() else: model.eval() for batch in data_loader: # Unpack training instances pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_title_mask = torch.unsqueeze(Variable(batch['pid_title_mask']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1).cuda() # Size: batch_size x 1 x body_length=100 pid_body_mask = torch.unsqueeze(Variable(batch['pid_body_mask']), 1).cuda() # Size: batch_size x 1 x body_length=100 candidate_title = Variable(batch['candidate_titles']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_title_mask = Variable(batch['candidate_titles_mask']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_body = Variable(batch['candidate_body']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=100 candidate_body_mask = Variable(batch['candidate_body_mask']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=40 if is_training: optimizer.zero_grad() # Run text through model pid_title = model(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body = model(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title = model(candidate_title) # batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body = model(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) pid_title_mask = torch.unsqueeze(pid_title_mask, 2).expand_as(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body_mask = torch.unsqueeze(pid_body_mask, 2).expand_as(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title_mask = torch.unsqueeze(candidate_title_mask, 2).expand_as(candidate_title)# batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body_mask = torch.unsqueeze(candidate_body_mask, 2).expand_as(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) good_title = torch.sum(pid_title * pid_title_mask, 3) # batch_size x 1 x output_size=500 good_body = torch.sum(pid_body * pid_body_mask, 3) # batch_size x 1 x output_size=500 cand_titles = torch.sum(candidate_title * candidate_title_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 cand_bodies = torch.sum(candidate_body * candidate_body_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 good_tensor = (good_title + good_body)/2 # batch_size x 1 x output_size=500 cand_tensor = (cand_titles + cand_bodies)/2 # batch_size x # candidates (21 in training) x output_size=500 if is_training: l = loss(good_tensor, cand_tensor, 1.0) l.backward() losses.append(l.cpu().data[0]) optimizer.step() else: similarity = cosine_sim(good_tensor.expand_as(cand_tensor), cand_tensor, dim=2) if transfer: similarity = torch.FloatTensor(similarity.data.cpu().numpy()) else: similarity = similarity.data.cpu().numpy() if transfer: labels = batch['labels'] else: labels = batch['labels'].numpy() def predict(sim, labels): predictions = [] for i in range(sim.shape[0]): sorted_cand = (-sim[i]).argsort() predictions.append(labels[i][sorted_cand]) return predictions if transfer: for sim in similarity: actual.append(sim) expected.extend(labels.view(-1)) else: l = predict(similarity, labels) losses.extend(l) if is_training: avg_loss = np.mean(losses) return avg_loss else: if transfer: auc = AUCMeter() auc.reset() auc.add(torch.cat(actual), torch.LongTensor(expected)) return auc.value(max_fpr=0.05) else: e = Evaluation(losses) MAP = e.MAP()*100 MRR = e.MRR()*100 P1 = e.Precision(1)*100 P5 = e.Precision(5)*100 return (MAP, MRR, P1, P5)