def evaluate(dataset): meter = AUCMeter() dev_loader = torch.utils.data.DataLoader(dataset, batch_size=dev_batch_size, shuffle=True) start_time = time.time() for dev_batch in tqdm(dev_loader): model.eval() domain_model.eval() dev_x = autograd.Variable(dev_batch["x"]) if args.cuda: dev_x = dev_x.cuda() dev_pad_title = dev_batch["pad_title"] dev_pad_body = dev_batch["pad_body"] if args.model == "lstm": hidden2 = model.init_hidden(dev_x.shape[1]) hidden2 = repackage_hidden(hidden2) out_dev_x_raw, _ = model(dev_x, dev_pad_title, dev_pad_body, hidden2) else: out_dev_x_raw, _ = model(dev_x, dev_pad_title, dev_pad_body) out_dev_x = out_dev_x_raw.data truth = [0] * len(out_dev_x[0]) truth[0] = 1 truth = np.asarray(truth) for i in range(len(out_dev_x)): meter.add(out_dev_x[i], truth) print("auc middle", meter.value(0.05)) print("AUC DONE", meter.value(0.05))
def evaluation(args, padding_id, android_ids_corpus, model, vocab_map, embeddings): print "starting evaluation" if args.model == 'lstm': lstm = model else: cnn = model meter = AUCMeter() android_test_pos_path = os.path.join(args.android_path, 'test.pos.txt') android_test_neg_path = os.path.join(args.android_path, 'test.neg.txt') android_test_annotations = android_pairs_to_annotations( android_test_pos_path, android_test_neg_path) android_test_batches = corpus.create_eval_batches( android_ids_corpus, android_test_annotations, padding_id) count = 0 for batch in android_test_batches: titles, bodies, qlabels = batch if args.model == 'lstm': model = lstm else: model = cnn hidden = vectorize_question(args, batch, model, vocab_map, embeddings, padding_id) query = hidden[0].unsqueeze(0) examples = hidden[1:] cos_similarity = F.cosine_similarity(query, examples, dim=1) target = torch.DoubleTensor(qlabels) meter.add(cos_similarity.data, target) print meter.value(0.05)
def calculate_meter(data): """Calculate the AUC score. """ positives = {} negatives = {} print "loading data" # dev.[pos|neg].txt and test.[pos|neg].txt format: # id \w id if data == 'dev': pos_ids_X, pos_Y = load_data("../Android/dev.pos.txt", True) else: pos_ids_X, pos_Y = load_data("../Android/test.pos.txt", True) for q1, q2 in pos_ids_X: if q1 in positives: positives[q1].append(q2) else: positives[q1] = [q2] if data == 'dev': neg_ids_X, neg_Y = load_data("../Android/dev.neg.txt", False) else: neg_ids_X, neg_Y = load_data("../Android/test.neg.txt", False) for q1, q2 in neg_ids_X: if q1 in negatives: negatives[q1].append(q2) else: negatives[q1] = [q2] vectorizer = TfidfVectorizer() print "tfidf fit" vectorizer.fit(all_sequences) # 36404 unique words # print len(vectorizer.vocabulary_) meter = AUCMeter() qlabels = [] all_questions = [] question_ids = set() question_ids.update(positives.keys()) question_ids.update(negatives.keys()) for qid in question_ids: questions = [raw_corpus[qid][0] + " " + raw_corpus[qid][1]] questions.extend([raw_corpus[nid][0] + " " + raw_corpus[nid][1] for nid in negatives[qid]]) questions.extend([raw_corpus[pid][0] + " " + raw_corpus[pid][1] for pid in positives[qid]]) all_questions.append(questions) qlabels.append([0]*len(negatives[qid]) + [1]*len(positives[qid])) for question, qlabel in zip(all_questions, qlabels): query = torch.DoubleTensor(vectorizer.transform([question[0]]).todense()) examples = torch.DoubleTensor(vectorizer.transform(question[1:]).todense()) cos_similarity = F.cosine_similarity(query, examples, dim=1) target = torch.DoubleTensor(qlabel) meter.add(cos_similarity, target) print meter.value(0.05)
def evaluate_auc(model, pos_data, neg_data, question_data, batch_size): auc = AUCMeter() evaluate_pair_set(model, pos_data, 1, question_data, auc, batch_size) evaluate_pair_set(model, neg_data, 0, question_data, auc, batch_size) return auc.value(max_fpr=0.05)
def eval_part2(model, android_data, use_dev, model_type, using_part1_model=False, batch_size=1, tfidf_weighting=False): print "Begin eval_part2..." auc_eval = AUCMeter() num_batches = len(android_data.dev_data) / batch_size if use_dev \ else len(android_data.test_data) / batch_size for i in xrange(num_batches): title, body, similar = android_data.get_next_eval_feature( use_dev, tfidf_weighting=tfidf_weighting) h = None if using_part1_model: h = run_model(model, title, body, True, True, model_type) else: title_vectors, title_masks = title body_vectors, body_masks = body h, _ = run_part2_model(model, title_vectors, body_vectors, title_masks, body_masks, model_type, False) candidate_scores = [] # The candidates are all results after the first one, which is h_q. h_q = h[0] for c in h[1:]: candidate_scores.append(get_cosine_similarity(h_q, c)) # Get the correct labels. # (1 if the candidate is similar to query question, 0 otherwise.) labels = np.zeros(len(candidate_scores)) for similar_idx in similar: labels[similar_idx] = 1 auc_eval.add(np.array(candidate_scores), labels) print "Part 2 AUC for %s: %f" % ("dev" if use_dev else "test", auc_eval.value(.05))
def do_eval(embedding_layer, eval_name, batch_first=False): if eval_name == 'Dev': eval_data = dev_android elif eval_name == 'Test': eval_data = test_android eval_map = {} for qid_ in eval_data.keys(): eval_map[qid_] = process_eval_batch(qid_, eval_data, batch_first=batch_first) labels = [] auc = AUCMeter() for qid_ in eval_map.keys(): eval_title_batch, eval_body_batch, eval_title_len, eval_body_len = eval_map[ qid_] # process_eval_batch(qid_, eval_data) embedding_layer.title_hidden = embedding_layer.init_hidden( eval_title_batch.shape[1]) embedding_layer.body_hidden = embedding_layer.init_hidden( eval_body_batch.shape[1]) eval_title_qs = Variable(torch.FloatTensor(eval_title_batch)) eval_body_qs = Variable(torch.FloatTensor(eval_body_batch)) if cuda_available: eval_title_qs, eval_body_qs = eval_title_qs.cuda( ), eval_body_qs.cuda() embeddings = embedding_layer(eval_title_qs, eval_body_qs, eval_title_len, eval_body_len) cos_scores = evaluate(embeddings).cpu().data.numpy() true_labels = np.array(eval_data[qid_]['label']) auc.add(cos_scores, true_labels) labels.append(true_labels[np.argsort(cos_scores)][::-1]) auc_stdout = eval_name + ' AUC ' + str(auc.value(0.05)) print(auc_stdout) logging.debug(auc_stdout) eval_metrics(labels, eval_name) return auc.value(0.05)
def evaluate(model, test_data, test_labels): m = AUCMeter() cos_sims = [] labels = [] titles, bodies = test_data print "Getting test query embeddings" title_output = model(Variable(torch.FloatTensor(titles))) body_output = model(Variable(torch.FloatTensor(bodies))) question_embeddings = (title_output + body_output) / 2 print "Getting cosine similarities" for i in range(len(question_embeddings) / 2): q_ind = 2 * i r_ind = 2 * i + 1 q_emb = question_embeddings[q_ind] r_emb = question_embeddings[r_ind] cos_sim = F.cosine_similarity(q_emb, r_emb, dim=0, eps=1e-6) cos_sims.append(cos_sim.data[0]) labels.append(test_labels[q_ind]) if i % 3000 == 0 or i == len(question_embeddings) / 2: print "index ", q_ind m.add(torch.FloatTensor(cos_sims), torch.IntTensor(labels)) print m.value(max_fpr=0.05)
def evaluate_tfidf_auc(data, tfidf_vectors, query_to_index): auc = AUCMeter() for entry_id, eval_query_result in data.items(): similar_ids = eval_query_result.similar_ids positives = set(similar_ids) candidate_ids = eval_query_result.candidate_ids entry_encoding = tfidf_vectors[query_to_index[entry_id]] candidate_similarities = [] targets = [] for candidate_id in candidate_ids: candidate_encoding = tfidf_vectors[query_to_index[candidate_id]] similarity = cosine(entry_encoding, candidate_encoding) candidate_similarities.append(similarity.item(0)) targets.append(IS_SIMMILAR_LABEL if candidate_id in positives else NOT_SIMMILAR_LABEL) similarities = torch.Tensor(candidate_similarities) auc.add(similarities, torch.Tensor(targets)) return auc.value(MAXIMUM_FALSE_POSITIVE_RATIO)
def unsupervised_methods_helper(android_data, use_dev): auc_eval = AUCMeter() batch_size = 1 num_batches = len(android_data.dev_data) / batch_size if use_dev \ else len(android_data.test_data) / batch_size for i in xrange(num_batches): bows, labels = android_data.get_next_eval_bow_feature( use_dev, batch_size) for j in xrange(batch_size): # TODO: this currently only works when batch size is 1, fix indexing query = bows[0] scores = [] for sample in bows[1:]: scores.append(get_tfidf_cosine_similarity(query, sample)) assert len(scores) == len(labels[j]) auc_eval.add(np.array(scores), labels[j]) # Report AUC. print "AUC for %s: %f" % ("dev" if use_dev else "test", auc_eval.value(.05))
def evaluate(self, dev_or_test): ''' dev_or_test must be one of 'dev' or 'test' ''' print('lv0') self.reset_params() auc_meter = AUCMeter() # get the id batches pos_ids_batches_pair = self.pre.eval_split_into_batches(is_pos=True, dev_or_test=dev_or_test) neg_ids_batches_pair = self.pre.eval_split_into_batches(is_pos=False, dev_or_test=dev_or_test) # start looping thru the batches data_sets = [neg_ids_batches_pair, pos_ids_batches_pair] print('lv1') i_target = 0 for ids_batches_pair in data_sets: assert i_target < 2 print('dataset number %d'%(i_target)) ids_batches_left = ids_batches_pair[0] ids_batches_right = ids_batches_pair[1] for i in xrange(len(ids_batches_left)): ids_batch_left = ids_batches_left[i] ids_batch_right = ids_batches_right[i] feats_left = self.get_output(ids_batch_left) feats_right = self.get_output(ids_batch_right) preds = self.get_cosine_scores(feats_left, feats_right).data.numpy() targets = np.ones(len(preds)) * i_target # 0s if neg, 1s if pos auc_meter.add(preds, targets) i_target += 1 print('lv3') # all predictions are added # now get the AUC value auc_value = auc_meter.value(params.auc_max_fpr) print('AUC(%f) value for %s = %f' %(params.auc_max_fpr, dev_or_test, auc_value))
def evaluate_model(model, data, corpus, word_to_index, cuda): auc = AUCMeter() for query in data.keys(): positives = set(data[query][0]) candidates = data[query][1] embeddings = [pad(merge_title_and_body(corpus[query]), len(word_to_index))] targets = [] for candidate in candidates: embeddings.append(pad(merge_title_and_body(corpus[candidate]), len(word_to_index))) targets.append(IS_SIMMILAR_LABEL if candidate in positives else NOT_SIMMILAR_LABEL) embeddings = Variable(torch.from_numpy(np.array(embeddings))) targets = torch.from_numpy(np.array(targets)) if cuda: embeddings = embeddings.cuda() encodings = model(embeddings) query_encoding = encodings[0] candidate_encodings = encodings[1:] similarities = (F.cosine_similarity(candidate_encodings, query_encoding.repeat(len(encodings)-1, 1), dim=1)) auc.add(similarities.data, targets) return auc.value(MAXIMUM_FALSE_POSITIVE_RATIO)
def evaluate_auc(args, model, embedding, batches, padding_id): model.eval() meter = AUCMeter() for i, batch in enumerate(batches): title_ids, body_ids, labels = batch hidden = forward(args, model, embedding, title_ids, body_ids, padding_id) q = hidden[0].unsqueeze(0) p = hidden[1:] scores = F.cosine_similarity(q, p, dim=1).cpu().data assert len(scores) == len(labels) target = torch.DoubleTensor(labels) meter.add(scores, target) auc_score = meter.value(0.05) print 'AUC(0.05): {}'.format(auc_score) return auc_score
stop_words = stopwords.words('english') stop_words.append('') meter = AUCMeter() vectorizer = TfidfVectorizer(lowercase=True, stop_words=stop_words, use_idf=True, ngram_range=(1, 2), tokenizer=lambda x: x.split(' ')) vs = vectorizer.fit_transform(contents) res = [] for q, p, ns in tqdm.tqdm(android_test): sims = [] question = vs[question_ids[q]] for candidate in [p]+ns: cos_sim = cosine_similarity(question, vs[question_ids[candidate]]) sims.append(cos_sim[0][0]) sims = np.array(sims) ind = np.argsort(sims)[::-1] labels = np.array([1] + [0] * len(ns)) labels = labels[ind] meter.add(sims[ind], labels[ind]) # # predicted = np.array(predicted) # map, mrr, p_at_one, p_at_five = Evaluation(predicted).evaluate() # print('\n') # print("MAP: {0}, MRR: {1}, P@1: {2}, P@5: {3}".format(map, mrr, p_at_one, p_at_five)) print(meter.value(0.05))
def run_epoch(args, ubuntu_loader, android_loader, qr_model, qr_criterion, qr_optimizer, dc_model, dc_criterion, dc_optimizer, epoch, mode='train'): queries_per_batch = args.batch_size / args.examples_per_query data_and_target_loaders = [ izip(ubuntu_loader, repeat(0)), izip(android_loader, repeat(1)) ] data_and_target_loader = roundrobin(*data_and_target_loaders) #if mode == 'train': # print "Training..." # data_and_target_loaders = [ izip(ubuntu_loader , repeat(0)), # izip(android_loader, repeat(1)) ] # data_and_target_loader = roundrobin(*data_and_target_loaders) #elif mode == 'val': # print "Validation..." # data_and_target_loader = izip(android_loader, repeat(1)) print "Epoch {}".format(epoch) qr_total_loss = 0 dc_total_loss = 0 dc_count = 0 qr_metrics = QuestionRetrievalMetrics() auc_meter = AUCMeter() for i_batch, (data, target_domain) in enumerate(data_and_target_loader): padded_things, ys = data print "Epoch {}, Batch #{}, Domain {}".format(epoch, i_batch, target_domain) ys = create_variable(ys) qt, qb, ot, ob = padded_things # padded_things might also be packed. # qt is (PackedSequence, perm_idx), or (seq_tensor, set_lengths) # Step 1. Remember that Pytorch accumulates gradients. # We need to clear them out before each instance for model in [qr_model, dc_model]: model.zero_grad() # Generate embeddings. query_title = qr_model.get_embed(*qt) query_body = qr_model.get_embed(*qb) other_title = qr_model.get_embed(*ot) other_body = qr_model.get_embed(*ob) query_embed = (query_title + query_body) / 2 other_embed = (other_title + other_body) / 2 grl = GradientReversalLayer(args.dc_factor) # Classify their domains other_domain = dc_model(grl(other_embed)) target = create_variable( torch.FloatTensor([float(target_domain)] * other_domain.size(0))) auc_meter.add(other_domain.data, target.data) if mode == 'train': # Compute batch loss qr_batch_loss = qr_criterion(query_embed, other_embed, ys) qr_total_loss += qr_batch_loss.data[0] print "avg QR loss for batch {} was {}".format( i_batch, qr_batch_loss.data[0] / queries_per_batch) dc_batch_loss = dc_criterion(other_domain, target) dc_total_loss += dc_batch_loss.data[0] print "avg DC loss for batch {} was {}".format( i_batch, dc_batch_loss.data[0] / args.batch_size) dc_count += args.batch_size if target_domain == 0: # ubuntu. We don't have android training data for QR. qr_batch_loss.backward(retain_graph=True) else: pass # android. dc_batch_loss.backward() qr_optimizer.step() dc_optimizer.step() if mode == "val" and target_domain == 0: pass else: update_metrics_for_batch(args, query_embed, other_embed, ys, qr_metrics) if i_batch % args.stats_display_interval == 0: qr_metrics.display(i_batch) print "AUC Meter {} final stats for epoch {} was {}".format( mode, epoch, auc_meter.value(0.05)) if mode == 'train': qr_avg_loss = qr_total_loss / qr_metrics.queries_count dc_avg_loss = dc_total_loss / dc_count print "average {} QR loss for epoch {} was {}".format( mode, epoch, qr_avg_loss) print "average {} DC loss for epoch {} was {}".format( mode, epoch, dc_avg_loss)
def run_adversarial_epoch(data, is_training, encoder, encoder_optimizer, classifier, classifier_optimizer): # Make batches data_loader = torch.utils.data.DataLoader( data, batch_size=10, shuffle=True, num_workers=4, drop_last=False) losses = [] bce_losses = [] actual = [] expected = [] if is_training: encoder.train() classifier.train() else: encoder.eval() classifier.eval() for batch in data_loader: # Unpack training instances pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_title_mask = torch.unsqueeze(Variable(batch['pid_title_mask']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1).cuda() # Size: batch_size x 1 x body_length=100 pid_body_mask = torch.unsqueeze(Variable(batch['pid_body_mask']), 1).cuda() # Size: batch_size x 1 x body_length=100 candidate_title = Variable(batch['candidate_titles']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_title_mask = Variable(batch['candidate_titles_mask']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_body = Variable(batch['candidate_body']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=100 candidate_body_mask = Variable(batch['candidate_body_mask']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=40 if is_training: android_title = torch.unsqueeze(Variable(batch['android_title']), 1).cuda() android_title_mask = torch.unsqueeze(Variable(batch['android_title_mask']), 1).cuda() android_body = torch.unsqueeze(Variable(batch['android_body']), 1).cuda() android_body_mask = torch.unsqueeze(Variable(batch['android_body_mask']), 1).cuda() sz = pid_title.size()[0] if is_training: encoder_optimizer.zero_grad() classifier_optimizer.zero_grad() # Run text through model pid_title = encoder(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body = encoder(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title = encoder(candidate_title) # batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body = encoder(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) if is_training: android_title = encoder(android_title) android_body = encoder(android_body) pid_title_mask = torch.unsqueeze(pid_title_mask, 2).expand_as(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body_mask = torch.unsqueeze(pid_body_mask, 2).expand_as(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title_mask = torch.unsqueeze(candidate_title_mask, 2).expand_as(candidate_title)# batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body_mask = torch.unsqueeze(candidate_body_mask, 2).expand_as(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) if is_training: android_title_mask = torch.unsqueeze(android_title_mask, 2).expand_as(android_title) android_body_mask = torch.unsqueeze(android_body_mask, 2).expand_as(android_body) good_title = torch.sum(pid_title * pid_title_mask, 3) # batch_size x 1 x output_size=500 good_body = torch.sum(pid_body * pid_body_mask, 3) # batch_size x 1 x output_size=500 cand_titles = torch.sum(candidate_title * candidate_title_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 cand_bodies = torch.sum(candidate_body * candidate_body_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 if is_training: android_title = torch.sum(android_title * android_title_mask, 3) android_body = torch.sum(android_body * android_body_mask, 3) good_tensor = (good_title + good_body)/2 # batch_size x 1 x output_size=500 cand_tensor = (cand_titles + cand_bodies)/2 # batch_size x # candidates (21 in training) x output_size=500 if is_training: android_tensor = (android_title + android_body)/2 if is_training: good_domain = classifier(good_tensor.view(sz, -1)) android_domain = classifier(android_tensor.view(sz, -1)) softmax = nn.Softmax(dim=1) good_dist = softmax(good_domain) android_dist = softmax(android_domain) dists = torch.cat((good_dist, android_dist)).clamp(min=0.0001, max=0.9999) expected = Variable(torch.FloatTensor([0] * sz + [1] * sz)).cuda() bce_loss = torch.nn.BCELoss()(dists[:,0], expected) l = loss(good_tensor, cand_tensor, 1.0) - 0.01 * bce_loss l.backward() losses.append(l.cpu().data[0]) bce_losses.append(bce_loss.cpu().data[0]) encoder_optimizer.step() classifier_optimizer.step() else: similarity = cosine_sim(good_tensor.expand_as(cand_tensor), cand_tensor, dim=2) similarity = torch.FloatTensor(similarity.data.cpu().numpy()) labels = batch['labels'] def predict(sim, labels): predictions = [] for i in range(sim.shape[0]): sorted_cand = (-sim[i]).argsort() predictions.append(labels[i][sorted_cand]) return predictions for sim in similarity: actual.append(sim) expected.extend(labels.view(-1)) if is_training: avg_loss = np.mean(losses) avg_bce_loss = np.mean(bce_losses) return avg_loss, avg_bce_loss else: auc = AUCMeter() auc.reset() auc.add(torch.cat(actual), torch.LongTensor(expected)) return auc.value(max_fpr=0.05)
def evaluate_model(model, use_test_data=False, use_lstm=True): if use_test_data: print "Running evaluate on the TEST data:" else: print "Running evaluate on the DEV data:" # Set the model to eval mode model.eval() # samples has shape (num_dev_samples, 22) samples = get_dev_data_android(use_test_data=use_test_data) num_samples = len(samples) num_batches = int(math.ceil(1. * num_samples / BATCH_SIZE)) score_matrix = torch.Tensor().cuda() if USE_GPU else torch.Tensor() orig_time = time() for i in range(num_batches): # Get the samples ready batch = samples[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] # If this is the last batch, then need to pad the batch to get the same shape as expected if i == num_batches - 1 and num_samples % BATCH_SIZE != 0: batch = np.concatenate( (batch, np.full( ((i + 1) * BATCH_SIZE - num_samples, 22), "0")), axis=0) # Convert from numpy arrays to tensors title_tensor, title_lengths = get_tensor_from_batch(batch, use_title=True) body_tensor, body_lengths = get_tensor_from_batch(batch, use_title=False) # Run the model model.hidden = model.init_hidden() title_lstm = model(title_tensor) title_encoding = get_encodings(title_lstm, title_lengths, use_lstm=use_lstm) model.hidden = model.init_hidden() body_lstm = model(body_tensor) body_encoding = get_encodings(body_lstm, body_lengths, use_lstm=use_lstm) # Compute evaluation X, _ = generate_score_matrix(title_encoding, body_encoding) X = X.data if i == num_batches - 1 and num_samples % BATCH_SIZE != 0: score_matrix = torch.cat( [score_matrix, X[:num_samples - i * BATCH_SIZE]]) else: score_matrix = torch.cat([score_matrix, X]) #print "Finished batch " + str(i) + " after " + str(time() - orig_time) + " seconds" # score_matrix is a shape (num_dev_samples, 21) matrix that contains the cosine similarity scores meter = AUCMeter() similarities, targets = [], [] for i in range(len(score_matrix)): similarities.append(score_matrix[i][0]) targets.append(1) for j in range(1, 21): similarities.append(score_matrix[i][j]) targets.append(0) meter.add(np.array(similarities), np.array(targets)) print "The AUC(0.05) value is " + str(meter.value(0.05)) # Set the model back to train mode model.train()
def evaluate(pairs, label, text_data, question_lookup, auc): for p in pairs: id_1, id_2 = p cos = cosine_similarity(text_data.getrow(question_lookup[id_1]), text_data.getrow(question_lookup[id_2])) cos = float(cos) auc.add(torch.DoubleTensor([cos]), torch.LongTensor([label])) print >> sys.stderr, "LOADING DATA..." question_lookup, text_raw = read_raw_data(CORPUS_PATH) dev_pos_pairs = import_pairs(DEV_POS_PATH) dev_neg_pairs = import_pairs(DEV_NEG_PATH) test_pos_pairs = import_pairs(TEST_POS_PATH) test_neg_pairs = import_pairs(TEST_NEG_PATH) print >> sys.stderr, "COMPUTING TF-IDF FEATURES..." text_tfidf = compute_tfidf(text_raw) print >> sys.stderr, "COMPUTING AUC..." d_auc = AUCMeter() t_auc = AUCMeter() evaluate(dev_pos_pairs, 1, text_tfidf, question_lookup, d_auc) evaluate(dev_neg_pairs, 0, text_tfidf, question_lookup, d_auc) evaluate(test_pos_pairs, 1, text_tfidf, question_lookup, t_auc) evaluate(test_neg_pairs, 0, text_tfidf, question_lookup, t_auc) print "Dev AUC: %.2f" % (d_auc.value(max_fpr=0.05)) print "Test AUC: %.2f" % (t_auc.value(max_fpr=0.05))
def run_epoch(data, is_training, model, optimizer, transfer=False): # Make batches data_loader = torch.utils.data.DataLoader( data, batch_size=10, shuffle=True, num_workers=4, drop_last=False) losses = [] actual = [] expected = [] if is_training: model.train() else: model.eval() for batch in data_loader: # Unpack training instances pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_title_mask = torch.unsqueeze(Variable(batch['pid_title_mask']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1).cuda() # Size: batch_size x 1 x body_length=100 pid_body_mask = torch.unsqueeze(Variable(batch['pid_body_mask']), 1).cuda() # Size: batch_size x 1 x body_length=100 candidate_title = Variable(batch['candidate_titles']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_title_mask = Variable(batch['candidate_titles_mask']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_body = Variable(batch['candidate_body']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=100 candidate_body_mask = Variable(batch['candidate_body_mask']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=40 if is_training: optimizer.zero_grad() # Run text through model pid_title = model(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body = model(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title = model(candidate_title) # batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body = model(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) pid_title_mask = torch.unsqueeze(pid_title_mask, 2).expand_as(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body_mask = torch.unsqueeze(pid_body_mask, 2).expand_as(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title_mask = torch.unsqueeze(candidate_title_mask, 2).expand_as(candidate_title)# batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body_mask = torch.unsqueeze(candidate_body_mask, 2).expand_as(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) good_title = torch.sum(pid_title * pid_title_mask, 3) # batch_size x 1 x output_size=500 good_body = torch.sum(pid_body * pid_body_mask, 3) # batch_size x 1 x output_size=500 cand_titles = torch.sum(candidate_title * candidate_title_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 cand_bodies = torch.sum(candidate_body * candidate_body_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 good_tensor = (good_title + good_body)/2 # batch_size x 1 x output_size=500 cand_tensor = (cand_titles + cand_bodies)/2 # batch_size x # candidates (21 in training) x output_size=500 if is_training: l = loss(good_tensor, cand_tensor, 1.0) l.backward() losses.append(l.cpu().data[0]) optimizer.step() else: similarity = cosine_sim(good_tensor.expand_as(cand_tensor), cand_tensor, dim=2) if transfer: similarity = torch.FloatTensor(similarity.data.cpu().numpy()) else: similarity = similarity.data.cpu().numpy() if transfer: labels = batch['labels'] else: labels = batch['labels'].numpy() def predict(sim, labels): predictions = [] for i in range(sim.shape[0]): sorted_cand = (-sim[i]).argsort() predictions.append(labels[i][sorted_cand]) return predictions if transfer: for sim in similarity: actual.append(sim) expected.extend(labels.view(-1)) else: l = predict(similarity, labels) losses.extend(l) if is_training: avg_loss = np.mean(losses) return avg_loss else: if transfer: auc = AUCMeter() auc.reset() auc.add(torch.cat(actual), torch.LongTensor(expected)) return auc.value(max_fpr=0.05) else: e = Evaluation(losses) MAP = e.MAP()*100 MRR = e.MRR()*100 P1 = e.Precision(1)*100 P5 = e.Precision(5)*100 return (MAP, MRR, P1, P5)
def evaluation(args, padding_id, ids_corpus, vocab_map, embeddings, model): """Calculate the AUC score of the model on Android data. """ meter = AUCMeter() print "starting evaluation" val_data = corpus.read_annotations(args.test) print "number of lines in test data: " + str(len(val_data)) val_batches = corpus.create_eval_batches(ids_corpus, val_data, padding_id) count = 0 similarities = [] for batch in val_batches: titles, bodies, qlabels = batch title_length, title_num_questions = titles.shape body_length, body_num_questions = bodies.shape title_embeddings, body_embeddings = corpus.get_embeddings(titles, bodies, vocab_map, embeddings) if args.model == 'lstm': if args.cuda: title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings).cuda())] title_inputs = torch.cat(title_inputs).view(title_length, title_num_questions, -1) title_hidden = (autograd.Variable(torch.zeros(1, title_num_questions, args.hidden_size).cuda()), autograd.Variable(torch.zeros((1, title_num_questions, args.hidden_size)).cuda())) else: title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings))] title_inputs = torch.cat(title_inputs).view(title_length, title_num_questions, -1) # title_inputs = torch.cat(title_inputs).view(title_num_questions, title_length, -1) title_hidden = (autograd.Variable(torch.zeros(1, title_num_questions, args.hidden_size)), autograd.Variable(torch.zeros((1, title_num_questions, args.hidden_size)))) else: if args.cuda: title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings).cuda())] else: title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings))] title_inputs = torch.cat(title_inputs).transpose(0,1).transpose(1,2) if args.model == 'lstm': title_out, title_hidden = model(title_inputs, title_hidden) else: title_out = model(title_inputs) title_out = F.tanh(title_out) title_out = title_out.transpose(1,2).transpose(0,1) average_title_out = average_questions(title_out, titles, padding_id) # body if args.model == 'lstm': if args.cuda: body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings).cuda())] body_inputs = torch.cat(body_inputs).view(body_length, body_num_questions, -1) body_hidden = (autograd.Variable(torch.zeros(1, body_num_questions, args.hidden_size).cuda()), autograd.Variable(torch.zeros((1, body_num_questions, args.hidden_size)).cuda())) else: body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings))] body_inputs = torch.cat(body_inputs).view(body_length, body_num_questions, -1) body_hidden = (autograd.Variable(torch.zeros(1, body_num_questions, args.hidden_size)), autograd.Variable(torch.zeros((1, body_num_questions, args.hidden_size)))) else: if args.cuda: body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings).cuda())] #body_inputs = torch.cat(body_inputs).view(body_num_questions, 200, -1) else: body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings))] #body_inputs = torch.cat(body_inputs).view(body_num_questions, 200, -1) body_inputs = torch.cat(body_inputs).transpose(0,1).transpose(1,2) if args.model == 'lstm': body_out, body_hidden = model(body_inputs, body_hidden) else: body_out = model(body_inputs) body_out = F.tanh(body_out) body_out = body_out.transpose(1,2).transpose(0,1) # average all words of each question from body_out average_body_out = average_questions(body_out, bodies, padding_id) # average body and title # representations of the questions as found by the CNN # 560 x 100 hidden = (average_title_out + average_body_out) * 0.5 query = torch.DoubleTensor(hidden[0].unsqueeze(0).cpu().data.numpy()) examples = torch.DoubleTensor(hidden[1:].cpu().data.numpy()) cos_similarity = F.cosine_similarity(query, examples, dim=1) qlabels = [float(qlabel) for qlabel in list(qlabels)] target = torch.DoubleTensor(qlabels) meter.add(cos_similarity, target) print meter.value(0.05)
for line in f.readlines(): main_qid, candidate_qid = line.split() neg[main_qid].add(candidate_qid) return pos, neg # Gets the cosine distance between two vectors x and y and maps the result to [0, 1] # 0 means that the two vectors are opposite, 1 means that they are the same # x and y are sparse vectors def cosine(x, y): return (1 + cosine_similarity(sparse.vstack([x, y]))[0][1]) / 2 if __name__ == '__main__': get_id_to_vector() meter = AUCMeter() pos, neg = get_dev_data_android(use_test_data=False) # Only use questions that have at least one positive match for main_qid in pos: main_vector = X[id_to_index[main_qid]] similiarities, targets = [], [] # For all positive matches, append similarity score + a 1 on the targets for pos_match_qid in pos[main_qid]: similiarities.append(cosine(main_vector, X[id_to_index[pos_match_qid]])) targets.append(1) # For all negative matches, append similarity score + a 0 on the targets for neg_match_qid in neg[main_qid]: similiarities.append(cosine(main_vector, X[id_to_index[neg_match_qid]])) targets.append(0) meter.add(np.array(similiarities), np.array(targets)) print "The AUC(0.05) value on the TfIdF weighted vectors are " + str(meter.value(0.05))