def evaluate(dataset):
        meter = AUCMeter()
        dev_loader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=dev_batch_size,
                                                 shuffle=True)
        start_time = time.time()
        for dev_batch in tqdm(dev_loader):
            model.eval()
            domain_model.eval()

            dev_x = autograd.Variable(dev_batch["x"])
            if args.cuda:
                dev_x = dev_x.cuda()

            dev_pad_title = dev_batch["pad_title"]
            dev_pad_body = dev_batch["pad_body"]
            if args.model == "lstm":
                hidden2 = model.init_hidden(dev_x.shape[1])
                hidden2 = repackage_hidden(hidden2)
                out_dev_x_raw, _ = model(dev_x, dev_pad_title, dev_pad_body,
                                         hidden2)
            else:
                out_dev_x_raw, _ = model(dev_x, dev_pad_title, dev_pad_body)

            out_dev_x = out_dev_x_raw.data
            truth = [0] * len(out_dev_x[0])
            truth[0] = 1
            truth = np.asarray(truth)

            for i in range(len(out_dev_x)):
                meter.add(out_dev_x[i], truth)
            print("auc middle", meter.value(0.05))
        print("AUC DONE", meter.value(0.05))
Example #2
0
def evaluation(args, padding_id, android_ids_corpus, model, vocab_map,
               embeddings):
    print "starting evaluation"
    if args.model == 'lstm':
        lstm = model
    else:
        cnn = model

    meter = AUCMeter()

    android_test_pos_path = os.path.join(args.android_path, 'test.pos.txt')
    android_test_neg_path = os.path.join(args.android_path, 'test.neg.txt')
    android_test_annotations = android_pairs_to_annotations(
        android_test_pos_path, android_test_neg_path)
    android_test_batches = corpus.create_eval_batches(
        android_ids_corpus, android_test_annotations, padding_id)

    count = 0
    for batch in android_test_batches:
        titles, bodies, qlabels = batch

        if args.model == 'lstm':
            model = lstm
        else:
            model = cnn
        hidden = vectorize_question(args, batch, model, vocab_map, embeddings,
                                    padding_id)
        query = hidden[0].unsqueeze(0)
        examples = hidden[1:]
        cos_similarity = F.cosine_similarity(query, examples, dim=1)
        target = torch.DoubleTensor(qlabels)
        meter.add(cos_similarity.data, target)
    print meter.value(0.05)
Example #3
0
def calculate_meter(data):
	"""Calculate the AUC score.
	"""
	positives = {}
	negatives = {}

	print "loading data"
	# dev.[pos|neg].txt and test.[pos|neg].txt format:
	# id \w id
	if data == 'dev':
		pos_ids_X, pos_Y = load_data("../Android/dev.pos.txt", True)
	else:
		pos_ids_X, pos_Y = load_data("../Android/test.pos.txt", True)
	for q1, q2 in pos_ids_X:
		if q1 in positives:
			positives[q1].append(q2)
		else:
			positives[q1] = [q2]

	if data == 'dev':
		neg_ids_X, neg_Y = load_data("../Android/dev.neg.txt", False)
	else:
		neg_ids_X, neg_Y = load_data("../Android/test.neg.txt", False)
	for q1, q2 in neg_ids_X:
		if q1 in negatives:
			negatives[q1].append(q2)
		else:
			negatives[q1] = [q2]

	vectorizer = TfidfVectorizer()
	print "tfidf fit"
	vectorizer.fit(all_sequences)
	# 36404 unique words
	# print len(vectorizer.vocabulary_)

	meter = AUCMeter()

	qlabels = []
	all_questions = []
	question_ids = set()
	question_ids.update(positives.keys())
	question_ids.update(negatives.keys())
	for qid in question_ids:
		questions = [raw_corpus[qid][0] + " " + raw_corpus[qid][1]]
		questions.extend([raw_corpus[nid][0] + " " + raw_corpus[nid][1] for nid in negatives[qid]])
		questions.extend([raw_corpus[pid][0] + " " + raw_corpus[pid][1] for pid in positives[qid]])
		all_questions.append(questions)
		qlabels.append([0]*len(negatives[qid]) + [1]*len(positives[qid]))

	for question, qlabel in zip(all_questions, qlabels):
		query = torch.DoubleTensor(vectorizer.transform([question[0]]).todense())
		examples = torch.DoubleTensor(vectorizer.transform(question[1:]).todense())

		cos_similarity = F.cosine_similarity(query, examples, dim=1)
		target = torch.DoubleTensor(qlabel)
		meter.add(cos_similarity, target)

	print meter.value(0.05)
def evaluate_auc(model, pos_data, neg_data, question_data, batch_size):
    auc = AUCMeter()

    evaluate_pair_set(model, pos_data, 1, question_data, auc, batch_size)
    evaluate_pair_set(model, neg_data, 0, question_data, auc, batch_size)

    return auc.value(max_fpr=0.05)
Example #5
0
def eval_part2(model,
               android_data,
               use_dev,
               model_type,
               using_part1_model=False,
               batch_size=1,
               tfidf_weighting=False):
    print "Begin eval_part2..."
    auc_eval = AUCMeter()
    num_batches = len(android_data.dev_data) / batch_size if use_dev \
                  else len(android_data.test_data) / batch_size
    for i in xrange(num_batches):
        title, body, similar = android_data.get_next_eval_feature(
            use_dev, tfidf_weighting=tfidf_weighting)
        h = None
        if using_part1_model:
            h = run_model(model, title, body, True, True, model_type)
        else:
            title_vectors, title_masks = title
            body_vectors, body_masks = body
            h, _ = run_part2_model(model, title_vectors, body_vectors,
                                   title_masks, body_masks, model_type, False)
        candidate_scores = []
        # The candidates are all results after the first one, which is h_q.
        h_q = h[0]
        for c in h[1:]:
            candidate_scores.append(get_cosine_similarity(h_q, c))
        # Get the correct labels.
        # (1 if the candidate is similar to query question, 0 otherwise.)
        labels = np.zeros(len(candidate_scores))
        for similar_idx in similar:
            labels[similar_idx] = 1
        auc_eval.add(np.array(candidate_scores), labels)
    print "Part 2 AUC for %s: %f" % ("dev" if use_dev else "test",
                                     auc_eval.value(.05))
Example #6
0
def do_eval(embedding_layer, eval_name, batch_first=False):

    if eval_name == 'Dev':
        eval_data = dev_android

    elif eval_name == 'Test':
        eval_data = test_android

    eval_map = {}
    for qid_ in eval_data.keys():
        eval_map[qid_] = process_eval_batch(qid_,
                                            eval_data,
                                            batch_first=batch_first)

    labels = []
    auc = AUCMeter()

    for qid_ in eval_map.keys():
        eval_title_batch, eval_body_batch, eval_title_len, eval_body_len = eval_map[
            qid_]  # process_eval_batch(qid_, eval_data)
        embedding_layer.title_hidden = embedding_layer.init_hidden(
            eval_title_batch.shape[1])
        embedding_layer.body_hidden = embedding_layer.init_hidden(
            eval_body_batch.shape[1])
        eval_title_qs = Variable(torch.FloatTensor(eval_title_batch))
        eval_body_qs = Variable(torch.FloatTensor(eval_body_batch))

        if cuda_available:
            eval_title_qs, eval_body_qs = eval_title_qs.cuda(
            ), eval_body_qs.cuda()
        embeddings = embedding_layer(eval_title_qs, eval_body_qs,
                                     eval_title_len, eval_body_len)
        cos_scores = evaluate(embeddings).cpu().data.numpy()
        true_labels = np.array(eval_data[qid_]['label'])
        auc.add(cos_scores, true_labels)
        labels.append(true_labels[np.argsort(cos_scores)][::-1])

    auc_stdout = eval_name + ' AUC ' + str(auc.value(0.05))
    print(auc_stdout)
    logging.debug(auc_stdout)
    eval_metrics(labels, eval_name)
    return auc.value(0.05)
def evaluate(model, test_data, test_labels):
    m = AUCMeter()
    cos_sims = []
    labels = []
    titles, bodies = test_data
    print "Getting test query embeddings"
    title_output = model(Variable(torch.FloatTensor(titles)))
    body_output = model(Variable(torch.FloatTensor(bodies)))
    question_embeddings = (title_output + body_output) / 2
    print "Getting cosine similarities"
    for i in range(len(question_embeddings) / 2):
        q_ind = 2 * i
        r_ind = 2 * i + 1
        q_emb = question_embeddings[q_ind]
        r_emb = question_embeddings[r_ind]
        cos_sim = F.cosine_similarity(q_emb, r_emb, dim=0, eps=1e-6)
        cos_sims.append(cos_sim.data[0])
        labels.append(test_labels[q_ind])
        if i % 3000 == 0 or i == len(question_embeddings) / 2:
            print "index ", q_ind
    m.add(torch.FloatTensor(cos_sims), torch.IntTensor(labels))
    print m.value(max_fpr=0.05)
Example #8
0
def evaluate_tfidf_auc(data, tfidf_vectors, query_to_index):
    auc = AUCMeter()
    for entry_id, eval_query_result in data.items():
        similar_ids = eval_query_result.similar_ids
        positives = set(similar_ids)
        candidate_ids = eval_query_result.candidate_ids

        entry_encoding = tfidf_vectors[query_to_index[entry_id]]
        candidate_similarities = []
        targets = []
        for candidate_id in candidate_ids:
            candidate_encoding = tfidf_vectors[query_to_index[candidate_id]]
            similarity = cosine(entry_encoding, candidate_encoding)
            candidate_similarities.append(similarity.item(0))
            targets.append(IS_SIMMILAR_LABEL if candidate_id in positives else NOT_SIMMILAR_LABEL)

        similarities = torch.Tensor(candidate_similarities)
        auc.add(similarities, torch.Tensor(targets))
    return auc.value(MAXIMUM_FALSE_POSITIVE_RATIO)
Example #9
0
 def unsupervised_methods_helper(android_data, use_dev):
     auc_eval = AUCMeter()
     batch_size = 1
     num_batches = len(android_data.dev_data) / batch_size if use_dev \
                   else len(android_data.test_data) / batch_size
     for i in xrange(num_batches):
         bows, labels = android_data.get_next_eval_bow_feature(
             use_dev, batch_size)
         for j in xrange(batch_size):
             # TODO: this currently only works when batch size is 1, fix indexing
             query = bows[0]
             scores = []
             for sample in bows[1:]:
                 scores.append(get_tfidf_cosine_similarity(query, sample))
             assert len(scores) == len(labels[j])
             auc_eval.add(np.array(scores), labels[j])
     # Report AUC.
     print "AUC for %s: %f" % ("dev" if use_dev else "test",
                               auc_eval.value(.05))
Example #10
0
	def evaluate(self, dev_or_test):
		''' dev_or_test must be one of 'dev' or 'test'
		'''
		print('lv0')
		self.reset_params()
		auc_meter = AUCMeter()

		# get the id batches
		pos_ids_batches_pair = self.pre.eval_split_into_batches(is_pos=True, dev_or_test=dev_or_test)
		neg_ids_batches_pair = self.pre.eval_split_into_batches(is_pos=False, dev_or_test=dev_or_test)

		# start looping thru the batches
		data_sets = [neg_ids_batches_pair, pos_ids_batches_pair]
		print('lv1')
		i_target = 0
		for ids_batches_pair in data_sets:
			assert i_target < 2
			print('dataset number %d'%(i_target))

			ids_batches_left = ids_batches_pair[0]
			ids_batches_right = ids_batches_pair[1]

			for i in xrange(len(ids_batches_left)):
				ids_batch_left = ids_batches_left[i]
				ids_batch_right = ids_batches_right[i]

				feats_left = self.get_output(ids_batch_left)
				feats_right = self.get_output(ids_batch_right)

				preds = self.get_cosine_scores(feats_left, feats_right).data.numpy()
				targets = np.ones(len(preds)) * i_target # 0s if neg, 1s if pos
				auc_meter.add(preds, targets)

			i_target += 1
			
		print('lv3')
		# all predictions are added
		# now get the AUC value
		auc_value = auc_meter.value(params.auc_max_fpr)
		print('AUC(%f) value for %s  =  %f'
			%(params.auc_max_fpr, dev_or_test, auc_value))
Example #11
0
def evaluate_model(model, data, corpus, word_to_index, cuda):
    auc = AUCMeter()
    for query in data.keys():
        positives = set(data[query][0])
        candidates = data[query][1]

        embeddings = [pad(merge_title_and_body(corpus[query]), len(word_to_index))]
        targets = []
        for candidate in candidates:
            embeddings.append(pad(merge_title_and_body(corpus[candidate]), len(word_to_index)))
            targets.append(IS_SIMMILAR_LABEL if candidate in positives else NOT_SIMMILAR_LABEL)
        embeddings = Variable(torch.from_numpy(np.array(embeddings)))
        targets = torch.from_numpy(np.array(targets))
        if cuda:
            embeddings = embeddings.cuda()

        encodings = model(embeddings)
        query_encoding = encodings[0]
        candidate_encodings = encodings[1:]
        similarities = (F.cosine_similarity(candidate_encodings, query_encoding.repeat(len(encodings)-1, 1), dim=1))
        auc.add(similarities.data, targets)
    return auc.value(MAXIMUM_FALSE_POSITIVE_RATIO)
Example #12
0
def evaluate_auc(args, model, embedding, batches, padding_id):
    model.eval()
    meter = AUCMeter()

    for i, batch in enumerate(batches):
        title_ids, body_ids, labels = batch

        hidden = forward(args, model, embedding,
                         title_ids, body_ids, padding_id)

        q = hidden[0].unsqueeze(0)
        p = hidden[1:]

        scores = F.cosine_similarity(q, p, dim=1).cpu().data
        assert len(scores) == len(labels)

        target = torch.DoubleTensor(labels)
        meter.add(scores, target)

    auc_score = meter.value(0.05)

    print 'AUC(0.05): {}'.format(auc_score)
    return auc_score
Example #13
0
stop_words = stopwords.words('english')
stop_words.append('')
meter = AUCMeter()

vectorizer = TfidfVectorizer(lowercase=True, stop_words=stop_words, use_idf=True, ngram_range=(1, 2), tokenizer=lambda x: x.split(' '))
vs = vectorizer.fit_transform(contents)

res = []
for q, p, ns in tqdm.tqdm(android_test):
    sims = []
    question = vs[question_ids[q]]
    for candidate in [p]+ns:
        cos_sim = cosine_similarity(question, vs[question_ids[candidate]])
        sims.append(cos_sim[0][0])
    sims = np.array(sims)
    ind = np.argsort(sims)[::-1]
    labels = np.array([1] + [0] * len(ns))
    labels = labels[ind]
    meter.add(sims[ind], labels[ind])



#
# predicted = np.array(predicted)
# map, mrr, p_at_one, p_at_five = Evaluation(predicted).evaluate()
# print('\n')
# print("MAP: {0}, MRR: {1}, P@1: {2}, P@5: {3}".format(map, mrr, p_at_one, p_at_five))

print(meter.value(0.05))
Example #14
0
def run_epoch(args,
              ubuntu_loader,
              android_loader,
              qr_model,
              qr_criterion,
              qr_optimizer,
              dc_model,
              dc_criterion,
              dc_optimizer,
              epoch,
              mode='train'):
    queries_per_batch = args.batch_size / args.examples_per_query
    data_and_target_loaders = [
        izip(ubuntu_loader, repeat(0)),
        izip(android_loader, repeat(1))
    ]
    data_and_target_loader = roundrobin(*data_and_target_loaders)
    #if mode == 'train':
    #    print "Training..."
    #    data_and_target_loaders = [ izip(ubuntu_loader , repeat(0)),
    #                                izip(android_loader, repeat(1)) ]
    #    data_and_target_loader = roundrobin(*data_and_target_loaders)
    #elif mode == 'val':
    #    print "Validation..."
    #    data_and_target_loader = izip(android_loader, repeat(1))

    print "Epoch {}".format(epoch)
    qr_total_loss = 0
    dc_total_loss = 0
    dc_count = 0

    qr_metrics = QuestionRetrievalMetrics()
    auc_meter = AUCMeter()

    for i_batch, (data, target_domain) in enumerate(data_and_target_loader):
        padded_things, ys = data

        print "Epoch {}, Batch #{}, Domain {}".format(epoch, i_batch,
                                                      target_domain)
        ys = create_variable(ys)

        qt, qb, ot, ob = padded_things  # padded_things might also be packed.
        # qt is (PackedSequence, perm_idx), or (seq_tensor, set_lengths)

        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        for model in [qr_model, dc_model]:
            model.zero_grad()

        # Generate embeddings.
        query_title = qr_model.get_embed(*qt)
        query_body = qr_model.get_embed(*qb)
        other_title = qr_model.get_embed(*ot)
        other_body = qr_model.get_embed(*ob)

        query_embed = (query_title + query_body) / 2
        other_embed = (other_title + other_body) / 2
        grl = GradientReversalLayer(args.dc_factor)
        # Classify their domains
        other_domain = dc_model(grl(other_embed))
        target = create_variable(
            torch.FloatTensor([float(target_domain)] * other_domain.size(0)))
        auc_meter.add(other_domain.data, target.data)
        if mode == 'train':
            # Compute batch loss
            qr_batch_loss = qr_criterion(query_embed, other_embed, ys)
            qr_total_loss += qr_batch_loss.data[0]
            print "avg QR loss for batch {} was {}".format(
                i_batch, qr_batch_loss.data[0] / queries_per_batch)
            dc_batch_loss = dc_criterion(other_domain, target)
            dc_total_loss += dc_batch_loss.data[0]
            print "avg DC loss for batch {} was {}".format(
                i_batch, dc_batch_loss.data[0] / args.batch_size)
            dc_count += args.batch_size

            if target_domain == 0:  # ubuntu. We don't have android training data for QR.
                qr_batch_loss.backward(retain_graph=True)
            else:
                pass  # android.
            dc_batch_loss.backward()
            qr_optimizer.step()
            dc_optimizer.step()

        if mode == "val" and target_domain == 0:
            pass
        else:
            update_metrics_for_batch(args, query_embed, other_embed, ys,
                                     qr_metrics)
        if i_batch % args.stats_display_interval == 0:
            qr_metrics.display(i_batch)

    print "AUC Meter {} final stats for epoch {} was {}".format(
        mode, epoch, auc_meter.value(0.05))
    if mode == 'train':
        qr_avg_loss = qr_total_loss / qr_metrics.queries_count
        dc_avg_loss = dc_total_loss / dc_count
        print "average {} QR loss for epoch {} was {}".format(
            mode, epoch, qr_avg_loss)
        print "average {} DC  loss for epoch {} was {}".format(
            mode, epoch, dc_avg_loss)
Example #15
0
def run_adversarial_epoch(data, is_training, encoder, encoder_optimizer, classifier, classifier_optimizer):
	
	# Make batches
	data_loader = torch.utils.data.DataLoader(
		data,
		batch_size=10,
		shuffle=True,
		num_workers=4,
		drop_last=False)

	losses = []
	bce_losses = []
	actual = []
	expected = []

	if is_training:
		encoder.train()
		classifier.train()
	else:
		encoder.eval()
		classifier.eval()
	
	for batch in data_loader:
		# Unpack training instances
		pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1).cuda() # Size: batch_size x 1 x title_length=40
		pid_title_mask = torch.unsqueeze(Variable(batch['pid_title_mask']), 1).cuda() # Size: batch_size x 1 x title_length=40
		pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1).cuda() # Size: batch_size x 1 x body_length=100
		pid_body_mask = torch.unsqueeze(Variable(batch['pid_body_mask']), 1).cuda() # Size: batch_size x 1 x body_length=100
		candidate_title = Variable(batch['candidate_titles']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40
		candidate_title_mask = Variable(batch['candidate_titles_mask']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40
		candidate_body = Variable(batch['candidate_body']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=100
		candidate_body_mask = Variable(batch['candidate_body_mask']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=40
		if is_training:
			android_title = torch.unsqueeze(Variable(batch['android_title']), 1).cuda()
			android_title_mask = torch.unsqueeze(Variable(batch['android_title_mask']), 1).cuda()
			android_body = torch.unsqueeze(Variable(batch['android_body']), 1).cuda()
			android_body_mask = torch.unsqueeze(Variable(batch['android_body_mask']), 1).cuda()
		sz = pid_title.size()[0]
		
		if is_training:
			encoder_optimizer.zero_grad()
			classifier_optimizer.zero_grad()
		
		# Run text through model
		pid_title = encoder(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		pid_body = encoder(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		candidate_title = encoder(candidate_title) # batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		candidate_body = encoder(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		if is_training:
			android_title = encoder(android_title)
			android_body = encoder(android_body)
		
		pid_title_mask = torch.unsqueeze(pid_title_mask, 2).expand_as(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		pid_body_mask = torch.unsqueeze(pid_body_mask, 2).expand_as(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		candidate_title_mask = torch.unsqueeze(candidate_title_mask, 2).expand_as(candidate_title)# batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		candidate_body_mask = torch.unsqueeze(candidate_body_mask, 2).expand_as(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		if is_training:
			android_title_mask = torch.unsqueeze(android_title_mask, 2).expand_as(android_title)
			android_body_mask = torch.unsqueeze(android_body_mask, 2).expand_as(android_body)

		good_title = torch.sum(pid_title * pid_title_mask, 3) # batch_size x 1 x output_size=500
		good_body = torch.sum(pid_body * pid_body_mask, 3) # batch_size x 1 x output_size=500
		cand_titles = torch.sum(candidate_title * candidate_title_mask, 3) # batch_size x # candidates (21 in training) x output_size=500
		cand_bodies = torch.sum(candidate_body * candidate_body_mask, 3) # batch_size x # candidates (21 in training) x output_size=500
		
		if is_training:
			android_title = torch.sum(android_title * android_title_mask, 3)
			android_body = torch.sum(android_body * android_body_mask, 3)
		
		good_tensor = (good_title + good_body)/2 # batch_size x 1 x output_size=500
		cand_tensor = (cand_titles + cand_bodies)/2 # batch_size x # candidates (21 in training) x output_size=500
		if is_training:
			android_tensor = (android_title + android_body)/2
		
		if is_training:
			good_domain = classifier(good_tensor.view(sz, -1))
			android_domain = classifier(android_tensor.view(sz, -1))
			
			softmax = nn.Softmax(dim=1)
			good_dist = softmax(good_domain)
			android_dist = softmax(android_domain)
		
			dists = torch.cat((good_dist, android_dist)).clamp(min=0.0001, max=0.9999)
			expected = Variable(torch.FloatTensor([0] * sz + [1] * sz)).cuda()
			
			bce_loss = torch.nn.BCELoss()(dists[:,0], expected)
			l = loss(good_tensor, cand_tensor, 1.0) - 0.01 * bce_loss
			l.backward()
			losses.append(l.cpu().data[0])
			bce_losses.append(bce_loss.cpu().data[0])
			encoder_optimizer.step()
			classifier_optimizer.step()
		else:
			similarity = cosine_sim(good_tensor.expand_as(cand_tensor), cand_tensor, dim=2)
			similarity = torch.FloatTensor(similarity.data.cpu().numpy())
			labels = batch['labels']
			def predict(sim, labels):
				predictions = []
				for i in range(sim.shape[0]):
					sorted_cand = (-sim[i]).argsort()
					predictions.append(labels[i][sorted_cand])
				return predictions
			for sim in similarity:
				actual.append(sim)
			expected.extend(labels.view(-1))

	if is_training:
		avg_loss = np.mean(losses)
		avg_bce_loss = np.mean(bce_losses)
		return avg_loss, avg_bce_loss
	else:
		auc = AUCMeter()
		auc.reset()
		auc.add(torch.cat(actual), torch.LongTensor(expected))
		return auc.value(max_fpr=0.05)
Example #16
0
def evaluate_model(model, use_test_data=False, use_lstm=True):
    if use_test_data:
        print "Running evaluate on the TEST data:"
    else:
        print "Running evaluate on the DEV data:"
    # Set the model to eval mode
    model.eval()

    # samples has shape (num_dev_samples, 22)
    samples = get_dev_data_android(use_test_data=use_test_data)
    num_samples = len(samples)

    num_batches = int(math.ceil(1. * num_samples / BATCH_SIZE))
    score_matrix = torch.Tensor().cuda() if USE_GPU else torch.Tensor()
    orig_time = time()
    for i in range(num_batches):
        # Get the samples ready
        batch = samples[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
        # If this is the last batch, then need to pad the batch to get the same shape as expected
        if i == num_batches - 1 and num_samples % BATCH_SIZE != 0:
            batch = np.concatenate(
                (batch, np.full(
                    ((i + 1) * BATCH_SIZE - num_samples, 22), "0")),
                axis=0)

        # Convert from numpy arrays to tensors
        title_tensor, title_lengths = get_tensor_from_batch(batch,
                                                            use_title=True)
        body_tensor, body_lengths = get_tensor_from_batch(batch,
                                                          use_title=False)

        # Run the model
        model.hidden = model.init_hidden()
        title_lstm = model(title_tensor)
        title_encoding = get_encodings(title_lstm,
                                       title_lengths,
                                       use_lstm=use_lstm)
        model.hidden = model.init_hidden()
        body_lstm = model(body_tensor)
        body_encoding = get_encodings(body_lstm,
                                      body_lengths,
                                      use_lstm=use_lstm)

        # Compute evaluation
        X, _ = generate_score_matrix(title_encoding, body_encoding)
        X = X.data
        if i == num_batches - 1 and num_samples % BATCH_SIZE != 0:
            score_matrix = torch.cat(
                [score_matrix, X[:num_samples - i * BATCH_SIZE]])
        else:
            score_matrix = torch.cat([score_matrix, X])

        #print "Finished batch " + str(i) + " after " + str(time() - orig_time) + " seconds"

    # score_matrix is a shape (num_dev_samples, 21) matrix that contains the cosine similarity scores
    meter = AUCMeter()
    similarities, targets = [], []
    for i in range(len(score_matrix)):
        similarities.append(score_matrix[i][0])
        targets.append(1)
        for j in range(1, 21):
            similarities.append(score_matrix[i][j])
            targets.append(0)
    meter.add(np.array(similarities), np.array(targets))
    print "The AUC(0.05) value is " + str(meter.value(0.05))

    # Set the model back to train mode
    model.train()
Example #17
0
def evaluate(pairs, label, text_data, question_lookup, auc):
    for p in pairs:
        id_1, id_2 = p
        cos = cosine_similarity(text_data.getrow(question_lookup[id_1]),
                                text_data.getrow(question_lookup[id_2]))
        cos = float(cos)

        auc.add(torch.DoubleTensor([cos]), torch.LongTensor([label]))


print >> sys.stderr, "LOADING DATA..."
question_lookup, text_raw = read_raw_data(CORPUS_PATH)
dev_pos_pairs = import_pairs(DEV_POS_PATH)
dev_neg_pairs = import_pairs(DEV_NEG_PATH)
test_pos_pairs = import_pairs(TEST_POS_PATH)
test_neg_pairs = import_pairs(TEST_NEG_PATH)

print >> sys.stderr, "COMPUTING TF-IDF FEATURES..."
text_tfidf = compute_tfidf(text_raw)

print >> sys.stderr, "COMPUTING AUC..."
d_auc = AUCMeter()
t_auc = AUCMeter()
evaluate(dev_pos_pairs, 1, text_tfidf, question_lookup, d_auc)
evaluate(dev_neg_pairs, 0, text_tfidf, question_lookup, d_auc)
evaluate(test_pos_pairs, 1, text_tfidf, question_lookup, t_auc)
evaluate(test_neg_pairs, 0, text_tfidf, question_lookup, t_auc)

print "Dev AUC: %.2f" % (d_auc.value(max_fpr=0.05))
print "Test AUC: %.2f" % (t_auc.value(max_fpr=0.05))
Example #18
0
def run_epoch(data, is_training, model, optimizer, transfer=False):
	
	# Make batches
	data_loader = torch.utils.data.DataLoader(
		data,
		batch_size=10,
		shuffle=True,
		num_workers=4,
		drop_last=False)

	losses = []
	actual = []
	expected = []

	if is_training:
		model.train()
	else:
		model.eval()
	
	for batch in data_loader:
		# Unpack training instances
		pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1).cuda() # Size: batch_size x 1 x title_length=40
		pid_title_mask = torch.unsqueeze(Variable(batch['pid_title_mask']), 1).cuda() # Size: batch_size x 1 x title_length=40
		pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1).cuda() # Size: batch_size x 1 x body_length=100
		pid_body_mask = torch.unsqueeze(Variable(batch['pid_body_mask']), 1).cuda() # Size: batch_size x 1 x body_length=100
		candidate_title = Variable(batch['candidate_titles']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40
		candidate_title_mask = Variable(batch['candidate_titles_mask']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40
		candidate_body = Variable(batch['candidate_body']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=100
		candidate_body_mask = Variable(batch['candidate_body_mask']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=40
		
		if is_training:
			optimizer.zero_grad()
		
		# Run text through model
		pid_title = model(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		pid_body = model(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		candidate_title = model(candidate_title) # batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		candidate_body = model(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		
		pid_title_mask = torch.unsqueeze(pid_title_mask, 2).expand_as(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		pid_body_mask = torch.unsqueeze(pid_body_mask, 2).expand_as(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		candidate_title_mask = torch.unsqueeze(candidate_title_mask, 2).expand_as(candidate_title)# batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		candidate_body_mask = torch.unsqueeze(candidate_body_mask, 2).expand_as(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN)

		good_title = torch.sum(pid_title * pid_title_mask, 3) # batch_size x 1 x output_size=500
		good_body = torch.sum(pid_body * pid_body_mask, 3) # batch_size x 1 x output_size=500
		cand_titles = torch.sum(candidate_title * candidate_title_mask, 3) # batch_size x # candidates (21 in training) x output_size=500
		cand_bodies = torch.sum(candidate_body * candidate_body_mask, 3) # batch_size x # candidates (21 in training) x output_size=500
		
		good_tensor = (good_title + good_body)/2 # batch_size x 1 x output_size=500
		cand_tensor = (cand_titles + cand_bodies)/2 # batch_size x # candidates (21 in training) x output_size=500
		
		if is_training:
			l = loss(good_tensor, cand_tensor, 1.0)
			l.backward()
			losses.append(l.cpu().data[0])
			optimizer.step()
		else:
			similarity = cosine_sim(good_tensor.expand_as(cand_tensor), cand_tensor, dim=2)
			if transfer:
				similarity = torch.FloatTensor(similarity.data.cpu().numpy())
			else:
				similarity = similarity.data.cpu().numpy()
			if transfer:
				labels = batch['labels']
			else:
				labels = batch['labels'].numpy()
			def predict(sim, labels):
				predictions = []
				for i in range(sim.shape[0]):
					sorted_cand = (-sim[i]).argsort()
					predictions.append(labels[i][sorted_cand])
				return predictions
			if transfer:
				for sim in similarity:
					actual.append(sim)
				expected.extend(labels.view(-1))
			else:
				l = predict(similarity, labels)
				losses.extend(l)

	if is_training:
		avg_loss = np.mean(losses)
		return avg_loss
	else:
		if transfer:
			auc = AUCMeter()
			auc.reset()
			auc.add(torch.cat(actual), torch.LongTensor(expected))
			return auc.value(max_fpr=0.05)
		else:
			e = Evaluation(losses)
			MAP = e.MAP()*100
			MRR = e.MRR()*100
			P1 = e.Precision(1)*100
			P5 = e.Precision(5)*100
			return (MAP, MRR, P1, P5)
Example #19
0
def evaluation(args, padding_id, ids_corpus, vocab_map, embeddings, model):
    """Calculate the AUC score of the model on Android data.
    """
    meter = AUCMeter()

    print "starting evaluation"
    val_data = corpus.read_annotations(args.test)
    print "number of lines in test data: " + str(len(val_data))
    val_batches = corpus.create_eval_batches(ids_corpus, val_data, padding_id)
    count = 0
    similarities = []

    for batch in val_batches:
        titles, bodies, qlabels = batch
        title_length, title_num_questions = titles.shape
        body_length, body_num_questions = bodies.shape
        title_embeddings, body_embeddings = corpus.get_embeddings(titles, bodies, vocab_map, embeddings)
        
        if args.model == 'lstm':
            if args.cuda:
                title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings).cuda())]
                title_inputs = torch.cat(title_inputs).view(title_length, title_num_questions, -1)

                title_hidden = (autograd.Variable(torch.zeros(1, title_num_questions, args.hidden_size).cuda()),
                      autograd.Variable(torch.zeros((1, title_num_questions, args.hidden_size)).cuda()))
            else:
                title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings))]
                title_inputs = torch.cat(title_inputs).view(title_length, title_num_questions, -1)
                # title_inputs = torch.cat(title_inputs).view(title_num_questions, title_length, -1)

                title_hidden = (autograd.Variable(torch.zeros(1, title_num_questions, args.hidden_size)),
                      autograd.Variable(torch.zeros((1, title_num_questions, args.hidden_size))))
        else:
            if args.cuda:
                title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings).cuda())]
            else:
                title_inputs = [autograd.Variable(torch.FloatTensor(title_embeddings))]
            title_inputs = torch.cat(title_inputs).transpose(0,1).transpose(1,2)

        if args.model == 'lstm':
            title_out, title_hidden = model(title_inputs, title_hidden)
        else:
            title_out = model(title_inputs)
            title_out = F.tanh(title_out)
            title_out = title_out.transpose(1,2).transpose(0,1)

        average_title_out = average_questions(title_out, titles, padding_id)

        # body
        if args.model == 'lstm':
            if args.cuda:
                body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings).cuda())]
                body_inputs = torch.cat(body_inputs).view(body_length, body_num_questions, -1)

                body_hidden = (autograd.Variable(torch.zeros(1, body_num_questions, args.hidden_size).cuda()),
                      autograd.Variable(torch.zeros((1, body_num_questions, args.hidden_size)).cuda()))
            else:
                body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings))]
                body_inputs = torch.cat(body_inputs).view(body_length, body_num_questions, -1)

                body_hidden = (autograd.Variable(torch.zeros(1, body_num_questions, args.hidden_size)),
                      autograd.Variable(torch.zeros((1, body_num_questions, args.hidden_size))))
        else:
            if args.cuda:
                body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings).cuda())]
                #body_inputs = torch.cat(body_inputs).view(body_num_questions, 200, -1)
            else:
                body_inputs = [autograd.Variable(torch.FloatTensor(body_embeddings))]
                #body_inputs = torch.cat(body_inputs).view(body_num_questions, 200, -1)
            body_inputs = torch.cat(body_inputs).transpose(0,1).transpose(1,2)
        
        if args.model == 'lstm':
            body_out, body_hidden = model(body_inputs, body_hidden)
        else:
            body_out = model(body_inputs)
            body_out = F.tanh(body_out)
            body_out = body_out.transpose(1,2).transpose(0,1)

        # average all words of each question from body_out
        average_body_out = average_questions(body_out, bodies, padding_id)

        # average body and title
        # representations of the questions as found by the CNN
        # 560 x 100
        hidden = (average_title_out + average_body_out) * 0.5

        query = torch.DoubleTensor(hidden[0].unsqueeze(0).cpu().data.numpy())
        examples = torch.DoubleTensor(hidden[1:].cpu().data.numpy())

        cos_similarity = F.cosine_similarity(query, examples, dim=1)
        qlabels = [float(qlabel) for qlabel in list(qlabels)]
        target = torch.DoubleTensor(qlabels)
        meter.add(cos_similarity, target)

    print meter.value(0.05)
Example #20
0
        for line in f.readlines():
            main_qid, candidate_qid = line.split()
            neg[main_qid].add(candidate_qid)
    return pos, neg

# Gets the cosine distance between two vectors x and y and maps the result to [0, 1]
# 0 means that the two vectors are opposite, 1 means that they are the same
# x and y are sparse vectors
def cosine(x, y):
    return (1 + cosine_similarity(sparse.vstack([x, y]))[0][1]) / 2

if __name__ == '__main__':
    get_id_to_vector()
    meter = AUCMeter()

    pos, neg = get_dev_data_android(use_test_data=False)
    # Only use questions that have at least one positive match
    for main_qid in pos:
        main_vector = X[id_to_index[main_qid]]
        similiarities, targets = [], []
        # For all positive matches, append similarity score + a 1 on the targets
        for pos_match_qid in pos[main_qid]:
            similiarities.append(cosine(main_vector, X[id_to_index[pos_match_qid]]))
            targets.append(1)
        # For all negative matches, append similarity score + a 0 on the targets
        for neg_match_qid in neg[main_qid]:
            similiarities.append(cosine(main_vector, X[id_to_index[neg_match_qid]]))
            targets.append(0)
        meter.add(np.array(similiarities), np.array(targets))
    print "The AUC(0.05) value on the TfIdF weighted vectors are " + str(meter.value(0.05))