def get_precision(topicid, docs):
	j = Judge()
	rels = []
	for docno in docs:
		rel = j.check(topicid, docno)
		rels.append(rel)
	ndcg = get_ndcg(rels)
		
	return (sum(rels[:5]) / 5., sum(rels) /10., ndcg, rels)
Esempio n. 2
0
def performance_ndcg(Predict, data_path, at_num,
                     user_id_dict, doc_id_dict, current_user_like_dict):
    user_like_list_file = open(data_path + '/user_like_list_in_test.dat.txt')
    user_dict = {}
    for user in user_like_list_file.readlines():
        splits = user.split()
        like_list = []
        for i in range(1, len(splits)):
            like_list.append(doc_id_dict[int(splits[i])])
        user_dict[user_id_dict[int(splits[0])]] = like_list

    (m, n) = Predict.shape
    
    total_ndcg = 0.0
    effective_user_num = 0
    for user_id in user_dict.keys():
        true_like_list = user_dict[user_id]
        if len(true_like_list) == 0:
            continue
        current_like_list = current_user_like_dict[user_id]
        p_like_list = list(Predict[user_id-1,:])
        p_like_dict = dict(zip(range(n),p_like_list))
        sort_p_like_list = sorted(p_like_dict.items(), lambda x,y: cmp(y[1],x[1]))

        sort_p_like_doc_ids = []
        effective_doc_num = 0
        for i in range(n):
            if effective_doc_num == at_num:
                break
            p_doc_id = sort_p_like_list[i][0] + 1
            if (p_doc_id in current_like_list) and (p_doc_id not in true_like_list):
                continue
            sort_p_like_doc_ids.append(p_doc_id)
            effective_doc_num += 1
  
        rank_list = []
        p_true_num = 0
        for doc_id in sort_p_like_doc_ids:
            if doc_id in true_like_list:
                rank_list.append(1)
            else:
                rank_list.append(0)
        user_ndcg = ndcg.get_ndcg(rank_list, len(rank_list))
        total_ndcg += user_ndcg
        effective_user_num += 1

    if effective_user_num == 0:
        avg_ndcg = 0
    else:
        avg_ndcg = total_ndcg / effective_user_num

    return avg_ndcg
def get_rel_top(docs, topicid):

	allx = []
	for doc in docs:
		docno, x, y = doc.split(",")
		allx.append(int(x))

	if len(allx) > 0:
		minx = min(allx)
		maxx = max(allx)
	else:
		minx = 0
		maxx = 0

	rels = []
	xs = []

	for doc in docs[:10]:
		docno, x, y = doc.split(",")
		rel = j.check(topicid, docno)	
		
		rels.append(rel)
		xs.append(int(x))
		
	if len(xs) == 0:
		avg_x = -1
		ratio_x = -1
	else:
		avg_x = float(sum(xs)) / len(xs)
		
		if maxx - minx == 0 :
			ratio_x = -1
		else:
			ratio_x = float(avg_x - minx) / (maxx - minx)
		
	ndcg = get_ndcg(rels)
		
	if len(rels) == 0:
		return 0, ndcg, avg_x, ratio_x
		
	return (float(sum(rels)) / len(rels), ndcg, avg_x, ratio_x)
Esempio n. 4
0
qCount = 0
count1 = 1
count3 = 1
count10 = 1
while 1:
    line = f.readline()
    if not line:
        break;
    arrays = re.split(r" ",line)
    #print arrays
    s = [float(i) for i in arrays]
    #print s
    qCount += 1
    apTtl += ap.get_ap(s)
    for tmp_k in k:
        if len(line)<(4*tmp_k-1):
            continue;
        if tmp_k ==1:
            ttl1 += ndcg.get_ndcg(s, tmp_k)
            p1 += p.get_p(s,tmp_k)
            count1 = count1 + 1
        if tmp_k ==3:
            ttl3 += ndcg.get_ndcg(s, tmp_k)
            p3 += p.get_p(s,tmp_k)
            count3 = count3 + 1
        if tmp_k ==10:
            ttl10 += ndcg.get_ndcg(s, tmp_k)
            p10 += p.get_p(s,tmp_k)
            count10 = count10 + 1
print "NDCG@1: %f, NDCG@3: %f, NDCG@10: %f, MAP: %f"%(ttl1/count1,ttl3/count3,ttl10/count10, apTtl/qCount)
Esempio n. 5
0
count1 = 1
count3 = 1
count10 = 1
while 1:
    line = f.readline()
    if not line:
        break
    arrays = re.split(r" ", line)
    #print arrays
    s = [float(i) for i in arrays]
    #print s
    qCount += 1
    apTtl += ap.get_ap(s)
    for tmp_k in k:
        if len(line) < (4 * tmp_k - 1):
            continue
        if tmp_k == 1:
            ttl1 += ndcg.get_ndcg(s, tmp_k)
            p1 += p.get_p(s, tmp_k)
            count1 = count1 + 1
        if tmp_k == 3:
            ttl3 += ndcg.get_ndcg(s, tmp_k)
            p3 += p.get_p(s, tmp_k)
            count3 = count3 + 1
        if tmp_k == 10:
            ttl10 += ndcg.get_ndcg(s, tmp_k)
            p10 += p.get_p(s, tmp_k)
            count10 = count10 + 1
print "NDCG@1: %f, NDCG@3: %f, NDCG@10: %f, MAP: %f" % (
    ttl1 / count1, ttl3 / count3, ttl10 / count10, apTtl / qCount)
Esempio n. 6
0
def model(features, labels, mode, params):
    x = features['features']
    r = features['rating']

    # tf.nn.tanh
    # name=scope
    def bucket(x):
        s = tf.layers.dense(
            x,
            units=4,
            activation=tf.nn.tanh,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.0001),
            kernel_regularizer=tf.contrib.layers.l2_regularizer(0.004),
            name='layer_2')
        weights = tf.get_default_graph().get_tensor_by_name(
            os.path.split(s.name)[0] + '/kernel:0')
        s1, s2, s3, s4 = tf.split(s, 4, -1)
        s2 = tf.minimum(s1, s2)
        s3 = tf.minimum(s2, s3)
        s4 = tf.minimum(s3, s4)
        s = s1 + s2 + s3 + s4
        return s, weights

    def dense(x):
        s = tf.layers.dense(
            x,
            units=1,
            activation=None,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.0001),
            kernel_regularizer=tf.contrib.layers.l2_regularizer(0.004),
            name='layer_2')
        weights = tf.get_default_graph().get_tensor_by_name(
            os.path.split(s.name)[0] + '/kernel:0')
        return s, weights

    with tf.variable_scope('net'):
        # tf.initializers.glorot_normal()
        # x = tf.nn.batch_normalization(x)
        if (mode == tf.estimator.ModeKeys.TRAIN):
            noise_shape = tf.concat(
                [tf.shape(x)[:-1],
                 tf.ones(1, dtype=tf.int32)], 0)
            x = tf.nn.dropout(x, keep_prob=0.9, noise_shape=noise_shape)
        #x = tf.layers.dense(x, units=128, activation=tf.nn.tanh, kernel_initializer=tf.truncated_normal_initializer(stddev=0.001), kernel_regularizer=tf.contrib.layers.l2_regularizer(0.004), name='layer_1')
        s, weights = dense(x)
        #s, weights = bucket(x)
        s = tf.squeeze(s, axis=-1)
    tf.summary.histogram('weights', weights)
    tf.summary.histogram('score', s)

    loss = rank_loss(r, s)
    #loss = regression_loss(r, s, 'label_loss')
    total_loss = loss + tf.add_n(tf.losses.get_regularization_losses())

    #s = tf.nn.relu(s)
    #gains_loss = regression_loss(6*tf.pow(2., tf.nn.relu(r)), 6*tf.pow(2., tf.nn.relu(s)), 'my_loss')

    ndcg = get_ndcg(s, r)
    ndcg = tf.reduce_mean(ndcg)
    tf.summary.scalar('ndcg', ndcg)
    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {
            #'my_loss': tf.metrics.mean(gains_loss),
            #'label_loss': tf.metrics.mean(loss),
            'ranking_loss': tf.metrics.mean(loss),
            'ndcg': tf.metrics.mean(ndcg)
        }  #, 'accuracy': tf.metrics.mean(accuracy)
        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          eval_metric_ops=metrics)

    with tf.variable_scope('train_op'):
        global_step = tf.train.get_global_step()
        learning_rate = tf.train.exponential_decay(
            learning_rate=params['starter_learning_rate'],
            global_step=global_step,
            decay_steps=params['stepvalue'],
            decay_rate=params['gamma'],
            staircase=True)
        #optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        #optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
        optimizer = tf.train.AdamOptimizer(learning_rate)

        train_op = optimizer.minimize(total_loss, global_step=global_step)
    tf.summary.scalar("learning_rate", learning_rate)
    return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)