def get_precision(topicid, docs): j = Judge() rels = [] for docno in docs: rel = j.check(topicid, docno) rels.append(rel) ndcg = get_ndcg(rels) return (sum(rels[:5]) / 5., sum(rels) /10., ndcg, rels)
def performance_ndcg(Predict, data_path, at_num, user_id_dict, doc_id_dict, current_user_like_dict): user_like_list_file = open(data_path + '/user_like_list_in_test.dat.txt') user_dict = {} for user in user_like_list_file.readlines(): splits = user.split() like_list = [] for i in range(1, len(splits)): like_list.append(doc_id_dict[int(splits[i])]) user_dict[user_id_dict[int(splits[0])]] = like_list (m, n) = Predict.shape total_ndcg = 0.0 effective_user_num = 0 for user_id in user_dict.keys(): true_like_list = user_dict[user_id] if len(true_like_list) == 0: continue current_like_list = current_user_like_dict[user_id] p_like_list = list(Predict[user_id-1,:]) p_like_dict = dict(zip(range(n),p_like_list)) sort_p_like_list = sorted(p_like_dict.items(), lambda x,y: cmp(y[1],x[1])) sort_p_like_doc_ids = [] effective_doc_num = 0 for i in range(n): if effective_doc_num == at_num: break p_doc_id = sort_p_like_list[i][0] + 1 if (p_doc_id in current_like_list) and (p_doc_id not in true_like_list): continue sort_p_like_doc_ids.append(p_doc_id) effective_doc_num += 1 rank_list = [] p_true_num = 0 for doc_id in sort_p_like_doc_ids: if doc_id in true_like_list: rank_list.append(1) else: rank_list.append(0) user_ndcg = ndcg.get_ndcg(rank_list, len(rank_list)) total_ndcg += user_ndcg effective_user_num += 1 if effective_user_num == 0: avg_ndcg = 0 else: avg_ndcg = total_ndcg / effective_user_num return avg_ndcg
def get_rel_top(docs, topicid): allx = [] for doc in docs: docno, x, y = doc.split(",") allx.append(int(x)) if len(allx) > 0: minx = min(allx) maxx = max(allx) else: minx = 0 maxx = 0 rels = [] xs = [] for doc in docs[:10]: docno, x, y = doc.split(",") rel = j.check(topicid, docno) rels.append(rel) xs.append(int(x)) if len(xs) == 0: avg_x = -1 ratio_x = -1 else: avg_x = float(sum(xs)) / len(xs) if maxx - minx == 0 : ratio_x = -1 else: ratio_x = float(avg_x - minx) / (maxx - minx) ndcg = get_ndcg(rels) if len(rels) == 0: return 0, ndcg, avg_x, ratio_x return (float(sum(rels)) / len(rels), ndcg, avg_x, ratio_x)
qCount = 0 count1 = 1 count3 = 1 count10 = 1 while 1: line = f.readline() if not line: break; arrays = re.split(r" ",line) #print arrays s = [float(i) for i in arrays] #print s qCount += 1 apTtl += ap.get_ap(s) for tmp_k in k: if len(line)<(4*tmp_k-1): continue; if tmp_k ==1: ttl1 += ndcg.get_ndcg(s, tmp_k) p1 += p.get_p(s,tmp_k) count1 = count1 + 1 if tmp_k ==3: ttl3 += ndcg.get_ndcg(s, tmp_k) p3 += p.get_p(s,tmp_k) count3 = count3 + 1 if tmp_k ==10: ttl10 += ndcg.get_ndcg(s, tmp_k) p10 += p.get_p(s,tmp_k) count10 = count10 + 1 print "NDCG@1: %f, NDCG@3: %f, NDCG@10: %f, MAP: %f"%(ttl1/count1,ttl3/count3,ttl10/count10, apTtl/qCount)
count1 = 1 count3 = 1 count10 = 1 while 1: line = f.readline() if not line: break arrays = re.split(r" ", line) #print arrays s = [float(i) for i in arrays] #print s qCount += 1 apTtl += ap.get_ap(s) for tmp_k in k: if len(line) < (4 * tmp_k - 1): continue if tmp_k == 1: ttl1 += ndcg.get_ndcg(s, tmp_k) p1 += p.get_p(s, tmp_k) count1 = count1 + 1 if tmp_k == 3: ttl3 += ndcg.get_ndcg(s, tmp_k) p3 += p.get_p(s, tmp_k) count3 = count3 + 1 if tmp_k == 10: ttl10 += ndcg.get_ndcg(s, tmp_k) p10 += p.get_p(s, tmp_k) count10 = count10 + 1 print "NDCG@1: %f, NDCG@3: %f, NDCG@10: %f, MAP: %f" % ( ttl1 / count1, ttl3 / count3, ttl10 / count10, apTtl / qCount)
def model(features, labels, mode, params): x = features['features'] r = features['rating'] # tf.nn.tanh # name=scope def bucket(x): s = tf.layers.dense( x, units=4, activation=tf.nn.tanh, kernel_initializer=tf.truncated_normal_initializer(stddev=0.0001), kernel_regularizer=tf.contrib.layers.l2_regularizer(0.004), name='layer_2') weights = tf.get_default_graph().get_tensor_by_name( os.path.split(s.name)[0] + '/kernel:0') s1, s2, s3, s4 = tf.split(s, 4, -1) s2 = tf.minimum(s1, s2) s3 = tf.minimum(s2, s3) s4 = tf.minimum(s3, s4) s = s1 + s2 + s3 + s4 return s, weights def dense(x): s = tf.layers.dense( x, units=1, activation=None, kernel_initializer=tf.truncated_normal_initializer(stddev=0.0001), kernel_regularizer=tf.contrib.layers.l2_regularizer(0.004), name='layer_2') weights = tf.get_default_graph().get_tensor_by_name( os.path.split(s.name)[0] + '/kernel:0') return s, weights with tf.variable_scope('net'): # tf.initializers.glorot_normal() # x = tf.nn.batch_normalization(x) if (mode == tf.estimator.ModeKeys.TRAIN): noise_shape = tf.concat( [tf.shape(x)[:-1], tf.ones(1, dtype=tf.int32)], 0) x = tf.nn.dropout(x, keep_prob=0.9, noise_shape=noise_shape) #x = tf.layers.dense(x, units=128, activation=tf.nn.tanh, kernel_initializer=tf.truncated_normal_initializer(stddev=0.001), kernel_regularizer=tf.contrib.layers.l2_regularizer(0.004), name='layer_1') s, weights = dense(x) #s, weights = bucket(x) s = tf.squeeze(s, axis=-1) tf.summary.histogram('weights', weights) tf.summary.histogram('score', s) loss = rank_loss(r, s) #loss = regression_loss(r, s, 'label_loss') total_loss = loss + tf.add_n(tf.losses.get_regularization_losses()) #s = tf.nn.relu(s) #gains_loss = regression_loss(6*tf.pow(2., tf.nn.relu(r)), 6*tf.pow(2., tf.nn.relu(s)), 'my_loss') ndcg = get_ndcg(s, r) ndcg = tf.reduce_mean(ndcg) tf.summary.scalar('ndcg', ndcg) if mode == tf.estimator.ModeKeys.EVAL: metrics = { #'my_loss': tf.metrics.mean(gains_loss), #'label_loss': tf.metrics.mean(loss), 'ranking_loss': tf.metrics.mean(loss), 'ndcg': tf.metrics.mean(ndcg) } #, 'accuracy': tf.metrics.mean(accuracy) return tf.estimator.EstimatorSpec(mode, loss=total_loss, eval_metric_ops=metrics) with tf.variable_scope('train_op'): global_step = tf.train.get_global_step() learning_rate = tf.train.exponential_decay( learning_rate=params['starter_learning_rate'], global_step=global_step, decay_steps=params['stepvalue'], decay_rate=params['gamma'], staircase=True) #optimizer = tf.train.GradientDescentOptimizer(learning_rate) #optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.minimize(total_loss, global_step=global_step) tf.summary.scalar("learning_rate", learning_rate) return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)