# print number of clusters print "Number of clusters:", len(list_of_clusters) # map each test session to its keyword list_test_keywords = map(lambda x : Keyword(x.keyword_id), list_of_test_sessions) # compute predictions for CTR list_CTR_predictions = [] count = 0 for kw in list_test_keywords: print kw for cluster in list_of_clusters: if cluster.getKeyword(kw) != None: print "FOUND" list_CTR_predictions.append(cluster.computeCTR()) count += 1 break print "found_ratio", count/float(len(list_test_keywords)) # Compute Mean Squared Error for predictions m_s_e = evaluator(list_CTR_predictions) print m_s_e
# remove the cluster we merged in cluster_list.remove(secondCluster) else: continue # Print out number of clusters: print "Number of Clusters:", len(cluster_list) # Compute predictions for keywords in test set list_test_keywords = map(lambda x : Keyword(x.keyword_id), list_of_test_sessions) list_CTR_predictions = [] list_CTR_weighted_predictions = [] for kw in list_test_keywords: node = g_new.findNode(kw) corresponding_cluster = node_cluster_dict[node] list_CTR_predictions.append(corresponding_cluster.computeCTR()) list_CTR_weighted_predictions.append(corresponding_cluster.weightedCTR(node, g, g_new)) # Compute Mean Squared Error for predictions m_s_e = evaluator(list_CTR_predictions) print m_s_e # Compute Mean Squared Error for predictions m_s_e = evaluator(list_CTR_weighted_predictions) print m_s_e