Esempio n. 1
0
def centrality_leaders(budgetYears):

	network = load_network_for(budgetYears)

	g = network.g.copy()

	g = ResearchCollaborationNetwork.largest_component(g)

	topK = 10

	candidates, rankings = cl.centrality_leaders(g)

	ordered_list = []
	for r in range(len(rankings))[:topK]:
		#logger.info('tier: %d'%r)
		for i in list(rankings[r]):
			node_name = g.vs[candidates[i]]['name']
			ordered_list.append(node_name)
			# set the node's centrality_leader attribute, the higher the better
			g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r

	startBudgetYear = budgetYears[0]
	endBudgetYear = budgetYears[-1]

	filename = '%s/figures/%s-%s-centrality-leaders.png' % (root_folder(), startBudgetYear, endBudgetYear)
	draw(g, filename)

	logger.info(ordered_list)
Esempio n. 2
0
def centrality_leaders(budgetYears):

    network = load_network_for(budgetYears)

    g = network.g.copy()

    g = ResearchCollaborationNetwork.largest_component(g)

    topK = 10

    candidates, rankings = cl.centrality_leaders(g)

    ordered_list = []
    for r in range(len(rankings))[:topK]:
        #logger.info('tier: %d'%r)
        for i in list(rankings[r]):
            node_name = g.vs[candidates[i]]['name']
            ordered_list.append(node_name)
            # set the node's centrality_leader attribute, the higher the better
            g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r

    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]

    filename = '%s/figures/%s-%s-centrality-leaders.png' % (
        root_folder(), startBudgetYear, endBudgetYear)
    draw(g, filename)

    logger.info(ordered_list)
Esempio n. 3
0
def update_graphml(budgetYears):
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]

    network = load_network_for(budgetYears)

    network.g.vs['centrality_leader'] = 0
    g = network.g.copy()

    g = ResearchCollaborationNetwork.largest_component(g)

    topK = 50

    candidates, rankings = cl.centrality_leaders(g)

    #ordered_list = []
    for r in range(len(rankings))[:topK]:
    #logger.info('tier: %d'%r)
        for i in list(rankings[r]):
            node_name = g.vs[candidates[i]]['name']
            # ordered_list.append(node_name)
            # set the node's centrality_leader attribute, the higher the better

            #g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r
            node = network.g.vs.select(name_eq=node_name)
            node['centrality_leader'] = topK - r
            #logger.info(topK - r)
            # logger.info(node['name'])

    filename = '%s/data/networks/%d-%d.graphml' % (root_folder(),
                                                   startBudgetYear, endBudgetYear)

    network.write(filename)
Esempio n. 4
0
def load_network_for(budgetYears):

	startBudgetYear = budgetYears[0]
	endBudgetYear = budgetYears[-1]
	
	filename = '%s/data/networks/%d-%d.graphml'%(root_folder(),startBudgetYear, endBudgetYear)
	network = ResearchCollaborationNetwork.read(filename)

	return network
Esempio n. 5
0
def load_network_for(budgetYears):

    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]

    filename = '%s/data/networks/%d-%d.graphml' % (
        root_folder(), startBudgetYear, endBudgetYear)
    network = ResearchCollaborationNetwork.read(filename)

    return network
Esempio n. 6
0
def network_to_d3(budgetYears):

    network = load_network_for(budgetYears)
    #network = ResearchCollaborationNetwork.read(budgetYears)
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]
    filename = '%s/data/networks/%s-%s-complete.json' % (
        root_folder(), startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(network.g, filename)

    # remove isolated nodes
    g = network.g.copy()
    g = ResearchCollaborationNetwork.simplify(g)
    filename = '%s/data/networks/%s-%s.json' % (root_folder(), startBudgetYear,
                                                endBudgetYear)
    ResearchCollaborationNetwork.d3(g, filename)

    # only the largest components
    g = network.g.copy()
    g = ResearchCollaborationNetwork.largest_component(g)
    filename = '%s/data/networks/%s-%s-largest-component.json' % (
        root_folder(), startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(g, filename)
Esempio n. 7
0
def draw_g(budgetYears):
	network = load_network_for(budgetYears)
	
	g = network.g.copy()
	#g = g.simplify(multiple=True, loops=True,combine_edges=sum)

	# convert to undirected
	#g.to_undirected(combine_edges=sum)

	g = ResearchCollaborationNetwork.simplify(g)

	startBudgetYear = budgetYears[0]
	endBudgetYear = budgetYears[-1]

	filename = '%s/figures/%s-%s-%d.png'%(root_folder(),startBudgetYear, endBudgetYear,len(g.vs))
	#logger.info(g.summary())
	draw(g, filename)

	gl = ResearchCollaborationNetwork.largest_component(g)
	
	filename = '%s/figures/%s-%s-%d-largest-component.png'%(root_folder(),startBudgetYear, endBudgetYear,len(gl.vs))

	draw(gl, filename)
Esempio n. 8
0
def network_to_d3(budgetYears):

    network = load_network_for(budgetYears)
    #network = ResearchCollaborationNetwork.read(budgetYears)
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]
    filename = '%s/data/networks/%s-%s-complete.json' % (root_folder(),
                                                         startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(network.g, filename)

    # remove isolated nodes
    g = network.g.copy()
    g = ResearchCollaborationNetwork.simplify(g)
    filename = '%s/data/networks/%s-%s.json' % (root_folder(),
                                                startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(g, filename)

    # only the largest components
    g = network.g.copy()
    g = ResearchCollaborationNetwork.largest_component(g)
    filename = '%s/data/networks/%s-%s-largest-component.json' % (root_folder(),
                                                                  startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(g, filename)
Esempio n. 9
0
def test():
    from matplotlib import rc
    rc('text', usetex=False)
    rc('font', family='serif')

    fig = plt.figure(figsize=(8, 16))
    ax = fig.add_subplot(1, 2, 1)

    task = 'per_user'
    roc.roc_curve_init(ax)

    area, [ax, b] = plot_auc(2006, 2009, task, ax, 'b')
    area, [ax, g] = plot_auc(2010, 2012, task, ax, 'g')
    area, [ax, r] = plot_auc(2006, 2012, task, ax, 'r')

    ax.set_title('Per-user Model')

    #f.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.3, hspace=.2)
    #plt.legend(loc=4)

    #plt.savefig('%s/figures/%s-roc-curve.eps'%(root_folder(), task),bbox_inches='tight', dpi=600)

    #plt.close()
    task = 'per_network'
    ax = fig.add_subplot(1, 2, 2)
    roc.roc_curve_init(ax)

    area, [ax, b] = plot_auc(2006, 2009, task, ax, 'b')
    area, [ax, g] = plot_auc(2010, 2012, task, ax, 'g')
    area, [ax, r] = plot_auc(2006, 2012, task, ax, 'r')

    ax.set_title('Per-network Model')

    fig.subplots_adjust(left=None,
                        bottom=None,
                        right=None,
                        top=None,
                        wspace=.3,
                        hspace=.2)
    #plt.figlegend((b, g, r), ('RCN (2006 - 2009)',  'RCN (2010 - 2012)',  'RCN (2006 - 2012)'), 'center')
    #plt.legend(loc='lower center', bbox_to_anchor=(0.5, -0.05), ncol=5, fancybox=True)
    plt.legend((b, g, r),
               ('RCN (2006 - 2009)', 'RCN (2010 - 2012)', 'RCN (2006 - 2012)'),
               bbox_to_anchor=(-0.15, -0.4),
               loc='center',
               prop={'size': 12})

    plt.savefig('%s/figures/roc-curve.eps' % (root_folder()),
                bbox_inches='tight',
                dpi=600)
Esempio n. 10
0
def plot_auc(startBudgetYear,endBudgetYear, task, ax, color):
	

	filename = '%s/data/%s-%s.%s.roc.samples.npy'%(root_folder(),startBudgetYear, endBudgetYear, task)

	roc_samples = np.load(filename)

	labels = []
	scores = []
	for k, label, score in roc_samples:
		labels.append(np.float(label))
		scores.append(np.float(score))

	area, [ax, lines] = roc.roc_curve(labels=np.array(labels),scores=np.array(scores), ax=ax, linewidth=1.5, color=color)

	return area, [ax, lines]
Esempio n. 11
0
def plot_all_combined():
	import matplotlib.pyplot as plt

	budgetYears = range(2006,2010)
	
	logger.info("================================================================")
	logger.info(budgetYears)

	wg_rcn_2006_2009, degree_rcn_2006_2009, strength_rcn_2006_2009 = get_data(budgetYears)


	budgetYears = range(2010,2013)
	
	logger.info("================================================================")
	logger.info(budgetYears)

	wg_rcn_2010_2012, degree_rcn_2010_2012, strength_rcn_2010_2012 = get_data(budgetYears)

	budgetYears = range(2006,2013)
	
	logger.info("================================================================")
	logger.info(budgetYears)

	wg_rcn_2006_2012, degree_rcn_2006_2012, strength_rcn_2006_2012 = get_data(budgetYears)

	f = plt.figure(figsize=(12,6))
	data = strength_rcn_2006_2009
	data_inst = 1
	units = 'RCN 2006 - 2009'
	plot_powerlaw_combined(data, data_inst, f, units)

	data_inst = 2
	data = strength_rcn_2010_2012
	units = 'RCN 2010 - 2012'
	plot_powerlaw_combined(data, data_inst, f, units)

	data_inst = 3
	data = strength_rcn_2006_2012
	units = 'RCN 2006 - 2012'
	plot_powerlaw_combined(data, data_inst, f, units)

	f.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.3, hspace=.2)
	f.savefig('%s/figures/powerlaw_degree_distribution.eps'%(root_folder()), bbox_inches='tight')
Esempio n. 12
0
def rwr_scores(budgetYears):
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]

    logger.info('---------------- %s-%s -------------------' %
                (startBudgetYear, endBudgetYear))

    network = load_network_for(budgetYears)

    #network = ResearchCollaborationNetwork.read(budgetYears)

    g = network.g.copy()
    ResearchCollaborationNetwork.simplify(g)

    logger.info(g.summary())

    adj = np.array(g.get_adjacency(igraph.GET_ADJACENCY_BOTH).data)

    links = []
    m = len(g.vs)
    for i in range(m):
        for j in range(i + 1, m):
            key = '%d,%d' % (i, j)
            links.append(key)

    rwr_scores = pgrank.rwr_score(g, links)

    rwrs = {}
    for link, score in rwr_scores.items():
        v = link.split(',')
        v1 = int(v[0])
        v2 = int(v[1])

        key = '%s,%s' % (g.vs[v1]['name'], g.vs[v2]['name'])
        if(float(score) > 0.001):
            rwrs[key] = score

    filename = '%s/data/networks/%d-%d-rwr.json' % (root_folder(),
                                                    startBudgetYear, endBudgetYear)

    with open(filename, 'w') as out:
        json.dump(rwrs, out)
Esempio n. 13
0
def rwr_scores(budgetYears):
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]

    logger.info('---------------- %s-%s -------------------' %
                (startBudgetYear, endBudgetYear))

    network = load_network_for(budgetYears)

    #network = ResearchCollaborationNetwork.read(budgetYears)

    g = network.g.copy()
    ResearchCollaborationNetwork.simplify(g)

    logger.info(g.summary())

    adj = np.array(g.get_adjacency(igraph.GET_ADJACENCY_BOTH).data)

    links = []
    m = len(g.vs)
    for i in range(m):
        for j in range(i + 1, m):
            key = '%d,%d' % (i, j)
            links.append(key)

    rwr_scores = pgrank.rwr_score(g, links)

    rwrs = {}
    for link, score in rwr_scores.items():
        v = link.split(',')
        v1 = int(v[0])
        v2 = int(v[1])

        key = '%s,%s' % (g.vs[v1]['name'], g.vs[v2]['name'])
        if (float(score) > 0.001):
            rwrs[key] = score

    filename = '%s/data/networks/%d-%d-rwr.json' % (
        root_folder(), startBudgetYear, endBudgetYear)

    with open(filename, 'w') as out:
        json.dump(rwrs, out)
Esempio n. 14
0
def plot_auc(startBudgetYear, endBudgetYear, task, ax, color):

    filename = '%s/data/%s-%s.%s.roc.samples.npy' % (
        root_folder(), startBudgetYear, endBudgetYear, task)

    roc_samples = np.load(filename)

    labels = []
    scores = []
    for k, label, score in roc_samples:
        labels.append(np.float(label))
        scores.append(np.float(score))

    area, [ax, lines] = roc.roc_curve(labels=np.array(labels),
                                      scores=np.array(scores),
                                      ax=ax,
                                      linewidth=1.5,
                                      color=color)

    return area, [ax, lines]
Esempio n. 15
0
def test():
	from matplotlib import rc
	rc('text', usetex=False)
	rc('font', family='serif')

	fig = plt.figure(figsize=(8,16))
	ax = fig.add_subplot(1,2,1)

	task = 'per_user'
	roc.roc_curve_init(ax)

	area, [ax, b] = plot_auc(2006, 2009, task, ax, 'b')
	area, [ax, g] = plot_auc(2010, 2012, task, ax, 'g')
	area, [ax, r] = plot_auc(2006, 2012, task, ax, 'r')

	ax.set_title('Per-user Model')
	
	

	#f.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.3, hspace=.2)
	#plt.legend(loc=4)
	
	#plt.savefig('%s/figures/%s-roc-curve.eps'%(root_folder(), task),bbox_inches='tight', dpi=600)

	#plt.close()
	task = 'per_network'
	ax = fig.add_subplot(1,2,2)
	roc.roc_curve_init(ax)

	area, [ax, b] = plot_auc(2006, 2009, task, ax, 'b')
	area, [ax, g] = plot_auc(2010, 2012, task, ax, 'g')
	area, [ax, r] = plot_auc(2006, 2012, task, ax, 'r')

	ax.set_title('Per-network Model')	
	
	fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.3, hspace=.2)
	#plt.figlegend((b, g, r), ('RCN (2006 - 2009)',  'RCN (2010 - 2012)',  'RCN (2006 - 2012)'), 'center')
	#plt.legend(loc='lower center', bbox_to_anchor=(0.5, -0.05), ncol=5, fancybox=True)
	plt.legend((b, g, r), ('RCN (2006 - 2009)',  'RCN (2010 - 2012)',  'RCN (2006 - 2012)'), bbox_to_anchor=(-0.15,-0.4),loc='center', prop={'size':12}) 

	plt.savefig('%s/figures/roc-curve.eps'%(root_folder()),bbox_inches='tight', dpi=600)
Esempio n. 16
0
def update_graphml(budgetYears):
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]

    network = load_network_for(budgetYears)

    network.g.vs['centrality_leader'] = 0
    g = network.g.copy()

    g = ResearchCollaborationNetwork.largest_component(g)

    topK = 50

    candidates, rankings = cl.centrality_leaders(g)

    #logger.info(candidates)
    #logger.info(rankings)

    #ordered_list = []
    for r in range(len(rankings))[:topK]:

        logger.info('tier: %d' % r)

        for i in list(rankings[r]):
            node_name = g.vs[candidates[i]]['name']
            # ordered_list.append(node_name)
            # set the node's centrality_leader attribute, the higher the better

            #g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r
            node = network.g.vs.select(name_eq=node_name)
            #logger.info(node['name'])
            node['centrality_leader'] = r + 1
            #logger.info(topK - r)
            # logger.info(node['name'])

    filename = '%s/data/networks/%d-%d.graphml' % (
        root_folder(), startBudgetYear, endBudgetYear)

    network.write(filename)
Esempio n. 17
0
def per_network(budgetYears):

	startBudgetYear = budgetYears[0]
	endBudgetYear = budgetYears[-1]

	logger.info('---------------- %s-%s -------------------'%(startBudgetYear, endBudgetYear))

	network = load_network_for(budgetYears)

	g = network.g.copy()

	ResearchCollaborationNetwork.simplify(g)

	logger.info(g.summary())

	# randomly pick 20 users 
	candidates = range(len(g.vs))
	shuffle(candidates)
	candidates = candidates[:20]

	adj = np.array(g.get_adjacency(igraph.GET_ADJACENCY_BOTH).data)

	m, _ = adj.shape

	nonobservedlinks = {}
	
	nonobserved_actual_edges = []

	nonobserved_nonexist_edges = []

	for i in range(m):
		# undirectd graph, so only care if the source is in candidates or not
		if i not in candidates:
			continue
		for j in range(i + 1, m):
			key = '%d,%d'%(i,j)
			nonobservedlinks[key] = adj[i,j]

			if adj[i,j] > 0:
				nonobserved_actual_edges.append(key)
			else:
				nonobserved_nonexist_edges.append(key)

	#logger.info('-----original graph:-----\r\n %s \r\n -----end original graph:-----'%g.summary())
	
	auc = 0.0
	
	apk = {3: 0.0, 5: 0.0, 10: 0.0}

	kfold = 10
	
	cnt = 0;
	
	roc_samples = []



	for ((es_p_training, es_p_validation), (es_m_training, es_m_validation)) in zip(utils.k_fold_cross_validation(list(nonobserved_actual_edges), kfold), utils.k_fold_cross_validation(list(nonobserved_nonexist_edges), kfold)):
		
		logger.info('--------iteration %d-------------'%cnt)

		logger.info('xxxxxxxxxxxxxxxxxxxxxxxx')
		logger.info('positive training: %d'%len(es_p_training))
		logger.info('positive validation: %d'%len(es_p_validation))
		logger.info('------------------------')
		logger.info('negative training: %d'%len(es_m_training))
		logger.info('negative validation: %d'%len(es_m_validation))
		#logger.info('xxxxxxxxxxxxxxxxxxxxxxxx')

		training = es_p_training + es_m_training
		validation = es_p_validation + es_m_validation

		#logger.info('training: %d; valiation: %d'%(len(training), len(validation)))
		
		# create training graph
		trainingG = g.copy()
		
		edges_2_delete = []
		#// remove edges from the validation set
		for link in validation:
			v = link.split(',')
			v1 = int(v[0])
			v2 = int(v[1])
			eId = trainingG.get_eid(v1,v2, directed=False, error=False)
			if eId != -1:
				edges_2_delete.append(eId)

		trainingG.delete_edges(edges_2_delete)

		#logger.info('-----training graph:-----\r\n %s \r\n -----end training graph:-----'%trainingG.summary())

		rwr_scores = pgrank.rwr_score(trainingG, validation)

		actual = []
		posterior = []
		actual_edges = []

		for k in validation:
			actual.append(nonobservedlinks[k])
			if nonobservedlinks[k] > 0:
				actual_edges.append(k)
			
			posterior.append(rwr_scores[k])
			
			roc_samples.append((k, nonobservedlinks[k], rwr_scores[k]))

		#logger.info('actual edges: %s'%actual_edges)		
		#logger.info('posterior: %s'%posterior)

		auc_ = benchmarks.auc(actual, posterior)
		auc += auc_

		#area, [ax, lines] = roc.roc_curve(labels=np.array(actual),scores=np.array(posterior))

		for topK, p in apk.iteritems():
			predictedIndexes = sorted(range(len(posterior)), reverse=True, key=lambda k: posterior[k])[:topK]
			predicted = np.array(validation)[predictedIndexes]

			apk_ = benchmarks.apk(actual_edges, predicted, topK)
			apk[topK] += apk_	


		cnt += 1


	# take a look at http://www.machinedlearnings.com/2012/06/thought-on-link-prediction.html
	logger.info('auc: %f'%(auc/kfold))
	for topK, p in apk.iteritems():
		logger.info('ap@%d: %f'%(topK, (apk[topK]/kfold)))

	#plt.show()
	np.save('%s/data/%s-%s.per_network.roc.samples.npy'%(root_folder(),startBudgetYear, endBudgetYear), np.array(roc_samples))
Esempio n. 18
0
def per_candidate(budgetYears):
	startBudgetYear = budgetYears[0]
	endBudgetYear = budgetYears[-1]

	logger.info('---------------- %s-%s -------------------'%(startBudgetYear, endBudgetYear))

	network = load_network_for(budgetYears)

	g = network.g.copy()

	ResearchCollaborationNetwork.simplify(g)

	logger.info(g.summary())

	adj = np.array(g.get_adjacency(igraph.GET_ADJACENCY_BOTH).data)

	m, _ = adj.shape

	cNodes = g.vs.select(_degree_gt=15) #range(len(g.vs))
	candidates = []
	for cNode in cNodes:
		candidates.append(cNode.index)

	shuffle(candidates)
	candidates = candidates[:10]

	total_auc = 0.0
	precision_at_k = {3: 0.0, 5: 0.0, 10: 0.0}
	mapk = precision_at_k
	kfold = 5	

	roc_samples = []

	progress = len(candidates)

	# for each candidate we do training and testing...
	for c in candidates:		

		logger.info('%d-----------------------'%progress)

		nonobservedlinks = {}
	
		nonobserved_actual_edges = []

		nonobserved_nonexist_edges = []
		
		# undirectd graph, so only care if the source is in candidates or not
		for j in range(m):
			key = '%d,%d'%(c,j)
			nonobservedlinks[key] = adj[c,j]
			#logger.info(adj[c,j])

			if adj[c,j] > 0:
				nonobserved_actual_edges.append(key)
			else:
				nonobserved_nonexist_edges.append(key)

		cnt = 0
		auc = 0.0
		#average precision at k is defined per candidate
		apk = precision_at_k
		for ((es_p_training, es_p_validation), (es_m_training, es_m_validation)) in zip(utils.k_fold_cross_validation(list(nonobserved_actual_edges), kfold), utils.k_fold_cross_validation(list(nonobserved_nonexist_edges), kfold)):
		
			#logger.info('--------iteration %d-------------'%cnt)

			#logger.info('xxxxxxxxxxxxxxxxxxxxxxxx')
			#logger.info('positive training: %d'%len(es_p_training))
			#logger.info('positive validation: %d'%len(es_p_validation))
			#logger.info('------------------------')
			#logger.info('negative training: %d'%len(es_m_training))
			#logger.info('negative validation: %d'%len(es_m_validation))
			#logger.info('xxxxxxxxxxxxxxxxxxxxxxxx')

			training = es_p_training + es_m_training
			validation = es_p_validation + es_m_validation

			#logger.info('training: %d; valiation: %d'%(len(training), len(validation)))
			
			# create training graph
			trainingG = g.copy()

			edges_2_delete = []
			#// remove edges from the validation set
			for link in validation:
				v = link.split(',')
				v1 = int(v[0])
				v2 = int(v[1])
				eId = trainingG.get_eid(v1,v2, directed=False, error=False)
				if eId != -1:
					edges_2_delete.append(eId)

			trainingG.delete_edges(edges_2_delete)

			#logger.info('-----training graph:-----\r\n %s \r\n -----end training graph:-----'%trainingG.summary())

			rwr_scores = pgrank.rwr_score(trainingG, validation)

			for k, rwr_score in rwr_scores.iteritems():
				if rwr_score > 1:
					logger.info('overflow? rwr_score: %0.2f'%(rwr_score))

			actual = []
			posterior = []
			actual_edges = []

			for k in validation:
				actual.append(nonobservedlinks[k])
				if nonobservedlinks[k] > 0:
					actual_edges.append(k)
				
				posterior.append(rwr_scores[k])
				
				roc_samples.append((k, nonobservedlinks[k], rwr_scores[k]))

			#logger.info('actual edges: %s'%actual_edges)		
			#logger.info('posterior: %s'%posterior)

			auc_ = benchmarks.auc(actual, posterior)
			auc += auc_
			total_auc += auc_

			#area, [ax, lines] = roc.roc_curve(labels=np.array(actual),scores=np.array(posterior))

			for topK, p in mapk.iteritems():
				predictedIndexes = sorted(range(len(posterior)), reverse=True, key=lambda k: posterior[k])[:topK]
				predicted = np.array(validation)[predictedIndexes]

				apk_ = benchmarks.apk(actual_edges, predicted, topK)
				apk[topK] += apk_	
				mapk[topK] += apk_

			cnt += 1	

		logger.info('%d: auc: %f'%(c, float(auc)/kfold))

		for topK, p in apk.iteritems():
			logger.info('%d: ap@%d: %f'%(c, topK, (apk[topK]/kfold)))

		progress -= 1	

	logger.info('auc: %f'%(float(total_auc)/(kfold*len(candidates))))
	for topK, p in mapk.iteritems():
		logger.info('map@%d: %f'%(topK, (mapk[topK]/(kfold*len(candidates)))))

	np.save('%s/data/%s-%s.per_user.roc.samples.npy'%(root_folder(),startBudgetYear, endBudgetYear), np.array(roc_samples))