Esempio n. 1
0
def main(args):
    db = args[0]
    date1 = args[1]
    date2 = args[2]
    date3 = args[3]
    k = int(args[4])
    basename = args[5]

    reader = DBReader(db)
    print("Getting uid")
    uid = reader.uid()

    print("Getting all the feature graphs")
    feature_graphs = graphutils.get_feat_graphs(db, uid, None, date2)

    print("Getting Gcollab_delta graph")
    Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2)
    Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1)

    base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs)
    graphutils.print_stats(base_graphs)
    graphutils.print_graph_stats("Gcollab_delta", Gcollab_delta)

    filepath = os.path.join(LEARNING_ROOT, basename + ".mat")
    features_matrix_name = "%s_%s" % (basename, FEATURES)
    labels_matrix_name = "%s_%s" % (basename, LABELS)

    features = consolidateFeatures.consolidate_features_add(
        base_graphs, k, Gcollab_delta)
    #features = consolidateFeatures.consolidate_features(base_graphs, Gcollab_delta, k)
    labels = consolidateFeatures.consolidate_labels(features, Gcollab_delta)

    np_train, np_output = interface.matwrapTrain(features, labels)
    interface.writeTrain(np_train, np_output, filepath, features_matrix_name,
                         labels_matrix_name)

    # Add learning root to mlab path so that all .m functions are available as mlab attributes
    mlab.path(mlab.path(), LEARNING_ROOT)
    mlab.training(np_train, np_output)
Esempio n. 2
0
def main(args):
	db = args[0]
	date1 = args[1]
	date2 = args[2]
	date3 = args[3]
	k = int(args[4])
	basename = args[5]

	reader = DBReader(db)
	print("Getting uid")
	uid = reader.uid()

	print("Getting all the feature graphs")
	feature_graphs = graphutils.get_feat_graphs(db, uid, None, date2)

	print("Getting Gcollab_delta graph")
	Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2)
	Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1)

	base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs)
	graphutils.print_stats(base_graphs)
	graphutils.print_graph_stats("Gcollab_delta", Gcollab_delta)

	filepath = os.path.join(LEARNING_ROOT, basename + ".mat")
	features_matrix_name = "%s_%s"%(basename, FEATURES)
	labels_matrix_name = "%s_%s"%(basename, LABELS)

	features = consolidateFeatures.consolidate_features_add(base_graphs, k, Gcollab_delta)
	#features = consolidateFeatures.consolidate_features(base_graphs, Gcollab_delta, k)
	labels = consolidateFeatures.consolidate_labels(features, Gcollab_delta)

	np_train, np_output = interface.matwrapTrain(features, labels)
	interface.writeTrain(np_train, np_output, filepath, features_matrix_name, labels_matrix_name)	
	
	# Add learning root to mlab path so that all .m functions are available as mlab attributes
	mlab.path(mlab.path(), LEARNING_ROOT)	
	mlab.training(np_train, np_output)
Esempio n. 3
0
def main(args):
	db = args[0]
	date1 = args[1]
	date2 = args[2]
	date3 = args[3]
	k = int(args[4])
	OUTFILE=args[5]

	if len(args)>=7:
		beta = float(args[6])
		bstart = beta
		bfinish = beta
	else:
		bstart = 0
		bfinish = 20

	reader = DBReader(db)
	print("Getting uid")
	uid = reader.uid()

	print("Getting all the base graphs")
	feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1)
	Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1)
	base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs)

	print("Getting Gcollab_delta graph")
	Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2)

	# from base graph take a random source node in every iteration
	baseG = base_graphs[graphutils.Graph.COLLAB]
	featG = graphutils.split_feat_graphs(base_graphs)

	deltaNIDs = [node.GetId() for node in Gcollab_delta.Nodes()]
	baseNIDs = [node.GetId() for node in baseG.Nodes()]
	common_nodes = list(set(deltaNIDs).intersection(baseNIDs))
	
	ktop = 20
	subGraphK= 10 
	f = open(OUTFILE,"w")

	print_and_log("# Beta\tRecall\tAvg. Rank\tPrec 20\n", f)		
	n_iterations = 10
	it = 0
	sources = []

	while it < n_iterations:
		src  = random.choice(common_nodes)
		subBaseG= graphutils.getSubGraph(baseG, src, subGraphK)
		#print 'subgraph: ', subBaseG.GetNodes(), subBaseG.GetEdges()
		actual = evaluate.getYList(subBaseG, Gcollab_delta, src)
		#actual = evaluate.getYList(baseG, Gcollab_delta, src)
	
		# Consider source node if it forms at least one edge in delta graph
		if len(actual)>0:
			sources.append(src)
			common_nodes.remove(src)
			it += 1
		else:
			print("Warning. Ignoring node with 0 new edges")
	print 'number of nodes and edges in graph:', baseG.GetNodes(), baseG.GetEdges()

	for beta in frange(bstart, bfinish, 4):
		total_recall = 0
		total_avg_rank= 0
		total_preck = 0

		for src in sources:
			subBaseG= graphutils.getSubGraph(baseG, src, subGraphK)
			print 'sub graph nodes:', subBaseG.GetNodes()
			print 'sub graph edges:', subBaseG.GetEdges()
			topIDs = runrw.runrw(subBaseG, featG, src, beta)	
			#topIDs = runrw.runrw(baseG, featG, Gcollab_delta, src, beta)	

			# compare topIDs with list of labels already formed	
			actual = evaluate.getYList(subBaseG, Gcollab_delta, src)
			#actual = evaluate.getYList(baseG, Gcollab_delta, src)
	
			# ignore if the node did not form an edge in the delta	
			recall, preck, average_rank = evaluate.getAccuracy(topIDs, actual, ktop)	
			print recall, preck, average_rank
				
			total_recall+=recall
			total_avg_rank += average_rank
			total_preck += len(preck)
			
		num = float(n_iterations)
		print_and_log("%f\t%f\t%f\t%f\n"%(beta, total_recall/num, total_avg_rank/num, total_preck/num), f)
	
	f.close()	
Esempio n. 4
0
def main(args):
    db = args[0]
    date1 = args[1]
    date2 = args[2]
    date3 = args[3]
    k = int(args[4])
    OUTFILE = args[5]

    if len(args) >= 7:
        beta = float(args[6])
        bstart = beta
        bfinish = beta
    else:
        bstart = 0
        bfinish = 20

    reader = DBReader(db)
    print("Getting uid")
    uid = reader.uid()

    print("Getting all the base graphs")
    feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1)
    Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1)
    base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs)

    print("Getting Gcollab_delta graph")
    Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2)

    # from base graph take a random source node in every iteration
    baseG = base_graphs[graphutils.Graph.COLLAB]
    featG = graphutils.split_feat_graphs(base_graphs)

    deltaNIDs = [node.GetId() for node in Gcollab_delta.Nodes()]
    baseNIDs = [node.GetId() for node in baseG.Nodes()]
    common_nodes = list(set(deltaNIDs).intersection(baseNIDs))

    ktop = 20
    subGraphK = 10
    f = open(OUTFILE, "w")

    print_and_log("# Beta\tRecall\tAvg. Rank\tPrec 20\n", f)
    n_iterations = 10
    it = 0
    sources = []

    while it < n_iterations:
        src = random.choice(common_nodes)
        subBaseG = graphutils.getSubGraph(baseG, src, subGraphK)
        #print 'subgraph: ', subBaseG.GetNodes(), subBaseG.GetEdges()
        actual = evaluate.getYList(subBaseG, Gcollab_delta, src)
        #actual = evaluate.getYList(baseG, Gcollab_delta, src)

        # Consider source node if it forms at least one edge in delta graph
        if len(actual) > 0:
            sources.append(src)
            common_nodes.remove(src)
            it += 1
        else:
            print("Warning. Ignoring node with 0 new edges")
    print 'number of nodes and edges in graph:', baseG.GetNodes(
    ), baseG.GetEdges()

    for beta in frange(bstart, bfinish, 4):
        total_recall = 0
        total_avg_rank = 0
        total_preck = 0

        for src in sources:
            subBaseG = graphutils.getSubGraph(baseG, src, subGraphK)
            print 'sub graph nodes:', subBaseG.GetNodes()
            print 'sub graph edges:', subBaseG.GetEdges()
            topIDs = runrw.runrw(subBaseG, featG, src, beta)
            #topIDs = runrw.runrw(baseG, featG, Gcollab_delta, src, beta)

            # compare topIDs with list of labels already formed
            actual = evaluate.getYList(subBaseG, Gcollab_delta, src)
            #actual = evaluate.getYList(baseG, Gcollab_delta, src)

            # ignore if the node did not form an edge in the delta
            recall, preck, average_rank = evaluate.getAccuracy(
                topIDs, actual, ktop)
            print recall, preck, average_rank

            total_recall += recall
            total_avg_rank += average_rank
            total_preck += len(preck)

        num = float(n_iterations)
        print_and_log(
            "%f\t%f\t%f\t%f\n" % (beta, total_recall / num,
                                  total_avg_rank / num, total_preck / num), f)

    f.close()
Esempio n. 5
0
import path
from db.interface import * 
from analysis import graphutils
import utils

db = utils.get_small_db()
reader = DBReader(db)

uid = reader.uid()
rowid = uid.values()[0]
print reader.get_users([rowid, rowid])

Gcollab = graphutils.get_collab_graph(db, uid)
feature_graphs = graphutils.get_feat_graphs(db, uid)

base_graphs = graphutils.get_base_dict(Gcollab, feature_graphs)
graphutils.print_stats(base_graphs)