def main(args): db = args[0] date1 = args[1] date2 = args[2] date3 = args[3] k = int(args[4]) basename = args[5] reader = DBReader(db) print("Getting uid") uid = reader.uid() print("Getting all the feature graphs") feature_graphs = graphutils.get_feat_graphs(db, uid, None, date2) print("Getting Gcollab_delta graph") Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2) Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1) base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs) graphutils.print_stats(base_graphs) graphutils.print_graph_stats("Gcollab_delta", Gcollab_delta) filepath = os.path.join(LEARNING_ROOT, basename + ".mat") features_matrix_name = "%s_%s" % (basename, FEATURES) labels_matrix_name = "%s_%s" % (basename, LABELS) features = consolidateFeatures.consolidate_features_add( base_graphs, k, Gcollab_delta) #features = consolidateFeatures.consolidate_features(base_graphs, Gcollab_delta, k) labels = consolidateFeatures.consolidate_labels(features, Gcollab_delta) np_train, np_output = interface.matwrapTrain(features, labels) interface.writeTrain(np_train, np_output, filepath, features_matrix_name, labels_matrix_name) # Add learning root to mlab path so that all .m functions are available as mlab attributes mlab.path(mlab.path(), LEARNING_ROOT) mlab.training(np_train, np_output)
def main(args): db = args[0] date1 = args[1] date2 = args[2] date3 = args[3] k = int(args[4]) basename = args[5] reader = DBReader(db) print("Getting uid") uid = reader.uid() print("Getting all the feature graphs") feature_graphs = graphutils.get_feat_graphs(db, uid, None, date2) print("Getting Gcollab_delta graph") Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2) Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1) base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs) graphutils.print_stats(base_graphs) graphutils.print_graph_stats("Gcollab_delta", Gcollab_delta) filepath = os.path.join(LEARNING_ROOT, basename + ".mat") features_matrix_name = "%s_%s"%(basename, FEATURES) labels_matrix_name = "%s_%s"%(basename, LABELS) features = consolidateFeatures.consolidate_features_add(base_graphs, k, Gcollab_delta) #features = consolidateFeatures.consolidate_features(base_graphs, Gcollab_delta, k) labels = consolidateFeatures.consolidate_labels(features, Gcollab_delta) np_train, np_output = interface.matwrapTrain(features, labels) interface.writeTrain(np_train, np_output, filepath, features_matrix_name, labels_matrix_name) # Add learning root to mlab path so that all .m functions are available as mlab attributes mlab.path(mlab.path(), LEARNING_ROOT) mlab.training(np_train, np_output)
def main(args): db = args[0] date1 = args[1] date2 = args[2] date3 = args[3] k = int(args[4]) OUTFILE=args[5] if len(args)>=7: beta = float(args[6]) bstart = beta bfinish = beta else: bstart = 0 bfinish = 20 reader = DBReader(db) print("Getting uid") uid = reader.uid() print("Getting all the base graphs") feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1) Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1) base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs) print("Getting Gcollab_delta graph") Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2) # from base graph take a random source node in every iteration baseG = base_graphs[graphutils.Graph.COLLAB] featG = graphutils.split_feat_graphs(base_graphs) deltaNIDs = [node.GetId() for node in Gcollab_delta.Nodes()] baseNIDs = [node.GetId() for node in baseG.Nodes()] common_nodes = list(set(deltaNIDs).intersection(baseNIDs)) ktop = 20 subGraphK= 10 f = open(OUTFILE,"w") print_and_log("# Beta\tRecall\tAvg. Rank\tPrec 20\n", f) n_iterations = 10 it = 0 sources = [] while it < n_iterations: src = random.choice(common_nodes) subBaseG= graphutils.getSubGraph(baseG, src, subGraphK) #print 'subgraph: ', subBaseG.GetNodes(), subBaseG.GetEdges() actual = evaluate.getYList(subBaseG, Gcollab_delta, src) #actual = evaluate.getYList(baseG, Gcollab_delta, src) # Consider source node if it forms at least one edge in delta graph if len(actual)>0: sources.append(src) common_nodes.remove(src) it += 1 else: print("Warning. Ignoring node with 0 new edges") print 'number of nodes and edges in graph:', baseG.GetNodes(), baseG.GetEdges() for beta in frange(bstart, bfinish, 4): total_recall = 0 total_avg_rank= 0 total_preck = 0 for src in sources: subBaseG= graphutils.getSubGraph(baseG, src, subGraphK) print 'sub graph nodes:', subBaseG.GetNodes() print 'sub graph edges:', subBaseG.GetEdges() topIDs = runrw.runrw(subBaseG, featG, src, beta) #topIDs = runrw.runrw(baseG, featG, Gcollab_delta, src, beta) # compare topIDs with list of labels already formed actual = evaluate.getYList(subBaseG, Gcollab_delta, src) #actual = evaluate.getYList(baseG, Gcollab_delta, src) # ignore if the node did not form an edge in the delta recall, preck, average_rank = evaluate.getAccuracy(topIDs, actual, ktop) print recall, preck, average_rank total_recall+=recall total_avg_rank += average_rank total_preck += len(preck) num = float(n_iterations) print_and_log("%f\t%f\t%f\t%f\n"%(beta, total_recall/num, total_avg_rank/num, total_preck/num), f) f.close()
def main(args): db = args[0] date1 = args[1] date2 = args[2] date3 = args[3] k = int(args[4]) OUTFILE = args[5] if len(args) >= 7: beta = float(args[6]) bstart = beta bfinish = beta else: bstart = 0 bfinish = 20 reader = DBReader(db) print("Getting uid") uid = reader.uid() print("Getting all the base graphs") feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1) Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1) base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs) print("Getting Gcollab_delta graph") Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2) # from base graph take a random source node in every iteration baseG = base_graphs[graphutils.Graph.COLLAB] featG = graphutils.split_feat_graphs(base_graphs) deltaNIDs = [node.GetId() for node in Gcollab_delta.Nodes()] baseNIDs = [node.GetId() for node in baseG.Nodes()] common_nodes = list(set(deltaNIDs).intersection(baseNIDs)) ktop = 20 subGraphK = 10 f = open(OUTFILE, "w") print_and_log("# Beta\tRecall\tAvg. Rank\tPrec 20\n", f) n_iterations = 10 it = 0 sources = [] while it < n_iterations: src = random.choice(common_nodes) subBaseG = graphutils.getSubGraph(baseG, src, subGraphK) #print 'subgraph: ', subBaseG.GetNodes(), subBaseG.GetEdges() actual = evaluate.getYList(subBaseG, Gcollab_delta, src) #actual = evaluate.getYList(baseG, Gcollab_delta, src) # Consider source node if it forms at least one edge in delta graph if len(actual) > 0: sources.append(src) common_nodes.remove(src) it += 1 else: print("Warning. Ignoring node with 0 new edges") print 'number of nodes and edges in graph:', baseG.GetNodes( ), baseG.GetEdges() for beta in frange(bstart, bfinish, 4): total_recall = 0 total_avg_rank = 0 total_preck = 0 for src in sources: subBaseG = graphutils.getSubGraph(baseG, src, subGraphK) print 'sub graph nodes:', subBaseG.GetNodes() print 'sub graph edges:', subBaseG.GetEdges() topIDs = runrw.runrw(subBaseG, featG, src, beta) #topIDs = runrw.runrw(baseG, featG, Gcollab_delta, src, beta) # compare topIDs with list of labels already formed actual = evaluate.getYList(subBaseG, Gcollab_delta, src) #actual = evaluate.getYList(baseG, Gcollab_delta, src) # ignore if the node did not form an edge in the delta recall, preck, average_rank = evaluate.getAccuracy( topIDs, actual, ktop) print recall, preck, average_rank total_recall += recall total_avg_rank += average_rank total_preck += len(preck) num = float(n_iterations) print_and_log( "%f\t%f\t%f\t%f\n" % (beta, total_recall / num, total_avg_rank / num, total_preck / num), f) f.close()
import path from db.interface import * from analysis import graphutils import utils db = utils.get_small_db() reader = DBReader(db) uid = reader.uid() rowid = uid.values()[0] print reader.get_users([rowid, rowid]) Gcollab = graphutils.get_collab_graph(db, uid) feature_graphs = graphutils.get_feat_graphs(db, uid) base_graphs = graphutils.get_base_dict(Gcollab, feature_graphs) graphutils.print_stats(base_graphs)