def insert_day_n(loc, day, log_loc): graph_db = neo4j.GraphDatabaseService("http://localhost:7474/db/data/") graph_db_topic = neo4j.GraphDatabaseService("http://localhost:7475/db/data/") c1 =0;c2 =0 f = open(loc,"r") line = f.readline() window = 0 while(line): # tobj = json.loads(line) # id_ = str(tobj['rtds_tweet']['user_id']) # id_ = 1107232339 tobj = line.split("\t") id_ = tobj[0] ts = int(tobj[1]) # intializing the window with the time_stamp of first tweet if(c1 == 0): window = ts n = graph_db.get_indexed_node("users", "uid", str(id_)) c1 +=1 # If that user present in main database if(n): c2+=1 profile = graph_db.get_properties(n)[0] friends = n.get_related_nodes(neo4j.Direction.OUTGOING,"follows") profile['day'] = day profile['ts'] = ts # Inserting the author if not already in topical Graph G(t) author = graph_db_topic.get_or_create_indexed_node("users", "uid", id_, profile) for friend in friends: fid = friend["uid"] fn = graph_db_topic.get_indexed_node("users", "uid", str(fid)) # If my friend already present in G(t-1) if(fn): graph_db_topic.create( (author, "follows", fn)) # update the window every 30 min # get the connected components sizes. if(ts > window + 30*60): window = ts l1, l2, mid, avg, l = connected_comp.main(ts) dump_log(log_loc,[c1, c2, l1, l2, mid, avg, l, datetime.now()]) line = f.readline() f.close()
def main(): loc = "/home/pranayag/validation/evolving_graph/mark_stats.txt" first_tweet = 1370394609 last_tweet = 1370907519 window = 30 * 60 l1 = first_tweet l2 = first_tweet + window while (l2 < last_tweet): t1 = time.ctime(l1) t2 = time.ctime(l2) arr = connected_comp.main(1, START=l1, END=l2, mode=2) dump_log(loc, arr + [datetime.now(), t1, "to", t2]) l2 += window
def main(): loc = "/home/pranayag/validation/evolving_graph/mark_stats.txt" first_tweet = 1370394609 last_tweet = 1370907519 window = 30 * 60 l1 = first_tweet l2 = first_tweet + window while(l2 < last_tweet): t1 = time.ctime(l1) t2 = time.ctime(l2) arr = connected_comp.main(1, START = l1, END = l2 , mode = 2) dump_log(loc, arr + [datetime.now(), t1, "to" ,t2]) l2 += window
def insert_day_n(loc, day, log_loc): """ Creates the evolving graph for the tweets cluster of topic 'loc' logs the graph stats at log_loc """ graph_db = neo4j.GraphDatabaseService("http://localhost:7474/db/data/") graph_db_topic = neo4j.GraphDatabaseService( "http://localhost:7475/db/data/") c1 = 0 c2 = 0 duplicates = 0 edges_count = 0 f = open(loc, "r") window = 0 for line in f: # tobj = json.loads(line) # id_ = str(tobj['rtds_tweet']['user_id']) # id_ = 1107232339 tobj = line.split("\t") id_ = tobj[0] ts = int(tobj[1]) # intializing the window with the time_stamp of first tweet if (c1 == 0): window = ts n = graph_db.get_indexed_node("users", "uid", str(id_)) c1 += 1 # If that user present in main Graph G if (n): c2 += 1 m = graph_db_topic.get_indexed_node("users", "uid", str(id_)) # Author already present then skip if (m): duplicates += 1 continue else: profile = graph_db.get_properties(n)[0] friends = n.get_related_nodes(neo4j.Direction.OUTGOING, "follows") profile['ts'] = ts # Inserting the author if not already in topical Graph g(t) author = graph_db_topic.get_or_create_indexed_node( "users", "uid", id_, profile) for friend in friends: fid = friend["uid"] fn = graph_db_topic.get_indexed_node( "users", "uid", str(fid)) # If my friend already present in G(t-1) if (fn): graph_db_topic.create((author, "follows", fn)) edges_count += 1 if (c1 % 100 == 0): dump_log(log_loc, [ c1, c2, l1, l2, mid, avg, l, edges_count, datetime.now(), duplicates, "Duplicates" ]) if (ts > window + 30 * 60): window = ts start = datetime.now() l1, l2, mid, avg, l = connected_comp.main(ts) end = datetime.now() dump_log( log_loc, [c1, c2, l1, l2, mid, avg, l, edges_count, start, end, ts]) f.close()
def insert_day_n(loc, day, log_loc): """ Creates the evolving graph for the tweets cluster of topic 'loc' logs the graph stats at log_loc """ graph_db = neo4j.GraphDatabaseService( "http://localhost:7474/db/data/" ) graph_db_topic = neo4j.GraphDatabaseService( "http://localhost:7475/db/data/" ) c1 =0 c2 =0 duplicates = 0 edges_count = 0 f = open(loc, "r") window = 0 for line in f: # tobj = json.loads(line) # id_ = str(tobj['rtds_tweet']['user_id']) # id_ = 1107232339 tobj = line.split("\t") id_ = tobj[0] ts = int(tobj[1]) # intializing the window with the time_stamp of first tweet if(c1 == 0): window = ts n = graph_db.get_indexed_node("users", "uid", str(id_)) c1 += 1 # If that user present in main Graph G if(n): c2 += 1 m = graph_db_topic.get_indexed_node("users", "uid", str(id_)) # Author already present then skip if(m): duplicates += 1 continue else: profile = graph_db.get_properties(n)[0] friends = n.get_related_nodes( neo4j.Direction.OUTGOING, "follows") profile['ts'] = ts # Inserting the author if not already in topical Graph g(t) author = graph_db_topic.get_or_create_indexed_node( "users", "uid", id_, profile) for friend in friends: fid = friend["uid"] fn = graph_db_topic.get_indexed_node( "users", "uid", str(fid)) # If my friend already present in G(t-1) if(fn): graph_db_topic.create( (author, "follows", fn)) edges_count += 1 if(c1 % 100 == 0): dump_log(log_loc, [c1, c2, l1, l2, mid, avg, l, edges_count, datetime.now(), duplicates, "Duplicates"]) if(ts > window + 30 * 60): window = ts start = datetime.now() l1, l2, mid, avg, l = connected_comp.main(ts) end = datetime.now() dump_log(log_loc,[c1, c2, l1, l2, mid, avg, l, edges_count, start, end, ts]) f.close()