Ejemplo n.º 1
0
def insert_day_n(loc, day, log_loc):
	
	graph_db = neo4j.GraphDatabaseService("http://localhost:7474/db/data/")
	graph_db_topic = neo4j.GraphDatabaseService("http://localhost:7475/db/data/")
	
	c1 =0;c2 =0
	
	f = open(loc,"r")
	line = f.readline()
	window = 0

	while(line):
		
		# tobj = json.loads(line)
		# id_ = str(tobj['rtds_tweet']['user_id'])
		# id_ = 1107232339
		tobj = line.split("\t")
		id_ = tobj[0]
		ts = int(tobj[1])
		
		# intializing the window with the time_stamp of first tweet
		
		if(c1 == 0):
			window = ts
		
		n = graph_db.get_indexed_node("users", "uid", str(id_))
		c1 +=1
		
		# If that user present in main database
		if(n):
			c2+=1
			profile =  graph_db.get_properties(n)[0]
			friends = n.get_related_nodes(neo4j.Direction.OUTGOING,"follows")
			
			profile['day'] = day
			profile['ts'] = ts
			# Inserting the author if not already in topical Graph G(t)
			author = graph_db_topic.get_or_create_indexed_node("users", "uid", id_, profile)
			
			for friend in friends:
				fid = friend["uid"]
				fn = graph_db_topic.get_indexed_node("users", "uid", str(fid))
				# If my friend already present in G(t-1)
				if(fn):
					graph_db_topic.create( (author, "follows", fn))
		
		# update the window every 30 min
		# get the connected components sizes.
		if(ts > window + 30*60):
			window = ts
			l1, l2, mid, avg, l = connected_comp.main(ts)
			dump_log(log_loc,[c1, c2, l1, l2, mid, avg, l, datetime.now()])

		line = f.readline()



	f.close()
Ejemplo n.º 2
0
def main():
    loc = "/home/pranayag/validation/evolving_graph/mark_stats.txt"
    first_tweet = 1370394609
    last_tweet = 1370907519
    window = 30 * 60

    l1 = first_tweet
    l2 = first_tweet + window

    while (l2 < last_tweet):
        t1 = time.ctime(l1)
        t2 = time.ctime(l2)
        arr = connected_comp.main(1, START=l1, END=l2, mode=2)
        dump_log(loc, arr + [datetime.now(), t1, "to", t2])
        l2 += window
Ejemplo n.º 3
0
def main():
	loc = "/home/pranayag/validation/evolving_graph/mark_stats.txt"
	first_tweet = 1370394609 
	last_tweet = 1370907519
	window = 30 * 60 

	l1 = first_tweet
	l2 = first_tweet + window

	while(l2 < last_tweet):
		t1 = time.ctime(l1)
		t2 = time.ctime(l2)
		arr = connected_comp.main(1, START = l1, END = l2 , mode = 2)
		dump_log(loc, arr + [datetime.now(), t1, "to" ,t2])
		l2 += window
Ejemplo n.º 4
0
def insert_day_n(loc, day, log_loc):
    """
	Creates the evolving graph for the tweets cluster of topic 'loc'
	logs the graph stats at log_loc
	"""
    graph_db = neo4j.GraphDatabaseService("http://localhost:7474/db/data/")
    graph_db_topic = neo4j.GraphDatabaseService(
        "http://localhost:7475/db/data/")
    c1 = 0
    c2 = 0
    duplicates = 0
    edges_count = 0
    f = open(loc, "r")
    window = 0

    for line in f:
        # tobj = json.loads(line)
        # id_ = str(tobj['rtds_tweet']['user_id'])
        # id_ = 1107232339

        tobj = line.split("\t")
        id_ = tobj[0]
        ts = int(tobj[1])
        # intializing the window with the time_stamp of first tweet
        if (c1 == 0):
            window = ts
        n = graph_db.get_indexed_node("users", "uid", str(id_))
        c1 += 1
        # If that user present in main Graph G
        if (n):
            c2 += 1
            m = graph_db_topic.get_indexed_node("users", "uid", str(id_))
            # Author already present then skip
            if (m):
                duplicates += 1
                continue
            else:
                profile = graph_db.get_properties(n)[0]
                friends = n.get_related_nodes(neo4j.Direction.OUTGOING,
                                              "follows")

                profile['ts'] = ts
                # Inserting the author if not already in topical Graph g(t)
                author = graph_db_topic.get_or_create_indexed_node(
                    "users", "uid", id_, profile)

                for friend in friends:
                    fid = friend["uid"]
                    fn = graph_db_topic.get_indexed_node(
                        "users", "uid", str(fid))
                    # If my friend already present in G(t-1)
                    if (fn):
                        graph_db_topic.create((author, "follows", fn))
                        edges_count += 1

        if (c1 % 100 == 0):
            dump_log(log_loc, [
                c1, c2, l1, l2, mid, avg, l, edges_count,
                datetime.now(), duplicates, "Duplicates"
            ])

        if (ts > window + 30 * 60):
            window = ts
            start = datetime.now()
            l1, l2, mid, avg, l = connected_comp.main(ts)
            end = datetime.now()
            dump_log(
                log_loc,
                [c1, c2, l1, l2, mid, avg, l, edges_count, start, end, ts])

    f.close()
Ejemplo n.º 5
0
def insert_day_n(loc, day, log_loc):
	"""
	Creates the evolving graph for the tweets cluster of topic 'loc'
	logs the graph stats at log_loc
	"""
	graph_db = neo4j.GraphDatabaseService(
		"http://localhost:7474/db/data/" )
	graph_db_topic = neo4j.GraphDatabaseService(
		"http://localhost:7475/db/data/" )
	c1 =0
	c2 =0
	duplicates = 0
	edges_count = 0
	f = open(loc, "r")
	window = 0

	for line in f:
		# tobj = json.loads(line)
		# id_ = str(tobj['rtds_tweet']['user_id'])
		# id_ = 1107232339
		
		tobj = line.split("\t")
		id_ = tobj[0]
		ts = int(tobj[1])
		# intializing the window with the time_stamp of first tweet
		if(c1 == 0):
			window = ts
		n = graph_db.get_indexed_node("users", "uid", str(id_))
		c1 += 1
		# If that user present in main Graph G
		if(n):
			c2 += 1
			m = graph_db_topic.get_indexed_node("users", "uid", str(id_))
			# Author already present then skip
			if(m):
				duplicates += 1
				continue
			else:
				profile =  graph_db.get_properties(n)[0]
				friends = n.get_related_nodes(
					neo4j.Direction.OUTGOING, "follows")
				
				profile['ts'] = ts
				# Inserting the author if not already in topical Graph g(t)
				author = graph_db_topic.get_or_create_indexed_node(
					"users", "uid", id_, profile)
			
				for friend in friends:
					fid = friend["uid"]
					fn = graph_db_topic.get_indexed_node(
						"users", "uid", str(fid))
					# If my friend already present in G(t-1)
					if(fn):
						graph_db_topic.create( (author, "follows", fn))
						edges_count += 1

		if(c1 % 100 == 0):
			dump_log(log_loc,
				[c1, c2, l1, l2, mid, avg, l, edges_count, datetime.now(), duplicates, "Duplicates"])
		
		if(ts > window + 30 * 60):
			window = ts
			start = datetime.now()
			l1, l2, mid, avg, l = connected_comp.main(ts)
			end = datetime.now()
			dump_log(log_loc,[c1, c2, l1, l2, mid, avg, l, edges_count, start, end, ts])

	f.close()