Ejemplo n.º 1
0
class FB():
    def __init__(self):
        db_path = joinpath(globals.data_dir, globals.db_name)

        #if not exists(db_path): 	create in db manager if not present
        #	log.error('db file not found')
        #	raise FBError

        self._db = DBManager(db_path)
        self._db.connect()

        self._types = "(type = 46 OR type = 80 OR type = 128 OR type = 247 OR type = 308 OR type = 60)"
        self._fql_limit = globals.max_fb_api_calls

    def __del__(self):
        self._db.disconnect()

    def do_fql(self, query):
        try:
            fbaccess = FBAccess(self._db)
            self._access_token = fbaccess.token
        except FBAccessException as fbae:
            raise FBError(fbae.message)

        if self._fql_limit == 0:
            log.error('FQL call limit exceeded')
            raise FBError
        self._fql_limit -= 1

        query_url = 'https://graph.facebook.com/fql?' + urllib.urlencode(
            {
                'q': query,
                'access_token': self._access_token
            })
        log.info(query_url)

        try:
            response = urllib.urlopen(query_url)
            log.info(response.getcode())
        except Exception as e:
            log.error(e.strerror)
            raise FBError

        try:
            data = response.read()
            jdata = json.loads(data)
            log.info('record count = %d' % len(jdata['data']))
        except Exception as e:
            log.error(e.strerror)
            log.info(data)
            raise FBError
        return jdata

    def get_friends(self):
        friends_query = """SELECT uid, first_name, middle_name, last_name, sex, relationship_status, pic, 
				significant_other_id, age_range, birthday_date, current_location, friend_count, 
				hometown_location FROM user WHERE uid IN (SELECT uid2 FROM friend WHERE uid1=me()) OR uid=me()"""

        jdata = self.do_fql(friends_query)

        users = self._db.query("SELECT * FROM user WHERE deleted = 0")
        user_ids = [i['id'] for i in users]

        for record in jdata['data']:
            for k in record:
                if (record[k] == ''):
                    record[k] = None
            if (record['friend_count'] != None):
                record['friend_count'] = int(record['friend_count'])
            if (record['sex'] != None):
                record['sex'] = record['sex'][0]
            if (record['hometown_location'] != None):
                record['hometown_location'] = record['hometown_location'][
                    'name']
            if (record['current_location'] != None):
                record['current_location'] = record['current_location']['name']
            if (record['age_range'] != None):
                record['age_range'] = str(record['age_range']['min'])

            if record['uid'] in user_ids:
                user = [u for u in users if u['id'] == record['uid']][0]
                update_values = {}

                for key in record.keys():
                    if key in ['uid']:
                        continue
                    if record[key] != user[key.encode()]:
                        update_values[key.encode()] = record[key]

                if len(update_values) > 0:
                    self._db.update('user', update_values,
                                    "id = \'%s\'" % record['uid'])
                    print update_values
                    print 'u ',
                    sys.stdout.flush()
                    #raw_input()

                user_ids.remove(record['uid'])
            else:
                self._db.insert(
                    'user',
                    (record['uid'], record['first_name'],
                     record['middle_name'], record['last_name'], record['sex'],
                     record['relationship_status'], record['pic'],
                     record['significant_other_id'], record['age_range'],
                     record['birthday_date'], record['current_location'],
                     record['friend_count'], record['hometown_location'], 0))
                log.info('new friend: %s %s' %
                         (record['first_name'], record['last_name']))

        for user_id in user_ids:
            self._db.update('user', {'deleted': 1}, "id = \'%s\'" % user_id)
            user = [u for u in users if u['id'] == user_id][0]
            log.info('deleted friend: %s %s' %
                     (user['first_name'], user['last_name']))

        self._db.commit()

    def get_stream(self, start_time=None, end_time=None, cont=False):
        def db_insert(jdata):
            for record in jdata['data']:
                for k in record:
                    if (record[k] == ''):
                        record[k] = None

                self._db.insert(
                    'stream', (record['post_id'], record['actor_id'],
                               record['created_time'], record['type'],
                               record['like_info']['like_count'],
                               record['comment_info']['comment_count'],
                               1 if record['is_popular'] else 0,
                               record['message'], record['share_count'],
                               record['permalink'], None, None, None, 0, 0, 0)
                )  #last 3: updated(remove), update_likes, update_comments

                self._db.commit()
                print '.',
                sys.stdout.flush()

        if not (start_time and end_time):
            if cont:
                if os.path.exists('./end_time'):
                    end_time = int(open('./end_time', 'r').read().strip())
                else:
                    end_time = int(
                        time())  #+ int(timedelta(days=-6).total_seconds())
            else:
                end_time = int(
                    time())  #+ int(timedelta(days=-6).total_seconds())
            start_time = end_time + int(timedelta(hours=-12).total_seconds())

        stream_query = """SELECT post_id, actor_id, created_time, type, like_info, comment_info, 
				is_popular, message, share_count, permalink FROM stream WHERE source_id """
        source_clause = "IN (SELECT uid2 FROM friend WHERE uid1=me())"
        limit_clause = ' LIMIT 500'
        time_clause = ' AND created_time > %d AND created_time < %d' % (
            start_time, end_time)

        jdata = self.do_fql(stream_query + source_clause + time_clause)
        db_insert(jdata)

        source_clause = "= me()"

        jdata = self.do_fql(stream_query + source_clause + time_clause)
        db_insert(jdata)

        if cont: open('end_time', 'w').write(str(start_time))

    def update_stream(self):
        post_ids_query = "SELECT post_id, like_count, comment_count, share_count FROM stream WHERE created_time > %d AND %s"\
          %(int(time()) + timedelta(days=-4).total_seconds(), self._types)
        result = self._db.query(post_ids_query)

        rc = len(result)
        while (rc > 0):
            post_ids = ''
            for i in range(
                    len(result) - rc,
                    len(result) - rc + 20 if rc > 20 else len(result)):
                post_ids += "'%s'," % result[i]['post_id']
            rc -= 20
            post_ids = post_ids.rstrip(',')

            stream_query = "SELECT post_id, like_info, comment_info, share_count FROM stream WHERE post_id IN (%s)" % (
                post_ids)

            jdata = self.do_fql(stream_query)

            def result_find(post_id):
                for row in result:
                    if row['post_id'] == post_id:
                        return row

            for record in jdata['data']:
                post_id = record['post_id']
                like_count = record['like_info']['like_count']
                comment_count = record['comment_info']['comment_count']
                share_count = record['share_count']

                row = result_find(post_id)
                likes_changed = row['like_count'] != like_count
                comments_changed = row['comment_count'] != comment_count

                if likes_changed or comments_changed:
                    update_query = """UPDATE stream SET like_count = %d, comment_count = %d, share_count = %d, 
							update_likes = %d, update_comments = %d WHERE post_id = '%s'"""\
                      %(like_count, comment_count, share_count, 1 if likes_changed else 0, 1 if comments_changed else 0, post_id)
                    self._db.query(update_query)
                    print('.'),
                    sys.stdout.flush()
            self._db.commit()

    def get_comments(self):
        while (True):
            post_ids_query2 = "SELECT post_id FROM stream LIMIT 30"
            post_ids_query = """SELECT post_id FROM stream WHERE (post_id NOT IN 
					(SELECT DISTINCT post_id FROM comment) OR update_comments = 1) AND comment_count > 0 AND 
					%s LIMIT 20""" % (self._types)
            result = self._db.query(post_ids_query)
            if len(result) == 0:
                return

            post_ids = ''
            for row in result:
                post_ids += "'%s'," % row['post_id']
            post_ids = post_ids.rstrip(',')

            like_query = "SELECT post_fbid, post_id, fromid FROM comment WHERE post_id IN (%s)" % post_ids  #AND user_id IN (SELECT uid2 FROM friend WHERE uid1=me())"%post_ids

            jdata = self.do_fql(like_query)

            for record in jdata['data']:
                self._db.insert(
                    'comment',
                    (record['post_fbid'], record['post_id'], record['fromid']))
                print '.',
                sys.stdout.flush()

            self._db.query(
                "UPDATE stream SET update_comments = 0 WHERE post_id in (%s)" %
                (post_ids))
            self._db.commit()

    def get_likes(self):
        while (True):
            post_ids_query = """SELECT post_id FROM stream WHERE (post_id NOT IN 
					(SELECT DISTINCT post_id FROM like) OR update_likes = 1) AND like_count > 0 AND 
					%s LIMIT 20""" % (self._types)
            result = self._db.query(post_ids_query)
            if len(result) == 0:
                return

            post_ids = ''
            for row in result:
                post_ids += "'%s'," % row['post_id']
            post_ids = post_ids.rstrip(',')

            like_query = "SELECT post_id, user_id FROM like WHERE post_id IN (%s)" % post_ids  #AND user_id IN (SELECT uid2 FROM friend WHERE uid1=me())"%post_ids

            jdata = self.do_fql(like_query)

            for record in jdata['data']:
                self._db.insert('like', (record['post_id'], record['user_id']))

                print '.',
                sys.stdout.flush()

            self._db.query(
                "UPDATE stream SET update_likes = 0 WHERE post_id in (%s)" %
                (post_ids))
            self._db.commit()

    def clean_duplicates(self):
        delete_dup_query = None
        with open(joinpath(dirname(realpath(__file__)), 'clean_stream.sql'),
                  'r') as f:
            delete_dup_query = f.read()

        self._db.executescript(delete_dup_query)

    def render_graph(self, start=None, end=None):
        drop_temp_tables = "drop table likejoin; drop table result;"

        filter_self_likes = True
        filter_self_comments = True

        time_period_clause = ""
        if start and end:
            time_period_clause = "AND s.created_time BETWEEN %d AND %d" % (
                self.get_timestamp(start), self.get_timestamp(end))

        likejoin_query = """CREATE TEMP TABLE likejoin AS 
				SELECT s.source_id AS user1, l.user_id AS user2 FROM stream AS s 
				JOIN like AS l 
				ON s.post_id = l.post_id %s WHERE (l.user_id IN (SELECT id FROM user WHERE deleted = 0)
				AND s.source_id IN (SELECT id FROM user WHERE deleted = 0)) %s"""\
          %("AND s.source_id <> l.user_id" if filter_self_likes else "", time_period_clause)

        commentjoin_query = """INSERT INTO likejoin 
				SELECT s.source_id AS user1, c.user_id AS user2 FROM stream AS s 
				JOIN comment AS c 
				ON s.post_id = c.post_id %s WHERE (c.user_id IN (SELECT id FROM user WHERE deleted = 0)
				AND s.source_id IN (SELECT id FROM user WHERE deleted = 0)) %s"""\
          %("AND s.source_id <> c.user_id" if filter_self_comments else "", time_period_clause)

        result_query = """CREATE TEMP TABLE result AS 
				SELECT min(t1.user1, t1.user2) AS fuser1, max(t1.user1, t1.user2) AS fuser2,
				t1.c+coalesce(t2.c, 0) AS count 
				FROM 
				(SELECT *, count(user1) AS c FROM likejoin GROUP BY user1, user2) AS t1 
				LEFT JOIN 
				(SELECT *, count(user1) AS c FROM likejoin GROUP BY user1, user2) AS t2 
				ON t1.user2 = t2.user1 AND t1.user1 = t2.user2 AND t1.user1 <> t1.user2 
				GROUP BY fuser1, fuser2"""

        users_query = """SELECT id, first_name, last_name FROM user WHERE id IN 
				(SELECT fuser1 FROM result UNION SELECT fuser2 FROM result)"""

        self._db.query(likejoin_query)
        self._db.query(commentjoin_query)
        self._db.query(result_query)

        min_max_count = self._db.query(
            "SELECT min(count) as min_count, max(count) as max_count FROM result"
        )[0]

        g = Graph(min_max_count['min_count'], min_max_count['max_count'])

        users = self._db.query(users_query)
        for user in users:
            g.add_node(str(user['id']),
                       str(user['first_name'] + '.' + user['last_name'][0]))

        likes = self._db.query("select * from result;")
        for like in likes:
            g.add_edge(str(like['fuser1']), str(like['fuser2']), like['count'])

        g.render(label='test123')

    def render_plot(self, type, count=10, first_name=None, last_name=None):
        plots = {}
        plots['top_posts'] = PlotInfo(query = """SELECT s.count, u.first_name || '.' || substr(u.last_name, 1, 1) AS name FROM 
				(SELECT count(*) AS count, source_id FROM stream WHERE %s GROUP BY source_id) AS s JOIN user AS u 
				ON s.source_id = u.id WHERE u.deleted = 0 ORDER BY s.count DESC LIMIT %d"""%(self._types, count),\
          title = 'Top %d Posts'%(count), x = 'name')

        plots['top_likes'] = PlotInfo(query = """SELECT s.count, u.first_name || '.' || substr(u.last_name, 1, 1) AS name FROM 
				(SELECT sum(like_count) AS count, source_id FROM stream WHERE %s GROUP BY source_id) AS s JOIN user AS u 
				ON s.source_id = u.id WHERE u.deleted = 0 ORDER BY s.count DESC LIMIT %d"""%(self._types, count),\
          title = 'Top %d Likes'%(count), x = 'name')

        plots['user_posts'] = PlotInfo(query = """select count(post_id) as count , strftime("%%m/%%d", datetime(created_time, 'unixepoch')) as day 
					from stream where source_id in (select id from user
					where first_name = '%s' and last_name = '%s') and %s group by day"""%(first_name, last_name, self._types),\
           title = 'Timeline: %s %s'%(first_name, last_name), x = 'day')

        result = self._db.query(plots[type].query)
        data = ''
        count = len(result)
        for row in result:
            data += '%s %s\n' % (row['count'], row[plots[type].x])

        plot_filepath = joinpath(dirname(realpath(__file__)), 'plot.gp')
        plot = Popen(["gnuplot", "-e",  "title='%s'"%(plots[type].title), "-e",\
           "size = '%d, %d'"%(count*60, int(count*60/1.6)), plot_filepath],\
           stdin=PIPE)
        plot.communicate(data)

    def add_job_period(self, start=None, end=None):
        if start and end:
            start_timestamp = (datetime.strptime(start, '%d%b%Y') -
                               datetime(1970, 1, 1)).total_seconds()
            end_timestamp = (datetime.strptime(end, '%d%b%Y') -
                             datetime(1970, 1, 1)).total_seconds()
        else:
            start_timestamp = self._db.query(
                "SELECT MAX(end_time) FROM job_period")[0][0]
            if start_timestamp is not None and start_timestamp > time():
                return

            if start_timestamp is None:
                start_timestamp = int(time())

            end_timestamp = start_timestamp + int(
                timedelta(days=globals.job_period_days).total_seconds())

        while start_timestamp < end_timestamp:
            self._db.insert('job_period',
                            (start_timestamp, start_timestamp +
                             timedelta(hours=12).total_seconds(), 0))
            start_timestamp += timedelta(hours=12).total_seconds()
        self._db.commit()

    def get_stream_job(self):
        time_periods_query = "SELECT * FROM job_period WHERE end_time <= %d AND get_count < 2" % (
            int(time()) - timezone)
        time_periods = self._db.query(time_periods_query)

        for tp in time_periods:
            self.get_stream(tp['start_time'], tp['end_time'])
            self._db.query(
                "UPDATE job_period SET get_count = %d WHERE end_time = %d" %
                (tp['get_count'] + 1, tp['end_time']))
            self._db.commit()

    def get_timestamp(self, date_string):
        return (datetime.strptime(date_string, '%d%b%Y') -
                datetime(1970, 1, 1)).total_seconds()

    def get_photos(self, start_time=None, end_time=None, cont=False):
        def db_insert(jdata):
            for record in jdata['data']:
                for k in record:
                    if (record[k] == ''):
                        record[k] = None

                self._db.insert(
                    'photo',
                    (record['pid'], record['aid'], record['caption'],
                     record['comment_info']['comment_count'],
                     record['like_info']['like_count'], record['created'],
                     record['link'], record['owner'], record['place_id'],
                     record['src_big'], 0, 0))

                self._db.commit()
                print '.',
                sys.stdout.flush()

        if not (start_time and end_time):
            if cont:
                if os.path.exists('./end_time'):
                    end_time = int(open('./end_time', 'r').read().strip())
                else:
                    end_time = int(
                        time())  #+ int(timedelta(days=-6).total_seconds())
            else:
                end_time = int(
                    time())  #+ int(timedelta(days=-6).total_seconds())
            start_time = end_time + int(timedelta(hours=-12).total_seconds())

        stream_query = """SELECT pid, aid, caption, comment_info, like_info, created, 
				link, owner, place_id, src_big FROM photo WHERE owner """
        source_clause = "IN (SELECT uid2 FROM friend WHERE uid1=me())"
        limit_clause = ' LIMIT 500'
        time_clause = ' AND created > %d AND created < %d' % (start_time,
                                                              end_time)

        jdata = self.do_fql(stream_query + source_clause)  # + time_clause)
        db_insert(jdata)

        source_clause = "= me()"

        jdata = self.do_fql(stream_query + source_clause)  # + time_clause)
        db_insert(jdata)

        if cont: open('end_time', 'w').write(str(start_time))
Ejemplo n.º 2
0
Archivo: fb.py Proyecto: amol9/fbstats
class FB():
	def __init__(self):
		db_path = joinpath(globals.data_dir, globals.db_name)

		#if not exists(db_path): 	create in db manager if not present
		#	log.error('db file not found')
		#	raise FBError

		self._db = DBManager(db_path)
		self._db.connect()

		self._types = "(type = 46 OR type = 80 OR type = 128 OR type = 247 OR type = 308 OR type = 60)"
		self._fql_limit = globals.max_fb_api_calls

		
	def __del__(self):
		self._db.disconnect()


	def do_fql(self, query):
		try:
			fbaccess = FBAccess(self._db)
			self._access_token = fbaccess.token
		except FBAccessException as fbae:
			raise FBError(fbae.message)

		if self._fql_limit == 0:
			log.error('FQL call limit exceeded')
			raise FBError
		self._fql_limit -= 1

		query_url = 'https://graph.facebook.com/fql?' + urllib.urlencode({'q' : query, 'access_token' : self._access_token})
		log.info(query_url)
	
		try:	
			response = urllib.urlopen(query_url)
			log.info(response.getcode())
		except Exception as e:
			log.error(e.strerror)
			raise FBError

		try:
			data = response.read()
			jdata = json.loads(data)
			log.info('record count = %d'%len(jdata['data']))
		except Exception as e:
			log.error(e.strerror)
			log.info(data)
			raise FBError
		return jdata


	def get_friends(self):
		friends_query = """SELECT uid, first_name, middle_name, last_name, sex, relationship_status, pic, 
				significant_other_id, age_range, birthday_date, current_location, friend_count, 
				hometown_location FROM user WHERE uid IN (SELECT uid2 FROM friend WHERE uid1=me()) OR uid=me()"""

		jdata = self.do_fql(friends_query)

		users = self._db.query("SELECT * FROM user WHERE deleted = 0")
		user_ids = [i['id'] for i in users]

		for record in jdata['data']:
			for k in record:
				if (record[k] == ''):
					record[k] = None
			if (record['friend_count'] != None):
				record['friend_count'] = int(record['friend_count'])
			if (record['sex'] != None):
				record['sex'] = record['sex'][0]
			if (record['hometown_location'] != None):
				record['hometown_location'] = record['hometown_location']['name']
			if (record['current_location'] != None):
				record['current_location'] = record['current_location']['name']
			if (record['age_range'] != None):
				record['age_range'] = str(record['age_range']['min'])

			if record['uid'] in user_ids:
				user = [u for u in users if u['id'] == record['uid']][0]
				update_values = {}

				for key in record.keys():
					if key in ['uid']:
						continue
					if record[key] != user[key.encode()]:
						update_values[key.encode()] = record[key]

				if len(update_values) > 0:
					self._db.update('user', update_values, "id = \'%s\'"%record['uid'])
					print update_values
					print 'u ',; sys.stdout.flush()
					#raw_input()

				user_ids.remove(record['uid'])
			else:
				self._db.insert('user', (record['uid'], record['first_name'], record['middle_name'],
					record['last_name'], record['sex'], record['relationship_status'], record['pic'], 
					record['significant_other_id'], record['age_range'], record['birthday_date'], 
					record['current_location'], record['friend_count'], record['hometown_location'],
					0))
				log.info('new friend: %s %s'%(record['first_name'], record['last_name']))

		for user_id in user_ids:
			self._db.update('user', {'deleted': 1}, "id = \'%s\'"%user_id)
			user = [u for u in users if u['id'] == user_id][0]
			log.info('deleted friend: %s %s'%(user['first_name'], user['last_name']))

		self._db.commit()
	

	def get_stream(self, start_time=None, end_time=None, cont=False):
		def db_insert(jdata):
			for record in jdata['data']:
				for k in record:
					if (record[k] == ''):
						record[k] = None

				self._db.insert('stream', (record['post_id'], record['actor_id'], record['created_time'], 
					record['type'], record['like_info']['like_count'], record['comment_info']['comment_count'], 
					1 if record['is_popular'] else 0, record['message'], record['share_count'], 
					record['permalink'], None, None, None, 0, 0, 0))	#last 3: updated(remove), update_likes, update_comments

				self._db.commit()
				print '.',; sys.stdout.flush()

		if not (start_time and end_time):
			if cont:
				if os.path.exists('./end_time'):
					end_time = int(open('./end_time', 'r').read().strip())
				else:
					end_time = int(time()) #+ int(timedelta(days=-6).total_seconds())
			else:
				end_time = int(time()) #+ int(timedelta(days=-6).total_seconds())
			start_time = end_time + int(timedelta(hours=-12).total_seconds())

		
		stream_query = """SELECT post_id, actor_id, created_time, type, like_info, comment_info, 
				is_popular, message, share_count, permalink FROM stream WHERE source_id """ 
		source_clause =	"IN (SELECT uid2 FROM friend WHERE uid1=me())"
		limit_clause = ' LIMIT 500'
		time_clause = ' AND created_time > %d AND created_time < %d'%(start_time, end_time)

		jdata = self.do_fql(stream_query + source_clause + time_clause)
		db_insert(jdata)

		source_clause = "= me()"

		jdata = self.do_fql(stream_query + source_clause + time_clause)				
		db_insert(jdata)

		if cont: open('end_time', 'w').write(str(start_time))


	def update_stream(self):
		post_ids_query = "SELECT post_id, like_count, comment_count, share_count FROM stream WHERE created_time > %d AND %s"\
				%(int(time()) + timedelta(days=-4).total_seconds(), self._types)
		result = self._db.query(post_ids_query)

		rc = len(result)
		while(rc > 0):
			post_ids = ''
			for i in range(len(result)-rc, len(result)-rc+20 if rc>20 else len(result)):
				post_ids += "'%s',"%result[i]['post_id']
			rc -= 20
			post_ids = post_ids.rstrip(',')

			stream_query = "SELECT post_id, like_info, comment_info, share_count FROM stream WHERE post_id IN (%s)"%(post_ids)
			
			jdata = self.do_fql(stream_query)

			def result_find(post_id):
				for row in result:
					if row['post_id'] == post_id:
						return row
			
			for record in jdata['data']:
				post_id = record['post_id']
				like_count = record['like_info']['like_count']
				comment_count = record['comment_info']['comment_count']
				share_count = record['share_count']

				row = result_find(post_id)
				likes_changed = row['like_count'] != like_count 
				comments_changed = row['comment_count'] != comment_count

				if likes_changed or comments_changed:
					update_query = """UPDATE stream SET like_count = %d, comment_count = %d, share_count = %d, 
							update_likes = %d, update_comments = %d WHERE post_id = '%s'"""\
							%(like_count, comment_count, share_count, 1 if likes_changed else 0, 1 if comments_changed else 0, post_id)
					self._db.query(update_query)
					print('.'),; sys.stdout.flush()
			self._db.commit()
		

	def get_comments(self):
		while(True):
			post_ids_query2 = "SELECT post_id FROM stream LIMIT 30"
			post_ids_query = """SELECT post_id FROM stream WHERE (post_id NOT IN 
					(SELECT DISTINCT post_id FROM comment) OR update_comments = 1) AND comment_count > 0 AND 
					%s LIMIT 20"""%(self._types)
			result = self._db.query(post_ids_query)
			if len(result) == 0:
				return

			post_ids = ''
			for row in result:
				post_ids += "'%s',"%row['post_id']
			post_ids = post_ids.rstrip(',')
			
			like_query = "SELECT post_fbid, post_id, fromid FROM comment WHERE post_id IN (%s)"%post_ids 	#AND user_id IN (SELECT uid2 FROM friend WHERE uid1=me())"%post_ids
			
			jdata = self.do_fql(like_query)

			for record in jdata['data']:
				self._db.insert('comment', (record['post_fbid'], record['post_id'], record['fromid']))
				print '.',; sys.stdout.flush()

			self._db.query("UPDATE stream SET update_comments = 0 WHERE post_id in (%s)"%(post_ids))
			self._db.commit()


	def get_likes(self):
		while(True):
			post_ids_query = """SELECT post_id FROM stream WHERE (post_id NOT IN 
					(SELECT DISTINCT post_id FROM like) OR update_likes = 1) AND like_count > 0 AND 
					%s LIMIT 20"""%(self._types)
			result = self._db.query(post_ids_query)
			if len(result) == 0:
				return

			post_ids = ''
			for row in result:
				post_ids += "'%s',"%row['post_id']
			post_ids = post_ids.rstrip(',')
			
			like_query = "SELECT post_id, user_id FROM like WHERE post_id IN (%s)"%post_ids 	#AND user_id IN (SELECT uid2 FROM friend WHERE uid1=me())"%post_ids
			
			jdata = self.do_fql(like_query)

			for record in jdata['data']:
				self._db.insert('like', (record['post_id'], record['user_id']))

				print '.',; sys.stdout.flush()

			self._db.query("UPDATE stream SET update_likes = 0 WHERE post_id in (%s)"%(post_ids))
			self._db.commit()

		
	def clean_duplicates(self):
		delete_dup_query = None
		with open(joinpath(dirname(realpath(__file__)), 'clean_stream.sql'), 'r') as f:
			delete_dup_query = f.read()

		self._db.executescript(delete_dup_query)
		

	def render_graph(self, start=None, end=None):
		drop_temp_tables = "drop table likejoin; drop table result;"

		filter_self_likes = True
		filter_self_comments = True

		time_period_clause = ""
		if start and end:
			time_period_clause = "AND s.created_time BETWEEN %d AND %d"%(self.get_timestamp(start), self.get_timestamp(end))

		likejoin_query = """CREATE TEMP TABLE likejoin AS 
				SELECT s.source_id AS user1, l.user_id AS user2 FROM stream AS s 
				JOIN like AS l 
				ON s.post_id = l.post_id %s WHERE (l.user_id IN (SELECT id FROM user WHERE deleted = 0)
				AND s.source_id IN (SELECT id FROM user WHERE deleted = 0)) %s"""\
				%("AND s.source_id <> l.user_id" if filter_self_likes else "", time_period_clause) 
		
		commentjoin_query = """INSERT INTO likejoin 
				SELECT s.source_id AS user1, c.user_id AS user2 FROM stream AS s 
				JOIN comment AS c 
				ON s.post_id = c.post_id %s WHERE (c.user_id IN (SELECT id FROM user WHERE deleted = 0)
				AND s.source_id IN (SELECT id FROM user WHERE deleted = 0)) %s"""\
				%("AND s.source_id <> c.user_id" if filter_self_comments else "", time_period_clause) 

		result_query = """CREATE TEMP TABLE result AS 
				SELECT min(t1.user1, t1.user2) AS fuser1, max(t1.user1, t1.user2) AS fuser2,
				t1.c+coalesce(t2.c, 0) AS count 
				FROM 
				(SELECT *, count(user1) AS c FROM likejoin GROUP BY user1, user2) AS t1 
				LEFT JOIN 
				(SELECT *, count(user1) AS c FROM likejoin GROUP BY user1, user2) AS t2 
				ON t1.user2 = t2.user1 AND t1.user1 = t2.user2 AND t1.user1 <> t1.user2 
				GROUP BY fuser1, fuser2"""

		users_query= """SELECT id, first_name, last_name FROM user WHERE id IN 
				(SELECT fuser1 FROM result UNION SELECT fuser2 FROM result)"""

		self._db.query(likejoin_query);
		self._db.query(commentjoin_query);
		self._db.query(result_query);

		min_max_count = self._db.query("SELECT min(count) as min_count, max(count) as max_count FROM result")[0]

		g = Graph(min_max_count['min_count'], min_max_count['max_count'])

		users = self._db.query(users_query)
		for user in users:
			g.add_node(str(user['id']), str(user['first_name'] + '.' + user['last_name'][0]))

		likes = self._db.query("select * from result;")
		for like in likes:
			g.add_edge(str(like['fuser1']), str(like['fuser2']), like['count'])

		g.render(label='test123')

	
	def render_plot(self, type, count=10, first_name=None, last_name=None):
		plots = {}
		plots['top_posts'] = PlotInfo(query = """SELECT s.count, u.first_name || '.' || substr(u.last_name, 1, 1) AS name FROM 
				(SELECT count(*) AS count, source_id FROM stream WHERE %s GROUP BY source_id) AS s JOIN user AS u 
				ON s.source_id = u.id WHERE u.deleted = 0 ORDER BY s.count DESC LIMIT %d"""%(self._types, count),\
				title = 'Top %d Posts'%(count), x = 'name')

		plots['top_likes'] = PlotInfo(query = """SELECT s.count, u.first_name || '.' || substr(u.last_name, 1, 1) AS name FROM 
				(SELECT sum(like_count) AS count, source_id FROM stream WHERE %s GROUP BY source_id) AS s JOIN user AS u 
				ON s.source_id = u.id WHERE u.deleted = 0 ORDER BY s.count DESC LIMIT %d"""%(self._types, count),\
				title = 'Top %d Likes'%(count), x = 'name')

		plots['user_posts'] = PlotInfo(query = """select count(post_id) as count , strftime("%%m/%%d", datetime(created_time, 'unixepoch')) as day 
					from stream where source_id in (select id from user
					where first_name = '%s' and last_name = '%s') and %s group by day"""%(first_name, last_name, self._types),\
					title = 'Timeline: %s %s'%(first_name, last_name), x = 'day')


		result = self._db.query(plots[type].query)
		data = ''
		count = len(result)
		for row in result:
			data += '%s %s\n'%(row['count'], row[plots[type].x])

		plot_filepath = joinpath(dirname(realpath(__file__)), 'plot.gp')
		plot = Popen(["gnuplot", "-e",  "title='%s'"%(plots[type].title), "-e",\
					"size = '%d, %d'"%(count*60, int(count*60/1.6)), plot_filepath],\
					stdin=PIPE)
		plot.communicate(data)

		
	def add_job_period(self, start=None, end=None):
		if start and end:
			start_timestamp = (datetime.strptime(start, '%d%b%Y') - datetime(1970, 1, 1)).total_seconds() 
			end_timestamp = (datetime.strptime(end, '%d%b%Y') - datetime(1970, 1, 1)).total_seconds() 
		else:
			start_timestamp = self._db.query("SELECT MAX(end_time) FROM job_period")[0][0]
			if start_timestamp is not None and start_timestamp > time():
				return

			if start_timestamp is None:
				start_timestamp = int(time())

			end_timestamp = start_timestamp + int(timedelta(days=globals.job_period_days).total_seconds())

		while start_timestamp < end_timestamp:
			self._db.insert('job_period', (start_timestamp, start_timestamp + timedelta(hours=12).total_seconds(), 0))
			start_timestamp += timedelta(hours=12).total_seconds()
		self._db.commit()

	
	def get_stream_job(self):
		time_periods_query = "SELECT * FROM job_period WHERE end_time <= %d AND get_count < 2"%(int(time()) - timezone)
		time_periods = self._db.query(time_periods_query)
		
		for tp in time_periods:
			self.get_stream(tp['start_time'], tp['end_time'])
			self._db.query("UPDATE job_period SET get_count = %d WHERE end_time = %d"%(tp['get_count'] + 1, tp['end_time']))
			self._db.commit()

		
	def get_timestamp(self, date_string):
		return (datetime.strptime(date_string, '%d%b%Y') - datetime(1970, 1, 1)).total_seconds()


	def get_photos(self, start_time=None, end_time=None, cont=False):
		def db_insert(jdata):
			for record in jdata['data']:
				for k in record:
					if (record[k] == ''):
						record[k] = None

				self._db.insert('photo', (record['pid'], record['aid'], record['caption'], 
					record['comment_info']['comment_count'], record['like_info']['like_count'], record['created'], 
					record['link'], record['owner'], record['place_id'], record['src_big'], 0, 0))

				self._db.commit()
				print '.',; sys.stdout.flush()

		if not (start_time and end_time):
			if cont:
				if os.path.exists('./end_time'):
					end_time = int(open('./end_time', 'r').read().strip())
				else:
					end_time = int(time()) #+ int(timedelta(days=-6).total_seconds())
			else:
				end_time = int(time()) #+ int(timedelta(days=-6).total_seconds())
			start_time = end_time + int(timedelta(hours=-12).total_seconds())

		
		stream_query = """SELECT pid, aid, caption, comment_info, like_info, created, 
				link, owner, place_id, src_big FROM photo WHERE owner """ 
		source_clause =	"IN (SELECT uid2 FROM friend WHERE uid1=me())"
		limit_clause = ' LIMIT 500'
		time_clause = ' AND created > %d AND created < %d'%(start_time, end_time)

		jdata = self.do_fql(stream_query + source_clause) # + time_clause)
		db_insert(jdata)

		source_clause = "= me()"

		jdata = self.do_fql(stream_query + source_clause) # + time_clause)				
		db_insert(jdata)

		if cont: open('end_time', 'w').write(str(start_time))
Ejemplo n.º 3
0
#!/usr/bin/env python2

from dbmanager import DBManager
from mqtt import MQTTDatabaseRunner

from config import DB_CONNECTION_STRING

db_manager = DBManager(DB_CONNECTION_STRING)
db_manager.connect()

db_runner = MQTTDatabaseRunner(db_manager)
db_runner.start(threadded = False)

#db_runner.wait()