def _get_all(fb_post_id, objects_type, after=None, start_with=None): global graph if start_with is not None: for item in start_with['data']: yield item if 'next' in start_with['paging']: for item in _get_all(fb_post_id, objects_type, after=start_with['paging']['cursors']['after']): yield item return if after is not None: #additional = graph.request(fb_post_id + '/' + objects_type, {'after':after}) additional = _call_and_retry(graph.request, 3, fb_post_id + '/' + objects_type, {'after':after}) else: #additional = graph.request(fb_post_id + '/' + objects_type) additional = _call_and_retry(graph.request, 3, fb_post_id + '/' + objects_type) for item in additional['data']: yield item if 'next' in additional['paging']: for item in _get_all(fb_post_id, objects_type, additional['paging']['cursors']['after']): yield item
def get_user_data(user_fbid): '''Retrieves the interesting fields from the user's public profile. The request is retried for 3 times before failing. Returnes a dict containing user information or None if it wan't retrieved.''' global graph logger = logging.getLogger('root') try: return _call_and_retry(graph.get_object, 3, user_fbid, fields='id,first_name,last_name,relationship_status,gender,age_range') except Exception as e: logger.exception(e) logger.warning('Unable to retrieve profile for user with fbid={0}'.format(user_fbid)) return None
def load_posts(engine, page_id, access_token, requests_limit, log_config_fname=None): global graph global logger """ Load data and metadata for all posts on page. """ logger = logging.getLogger('root') session = sessionmaker(bind=engine)() requests_num = 0 graph = facebook.GraphAPI(access_token) ids = post_ids(graph, page_id) if not ids: logger.info('No posts found.') start = time.time() for post_id in ids: if session.query(Post).filter_by(fbid=unicode(post_id, 'utf8')).first(): logger.debug( 'Post with ID %s already loaded. Skip.' % post_id) continue # Query and add post to session. requests_num += 1 try: data_dict = _call_and_retry(graph.get_object, 3, post_id, metadata=1) try: _add_post(session, data_dict) # Mark the post as complete (i.e. all the associated data was # retrived) session.query(Post).filter_by(fbid=unicode(post_id, 'utf8')).update({'incomplete': False}) except Exception as e: logger.exception(e) logger.warning('Incomplete data for post with id: %s' % (post_id)) except Exception as e: logger.exception(e) logger.warning('Skipped post with id: %s' % (post_id)) session.add(Post(fbid=unicode(post_id, 'utf8'), incomplete=True)) session.flush() session.commit() logger.debug( '%s posts loaded, %s seconds passed. ' % (requests_num, time.time() - start)) logger.info('Done in %s seconds.' % (time.time() - start))
def post_ids(graph, page_id, limit=500): """ Generator that returns post IDs ordered descending by 'create_time' (from newest post to oldest). """ bottom_time = int(round(time.time())) while True: query = ''' SELECT post_id, created_time FROM stream WHERE source_id="%s" AND actor_id="%s" AND created_time < %s ORDER BY created_time DESC LIMIT %s ''' % (page_id, page_id, bottom_time, limit) #results = graph.fql({'ids': query})[0]['fql_result_set'] results = _call_and_retry(graph.fql, 3, {'ids': query})[0]['fql_result_set'] if not results: return for result in results: bottom_time = result['created_time'] yield result['post_id']