Ejemplo n.º 1
0
def _get_all(fb_post_id, objects_type, after=None, start_with=None):
    global graph
    if start_with is not None:
        for item in start_with['data']:
            yield item
        if 'next' in start_with['paging']:
            for item in _get_all(fb_post_id, objects_type, after=start_with['paging']['cursors']['after']):
                yield item
        return
    if after is not None:
        #additional = graph.request(fb_post_id + '/' + objects_type, {'after':after})
        additional = _call_and_retry(graph.request, 3, fb_post_id + '/' + objects_type, {'after':after})
    else:
        #additional = graph.request(fb_post_id + '/' + objects_type)
        additional = _call_and_retry(graph.request, 3, fb_post_id + '/' + objects_type)

    for item in additional['data']:
        yield item

    if 'next' in additional['paging']:
        for item in _get_all(fb_post_id, objects_type, additional['paging']['cursors']['after']):
            yield item
Ejemplo n.º 2
0
def get_user_data(user_fbid):
    '''Retrieves the interesting fields from the user's public profile.

    The request is retried for 3 times before failing.

    Returnes a dict containing user information or None if it wan't retrieved.'''
    global graph
    logger = logging.getLogger('root')
    try:
        return _call_and_retry(graph.get_object, 3, user_fbid, fields='id,first_name,last_name,relationship_status,gender,age_range')
    except Exception as e:
        logger.exception(e)
        logger.warning('Unable to retrieve profile for user with fbid={0}'.format(user_fbid))
        return None
Ejemplo n.º 3
0
def load_posts(engine, page_id, access_token, requests_limit, log_config_fname=None):
    global graph
    global logger

    """
    Load data and metadata for all posts on page.
    """

    logger = logging.getLogger('root')

    session = sessionmaker(bind=engine)()
    requests_num = 0
    graph = facebook.GraphAPI(access_token)
    ids = post_ids(graph, page_id)
    if not ids:
        logger.info('No posts found.')
    start = time.time()
    for post_id in ids:
        if session.query(Post).filter_by(fbid=unicode(post_id, 'utf8')).first():
            logger.debug(
                    'Post with ID %s already loaded. Skip.' %
                    post_id)
            continue
        # Query and add post to session.
        requests_num += 1
        try:
            data_dict = _call_and_retry(graph.get_object, 3, post_id, metadata=1)
            try:
                _add_post(session, data_dict)
                # Mark the post as complete (i.e. all the associated data was
                # retrived)
                session.query(Post).filter_by(fbid=unicode(post_id, 'utf8')).update({'incomplete': False})
            except Exception as e:
                logger.exception(e)
                logger.warning('Incomplete data for post with id: %s' % (post_id))
        except Exception as e:
            logger.exception(e)
            logger.warning('Skipped post with id: %s' % (post_id))
            session.add(Post(fbid=unicode(post_id, 'utf8'), incomplete=True))
            session.flush()

        session.commit()
        logger.debug(
                '%s posts loaded, %s seconds passed. ' %
                (requests_num, time.time() - start))
    logger.info('Done in %s seconds.' % (time.time() - start))
Ejemplo n.º 4
0
def post_ids(graph, page_id, limit=500):
    """
    Generator that returns post IDs ordered descending by
    'create_time' (from newest post to oldest).
    """
    bottom_time = int(round(time.time()))
    while True:
        query = '''
            SELECT post_id, created_time
            FROM stream
            WHERE source_id="%s" AND actor_id="%s" AND created_time < %s
            ORDER BY created_time DESC
            LIMIT %s
        ''' % (page_id, page_id, bottom_time, limit)
        #results = graph.fql({'ids': query})[0]['fql_result_set']
        results = _call_and_retry(graph.fql, 3, {'ids': query})[0]['fql_result_set']
        if not results:
            return
        for result in results:
            bottom_time = result['created_time']
            yield result['post_id']