def collect_users(call, users_config_filepath, output_folder, config):

    apikeys = list(config['apikeys'].values()).pop()

    users_config = {}
    with open(os.path.abspath(users_config_filepath), 'r') as users_config_rf:
        users_config = json.load(users_config_rf)

    max_range = 100
    current_ix = users_config['current_ix'] if ('current_ix' in users_config) else 0
    total = len(users_config['users'][current_ix:])
    user_chuncks = util.chunks(users_config['users'][current_ix:], max_range)

    for users in user_chuncks:
        try:
            twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS, output_folder = output_folder)
            twitterCralwer.fetch_users(call=call, users=users)
            current_ix += len(users)

        except Exception as exc:
            logger.error(exc)
            logger.error(util.full_stack()) #don't care, if Ctrl+c is hit, does not handle it.  When you restart, it restarts from the last chunk (too much trouble to handle Ctrl + c).
            # you will get duplicate tweets, so what...
            pass

        users_config['current_ix'] = current_ix
        
        flash_cmd_config(users_config, users_config_filepath, output_folder)

        logger.info('COMPLETED -> (current_ix: [%d/%d])'%(current_ix, total))
        logger.info('PAUSE %ds to CONTINUE...'%WAIT_TIME)
        time.sleep(WAIT_TIME)
    else:
        logger.info('[collect_users] ALL COMPLETED')
Exemple #2
0
def collect_users(call, users_config_filepath, output_folder, config):

    apikeys = list(config['apikeys'].values()).pop()

    users_config = {}
    with open(os.path.abspath(users_config_filepath), 'r') as users_config_rf:
        users_config = json.load(users_config_rf)

    max_range = 100
    current_ix = users_config['current_ix'] if ('current_ix' in users_config) else 0

    total = len(users_config['users'][current_ix:])
    user_chuncks = util.chunks(users_config['users'][current_ix:], max_range)

    for users in user_chuncks:
        try:
            twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS, output_folder = output_folder)
            twitterCralwer.fetch_users(call=call, users=users)
            current_ix += len(users)

        except Exception as exc:
            logger.error(exc)
            logger.error(util.full_stack()) #don't care, if Ctrl+c is hit, does not handle it.  When you restart, it restarts from the last chunk (too much trouble to handle Ctrl + c).
            # you will get duplicate tweets, so what...
            pass

        users_config['current_ix'] = current_ix
        
        flash_cmd_config(users_config, users_config_filepath, output_folder)

        logger.info('COMPLETED -> (current_ix: [%d/%d])'%(current_ix, total))
        logger.info('PAUSE %ds to CONTINUE...'%WAIT_TIME)
        time.sleep(WAIT_TIME)
    else:
        logger.info('[collect_users] ALL COMPLETED')
def collect_users_all(user_screen_names_ids,deep, output_folder, config):
    apikeys = list(config['apikeys'].values()).pop()

    if user_screen_names_ids.isdigit():
        call = 'user_id'
    else:
        call = 'screen_name'

    try:
        users = [user_screen_names_ids.strip('@')]
        twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS)
        ret = twitterCralwer.fetch_users(call=call, users=users, output_folder=output_folder)

        if ret == 'ok':
            since_id = 1
            ret, since_id, remove = twitterCralwer.fetch_user_timeline(user_screen_names_ids, since_id=since_id)
        if ret == 'ok':
            call = '/friends/ids'
            ret = twitterCralwer.fetch_user_relationships(call=call, user_id=user_screen_names_ids.strip('@'),deep=deep)


    except Exception as exc:
        logger.error(exc)
        return 'error'

    logger.info('PAUSE %ds to CONTINUE...' % WAIT_TIME)
    time.sleep(WAIT_TIME)
    return ret
def collect_users(user_screen_names_ids, output_folder, config):
    apikeys = list(config['apikeys'].values()).pop()

    if user_screen_names_ids.isdigit() :
        call = 'user_id'
    else:
        call = 'screen_name'

    try:
        users = [user_screen_names_ids.strip('@')]
        twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS)
        ret = twitterCralwer.fetch_users(call=call, users=users,output_folder=output_folder)

    except Exception as exc:
        logger.error(exc)
        return 'error'

    logger.info('PAUSE %ds to CONTINUE...' % WAIT_TIME_COLLECT_USER)
    time.sleep(WAIT_TIME_COLLECT_USER)
    return ret