def collect_users(call, users_config_filepath, output_folder, config): apikeys = list(config['apikeys'].values()).pop() users_config = {} with open(os.path.abspath(users_config_filepath), 'r') as users_config_rf: users_config = json.load(users_config_rf) max_range = 100 current_ix = users_config['current_ix'] if ('current_ix' in users_config) else 0 total = len(users_config['users'][current_ix:]) user_chuncks = util.chunks(users_config['users'][current_ix:], max_range) for users in user_chuncks: try: twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS, output_folder = output_folder) twitterCralwer.fetch_users(call=call, users=users) current_ix += len(users) except Exception as exc: logger.error(exc) logger.error(util.full_stack()) #don't care, if Ctrl+c is hit, does not handle it. When you restart, it restarts from the last chunk (too much trouble to handle Ctrl + c). # you will get duplicate tweets, so what... pass users_config['current_ix'] = current_ix flash_cmd_config(users_config, users_config_filepath, output_folder) logger.info('COMPLETED -> (current_ix: [%d/%d])'%(current_ix, total)) logger.info('PAUSE %ds to CONTINUE...'%WAIT_TIME) time.sleep(WAIT_TIME) else: logger.info('[collect_users] ALL COMPLETED')
def collect_users_all(user_screen_names_ids,deep, output_folder, config): apikeys = list(config['apikeys'].values()).pop() if user_screen_names_ids.isdigit(): call = 'user_id' else: call = 'screen_name' try: users = [user_screen_names_ids.strip('@')] twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS) ret = twitterCralwer.fetch_users(call=call, users=users, output_folder=output_folder) if ret == 'ok': since_id = 1 ret, since_id, remove = twitterCralwer.fetch_user_timeline(user_screen_names_ids, since_id=since_id) if ret == 'ok': call = '/friends/ids' ret = twitterCralwer.fetch_user_relationships(call=call, user_id=user_screen_names_ids.strip('@'),deep=deep) except Exception as exc: logger.error(exc) return 'error' logger.info('PAUSE %ds to CONTINUE...' % WAIT_TIME) time.sleep(WAIT_TIME) return ret
def collect_users(user_screen_names_ids, output_folder, config): apikeys = list(config['apikeys'].values()).pop() if user_screen_names_ids.isdigit() : call = 'user_id' else: call = 'screen_name' try: users = [user_screen_names_ids.strip('@')] twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS) ret = twitterCralwer.fetch_users(call=call, users=users,output_folder=output_folder) except Exception as exc: logger.error(exc) return 'error' logger.info('PAUSE %ds to CONTINUE...' % WAIT_TIME_COLLECT_USER) time.sleep(WAIT_TIME_COLLECT_USER) return ret