def collect_users_all(user_screen_names_ids,deep, output_folder, config): apikeys = list(config['apikeys'].values()).pop() if user_screen_names_ids.isdigit(): call = 'user_id' else: call = 'screen_name' try: users = [user_screen_names_ids.strip('@')] twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS) ret = twitterCralwer.fetch_users(call=call, users=users, output_folder=output_folder) if ret == 'ok': since_id = 1 ret, since_id, remove = twitterCralwer.fetch_user_timeline(user_screen_names_ids, since_id=since_id) if ret == 'ok': call = '/friends/ids' ret = twitterCralwer.fetch_user_relationships(call=call, user_id=user_screen_names_ids.strip('@'),deep=deep) except Exception as exc: logger.error(exc) return 'error' logger.info('PAUSE %ds to CONTINUE...' % WAIT_TIME) time.sleep(WAIT_TIME) return ret
def collect_tweets_by_user_ids(user_id, output_folder, config,since_id = 1): apikeys = list(config['apikeys'].values()).pop() try: twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS, output_folder=output_folder) ret,since_id, remove = twitterCralwer.fetch_user_timeline(user_id, since_id=since_id) except Exception as exc: logger.error(exc) return 'error' logger.info('COMPLETED -> (user_id: [%s]; since_id: [%d]; remove: [%s])' % (user_id, since_id, remove)) logger.info('PAUSE %ds to CONTINUE...' % WAIT_TIME_TREETS) time.sleep(WAIT_TIME_TREETS) return ret
def collect_tweets_by_user_ids(users_config_filepath, output_folder, config): apikeys = list(config['apikeys'].values()).pop() users_config = {} with open(os.path.abspath(users_config_filepath), 'r') as users_config_rf: users_config = json.load(users_config_rf) for user_config_id in itertools.cycle(users_config): user_config = users_config[user_config_id] if ('remove' in user_config and user_config['remove']): continue user_id = user_config['user_id'] since_id = user_config['since_id'] if 'since_id' in user_config else 1 logger.info('REQUEST -> (user_id: [%d]; since_id: [%d])' % (user_id, since_id)) remove = False try: twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS, output_folder=output_folder) since_id, remove = twitterCralwer.fetch_user_timeline( user_id, since_id=since_id) except Exception as exc: logger.error(exc) logger.error(util.full_stack()) pass user_config['since_id'] = since_id user_config['remove'] = remove users_config[user_config_id] = user_config flash_cmd_config(users_config, users_config_filepath, output_folder) logger.info( 'COMPLETED -> (user_id: [%d]; since_id: [%d]; remove: [%s])' % (user_id, since_id, remove)) logger.info('PAUSE %ds to CONTINUE...' % WAIT_TIME) time.sleep(WAIT_TIME)
def collect_tweets_by_user_ids(users_config_filepath, output_folder, config): apikeys = list(config['apikeys'].values()).pop() users_config = {} with open(os.path.abspath(users_config_filepath), 'r') as users_config_rf: users_config = json.load(users_config_rf) for user_config_id in itertools.cycle(users_config): user_config = users_config[user_config_id] if ('remove' in user_config and user_config['remove']): continue user_id = user_config['user_id'] since_id = user_config['since_id'] if 'since_id' in user_config else 1 logger.info('REQUEST -> (user_id: [%d]; since_id: [%d])'%(user_id, since_id)) remove = False try: twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS, output_folder = output_folder) since_id, remove = twitterCralwer.fetch_user_timeline(user_id, since_id=since_id) except Exception as exc: logger.error(exc) logger.error(util.full_stack()) pass user_config['since_id'] = since_id user_config['remove'] = remove users_config[user_config_id] = user_config flash_cmd_config(users_config, users_config_filepath, output_folder) logger.info('COMPLETED -> (user_id: [%d]; since_id: [%d]; remove: [%s])'%(user_id, since_id, remove)) logger.info('PAUSE %ds to CONTINUE...'%WAIT_TIME) time.sleep(WAIT_TIME)