def main(): args_dict = vars(parse_cmd_args().parse_args()) if args_dict.get("debug") is True: logger.setLevel(logging.DEBUG) logger.debug("command line args dict:") logger.debug(json.dumps(args_dict, indent=4)) if args_dict.get("config_filename") is not None: configfile_dict = read_config(args_dict["config_filename"]) else: configfile_dict = {} extra_headers_str = args_dict.get("extra_headers") if extra_headers_str is not None: args_dict['extra_headers_dict'] = json.loads(extra_headers_str) del args_dict['extra_headers'] logger.debug("config file ({}) arguments sans sensitive args:".format( args_dict["config_filename"])) logger.debug(json.dumps(_filter_sensitive_args(configfile_dict), indent=4)) creds_dict = load_credentials(filename=args_dict["credential_file"], account_type=args_dict["account_type"], yaml_key=args_dict["credential_yaml_key"], env_overwrite=args_dict["env_overwrite"]) dict_filter = lambda x: {k: v for k, v in x.items() if v is not None} config_dict = merge_dicts(dict_filter(configfile_dict), dict_filter(creds_dict), dict_filter(args_dict)) logger.debug("combined dict (cli, config, creds) sans password:"******"ERROR: not enough arguments for the program to work") sys.exit(1) stream_params = gen_params_from_config(config_dict) logger.debug( "full arguments passed to the ResultStream object sans password") logger.debug(json.dumps(_filter_sensitive_args(stream_params), indent=4)) rs = ResultStream(tweetify=False, **stream_params) logger.debug(str(rs)) if config_dict.get("filename_prefix") is not None: stream = write_result_stream( rs, filename_prefix=config_dict.get("filename_prefix"), results_per_file=config_dict.get("results_per_file")) else: stream = rs.stream() for tweet in stream: if config_dict["print_stream"] is True: print(json.dumps(tweet))
def main(): parser = parse_cmd_args() args_dict = vars(parse_cmd_args().parse_args()) if args_dict.get("debug") is True: logger.setLevel(logging.DEBUG) logger.debug(json.dumps(args_dict, indent=4)) if args_dict.get("config_filename") is not None: configfile_dict = read_config(args_dict["config_filename"]) else: configfile_dict = {} creds_dict = load_credentials(filename=args_dict["credential_file"], account_type=args_dict["account_type"], yaml_key=args_dict["credential_yaml_key"], env_overwrite=args_dict["env_overwrite"]) dict_filter = lambda x: {k: v for k, v in x.items() if v is not None} config_dict = merge_dicts(dict_filter(configfile_dict), dict_filter(args_dict), dict_filter(creds_dict)) logger.debug(json.dumps(config_dict, indent=4)) if len(dict_filter(config_dict).keys() & REQUIRED_KEYS) < len(REQUIRED_KEYS): print(REQUIRED_KEYS - dict_filter(config_dict).keys()) logger.error("ERROR: not enough arguments for the program to work") sys.exit(1) stream_params = gen_params_from_config(config_dict) logger.debug(json.dumps(config_dict, indent=4)) rs = ResultStream(tweetify=False, **stream_params) logger.debug(str(rs)) if config_dict.get("filename_prefix") is not None: stream = write_result_stream( rs, filename_prefix=config_dict["filename_prefix"], results_per_file=config_dict["results_per_file"]) else: stream = rs.stream() for tweet in stream: if config_dict["print_stream"] is True: print(json.dumps(tweet))
elif args['output'] == 'csv': # save to csv print('[INFO] - Output file set to csv') else: print( '[INFO] - Invalid output file! Valid options are pickle or csv. Exiting...' ) exit # load twitter keys search_creds = load_credentials('.twitter_keys.yaml', yaml_key='search_tweets_v2', env_overwrite=False) # load configuration for search query config = read_config('search_config.yaml') # fields for v2 api tweetfields = ",".join([ "attachments", "author_id", "conversation_id", "created_at", "entities", "geo", "id", "in_reply_to_user_id", "lang", "public_metrics", "possibly_sensitive", "referenced_tweets",
def main(): args_dict = vars(parse_cmd_args().parse_args()) if args_dict.get("debug") is True: logger.setLevel(logging.DEBUG) logger.debug("command line args dict:") logger.debug(json.dumps(args_dict, indent=4)) if args_dict.get("config_filename") is not None: configfile_dict = read_config(args_dict["config_filename"]) else: configfile_dict = {} extra_headers_str = args_dict.get("extra_headers") if extra_headers_str is not None: args_dict['extra_headers_dict'] = json.loads(extra_headers_str) del args_dict['extra_headers'] logger.debug("config file ({}) arguments sans sensitive args:".format( args_dict["config_filename"])) logger.debug(json.dumps(_filter_sensitive_args(configfile_dict), indent=4)) creds_dict = load_credentials(filename=args_dict["credential_file"], yaml_key=args_dict["credential_yaml_key"], env_overwrite=args_dict["env_overwrite"]) dict_filter = lambda x: {k: v for k, v in x.items() if v is not None} config_dict = merge_dicts(dict_filter(configfile_dict), dict_filter(creds_dict), dict_filter(args_dict)) logger.debug("combined dict (cli, config, creds):") logger.debug(json.dumps(_filter_sensitive_args(config_dict), indent=4)) if len(dict_filter(config_dict).keys() & REQUIRED_KEYS) < len(REQUIRED_KEYS): print(REQUIRED_KEYS - dict_filter(config_dict).keys()) logger.error("ERROR: not enough arguments for the script to work") sys.exit(1) stream_params = gen_params_from_config(config_dict) logger.debug( "full arguments passed to the ResultStream object sans credentials") logger.debug(json.dumps(_filter_sensitive_args(stream_params), indent=4)) while True: start = time.time() rs = ResultStream(tweetify=False, **stream_params) logger.debug(str(rs)) if config_dict.get("filename_prefix") is not None: stream = write_result_stream( rs, filename_prefix=config_dict.get("filename_prefix"), results_per_file=config_dict.get("results_per_file")) else: stream = rs.stream() first_tweet = True tweets_num = 0 #Iterate through Tweet array and handle output. for tweet in stream: tweets_num = tweets_num + 1 #Get Tweet ID from first Tweet if first_tweet: newest_id = tweet['id'] first_tweet = False if config_dict["print_stream"] is True: print(json.dumps(tweet)) #This polling script switches to a since_id requests and removes the start_time parameter if it is used for backfill. #Prepare next query, by setting the since_id request parameter. print(f"{tweets_num} new Tweets. Newest_id: {newest_id}") request_json = json.loads(stream_params['request_parameters']) if 'start_time' in request_json.keys(): del request_json['start_time'] request_json.update(since_id=newest_id) stream_params['request_parameters'] = json.dumps(request_json) duration = time.time() - start sleep_interval = (float(config_dict["interval"]) * 60) - duration if sleep_interval < 0: sleep_interval = (float(config_dict["interval"]) * 60) time.sleep(sleep_interval)