from ravenpackapi import RPApi import logging logging.basicConfig(level=logging.DEBUG) # initialize the API (here we use the RP_API_KEY in os.environ) api = RPApi() # query the json endpoint for a dataset *** # use the public dataset with id 'us30' ds = api.get_dataset(dataset_id='us30') # query the dataset analytics with the json endpoint print(ds) data = ds.json( start_date='2018-01-05 18:00:00', end_date='2018-01-05 18:01:00', ) for record in data: print(record) # query the ad-hoc json endpoint *** adhoc_data = api.json( start_date='2018-01-05 18:00:00', end_date='2018-01-05 18:01:00', fields=ds.fields, filters=ds.filters, ) print(adhoc_data) for record in adhoc_data: print(record)
# New video tutorials are available at https://www.microprediction.com/python-1 to help you # get started creating streams (see the 4th module in particular) # This might be broken from ravenpackapi import RPApi, ApiConnectionError logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) # initialize the API (here we use the RP_API_KEY in os.environ) api = RPApi(api_key=COVID_API) # query the realtime feed ds = api.get_dataset(dataset_id=COVID_UUID) def wait_between_attempts(): """ Incremental backoff between connection attempts """ wait_time = 19.3 # time is in seconds while True: yield wait_time wait_time = min(wait_time * 1.5, 30) wait_time *= (100 + random.randint(0, 50)) / 100 wait_time = wait_between_attempts() def get_live_data_keyword_count(keywords):
""" Download all data from the chosen dataset in a time range Download files are compressed, and chunked per year """ import os from ravenpackapi import RPApi from ravenpackapi.util import time_intervals, SPLIT_WEEKLY api = RPApi(api_key='YOUR_API_KEY') ds = api.get_dataset('YOUR_DATASET_ID') start_date = '2018-01-01' end_date = '2018-01-10' GET_COMPRESSED = True output_folder = './output' os.makedirs(output_folder, exist_ok=True) # create folder for output for range_start, range_end in time_intervals( start_date, end_date, split=SPLIT_WEEKLY, # available splits: # SPLIT_YEARLY, SPLIT_WEEKLY, SPLIT_DAILY # or SPLIT_MONTHLY (the default) ): job = ds.request_datafile(
if dataset_id is None: dataset = Dataset(api=api, filters={}, name='Average sentiment', frequency='daily', fields=[{ 'average_ess': { 'avg': { 'field': 'EVENT_SENTIMENT_SCORE' } } }]) dataset_id = dataset.save() else: dataset = api.get_dataset(dataset_id) # job = Job(api=api, # token='xxx') # if you already have a job you can use this # ... or request a new one job = dataset.request_datafile( start_date='2018-01-01 00:00:00', end_date='2018-01-02 00:00:00', ) # write only the ROLLUP rows for line in job.iterate_results(): timestamp, entity_id, entity_name, avg_sentiment = line if entity_id == 'ROLLUP': print(line)
import logging import random import time from ravenpackapi import RPApi, ApiConnectionError logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) # initialize the API (here we use the RP_API_KEY in os.environ) api = RPApi() # query the realtime feed ds = api.get_dataset(dataset_id='us500') def wait_between_attempts(): """ Incremental backoff between connection attempts """ wait_time = 0.3 # time is in seconds while True: yield wait_time wait_time = min(wait_time * 1.5, 30) wait_time *= (100 + random.randint(0, 50)) / 100 wait_time = wait_between_attempts() while True: try: for record in ds.request_realtime(): print(record) print(record.timestamp_utc, record.entity_name,
def check_realtime(): print("Realtime ...") for record in ds.request_realtime(): assert isinstance(record, Result) break print('Realtime - OK') results['realtime'] = True if __name__ == '__main__': args = parser.parse_args() api = RPApi(args.key) ds = api.get_dataset('all-granular-data') date_end = datetime.datetime.utcnow() - datetime.timedelta(minutes=60) date_start = date_end - datetime.timedelta(minutes=3) # 3 minutes of data if not api.api_key: print("Please provide an APIKEY: with the --key parameter or setting the RP_API_KEY environment variable") exit(1) print("Checking connection with APIKEY: %s" % api.api_key) checks = [ threading.Thread(target=check_datafile), threading.Thread(target=check_json), threading.Thread(target=check_realtime), ] results = dict( datafile=False,