Esempio n. 1
0
from ravenpackapi import RPApi
import logging

logging.basicConfig(level=logging.DEBUG)
# initialize the API (here we use the RP_API_KEY in os.environ)
api = RPApi()

# query the json endpoint for a dataset ***
# use the public dataset with id 'us30'
ds = api.get_dataset(dataset_id='us30')
# query the dataset analytics with the json endpoint
print(ds)

data = ds.json(
    start_date='2018-01-05 18:00:00',
    end_date='2018-01-05 18:01:00',
)

for record in data:
    print(record)

# query the ad-hoc json endpoint ***
adhoc_data = api.json(
    start_date='2018-01-05 18:00:00',
    end_date='2018-01-05 18:01:00',
    fields=ds.fields,
    filters=ds.filters,
)
print(adhoc_data)
for record in adhoc_data:
    print(record)
Esempio n. 2
0
# New video tutorials are available at https://www.microprediction.com/python-1 to help you
# get started creating streams (see the 4th module in particular)

# This might be broken

from ravenpackapi import RPApi, ApiConnectionError

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# initialize the API (here we use the RP_API_KEY in os.environ)
api = RPApi(api_key=COVID_API)

# query the realtime feed
ds = api.get_dataset(dataset_id=COVID_UUID)


def wait_between_attempts():
    """ Incremental backoff between connection attempts """
    wait_time = 19.3  # time is in seconds
    while True:
        yield wait_time
        wait_time = min(wait_time * 1.5, 30)
        wait_time *= (100 + random.randint(0, 50)) / 100


wait_time = wait_between_attempts()


def get_live_data_keyword_count(keywords):
"""

Download all data from the chosen dataset in a time range
Download files are compressed, and chunked per year

"""

import os

from ravenpackapi import RPApi
from ravenpackapi.util import time_intervals, SPLIT_WEEKLY

api = RPApi(api_key='YOUR_API_KEY')
ds = api.get_dataset('YOUR_DATASET_ID')

start_date = '2018-01-01'
end_date = '2018-01-10'
GET_COMPRESSED = True

output_folder = './output'

os.makedirs(output_folder, exist_ok=True)  # create folder for output
for range_start, range_end in time_intervals(
        start_date,
        end_date,
        split=SPLIT_WEEKLY,
        # available splits:
        # SPLIT_YEARLY, SPLIT_WEEKLY, SPLIT_DAILY
        # or SPLIT_MONTHLY (the default)
):
    job = ds.request_datafile(
if dataset_id is None:
    dataset = Dataset(api=api,
                      filters={},
                      name='Average sentiment',
                      frequency='daily',
                      fields=[{
                          'average_ess': {
                              'avg': {
                                  'field': 'EVENT_SENTIMENT_SCORE'
                              }
                          }
                      }])
    dataset_id = dataset.save()
else:
    dataset = api.get_dataset(dataset_id)

# job = Job(api=api,
#           token='xxx')  # if you already have a job you can use this

# ... or request a new one
job = dataset.request_datafile(
    start_date='2018-01-01 00:00:00',
    end_date='2018-01-02 00:00:00',
)

# write only the ROLLUP rows
for line in job.iterate_results():
    timestamp, entity_id, entity_name, avg_sentiment = line
    if entity_id == 'ROLLUP':
        print(line)
Esempio n. 5
0
import logging
import random
import time

from ravenpackapi import RPApi, ApiConnectionError

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# initialize the API (here we use the RP_API_KEY in os.environ)
api = RPApi()

# query the realtime feed
ds = api.get_dataset(dataset_id='us500')


def wait_between_attempts():
    """ Incremental backoff between connection attempts """
    wait_time = 0.3  # time is in seconds
    while True:
        yield wait_time
        wait_time = min(wait_time * 1.5, 30)
        wait_time *= (100 + random.randint(0, 50)) / 100


wait_time = wait_between_attempts()
while True:
    try:
        for record in ds.request_realtime():
            print(record)
            print(record.timestamp_utc, record.entity_name,
Esempio n. 6
0

def check_realtime():
    print("Realtime ...")
    for record in ds.request_realtime():
        assert isinstance(record, Result)
        break
    print('Realtime - OK')
    results['realtime'] = True


if __name__ == '__main__':
    args = parser.parse_args()
    api = RPApi(args.key)

    ds = api.get_dataset('all-granular-data')
    date_end = datetime.datetime.utcnow() - datetime.timedelta(minutes=60)
    date_start = date_end - datetime.timedelta(minutes=3)  # 3 minutes of data

    if not api.api_key:
        print("Please provide an APIKEY: with the --key parameter or setting the RP_API_KEY environment variable")
        exit(1)
    print("Checking connection with APIKEY: %s" % api.api_key)

    checks = [
        threading.Thread(target=check_datafile),
        threading.Thread(target=check_json),
        threading.Thread(target=check_realtime),
    ]
    results = dict(
        datafile=False,