class TestRealtimeFeed(): api = RPApi() @pytest.mark.slow def test_get_something_from_us500(self, dataset='all-granular-data', max_received=5): ds = self.api.get_dataset(dataset_id=dataset) received = 0 for record in ds.request_realtime(): assert isinstance(record, Result) received += 1 logger.info("Got {received}/{max} from us500".format( received=received, max=max_received )) errors = record.is_invalid assert errors is False, 'Record is invalid: %s' % errors if received > max_received: break def test_missing_dataset(self): ds = self.api.get_dataset(dataset_id='missing-dataset') with pytest.raises(APIException) as e: for record in ds.request_realtime(): pass assert e.value.response.status_code == 403
class TestDatafile(object): api = RPApi() @pytest.mark.slow @pytest.mark.datafile def test_small_async_download(self): ds = self.api.get_dataset(dataset_id='swiss20') job = ds.request_datafile( start_date='2018-01-01 18:00:00', end_date='2018-01-02 18:00:00', ) assert isinstance(job, Job) with tempfile.NamedTemporaryFile() as fp: job.save_to_file(filename=fp.name) @pytest.mark.slow @pytest.mark.datafile def test_small_async_with_headers(self): ds = self.api.get_dataset(dataset_id='swiss20') job = ds.request_datafile(start_date='2018-01-01 18:00:00', end_date='2018-01-01 18:05:00', fields=['rp_story_id', 'timestamp_utc']) records = [] for record in job.iterate_results(include_headers=True): records.append(record) assert len(records) > 1 assert records[0] == ['RP_STORY_ID', 'TIMESTAMP_UTC'] # we want the headers
class TestEntityTypeReference(object): api = RPApi() team_reference = api.get_entity_type_reference('team') @pytest.mark.slow def test_save_team_reference(self): """ Get the team reference as a CSV """ team_reference = self.api.get_entity_type_reference('team') f = tempfile.NamedTemporaryFile(prefix='test_reference', delete=False) filepath = f.name team_reference.write_to_file(filepath) with io.open(filepath, encoding='latin-1') as f: lines = f.readlines() assert len(lines) > 100, "We should have several rows" assert lines[0] == 'RP_ENTITY_ID,ENTITY_TYPE,DATA_TYPE,DATA_VALUE,RANGE_START,RANGE_END\n' os.unlink(f.name) def test_team_reference_as_map(self): club = self.team_reference['022568'] assert club.name == 'Olympique de Marseille' def test_team_iterate_entities(self): valid_entities = set() for entity in self.team_reference: last_name = entity.entity_names[-1] if last_name.is_valid(): valid_entities.add(entity.rp_entity_id) assert len(valid_entities) > 100, "We should have several valid teams"
class TestDeleteAllByName(object): api = RPApi() base_dataset = Dataset( name='testing_api_delete_all', filters={}, # a dataset without filters ) def test_delete_all_by_name(self): dataset_name = self.base_dataset.name delete_all_datasets_by_name(self.api, dataset_name) assert len(get_datasets_by_name(self.api, dataset_name) ) == 0, "Seems we have datasets that should be deleted" ds1 = self.api.create_dataset(self.base_dataset) # create 1... ds2 = self.api.create_dataset(self.base_dataset) # create 2... assert len(get_datasets_by_name( self.api, dataset_name)) == 2, "We should have just created 2 datasets" # we can also check the new ones are in the owned owned_dataset = self.api.list_datasets() assert ds1 in owned_dataset assert ds2 in owned_dataset delete_all_datasets_by_name(self.api, dataset_name) assert len(get_datasets_by_name(self.api, dataset_name) ) == 0, "Seems we have datasets that should be deleted"
class TestDatasetUpdate(object): """ try to Create a dataset, Read it Update it and Delete it""" api = RPApi() dataset_name = 'testing_ds_update' def test_create_and_update(self): delete_all_datasets_by_name(self.api, self.dataset_name) filters = {"rp_entity_id": {"$in": ['AAAAA']}} dataset = Dataset( name=self.dataset_name, filters=filters, # a dataset with a filter ) dataset = self.api.create_dataset(dataset) assert dataset.id is not None dataset_id = dataset.id # change the dataset new_filters = {"rp_entity_id": {"$in": ['BBBBB']}} dataset.filters = new_filters dataset.save() # get the dataset again dataset = self.api.get_dataset(dataset_id) assert dataset.filters == new_filters new_filters = {"rp_entity_id": {"$in": ['CCCCC']}} dataset.filters = new_filters dataset.save() dataset.delete() assert delete_all_datasets_by_name(self.api, self.dataset_name) == 0
class TestUploadFlow: api = RPApi() def test_quota(self): data = self.api.upload.quota() for field in ('files', 'quota'): assert field in data
class TestEncoding(object): api = RPApi() @classmethod def setup_class(cls): cls.ds = cls.api.create_dataset( Dataset( name='testing_encoding', filters={ "rp_entity_id": '9BFEB5' # this entity has non-ascii name }, )) params = dict( start_date='2018-05-01 21:51', # we have an event here end_date='2018-05-01 21:52', ) def test_json_iterate(self): self.api.log_curl_commands = True results = self.ds.json(**self.params) assert results, 'We should have some result in the timerange' for analytic_row in results: print(analytic_row) def test_dump_iterate(self): results = self.ds.request_datafile(**self.params) for analytic_row in results: print(analytic_row) @classmethod def teardown_class(cls): cls.ds.delete()
class TestEntityMapping(object): api = RPApi() def test_matching_entity_mapping(self): entities = [{ 'ticker': 'AAPL', 'name': 'Apple Inc.' }, { 'ticker': 'JPM' }, { 'listing': 'XNYS:DVN' }] api = self.api mapping = api.get_entity_mapping(entities) assert not mapping.errors assert len(mapping.matched) == len(mapping.submitted) == 3 # let's get the first mapped entities rp_entity_ids = [match.id for match in mapping.matched] assert rp_entity_ids == ['D8442A', '619882', '14BA06'] def test_mismatch_mapping(self): entities = ["unknown!"] api = self.api mapping = api.get_entity_mapping(entities) rp_entity_ids = [match.id for match in mapping.matched] assert rp_entity_ids == []
class TestDatasetList(object): api = RPApi() def test_list_public_filters(self): # we get name and id for dataset datasets = self.api.list_datasets(scope="public") # iterating it should automatically get the dataset to return the filters for ds in datasets[:3]: assert ds.id and ds.uuid and ds.name and ds.filters
class TestRecentAnalyticsRetried: api = RPApi() def test_upload_delete_retry(self): """ When we delete immediately after creation we get a 404 The API should silently retry for some time """ api = self.api filename = "upload_sample.txt" f = api.upload.file(os.path.join(os.path.dirname(__file__), filename)) f.delete()
class TestDatafile(object): api = RPApi() def test_premium_url(self): premium_story_id = 'B5461869942657A8D4956BE409DEC944' url = self.api.get_document_url(premium_story_id) assert "ravenpack.com" in url def test_nonpremium_url(self): premium_story_id = '691D5D416F8E9752DDD9C2F8C30FBE53' url = self.api.get_document_url(premium_story_id) assert 'https://www.india.com/' in url
class TestDatasetUpdate(object): """ try to Create a dataset, Read it Update it and Delete it""" api = RPApi() dataset_name = 'testing_ds_update' def test_create_and_update(self): delete_all_datasets_by_name(self.api, self.dataset_name) filters = {"rp_entity_id": {"$in": ['AAAAAA']}} dataset = Dataset( name=self.dataset_name, filters=filters, # a dataset with a filter ) dataset = self.api.create_dataset(dataset) assert dataset.id is not None dataset_id = dataset.id # change the dataset new_filters = {"rp_entity_id": {"$in": ['BBBBBB']}} dataset.filters = new_filters dataset.save() # get the dataset again dataset = self.api.get_dataset(dataset_id) assert dataset.filters == new_filters new_filters = {"rp_entity_id": {"$in": ['CCCCCC']}} dataset.filters = new_filters dataset.save() dataset.delete() assert delete_all_datasets_by_name(self.api, self.dataset_name) == 0 def test_simple_update(self): filters = {"rp_entity_id": {"$in": ['D8442A']}} ds = self.api.create_dataset( Dataset( name=self.dataset_name, filters=filters, # a dataset with a filter ) ) assert ds._lazy_retrieve_on_get is False dataset_id = ds.id ds = self.api.get_dataset(dataset_id) # retrieve the dataset assert ds._lazy_retrieve_on_get is True # it still have to be lazy loaded here ds.filters = {"rp_entity_id": {"$in": ["228D42"]}} # update the dataset *** ds.save() for r in ds.json('2019-01-01', '2019-01-02'): assert r['rp_entity_id'] == '228D42', "Expecting entity to be 228D42 - got %s" % r['rp_entity_id'] break
class TestDatasetRetrieval(object): api = RPApi() def test_get_dataset(self): dataset_id = 'us30' ds_by_id = Dataset(api=self.api, id=dataset_id) filters = ds_by_id.filters assert isinstance(filters, dict) ds_via_api = self.api.get_dataset(dataset_id) ds_by_uuid = Dataset(api=self.api, uuid=dataset_id) assert ds_via_api.filters == ds_by_id.filters == ds_by_uuid.filters
class TestDatafile(object): api = RPApi() @pytest.mark.slow @pytest.mark.datafile def test_small_async_download(self): ds = self.api.get_dataset(dataset_id='swiss20') job = ds.request_datafile( start_date='2018-01-01 18:00:00', end_date='2018-01-02 18:00:00', ) assert isinstance(job, Job) with tempfile.NamedTemporaryFile() as fp: job.save_to_file(filename=fp.name)
class TestEntityMapping(object): api = RPApi() def test_matching_entity_mapping(self): entities = [{'ticker': 'AAPL', 'name': 'Apple Inc.'}, {'ticker': 'JPM'}, {'listing': 'XNYS:DVN'}] mapping = self.api.get_entity_mapping(entities) assert not mapping.errors assert len(mapping.matched) == len(mapping.submitted) == 3 # let's get the first mapped entities rp_entity_ids = [match.id for match in mapping.matched] assert rp_entity_ids == ['D8442A', '619882', '14BA06'] def test_mismatch_mapping(self): entities = ["unknown!"] mapping = self.api.get_entity_mapping(entities) rp_entity_ids = [match.id for match in mapping.matched] assert rp_entity_ids == [] def test_mapping_example(self): invalid_entity_request = "Unknown entity specified" universe = [ "RavenPack", {'ticker': 'AAPL'}, 'California USA', { # Amazon, specifying various fields "client_id": "12345-A", "date": "2017-01-01", "name": "Amazon Inc.", "entity_type": "COMP", "isin": "US0231351067", "cusip": "023135106", "sedol": "B58WM62", "listing": "XNAS:AMZN" }, invalid_entity_request ] mapping = self.api.get_entity_mapping(universe) assert len(mapping.matched) == 4 assert [m.name for m in mapping.matched] == [ "RavenPack International S.L.", "Apple Inc.", "California, U.S.", "Amazon.com Inc." ] assert len(mapping.errors) == 1 assert mapping.errors[0].request == invalid_entity_request
class TestEntityReference(object): api = RPApi() def test_apple(self): reference = self.api.get_entity_reference(APPLE_RP_ENTITY_ID) assert reference.rp_entity_id == APPLE_RP_ENTITY_ID assert reference.names[-1].value == reference.name == 'APPLE INC.' assert reference.tickers[-1].value == 'AAPL' def test_failing(self): try: missing = self.api.get_entity_reference('invalid') assert False, "Invalid entity should raise an Exception" except APIException: pass
class TestAdHocJson(object): api = RPApi() def test_small_adhoc(self): data = self.api.json( start_date='2018-01-01 18:00:00', end_date='2018-01-01 18:05:00', fields=['timestamp_utc', 'rp_entity_id', 'headline'], filters={ "entity_type": { "$in": ['PROD'] }, # "entity_type": "PROD", }) assert isinstance(data, Results) assert len(data) > 0, 'We should have some product in those 5 minutes'
class TestDatasetJson(object): api = RPApi() def test_known_swiss(self): ds = self.api.get_dataset(dataset_id='swiss20') data = ds.json( start_date='2018-01-01 18:00:00', end_date='2018-01-02 18:00:00', ) assert isinstance(data, Results) assert len(data) > 500, 'We should have more data in 1 day of swiss20' def test_indicator_dataset(self): indicator_dataset = Dataset( name='Test-indicator-dataset', filters={"$and": [{"rp_entity_id": {"$in": ["D8442A"]}}]}, fields=[{"average": {"avg": {"field": "EVENT_SENTIMENT_SCORE"}}}], frequency='daily', ) indicator_dataset = self.api.create_dataset(indicator_dataset) try: # ask the indicator dataset for its data response = indicator_dataset.json('2018-01-01 00:00', '2018-01-02 00:00') assert len(response) == 2 # we should get 2 rows assert {r['rp_entity_id'] for r in response} == {'D8442A', 'ROLLUP'} # do a request overriding fields and frequency to see the underlying data response = indicator_dataset.json('2018-01-01 00:00', '2018-01-02 00:00', fields=['rp_story_id', 'rp_entity_id'], frequency='granular') assert len(response) > 200, "We should have many granular analytics rows" assert {r['rp_entity_id'] for r in response} == {'D8442A'}, "All rows should be D8442A" finally: indicator_dataset.delete() def test_granular_dataset(self): self.api.log_curl_commands = True granular_dataset = Dataset( name='Test-granular-dataset', filters={"$and": [{"rp_entity_id": {"$in": ["D8442A"]}}, {"relevance": 90}]}, ) granular_dataset = self.api.create_dataset(granular_dataset) try: granular_dataset.json('2018-01-01 00:00', '2018-01-02 00:00') finally: granular_dataset.delete()
class TestDatasetCount(object): api = RPApi() @pytest.mark.json def test_count_timezone(self): ds = self.api.get_dataset(dataset_id='us30') count_results_utc = ds.count( start_date="2019-05-14", end_date="2019-05-15", ) assert isinstance(count_results_utc, dict) count_results_london = ds.count(start_date="2019-05-14", end_date="2019-05-15", time_zone="Europe/London") assert isinstance(count_results_london, dict) assert count_results_london != count_results_utc
class TestJobCancellation(object): api = RPApi() ds = None @classmethod def setup_class(cls): cls.ds = cls.api.create_dataset( Dataset( name='test_job_cancel', filters={ "rp_entity_id": 'D8442A' }, ) ) def test_job_cancel(self): params = dict( start_date='2018-05-10 21:51', # we have an event here end_date='2018-05-10 21:52', ) job = self.ds.request_datafile( **params ) status = job.get_status() try: job.cancel() except APIException as exception: # cancel raised an exception, means that we were already processing it assert status == 'processing' assert exception.response.status_code == 400 else: assert status == 'enqueued' assert job.get_status() == 'cancelled' assert job.is_processing is False with pytest.raises(JobNotProcessing): job.wait_for_completion() @classmethod def teardown_class(cls): cls.ds.delete()
class TestDatasetCRUD(object): """ try to Create a dataset, Read it Update it and Delete it""" api = RPApi() dataset_name = 'testing_api_crud' def test_get_public_dataset_list(self): datasets = self.api.list_datasets(scope='public') assert 'us30' in datasets, 'US30 should be in public datasets' assert len(datasets) > 100, 'We expect at least 100 public RavenPack datasets' def test_get_private_dataset_list(self): datasets = self.api.list_datasets() assert len(datasets) > 0, "Don't you have a dataset?" def test_create_and_delete(self): # the test dataset is already there, let's delete it first # we can have multiple dataset with same name, deleting all of them delete_all_datasets_by_name(self.api, self.dataset_name) # create the dataset filters = {"rp_entity_id": {"$in": ['D8442A']}} dataset = Dataset( name=self.dataset_name, filters=filters, # a dataset with a filter ) new_dataset = self.api.create_dataset( dataset ) assert new_dataset.filters == dataset.filters, "Created dataset filters are not as expected" assert new_dataset.id is not None, "We should have a dataset id" owned_dataset = self.api.list_datasets() assert new_dataset.id in owned_dataset, "We should own the new dataset" new_dataset.delete() owned_dataset = self.api.list_datasets() assert new_dataset.id not in owned_dataset, "The new dataset should be deleted"
from pprint import pprint from microprediction import MicroWriter from microprediction.config_private import COVID_API, COVID_UUID, TRAFFIC_WRITE_KEY # New video tutorials are available at https://www.microprediction.com/python-1 to help you # get started creating streams (see the 4th module in particular) # This might be broken from ravenpackapi import RPApi, ApiConnectionError logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) # initialize the API (here we use the RP_API_KEY in os.environ) api = RPApi(api_key=COVID_API) # query the realtime feed ds = api.get_dataset(dataset_id=COVID_UUID) def wait_between_attempts(): """ Incremental backoff between connection attempts """ wait_time = 19.3 # time is in seconds while True: yield wait_time wait_time = min(wait_time * 1.5, 30) wait_time *= (100 + random.randint(0, 50)) / 100 wait_time = wait_between_attempts()
from ravenpackapi import RPApi import logging logging.basicConfig(level=logging.DEBUG) # initialize the API (here we use the RP_API_KEY in os.environ) api = RPApi() # query the json endpoint for a dataset *** # use the public dataset with id 'us30' ds = api.get_dataset(dataset_id='us30') # query the dataset analytics with the json endpoint print(ds) data = ds.json( start_date='2018-01-05 18:00:00', end_date='2018-01-05 18:01:00', ) for record in data: print(record) # query the ad-hoc json endpoint *** adhoc_data = api.json( start_date='2018-01-05 18:00:00', end_date='2018-01-05 18:01:00', fields=ds.fields, filters=ds.filters, ) print(adhoc_data) for record in adhoc_data: print(record)
""" Download all data from the chosen dataset in a time range Download files are compressed, and chunked per year """ import os from ravenpackapi import RPApi from ravenpackapi.util import time_intervals, SPLIT_WEEKLY api = RPApi(api_key='YOUR_API_KEY') ds = api.get_dataset('YOUR_DATASET_ID') start_date = '2018-01-01' end_date = '2018-01-10' GET_COMPRESSED = True output_folder = './output' os.makedirs(output_folder, exist_ok=True) # create folder for output for range_start, range_end in time_intervals( start_date, end_date, split=SPLIT_WEEKLY, # available splits: # SPLIT_YEARLY, SPLIT_WEEKLY, SPLIT_DAILY # or SPLIT_MONTHLY (the default) ): job = ds.request_datafile(
from ravenpackapi import RPApi, Dataset import logging logging.basicConfig(level=logging.INFO) # initialize the API (here we use the RP_API_KEY in os.environ) api = RPApi() # get the us30 dataset (its filters contain the top 30 US companies) us30 = Dataset(api=api, id='us30') print(us30.filters) # creating a new dataset with modified filters and fields # the filters are an aggregation of the us30 with some additional rule new_filters = { "$and": [ us30.filters, { "relevance": { "$gte": 90 } }, { "event_similarity_days": { "$gte": 1 } } ] } new_fields = [{ "daily_average_ess_1d": { "avg": { "field": "EVENT_SENTIMENT_SCORE",
import pickle import numpy as np import pandas as pd import torch from ravenpackapi import RPApi # we temporarily cannot share the api key api = RPApi(api_key='') def read_data(args): path = args.path + args.region + '/' + args.sector + '/' batch = args.batch_size with open(path + "input_ef.pkl", "rb") as f: input_ef = np.array(pickle.load(f)) if args.mode == 'price_spike': with open(path + "input_y_price.pkl", "rb") as f: label_y = np.array(pickle.load(f)) elif args.mode == 'volume_spike': with open(path + "input_y_volume.pkl", "rb") as f: label_y = np.array(pickle.load(f)) else: raise ValueError with open(path + "companies_list.pkl", 'rb') as f: company_list = pickle.load(f) with open(path + "input_et.pkl", "rb") as f: input_et = np.array(pickle.load(f)) with open(path + "input_pt.pkl", "rb") as f: input_pt = np.array(pickle.load(f))
import os from ravenpackapi import RPApi api = RPApi() reference_filename = 'reference.csv' if os.path.isfile(reference_filename): # use the locally saved reference file if it exists reference = api.get_entity_type_referen = api.get_entity_type_reference_from_file( reference_filename) else: print("Retrieving the company mapping file") # get the latest reference file for all the COMP entities # call it without arguments to get all entities of all types reference = api.get_entity_type_reference('COMP') reference.write_to_file(reference_filename) # with the reference we can also ask for a single entity given the ID for rp_entity_id in [ '4A6F00', '01F2E5' ]: # add here as many as you want - they won't cause extra requests company = reference[rp_entity_id] valid_sedols = [ sedol.value for sedol in company.sedols if sedol.is_valid() # get all the sedol that are valid now # (you can pass a date to is_valid to get the ones valid point-in-time ] print(company.name, valid_sedols)
from ravenpackapi import RPApi from ravenpackapi.exceptions import APIException # initialize the API (here we use the RP_API_KEY in os.environ) api = RPApi() # query the json endpoint for a dataset *** # use the public dataset with id 'us30' ds = api.get_dataset(dataset_id='us30') data = ds.json( start_date='2019-08-05 18:00:00', end_date='2019-08-05 18:01:00', ) for record in list(data)[:5]: # get url of the first documents rp_story_id = record['rp_story_id'] try: url = api.get_document_url(rp_story_id) except APIException as e: if e.response.status_code == 404: # when the document is found, handle it gracefully url = None else: raise print( rp_story_id, record['headline'], url, )
from ravenpackapi import RPApi if __name__ == '__main__': entities = [{ 'ticker': 'AAPL', 'name': 'Apple Inc.' }, { 'ticker': 'JPM' }, { 'listing': 'XNYS:DVN' }] api = RPApi() mapping = api.get_entity_mapping(entities) # show the matched entities for match in mapping.matched: print(match.id, match.name, match.type, match.request)
from ravenpackapi import RPApi from ravenpackapi import Dataset import pandas as pd apikey = "**********************" api = RPApi(api_key=apikey) #Extracts data (positive news count) from one entity def get_counts(entity_id, ltgt, start_date, end_date, filename): label = "count_pos" if ltgt == "lt": label = "count_neg" global api custom_dataset = Dataset(name="Test set", frequency="daily", filters={ "and": [ { "rp_entity_id": entity_id }, { "event_relevance": { "gte": 90 } }, { "event_sentiment_score": {