Exemple #1
0
    def test_create_and_update(self):
        delete_all_datasets_by_name(self.api, self.dataset_name)
        filters = {"rp_entity_id": {"$in": ['AAAAAA']}}
        dataset = Dataset(
            name=self.dataset_name,
            filters=filters,  # a dataset with a filter
        )
        dataset = self.api.create_dataset(dataset)

        assert dataset.id is not None
        dataset_id = dataset.id

        # change the dataset
        new_filters = {"rp_entity_id": {"$in": ['BBBBBB']}}
        dataset.filters = new_filters
        dataset.save()

        # get the dataset again
        dataset = self.api.get_dataset(dataset_id)
        assert dataset.filters == new_filters
        new_filters = {"rp_entity_id": {"$in": ['CCCCCC']}}
        dataset.filters = new_filters
        dataset.save()

        dataset.delete()

        assert delete_all_datasets_by_name(self.api, self.dataset_name) == 0
Exemple #2
0
 def create_dataset(self, dataset):
     # be sure to create a copy
     new_dataset_data = dataset.as_dict()
     new_dataset = Dataset(api=self, **new_dataset_data)
     if 'uuid' in new_dataset_data:
         del new_dataset['uuid']
     new_dataset.save()
     dataset_id = new_dataset.id
     logger.info("Created dataset %s" % dataset_id)
     return new_dataset
Exemple #3
0
    def test_get_dataset(self):
        dataset_id = 'us30'
        ds_by_id = Dataset(api=self.api, id=dataset_id)
        filters = ds_by_id.filters

        assert isinstance(filters, dict)

        ds_via_api = self.api.get_dataset(dataset_id)
        ds_by_uuid = Dataset(api=self.api, uuid=dataset_id)

        assert ds_via_api.filters == ds_by_id.filters == ds_by_uuid.filters
def daily_download(dataset_id):
    end_date = datetime.datetime.utcnow().replace(hour=0,
                                                  minute=0,
                                                  second=0,
                                                  microsecond=0)
    start_date = end_date - datetime.timedelta(days=1)
    print("Date range: %s-%s" % (start_date, end_date))
    dataset = Dataset(api=api, id=dataset_id)
    job = dataset.request_datafile(start_date, end_date)
    print("Waiting for the job to complete")
    job.wait_for_completion()
    output_filename = "dailydata-%s.csv" % start_date.strftime("%Y-%m-%d")
    job.save_to_file(output_filename)
    print("Daily download saved on:", output_filename)
Exemple #5
0
 def setup_class(cls):
     cls.ds = cls.api.create_dataset(
         Dataset.from_dict({
             "name":
             "Test custom dataset",
             "fields":
             ["timestamp_utc", "rp_entity_id", "entity_name", "AVG_REL"],
             "filters": {
                 "relevance": {
                     "$gte": 90
                 }
             },
             "custom_fields": [{
                 "AVG_REL": {
                     "avg": {
                         "field": "RELEVANCE",
                         "mode": "daily"
                     }
                 }
             }],
             "conditions": {
                 "$and": [{
                     "AVG_REL": {
                         "$gt": 30
                     }
                 }, {
                     "rp_entity_id": {
                         "$in": ["ROLLUP"]
                     }
                 }]
             },
             "frequency":
             "daily",
             "tags": []
         }))
Exemple #6
0
class TestDeleteAllByName(object):
    api = RPApi()
    base_dataset = Dataset(
        name='testing_api_delete_all',
        filters={},  # a dataset without filters
    )

    def test_delete_all_by_name(self):
        dataset_name = self.base_dataset.name

        delete_all_datasets_by_name(self.api, dataset_name)
        assert len(get_datasets_by_name(self.api, dataset_name)
                   ) == 0, "Seems we have datasets that should be deleted"

        ds1 = self.api.create_dataset(self.base_dataset)  # create 1...
        ds2 = self.api.create_dataset(self.base_dataset)  # create 2...

        assert len(get_datasets_by_name(
            self.api,
            dataset_name)) == 2, "We should have just created 2 datasets"

        # we can also check the new ones are in the owned
        owned_dataset = self.api.list_datasets()
        assert ds1 in owned_dataset
        assert ds2 in owned_dataset

        delete_all_datasets_by_name(self.api, dataset_name)
        assert len(get_datasets_by_name(self.api, dataset_name)
                   ) == 0, "Seems we have datasets that should be deleted"
 def setup_class(cls):
     cls.ds = cls.api.create_dataset(
         Dataset(
             name='testing_encoding',
             filters={
                 "rp_entity_id": '9BFEB5'  # this entity has non-ascii name
             },
         ))
 def setup_class(cls):
     cls.ds = cls.api.create_dataset(
         Dataset(
             name='test_job_cancel',
             filters={
                 "rp_entity_id": 'D8442A'
             },
         )
     )
Exemple #9
0
 def list_datasets(self, scope=None, tags=None):
     """ Return a DataSetList of datasets in the scope """
     response = self.request('/datasets',
                             params=dict(
                                 tags=tags or None,
                                 scope=scope or 'private',
                             ))
     return DatasetList(
         map(lambda item: Dataset.from_dict(item, api=self),
             response.json()['datasets']))
Exemple #10
0
def get_counts_universe(universe, ltgt, start_date, end_date, thresh,
                        filename):

    label = "count_pos"

    if ltgt == "lt":
        label = "count_neg"

    global api
    custom_dataset = Dataset(name="Test set",
                             frequency="daily",
                             filters={
                                 "and": [
                                     {
                                         "rp_entity_id": {
                                             "in": universe
                                         }
                                     },
                                     {
                                         "event_relevance": {
                                             "gte": thresh
                                         }
                                     },
                                     {
                                         "event_sentiment_score": {
                                             ltgt: 0.5
                                         }
                                     },
                                 ]
                             },
                             custom_fields=[
                                 {
                                     label: {
                                         "count": {
                                             "field": "rp_entity_id"
                                         }
                                     }
                                 },
                             ])
    ds = api.create_dataset(custom_dataset)
    req_data = ds.request_datafile(start_date=start_date, end_date=end_date)
    fname = filename + ".csv"

    with open(fname, "w") as fp:
        req_data.save_to_file(filename=fp.name)
        fp.close()

    print("Done importing data for {}".format(fname))
 def test_granular_dataset(self):
     self.api.log_curl_commands = True
     granular_dataset = Dataset(
         name='Test-granular-dataset',
         filters={"$and": [{"rp_entity_id": {"$in": ["D8442A"]}}, {"relevance": 90}]},
     )
     granular_dataset = self.api.create_dataset(granular_dataset)
     try:
         granular_dataset.json('2018-01-01 00:00', '2018-01-02 00:00')
     finally:
         granular_dataset.delete()
Exemple #12
0
    def list_datasets(self, scope=None, tags=None, options=None):
        """ Return a DataSetList of datasets in the scope """
        response = self.request('/datasets',
                                params=dict(
                                    tags=tags or None,
                                    scope=scope or 'private',
                                ))
        datasets = DatasetList(
            map(lambda item: Dataset.from_dict(item, api=self),
                response.json()['datasets']))

        # Returns either the dataset class or the datasets
        if options == 'by_name':
            return datasets.by_name
        if options == 'by_id':
            return datasets.by_id
        else:
            return datasets
Exemple #13
0
 def test_dataset_copy_updated(self):
     source_dataset = Dataset(api=self.api, id='us30')
     new_dataset = Dataset(
         api=self.api,
         name="copy of the us30 dataset",
         filters=source_dataset.filters,
         fields=['timestamp_utc', 'rp_entity_id', 'avg_sentiment'],
         custom_fields=[{
             "avg_sentiment": {
                 "avg": {
                     "field": "EVENT_SENTIMENT_SCORE",
                 }
             }
         }],
         frequency='daily',
         tags=['copy', 'test'])
     new_dataset.save()
     new_dataset.delete()
Exemple #14
0
    def test_simple_update(self):
        filters = {"rp_entity_id": {"$in": ['D8442A']}}
        ds = self.api.create_dataset(
            Dataset(
                name=self.dataset_name,
                filters=filters,  # a dataset with a filter
            )
        )
        assert ds._lazy_retrieve_on_get is False

        dataset_id = ds.id

        ds = self.api.get_dataset(dataset_id)  # retrieve the dataset
        assert ds._lazy_retrieve_on_get is True  # it still have to be lazy loaded here
        ds.filters = {"rp_entity_id": {"$in": ["228D42"]}}  # update the dataset ***
        ds.save()

        for r in ds.json('2019-01-01', '2019-01-02'):
            assert r['rp_entity_id'] == '228D42', "Expecting entity to be 228D42 - got %s" % r['rp_entity_id']
            break
Exemple #15
0
    def test_create_and_delete(self):
        # the test dataset is already there, let's delete it first
        # we can have multiple dataset with same name, deleting all of them
        delete_all_datasets_by_name(self.api, self.dataset_name)

        # create the dataset
        filters = {"rp_entity_id": {"$in": ['D8442A']}}
        dataset = Dataset(
            name=self.dataset_name,
            filters=filters,  # a dataset with a filter
        )
        new_dataset = self.api.create_dataset(dataset)
        assert new_dataset.filters == dataset.filters, "Created dataset filters are not as expected"
        assert new_dataset.id is not None, "We should have a dataset id"

        owned_dataset = self.api.list_datasets()
        assert new_dataset.id in owned_dataset, "We should own the new dataset"

        new_dataset.delete()

        owned_dataset = self.api.list_datasets()
        assert new_dataset.id not in owned_dataset, "The new dataset should be deleted"
    def test_indicator_dataset(self):
        indicator_dataset = Dataset(
            name='Test-indicator-dataset',
            filters={"$and": [{
                "rp_entity_id": {
                    "$in": ["D8442A"]
                }
            }]},
            fields=[{
                "average": {
                    "avg": {
                        "field": "EVENT_SENTIMENT_SCORE"
                    }
                }
            }],
            frequency='daily',
        )
        indicator_dataset = self.api.create_dataset(indicator_dataset)
        try:

            # ask the indicator dataset for its data
            response = indicator_dataset.json('2018-01-01 00:00',
                                              '2018-01-02 00:00')
            assert len(response) == 2  # we should get 2 rows
            assert {r['rp_entity_id']
                    for r in response} == {'D8442A', 'ROLLUP'}

            # do a request overriding fields and frequency to see the underlying data
            response = indicator_dataset.json(
                '2018-01-01 00:00',
                '2018-01-02 00:00',
                fields=['rp_story_id', 'rp_entity_id'],
                frequency='granular')
            assert len(
                response) > 200, "We should have many granular analytics rows"
            assert {r['rp_entity_id']
                    for r in response} == {'D8442A'
                                           }, "All rows should be D8442A"
        finally:
            indicator_dataset.delete()
Exemple #17
0
from ravenpackapi import RPApi, Dataset
import logging

logging.basicConfig(level=logging.INFO)
# initialize the API (here we use the RP_API_KEY in os.environ)
api = RPApi()

# get the us30 dataset (its filters contain the top 30 US companies)
us30 = Dataset(api=api, id='us30')

print(us30.filters)

# creating a new dataset with modified filters and fields
# the filters are an aggregation of the us30 with some additional rule
new_filters = {
    "$and": [
        us30.filters, {
            "relevance": {
                "$gte": 90
            }
        }, {
            "event_similarity_days": {
                "$gte": 1
            }
        }
    ]
}
new_fields = [{
    "daily_average_ess_1d": {
        "avg": {
            "field": "EVENT_SENTIMENT_SCORE",
from ravenpackapi import RPApi, Dataset
from ravenpackapi.models.job import Job

logging.basicConfig(level=logging.INFO)
# initialize the API (here we use the RP_API_KEY in os.environ)
api = RPApi()

dataset_id = None  # put here a dataset_id if you have it already

if dataset_id is None:
    dataset = Dataset(api=api,
                      filters={},
                      name='Average sentiment',
                      frequency='daily',
                      fields=[{
                          'average_ess': {
                              'avg': {
                                  'field': 'EVENT_SENTIMENT_SCORE'
                              }
                          }
                      }])
    dataset_id = dataset.save()
else:
    dataset = api.get_dataset(dataset_id)

# job = Job(api=api,
#           token='xxx')  # if you already have a job you can use this

# ... or request a new one
job = dataset.request_datafile(
    start_date='2018-01-01 00:00:00',
Exemple #19
0
 def test_invalid_additional_fields(self):
     dt = '2020-01-01'
     with pytest.raises(ValueError):
         Dataset(id='us30', creation_time=dt, last_modified=dt, invalid_field=1)
Exemple #20
0
 def test_valid_uuid(self):
     d = Dataset(uuid='us30')
     assert d.id == 'us30'
Exemple #21
0
 def test_valid_additional_fields(self):
     dt = '2020-01-01'
     d = Dataset(id='us30', creation_time=dt, last_modified=dt)
     assert d.id == 'us30'
     assert d.creation_time == d.last_modified == dt
Exemple #22
0
 def get_dataset(self, dataset_id):
     return Dataset(
         api=self,
         uuid=dataset_id,
     )
dataset = Dataset(api,
                  name='My Indicator dataset',
                  filters={"relevance": {
                      "$gt": 90
                  }},
                  frequency='daily',
                  fields=[{
                      "avg_1d": {
                          "avg": {
                              "field": "EVENT_SENTIMENT_SCORE",
                              "lookback": 1,
                              "mode": "granular"
                          }
                      }
                  }, {
                      "avg_7d": {
                          "avg": {
                              "field": "avg_1d",
                              "lookback": 1,
                              "mode": "granular"
                          }
                      }
                  }, {
                      "buzz_365d": {
                          "buzz": {
                              "field": "RP_ENTITY_ID",
                              "lookback": 365
                          }
                      }
                  }, {
                      "newsvolume_1d": {
                          "count": {
                              "field": "RP_ENTITY_ID",
                              "lookback": 1
                          }
                      }
                  }, {
                      "newsvolume_365d": {
                          "avg": {
                              "field": "newsvolume_1d",
                              "lookback": 365,
                              "mode": "granular"
                          }
                      }
                  }])
Exemple #24
0
 Dataset(
     **{
         "product":
         "rpa",
         "product_version":
         "1.0",
         "name":
         "Events in UK - example",
         "fields": [
             "timestamp_utc", "rp_story_id", "rp_entity_id", "entity_type",
             "entity_name", "country_code", "relevance",
             "event_sentiment_score", "topic", "group", "headline"
         ],
         "filters": {
             "$and": [{
                 "relevance": {
                     "$gte": 90
                 }
             }, {
                 "country_code": {
                     "$in": ["GB"]
                 }
             }, {
                 "event_sentiment_score": {
                     "$nbetween": [-0.5, 0.5]
                 }
             }]
         },
         "frequency":
         "granular",
     }))
from ravenpackapi import RPApi, Dataset
from ravenpackapi.utils.helpers import delete_all_datasets_by_name

api = RPApi()

# Begin creating a dataset with your desired filters (see the RPA user guide for syntax)
# You can then add functions (https://app.ravenpack.com/api-documentation/#indicator-syntax)
# Alternatively you can also create the dataset via the query builder and just use the dataset_uuid
dataset = Dataset(api,
                  name='My Indicator dataset',
                  filters={"relevance": {"$gt": 90}},
                  frequency='daily',
                  fields=[{"avg_1d": {"avg": {"field": "EVENT_SENTIMENT_SCORE", "lookback": 1,
                                              "mode": "granular"}}},
                          {"avg_7d": {
                              "avg": {"field": "avg_1d", "lookback": 1, "mode": "granular"}}},
                          {"buzz_365d": {"buzz": {"field": "RP_ENTITY_ID", "lookback": 365}}},
                          {"newsvolume_1d": {"count": {"field": "RP_ENTITY_ID", "lookback": 1}}},
                          {"newsvolume_365d": {"avg": {"field": "newsvolume_1d", "lookback": 365,
                                                       "mode": "granular"}}}]
                  )
dataset.save()

# you can also change the fields, (remember to save afterward)
dataset.fields = [
    {"avg": {"avg": {"field": "EVENT_SENTIMENT_SCORE", "lookback": 365}}},
]
dataset.save()

# Following this, you can then generate a datafile (for your desired date range)
job = dataset.request_datafile(