Beispiel #1
0
def check_if_exists(entity_type, entity_value, api_token, api_host=API_HOST):
    """Checks if the Gro-entity_id exists"""
    client = GroClient(api_host, api_token)
    logger = client.get_logger()
    try:
        client.lookup(entity_type, entity_value)
        return entity_value
    except Exception as e:
        message = "Gro-{}_id invalid: '{}'.".format(entity_type, entity_value)
        logger.warning(message)
        raise e
Beispiel #2
0
def generate_correlation_scatterplots(api_token,
                                      dataframe,
                                      folder_name,
                                      output_dir='',
                                      api_host=DEFAULT_API_HOST):
    client = GroClient(api_host, api_token)
    logger = client.get_logger()
    folder_path = os.path.join(output_dir, './ranks_csv', folder_name)
    sns.set(style="ticks")
    sns_plot = sns.pairplot(dataframe, diag_kind=None)
    correlation_plot_path = os.path.join(folder_path, 'correlation_plot.png')
    logger.info("Saving scatterplots in {}".format(correlation_plot_path))
    return sns_plot.savefig(correlation_plot_path)
 def test_missing_token(self):
     # Explicitly unset GROAPI_TOKEN if it's set (eg, its set in our Shippable config).
     env_without_token = {
         k: v
         for k, v in os.environ.items() if k != "GROAPI_TOKEN"
     }
     with patch.dict(os.environ, env_without_token, clear=True):
         with self.assertRaisesRegex(RuntimeError,
                                     "environment variable must be set"):
             _ = GroClient(MOCK_HOST)
         with self.assertRaisesRegex(RuntimeError,
                                     "environment variable must be set"):
             _ = GroClient()
Beispiel #4
0
def init_region_mapping(tables):
    client = GroClient('api.gro-intelligence.com', os.environ['GROAPI_TOKEN'])
    gro_regions = [
        r['id']
        for r in client.get_descendant_regions(COUNTRY_ID, REGION_LEVEL)
    ]
    region_mapping = {}
    for region in filter(lambda x: x, tables.keys()):
        for r in client.search('regions', region):
            if r['id'] in gro_regions:
                region_mapping[region] = r['id']
                break
        if region not in region_mapping:
            region_mapping[region] = None
    return region_mapping
Beispiel #5
0
def save_to_csv(api_token,
                dataframe,
                folder_name,
                file_name='',
                output_dir='',
                api_host=DEFAULT_API_HOST):
    """ save the dataframe into csv file called <output_dir>/ranks_csv/ranks.csv """
    client = GroClient(api_host, api_token)
    logger = client.get_logger()
    folder_path = os.path.join(output_dir, './ranks_csv', folder_name)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    file_path = os.path.join(folder_path, file_name)
    logger.info("\n Saving {} in {} \n".format(file_name, file_path))
    return dataframe.to_csv(file_path)
Beispiel #6
0
def get_file_name(api_token,
                  data_series_list,
                  initial_date,
                  final_date,
                  api_host=DEFAULT_API_HOST):
    """Combines region, items, and dates to return a string"""
    client = GroClient(api_host, api_token)
    logger = client.get_logger()
    key_words = [
        client.lookup('regions', data_series_list[0]['region_id'])['name']
    ]
    for i in range(len(data_series_list)):
        key_words.append(
            client.lookup('items', data_series_list[i]['item_id'])['name'])
    key_words.append(initial_date)
    key_words.append(final_date)
    logger.info(
        "\n Computing analogous years' ranks in {} with respect to {} between {} and {} \n"
        .format(key_words[0], key_words[1:-2], initial_date, final_date))
    combined_name = '_'.join(key_words)
    return combined_name
def main():
    # set up the Batch Client, same as normal Client
    """ API Config """
    API_HOST = 'api.gro-intelligence.com'
    ACCESS_TOKEN = os.environ['GROAPI_TOKEN']

    api_client = GroClient(API_HOST, ACCESS_TOKEN)

    # specify everything except region_id
    selection = {
        'metric_id': 860032,
        'item_id': 274,
        'source_id': 25,
        'frequency_id': 9,
        'start_date': '1998-01-01T00:00:00.000Z',
        'end_date': '1998-01-01T00:00:00.000Z'
    }

    # make a list of this query for every county in Mississippi
    mississippi_county_ids = [
        region["id"] for region in api_client.get_descendant_regions(13075, 5)
    ]

    selections = []
    for region_id in mississippi_county_ids:
        selection_temp = dict(selection)
        selection_temp["region_id"] = region_id
        selections.append(selection_temp)

    # make the request in a batch, asynchronous way
    output = api_client.batch_async_get_data_points(selections)

    # the output data is in the same order as the input queries.
    for county_id, data in zip(mississippi_county_ids, output):
        if len(data) > 0:  # some counties are missing data
            print("county_idx=%i produced %.0f tonnes of corn in 1998" %
                  (county_id, data[0]["value"]))
        else:
            print("county_idx=%i has no data for 1998" % county_id)
Beispiel #8
0
def main():
    client = GroClient(API_HOST, ACCESS_TOKEN)

    selected_entities = {
        'region_id': 1210,  # Ukraine
        'item_id': 95,  # Wheat
        'metric_id': 570001
    }  # Area Harvested (area)

    writer = unicodecsv.writer(open(OUTPUT_FILENAME, 'wb'))

    # Get what possible series there are for that combination of selections
    for data_series in client.get_data_series(**selected_entities):

        # Add a time range restriction to your data request
        # (Optional - otherwise get all points)
        data_series['start_date'] = '2000-01-01'
        data_series['end_date'] = '2012-12-31'

        for point in client.get_data_points(**data_series):
            writer.writerow([
                point['start_date'], point['end_date'], point['value'],
                client.lookup_unit_abbreviation(point['unit_id'])
            ])
Beispiel #9
0
def test_get_data(test_data_1):
    client = GroClient('mock_website', 'mock_access_token')
    start_date_bound = '2019-08-01T00:00:00.000Z'
    expected = pd.DataFrame(pd.DataFrame({'end_date': pd.to_datetime(['2019-08-01T00:00:00.000Z',
                                                                      '2019-08-02T00:00:00.000Z',
                                                                      '2019-08-03T00:00:00.000Z',
                                                                      '2019-08-04T00:00:00.000Z',
                                                                      '2019-08-05T00:00:00.000Z',
                                                                      '2019-08-06T00:00:00.000Z',
                                                                      '2019-08-07T00:00:00.000Z',
                                                                      '2019-08-08T00:00:00.000Z',
                                                                      '2019-08-09T00:00:00.000Z',
                                                                      '2019-08-10T00:00:00.000Z']),
                                          'value': [0.13002748115958, 1.17640700229636,
                                                    1.17640700229636, 2.39664378851418,
                                                    2.39664378851418, 2.39664378851418,
                                                    2.39664378851418, 1.10551943121531,
                                                    1.10551943121531, 1.10551943121531]}))
    expected.index = expected['end_date']
    expected = expected.asfreq('D')

    test_data = get_transform_data.get_data(client, 'metric_id', 'item_id', 'region_id',
                                            'source_id', 'frequency_id', start_date_bound)
    assert_frame_equal(test_data, expected)
 def test_explicit_host_and_token(_self):
     client = GroClient(MOCK_HOST, MOCK_TOKEN)
     with patch("groclient.lib.get_available") as get_available:
         _ = client.get_available("items")
         get_available.assert_called_once_with(MOCK_TOKEN, MOCK_HOST,
                                               "items")
 def test_batch_async_get_data_points_multiple_clients(self):
     client = GroClient(MOCK_HOST, MOCK_TOKEN)
     ahc_id1 = id(client._async_http_client)
     client = GroClient(MOCK_HOST, MOCK_TOKEN)
     ahc_id2 = id(client._async_http_client)
     self.assertNotEqual(ahc_id1, ahc_id2)
NAFIS = 12
GROYM = 32

# FREQUENCY_ID GLOBALS
DAILY = 1
WEEKLY = 2
EIGHTDAY = 3
SIXTEENDAY = 5
MONTHLY = 6
QUARTERLY = 7
ANNUAL = 9
POINT_IN_TIME = 15

# Set up gro API

client = GroClient(API_HOST, ACCESS_TOKEN)

# BRAZIL COCOA GRINDINGS FROM ICCO (1)
client.add_single_data_series({
    'metric_id': GRINDINGS,
    'item_id': COCOA,
    'region_id': BRAZIL,
    'frequency_id': ANNUAL,
    'source_id': ICCO
})

# BRAZIL ARABICA COMPOSITE PRICES (2)
client.add_single_data_series({
    'metric_id': COMPOSITE_PRICES,
    'item_id': BRAZ_ARABICA,
    'region_id': WORLD,
Beispiel #13
0
def analogous_years(api_token,
                    data_series_list,
                    initial_date,
                    final_date,
                    methods_list=['euclidean', 'cumulative', 'ts-features'],
                    all_ranks=None,
                    weights=None,
                    enso=None,
                    enso_weight=None,
                    provided_start_date=None,
                    tsfresh_num_jobs=0,
                    api_host=DEFAULT_API_HOST):
    """
    Use L^2 distance function to combine weighted distances from multiple gro-data_series
    and return the rank
    :param api_token: string, Gro-api token
    :param data_series_list: list of dictionaries containing gro data series
    :param initial_date: A date in YYYY-MM-DD format
    :param final_date: A date in YYYY-MM-DD format
    :param methods_list: a sublist of ['cumulative', 'euclidean', 'ts-features', 'dtw']
    :param all_ranks: Boolean to determine if all ranks will be displayed or a composite rank
    :param weights: Float determining the weight given to each data_series
    :param enso: Boolean to include ENSO
    :param enso_weight: Float
    :param provided_start_date: A string in YYYY-MM-DD format
    :param tsfresh_num_jobs: integer, number of parallel processes in tsfresh
    :param api_host:
    :return: A tuple (string, dataframe)
    The string contains '_' separated region, item, date
    The dataframe contains integer values (ranks)
    """
    client = GroClient(api_host, api_token)
    combined_items_distances = None
    data_series_list = common_start_date(client, data_series_list,
                                         provided_start_date)['data_series']
    start_date = common_start_date(client, data_series_list,
                                   provided_start_date)['start_date']
    if not weights:
        weights = [1] * len(data_series_list)
    if enso:
        data_series_list.append(enso_data(start_date))
        if enso_weight:
            weights.append(enso_weight)
        else:
            weights.append(1)
    for i in range(len(data_series_list)):
        gro_item = client.lookup('items',
                                 data_series_list[i]['item_id'])['name']
        combined_methods_distances_df = combined_methods_distances(
            ranked_df_dictionary(client,
                                 data_series_list[i],
                                 initial_date,
                                 final_date,
                                 gro_item,
                                 methods_list,
                                 tsfresh_num_jobs=tsfresh_num_jobs))
        numpy_combined_methods_distances = combined_methods_distances_df.values
        if combined_items_distances is None:
            combined_items_distances = np.zeros(
                numpy_combined_methods_distances.shape)
        combined_items_distances = combined_items_distances + np.power(
            weights[i] * numpy_combined_methods_distances, 2)
    combined_items_distances = pd.DataFrame(
        np.sqrt(combined_items_distances),
        index=combined_methods_distances_df.index,
        columns=combined_methods_distances_df.columns)
    combined_items_distances.loc[:,
                                 'composite'] = combined_items_distances.sum(
                                     axis=1, skipna=True)
    ranks = []
    for column_name in combined_items_distances.columns:
        combined_items_distances.sort_values(by=column_name, inplace=True)
        column_new_name = column_name.split('_')[0] + '_rank'
        combined_items_distances.loc[:, column_new_name] = \
            combined_items_distances.reset_index().index + 1
        ranks.append(column_new_name)
        combined_items_distances.sort_index(inplace=True)
    if all_ranks:
        display_dataframe = combined_items_distances[ranks]
    else:
        display_dataframe = combined_items_distances[['composite_rank']]
    return display_dataframe
class BatchTests(TestCase):
    def setUp(self):
        self.client = GroClient(MOCK_HOST, MOCK_TOKEN)
        self.assertTrue(isinstance(self.client, GroClient))

    def tearDown(self):
        IOLoop.clear_current()

    def test_batch_async_get_data_points(self):
        data_points = self.client.batch_async_get_data_points([
            {
                "metric_id": 1,
                "item_id": 2,
                "region_id": 3,
                "frequency_id": 4,
                "source_id": 5,
            },
            {
                "metric_id": 6,
                "item_id": 7,
                "region_id": 8,
                "frequency_id": 9,
                "source_id": 10,
                "insert_nulls": True,
            },
        ])
        self.assertEqual(data_points[0][0]["start_date"],
                         "2017-01-01T00:00:00.000Z")
        self.assertEqual(data_points[0][0]["end_date"],
                         "2017-12-31T00:00:00.000Z")
        self.assertEqual(data_points[0][0]["value"], 40891)
        self.assertEqual(data_points[0][0]["unit_id"], 14)
        self.assertEqual(data_points[0][0]["reporting_date"], None)
        self.assertEqual(data_points[0][1]["start_date"],
                         "2018-01-01T00:00:00.000Z")
        self.assertEqual(data_points[0][1]["end_date"],
                         "2018-12-31T00:00:00.000Z")
        self.assertEqual(data_points[0][1]["value"], 56789)
        self.assertEqual(data_points[0][1]["unit_id"], 10)
        self.assertEqual(data_points[0][1]["reporting_date"],
                         "2019-03-14T00:00:00.000Z")

    def test_batch_async_get_data_points_map_function(self):
        def sum_results(inputIndex, inputObject, response, summation):
            for point in response:
                summation += point["value"]
            return summation

        summation = self.client.batch_async_get_data_points(
            [{
                "metric_id": 1,
                "item_id": 2,
                "region_id": 3,
                "frequency_id": 4,
                "source_id": 5,
            }],
            output_list=0,
            map_result=sum_results,
        )

        self.assertEqual(summation, 97680)

    # Test that multiple GroClients each have their own AsyncHTTPClient. Note:
    # this tests the fix for the `fetch called on closed AsyncHTTPClient`
    # error. We can't test for that directly since the `fetch` call is mocked,
    # so instead we just ensure that all GroClients have their own
    # AsyncHTTPClient.
    def test_batch_async_get_data_points_multiple_clients(self):
        client = GroClient(MOCK_HOST, MOCK_TOKEN)
        ahc_id1 = id(client._async_http_client)
        client = GroClient(MOCK_HOST, MOCK_TOKEN)
        ahc_id2 = id(client._async_http_client)
        self.assertNotEqual(ahc_id1, ahc_id2)

    def test_batch_async_get_data_points_bad_request_error(self):
        responses = self.client.batch_async_get_data_points(
            [mock_error_selection])
        self.assertTrue(isinstance(responses[0], BatchError))

    def test_batch_async_get_data_points_map_errors(self):
        def raise_exception(idx, query, response, accumulator):
            if isinstance(response, Exception):
                raise response
            accumulator[idx] = response
            return accumulator

        with self.assertRaises(Exception):
            self.client.batch_async_get_data_points([mock_error_selection],
                                                    map_result=raise_exception)

    def test_async_get_df(self):
        self.client.add_single_data_series({
            "metric_id": 1,
            "item_id": 2,
            "region_id": 3,
            "frequency_id": 4,
            "source_id": 5,
        })
        df = self.client.async_get_df()
        self.assertEqual(df.iloc[0]["start_date"].date(), date(2017, 1, 1))
        self.assertEqual(df.iloc[0]["end_date"].date(), date(2017, 12, 31))
        self.assertEqual(df.iloc[0]["value"], 40891)

    def test_batch_async_rank_series_by_source(self):
        list_of_ranked_series_lists = self.client.batch_async_rank_series_by_source(
            [mock_data_series, mock_data_series])
        # There were two inputs, so there should be two outputs:
        self.assertEqual(len(list_of_ranked_series_lists), 2)
        for series_list in list_of_ranked_series_lists:
            # Not necessarily true, but true given the mock_rank_series_by_source() function:
            self.assertEqual(len(series_list), len(mock_data_series))
            for series in series_list:
                self.assertTrue("metric_id" in series)
                self.assertTrue("source_id" in series)
Beispiel #15
0
def main():  # pragma: no cover
    """Basic Gro API command line interface.

    Note that results are chosen randomly from matching selections, and so results are not
    deterministic. This tool is useful for simple queries, but anything more complex should be done
    using the provided Python packages.

    Usage examples:
        gro_client --item=soybeans  --region=brazil --partner_region china --metric export
        gro_client --item=sesame --region=ethiopia
        gro_client [email protected]  --print_token
    For more information use --help
    """
    parser = argparse.ArgumentParser(
        description="Gro API command line interface")
    parser.add_argument("--user_email")
    parser.add_argument("--user_password")
    parser.add_argument("--item")
    parser.add_argument("--metric")
    parser.add_argument("--region")
    parser.add_argument("--partner_region")
    parser.add_argument("--file")
    parser.add_argument(
        "--print_token",
        action="store_true",
        help="Output API access token for the given user email and password. "
        "Save it in GROAPI_TOKEN environment variable.",
    )
    parser.add_argument(
        "--token",
        default=os.environ.get("GROAPI_TOKEN"),
        help="Defaults to GROAPI_TOKEN environment variable.",
    )
    parser.add_argument("--version", action="store_true")
    args = parser.parse_args()

    if args.version:
        print(groclient.lib.get_version_info().get('api-client-version'))
        return

    assert (args.user_email
            or args.token), "Need --token, or --user_email, or $GROAPI_TOKEN"
    access_token = None

    if args.token:
        access_token = args.token
    else:
        if not args.user_password:
            args.user_password = getpass.getpass()
        access_token = groclient.lib.get_access_token(groclient.cfg.API_HOST,
                                                      args.user_email,
                                                      args.user_password)

    if args.print_token:
        print(access_token)
        return

    client = GroClient(groclient.cfg.API_HOST, access_token)

    if (not args.metric and not args.item and not args.region
            and not args.partner_region):
        data_series = pick_random_data_series(client)
    else:
        data_series = next(
            client.find_data_series(
                item=args.item,
                metric=args.metric,
                region=args.region,
                partner_region=args.partner_region,
            ), None)

    if data_series is None:
        print("No data series found.")
        return

    if args.file is not None:
        write_one_data_series(client, data_series, args.file)
    else:
        print_one_data_series(client, data_series)
 def setUp(self):
     self.client = GroClient(MOCK_HOST, MOCK_TOKEN)
     self.assertTrue(isinstance(self.client, GroClient))
Beispiel #17
0
def get_geometry(region_id):
    client = GroClient('api.gro-intelligence.com', os.environ['GROAPI_TOKEN'])
    geom = client.get_geojson(region_id)
    if not geom:
        return None
    return shape(geom['geometries'][0])
class GroClientTests(TestCase):
    def setUp(self):
        self.client = GroClient(MOCK_HOST, MOCK_TOKEN)
        self.client._async_http_client = None  # Force tests to use synchronous http
        self.assertTrue(isinstance(self.client, GroClient))

    def test_get_logger(self):
        # should NOT raise any exception if get_logger correctly returns a logger object:
        self.client.get_logger().debug("Test output")

    def test_get_available(self):
        self.assertTrue("name" in self.client.get_available("units")[0])

    def test_list_available(self):
        self.assertTrue("metric_id" in self.client.list_available({})[0])

    def test_lookup(self):
        self.assertEqual(self.client.lookup("units", 10)["name"], "kilogram")

    def test_lookup_unit_abbreviation(self):
        self.assertEqual(self.client.lookup_unit_abbreviation(10), "kg")

    def test_get_allowed_units(self):
        self.assertTrue(isinstance(
            self.client.get_allowed_units(1, 1)[0], int))

    def test_get_data_series(self):
        self.assertTrue("metric_id" in self.client.get_data_series()[0])

    def test_search(self):
        self.assertTrue(
            isinstance(self.client.search("regions", "United")[0], dict))
        self.assertTrue("id" in self.client.search("regions", "United")[0])

    def test_search_and_lookup(self):
        region = next(self.client.search_and_lookup("regions", "United"))
        self.assertTrue(isinstance(region, dict))
        self.assertTrue("id" in region)
        self.assertEqual(region["name"], "United States")

    def test_lookup_belongs(self):
        self.assertEqual(
            next(self.client.lookup_belongs("regions", 1215))["name"], "World")

    def test_rank_series_by_source(self):
        series = next(self.client.rank_series_by_source([]))
        self.assertTrue("metric_id" in series)
        self.assertTrue("source_id" in series)

    def test_get_geo_centre(self):
        centre = self.client.get_geo_centre(1215)
        self.assertTrue(len(centre) == 1)
        self.assertTrue("centre" in centre[0])
        self.assertTrue("regionName" in centre[0])

    def test_get_geojsons(self):
        geojsons = self.client.get_geojsons(1215, 4)
        self.assertTrue(len(geojsons) == 2)
        self.assertTrue("region_id" in geojsons[0])
        self.assertTrue("region_name" in geojsons[0])
        self.assertTrue("centre" in geojsons[0])
        self.assertTrue("geojson" in geojsons[0])
        self.assertTrue("type" in geojsons[0]["geojson"])
        self.assertTrue("coordinates" in geojsons[0]["geojson"])

    def test_get_geojson(self):
        geojson = self.client.get_geojson(1215)
        self.assertTrue("type" in geojson)
        self.assertTrue("type" in geojson["geometries"][0])
        self.assertTrue("coordinates" in geojson["geometries"][0])
        self.assertTrue(geojson["geometries"][0]['coordinates'][0][0][0] ==
                        [-38.394, -4.225])
        geojson = self.client.get_geojson(1215, 1)
        self.assertTrue("type" in geojson)
        self.assertTrue("type" in geojson["geometries"][0])
        self.assertTrue("coordinates" in geojson["geometries"][0])
        self.assertTrue(
            geojson["geometries"][0]['coordinates'][0][0][0] == [-38, -4])

    def test_get_ancestor(self):
        self.assertTrue(
            "name" in self.client.get_descendant('metrics', 119)[0])
        self.assertTrue("name" not in self.client.get_ancestor(
            'regions', 12345, include_details=False)[0])

    def test_get_descendant(self):
        self.assertTrue(
            "name" in self.client.get_descendant('metrics', 119)[0])
        self.assertTrue("name" not in self.client.get_descendant(
            'metrics', 119, include_details=False)[0])
        self.assertTrue("name" in self.client.get_descendant_regions(1215)[0])
        self.assertTrue("name" not in self.client.get_descendant_regions(
            1215, include_details=False)[0])

    def test_get_available_timefrequency(self):
        self.assertEqual(
            self.client.get_available_timefrequency()[0]["frequency_id"], 3)

    def test_get_top(self):
        self.assertEqual(
            self.client.get_top("regions",
                                metric_id=860032,
                                item_id=274,
                                frequency_id=9,
                                source_id=2)[0]["regionId"],
            1215,
        )

    def test_get_df(self):
        self.client.add_single_data_series(mock_data_series[0])
        df = self.client.get_df()
        self.assertEqual(df.iloc[0]["start_date"].date(), date(2017, 1, 1))
        self.client.add_single_data_series(mock_data_series[0])
        df = self.client.get_df(reporting_history=True)
        self.assertEqual(df.iloc[0]["start_date"].date(), date(2017, 1, 1))
        indexed_df = self.client.get_df(index_by_series=True)
        self.assertEqual(indexed_df.iloc[0]["start_date"].date(),
                         date(2017, 1, 1))
        series = zip_selections(indexed_df.iloc[0].name)
        self.assertEqual(series, mock_data_series[0])

    def test_get_df_complete_history(self):
        self.client.add_single_data_series(mock_data_series[0])
        df = self.client.get_df(complete_history=True)
        self.assertEqual(df.iloc[0]["reporting_date"].date(), date(2018, 1, 1))
        self.assertEqual(df.iloc[0]["available_date"].date(),
                         date(2018, 1, 31))

    def test_add_points_to_df(self):
        self.client.add_points_to_df(None, mock_data_series[0], [])
        self.assertTrue(self.client.get_df().empty)
        self.assertTrue(self.client.get_df(reporting_history=True).empty)
        self.assertTrue(self.client.get_df(index_by_series=True).empty)

        data_points = self.client.get_data_points(**mock_data_series[0])
        self.client.add_points_to_df(None, mock_data_series[0], data_points)
        self.assertEqual(self.client.get_df().iloc[0]["start_date"].date(),
                         date(2017, 1, 1))
        self.assertEqual(self.client.get_df().iloc[0]["source_id"], 2)

    def test_get_data_points(self):
        # Gives the point's default unit if unit's not specified:
        data_points = self.client.get_data_points(**mock_data_series[0])
        self.assertEqual(data_points[0]["unit_id"], 14)
        self.assertEqual(data_points[0]["value"], 40891)

        # Converts to the given unit:
        selections = dict(
            mock_data_series[0])  # make a copy so we don't modify the original
        selections["unit_id"] = 10
        data_points = self.client.get_data_points(**selections)
        self.assertEqual(data_points[0]["unit_id"], 10)
        self.assertEqual(data_points[0]["value"], 40891000)

    def test_GDH(self):
        df = self.client.GDH("860032-274-1215-0-9-2")
        self.assertEqual(df.iloc[0]["start_date"].date(), date(2017, 1, 1))
        # if you request a series with no data, an empty dataframe should be returned:
        # Extra options can be given, but value in the GDH key itself (metric_id/item_id/etc.)
        # should be ignored.
        df = self.client.GDH("860032-274-1215-0-2-9",
                             insert_nulls=True,
                             metric_id=1)
        self.assertEqual(len(df), 0)

    def test_add_single_data_series_adds_copy(self):
        selections = dict(
            mock_data_series[0])  # don't modify test data. Make a copy
        for region_id in [
                mock_data_series[0]["region_id"],
                mock_data_series[1]["region_id"],
        ]:
            # modify the original selections object
            selections["region_id"] = region_id
            # if add_single_data_series isn't making a copy of the selections passed in,
            # then this test should fail since the original reference has been modified.
            self.client.add_single_data_series(selections)
        self.assertEqual(
            len(self.client.get_df().drop_duplicates().region_id.unique()), 2)

    def test_add_single_data_series_allows_metadata(self):
        selections = dict(mock_data_series[0])
        selections['metadata'] = {'includes_historical_region': True}
        self.client.add_single_data_series(selections)
        self.assertEqual(len(self.client.get_df().item_id), 1)

    def test_get_data_series_list(self):
        self.client.add_single_data_series(mock_data_series[0])
        for key, value in self.client.get_data_series_list()[0].items():
            self.assertEqual(value, mock_data_series[0][key])

    def test_find_data_series(self):
        # TODO: when duplicates are removed, this should equal 2:
        self.assertEqual(
            len(
                list(
                    self.client.find_data_series(
                        metric="Production",
                        region="United",
                        start_date="2000-01-01",
                        end_date="2005-12-31",
                    ))),
            8,
        )

        # TODO: when duplicates are removed, this should equal 2:
        def only_accept_production_quantity(search_result):
            return "metric_id" not in search_result or search_result[
                "metric_id"] == 860032

        self.assertEqual(
            len(
                list(
                    self.client.find_data_series(
                        metric="Production",
                        result_filter=only_accept_production_quantity))),
            8,
        )

    def test_add_data_series(self):
        # TODO: when duplicates are removed, this should equal 2:
        data_series = self.client.add_data_series(metric="Production",
                                                  region="United")
        self.assertEqual(data_series, mock_data_series[0])
        for key, value in self.client.get_data_series_list()[0].items():
            self.assertEqual(value, mock_data_series[0][key])

    def test_search_for_entity(self):
        self.assertEqual(
            self.client.search_for_entity("metrics", "Production"), 860032)

    def test_get_provinces(self):
        self.assertEqual(
            self.client.get_provinces("United")[0],
            mock_entities["regions"][12345])

    def test_get_names_for_selection(self):
        selection = {"metric_id": 860032, "region_id": 0}
        self.assertEqual(
            self.client.get_names_for_selection(selection),
            [("metric", "Production Quantity"), ("region", "World")],
        )

    def test_convert_unit(self):
        self.assertEqual(
            self.client.convert_unit({
                "value": 1,
                "unit_id": 10
            }, 10),
            {
                "value": 1,
                "unit_id": 10
            },
        )
        self.assertEqual(
            self.client.convert_unit({
                "value": 1,
                "unit_id": 10
            }, 14),
            {
                "value": 0.001,
                "unit_id": 14
            },
        )
        self.assertEqual(
            self.client.convert_unit({
                "value": 3,
                "unit_id": 36
            }, 37),
            {
                "value": 42,
                "unit_id": 37
            },
        )
        self.assertEqual(
            self.client.convert_unit({
                "value": 1,
                "unit_id": 37
            }, 36),
            {
                "value": -17.5,
                "unit_id": 36
            },
        )
        self.assertEqual(
            self.client.convert_unit(
                {
                    "value": 20,
                    "unit_id": 10,
                    "metadata": {
                        "conf_interval": 2
                    }
                }, 14),
            {
                "value": 0.02,
                "metadata": {
                    "conf_interval": 0.002
                },
                "unit_id": 14
            },
        )
        self.assertEqual(
            self.client.convert_unit(
                {
                    "value": 20,
                    "unit_id": 10,
                    "metadata": {}
                }, 14),
            {
                "value": 0.02,
                "metadata": {},
                "unit_id": 14
            },
        )

        self.assertEqual(self.client.convert_unit({}, 36), {})

        with self.assertRaises(Exception):
            self.client.convert_unit({"value": 1, "unit_id": 10}, 43)

        self.assertEqual(
            self.client.convert_unit({
                "value": None,
                "unit_id": 37
            }, 36),
            {
                "value": None,
                "unit_id": 36
            },
        )

        with self.assertRaises(Exception):
            self.client.convert_unit({"value": None, "unit_id": 10}, 43)
Beispiel #19
0
def main():
    client = GroClient(API_HOST, ACCESS_TOKEN)

    # ===================
    # | client.search() |
    # ===================
    # Returns a list of ids, ordered by relevance to your given search term
    # Note that you can search across metrics, items, regions, and sources.

    print('client.search()')
    print(client.search('metrics', 'Exports')[0]) # { 'id': 125 }
    print(client.search('items', 'Wheat')[0]) # { 'id': 95 }
    print(client.search('regions', 'India')[0]) # { 'id': 1094 }
    print(client.search('sources', 'USDA NASS')[0]) # { 'id': 29 }

    # ==============================
    # | client.search_and_lookup() |
    # ==============================
    # Helper function to use the client.lookup() function on each search result
    # and see more details about the result.
    # Returns a generator, which yields one search result at a time. Use the
    # next() method to get the first result:
    print('\nclient.search_and_lookup()')
    print(next(client.search_and_lookup('metrics', 'Export Value')))
    # {'id': 10000, 'contains': [10065, 11078], 'name': 'Export Value',
    #  'definition': 'The value of exports, or goods that have been sent to a \
    #  foreign country for sale. Data is mostly reported as free-on-board, \
    #  which includes the cost of delivering the goods to a designated \
    #  delivery vessel; exports of a good may not necessarily equal imports \
    #  for the partner region, since imports and exports are measured \
    #  differently by different governments.'}

    print(next(client.search_and_lookup('items', 'Wheat')))
    # {'id': 95, 'contains': [3595, 5772], 'name': 'Wheat',
    #  'definition': "Cereals within the genus <i>Triticum</i>, which is one \
    #  of the world's most popular and widely cultivated grain crops. Data \
    #  primarily covers common and durum wheat, as well as spelt."}

    print(next(client.search_and_lookup('regions', 'India')))
    # {'id': 1094, 'contains': [11187, 11190, 11174, 11197, 11188, 11200,
    #  11204, 11186, 11180, 11177, 11207, 11201, 11173, 11178, 11195, 11194,
    #  11183, 11199, 11203, 11202, 11193, 11181, 13475, 11196, 11185, 11175,
    #  11198, 11192, 11179, 11191, 11189, 11176, 11182, 11205, 11184, 11206],
    #  'name': 'India', 'level': 3, 'latitude': 22.8838, 'longitude': 79.6201}

    print(next(client.search_and_lookup('sources', 'USDA NASS')))
    # {'id': 29, 'name': 'USDA NASS Animals', 'longName': 'USDA National \
    #  Agricultural Statistics Database', 'metaType': 'data_series',
    #  'sourceLag': {'annual': '4m15d', 'weekly': '4d', 'monthly': '1m10d'},
    #  'historicalStartDate': '1866-12-01T00:00:00.000Z',
    #  'description': 'The National Agricultural Statistics Service is an arm \
    #  of the USDA and one of its primary intelligence- and data-gathering \
    #  units. The database provides updates almost daily on livestock, crops, \
    #  demographics, economics, and environmental indicators. Metrics covered \
    #  include production, yield, area harvested, price, inputs, stocks, etc. \
    #  The granularity is mostly internal US data and goes back as far as \
    #  1850.', 'resolution': 'District', 'regionalCoverage': 'United States',
    #  'language': 'English', 'fileFormat': 'CSV'}

    # ==========================
    # | client.get_data_series |
    # ==========================
    # Once you have identified one or more entities of interest, you can see
    # what data series are available for those entities using the
    # client.get_data_series() function.

    # The normal process of data discovery using the API would be to look up
    # items and/or regions of interest first. i.e. if you know you are
    # interested in United States Corn data. Then you can see what metrics are
    # available for that item and region: production, exports, prices, etc.
    # For example:
    print('\nclient.get_data_series() Part 1: Search by item/region')
    # First look up the item/region of interest as seen in the above examples.
    # We just need the id number, so we will use search(). search_and_lookup()
    # would also work.
    corn = client.search('items', 'corn')[0]
    united_states = client.search('regions', 'united states')[0]
    # Now we can use client.get_data_series() to see what data series exist:
    data_series_list = client.get_data_series(**{
        'item_id': corn['id'],
        'region_id': united_states['id']
    })
    print('There are', len(data_series_list), 'different US Corn data series')
    unique_metrics = set(data_series['metric_name'] for data_series in data_series_list)
    print('Unique metrics:', len(unique_metrics))
    unique_sources = set(data_series['source_name'] for data_series in data_series_list)
    print('Unique sources:', len(unique_sources))

    # If you are interested in a particular source, you can also start there
    # and see what data series exist for it. One frequently asked question is
    # how to see what items/regions Gro publishes yield models for. Here is how
    # one would find out programmatically:
    print('\nclient.get_data_series() Part 2: search by source')
    # Gro publishes its own yield model values under the "Gro Yield Model"
    # source, which is treated as its own source just like any other, and you
    # can find it in the same manner:
    gro_yield_model = client.search('sources', 'Gro Yield Model')[0]
    # Now we can use client.get_data_series() to see what data series exist
    # under that source.
    data_series_list = client.get_data_series(**{
        'source_id': gro_yield_model['id']
    })
    print('There are', len(data_series_list), 'different Gro Yield Model data series')
    # There are thousands of data series in data_series_list since there are
    # many different regions. Let's just check the unique items there are yield
    # models for:
    unique_items = set(data_series['item_name'] for data_series in data_series_list)
    for item in unique_items:
        print(item)
 def test_no_host_and_kwarg_token(self):
     client = GroClient(access_token=MOCK_TOKEN)
     with patch("groclient.lib.get_available") as get_available:
         _ = client.get_available("items")
         get_available.assert_called_once_with(MOCK_TOKEN,
                                               self.PROD_API_HOST, "items")
 def setUp(self):
     self.client = GroClient(MOCK_HOST, MOCK_TOKEN)
     self.client._async_http_client = None  # Force tests to use synchronous http
     self.assertTrue(isinstance(self.client, GroClient))