def get_triples(dcids, limit=utils._MAX_LIMIT): """ Returns all triples associated with the given :code:`dcids`. A knowledge graph can be described as a collection of `triples` which are 3-tuples that take the form `(s, p, o)`. Here `s` and `o` are nodes in the graph called the *subject* and *object* respectively while `p` is the property label of a directed edge from `s` to `o` (sometimes also called the *predicate*). Args: dcids (:obj:`list` of :obj:`str`): A list of dcids to get triples for. limit (:obj:`int`, optional): The maximum total number of triples to get. Returns: A :obj:`dict` mapping dcids to a :obj:`list` of triples `(s, p, o)` where `s`, `p`, and `o` are instances of :obj:`str` and either the subject or object is the mapped dcid. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get five triples associated with `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_ >>> get_triples(["geoId/06"], limit=5) { "geoId/06": [ ("geoId/06", "name", "California"), ("geoId/06", "typeOf", "State"), ("geoId/06", "geoId", "06"), ("geoId/0687056", "containedInPlace", "geoId/06"), ("geoId/0686440", "containedInPlace", "geoId/06") ] } """ # Generate the GetTriple query and send the request. url = utils._API_ROOT + utils._API_ENDPOINTS['get_triples'] payload = utils._send_request(url, req_json={ 'dcids': dcids, 'limit': limit }) # Create a map from dcid to list of triples. results = defaultdict(list) for dcid in dcids: # Make sure each dcid is mapped to an empty list. results[dcid] # Add triples as appropriate for t in payload[dcid]: if 'objectId' in t: results[dcid].append( (t['subjectId'], t['predicate'], t['objectId'])) elif 'objectValue' in t: results[dcid].append( (t['subjectId'], t['predicate'], t['objectValue'])) return dict(results)
def get_stat_value(place, stat_var, date=None, measurement_method=None, observation_period=None, unit=None, scaling_factor=None): """Returns a value for `place` based on the `stat_var`. Args: place (`str`): The dcid of Place to query for. stat_var (`str`): The dcid of the StatisticalVariable. date (`str`): Optional, the preferred date of observation in ISO 8601 format. If not specified, returns the latest observation. measurement_method (`str`): Optional, the dcid of the preferred `measurementMethod` value. observation_period (`str`): Optional, the preferred `observationPeriod` value. unit (`str`): Optional, the dcid of the preferred `unit` value. scaling_factor (`int`): Optional, the preferred `scalingFactor` value. Returns: A `float` the value of `stat_var` for `place`, filtered by optional args. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: >>> get_stat_value("geoId/05", "Count_Person") 366331 """ url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_value'] url += '?place={}&stat_var={}'.format(place, stat_var) if date: url += '&date={}'.format(date) if measurement_method: url += '&measurement_method={}'.format(measurement_method) if observation_period: url += '&observation_period={}'.format(observation_period) if unit: url += '&unit={}'.format(unit) if scaling_factor: url += '&scaling_factor={}'.format(scaling_factor) try: res_json = utils._send_request(url, post=False, use_payload=False) except ValueError: raise ValueError('No data in response.') return res_json['value']
def get_places_in(dcids, place_type): """ Returns :obj:`Place`s contained in :code:`dcids` of type :code:`place_type`. Args: dcids (:obj:`iterable` of :obj:`str`): Dcids to get contained in places. place_type (:obj:`str`): The type of places contained in the given dcids to filter by. Returns: The returned :obj:`Place`'s are formatted as a :obj:`dict` from a given dcid to a list of places identified by dcids of the given `place_type`. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get all Counties contained in `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_. Specifying the :code:`dcids` as a :obj:`list` result in the following. >>> get_places_in(["geoId/06"], "County") { 'geoId/06': [ 'geoId/06041', 'geoId/06089', 'geoId/06015', 'geoId/06023', 'geoId/06067', ... # and 53 more ] } """ dcids = filter(lambda v: v == v, dcids) # Filter out NaN values dcids = list(dcids) url = utils._API_ROOT + utils._API_ENDPOINTS['get_places_in'] payload = utils._send_request(url, req_json={ 'dcids': dcids, 'place_type': place_type, }) # Create the results and format it appropriately result = utils._format_expand_payload(payload, 'place', must_exist=dcids) return result
def get_stat_series(place, stat_var, measurement_method=None, observation_period=None, unit=None, scaling_factor=None): """Returns a `dict` mapping dates to value of `stat_var` for `place`. Args: place (`str`): The dcid of Place to query for. stat_var (`str`): The dcid of the StatisticalVariable. measurement_method (`str`): Optional, the dcid of the preferred `measurementMethod` value. observation_period (`str`): Optional, the preferred `observationPeriod` value. unit (`str`): Optional, the dcid of the preferred `unit` value. scaling_factor (`int`): Optional, the preferred `scalingFactor` value. Returns: A `dict` mapping dates to value of `stat_var` for `place`, filtered by optional args. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: >>> get_stat_series("geoId/05", "Count_Person") {"1962":17072000,"2009":36887615,"1929":5531000,"1930":5711000} """ url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_series'] url += '?place={}&stat_var={}'.format(place, stat_var) if measurement_method: url += '&measurement_method={}'.format(measurement_method) if observation_period: url += '&observation_period={}'.format(observation_period) if unit: url += '&unit={}'.format(unit) if scaling_factor: url += '&scaling_factor={}'.format(scaling_factor) res_json = utils._send_request(url, post=False, use_payload=False) if 'series' not in res_json: raise ValueError('No data in response.') return res_json['series']
def get_property_labels(dcids, out=True): """ Returns the labels of properties defined for the given :code:`dcids`. Args: dcids (:obj:`iterable` of :obj:`str`): A list of nodes identified by their dcids. out (:obj:`bool`, optional): Whether or not the property points away from the given list of nodes. Returns: A :obj:`dict` mapping dcids to lists of property labels. If `out` is `True`, then property labels correspond to edges directed away from given nodes. Otherwise, they correspond to edges directed towards the given nodes. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: To get all outgoing property labels for `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_ and `Colorado <https://browser.datacommons.org/kg?dcid=geoId/08>`_, we can write the following. >>> get_property_labels(['geoId/06', 'geoId/08']) { "geoId/06": [ "containedInPlace", "geoId", "kmlCoordinates", "name", "provenance", "typeOf" ], "geoId/08",: [ "containedInPlace", "geoId", "kmlCoordinates", "name", "provenance", "typeOf" ] } We can also get incoming property labels by setting `out=False`. >>> get_property_labels(['geoId/06', 'geoId/08'], out=False) { "geoId/06": [ "addressRegion", "containedInPlace", "location", "overlapsWith" ], "geoId/08",: [ "addressRegion", "containedInPlace", "location", "overlapsWith" ] } """ # Generate the GetProperty query and send the request dcids = filter(lambda v: v == v, dcids) # Filter out NaN values dcids = list(dcids) url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_labels'] payload = utils._send_request(url, req_json={'dcids': dcids}) # Return the results based on the orientation results = {} for dcid in dcids: if out: results[dcid] = payload[dcid]['outLabels'] else: results[dcid] = payload[dcid]['inLabels'] return results
def get_property_values(dcids, prop, out=True, value_type=None, limit=utils._MAX_LIMIT): """ Returns property values of given :code:`dcids` along the given property. Args: dcids (:obj:`iterable` of :obj:`str`): dcids to get property values for. prop (:obj:`str`): The property to get property values for. out (:obj:`bool`, optional): A flag that indicates the property is directed away from the given nodes when set to true. value_type (:obj:`str`, optional): A type to filter returned property values by. limit (:obj:`int`, optional): The maximum number of property values returned aggregated over all given nodes. Returns: Returned property values are formatted as a :obj:`dict` from a given dcid to a list of its property values. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get the `name` of a list of states specified by their dcid: `geoId/06 <https://browser.datacommons.org/kg?dcid=geoId/06>`_, `geoId/21 <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and `geoId/24 <https://browser.datacommons.org/kg?dcid=geoId/24>`_ First, let's try specifying the :code:`dcids` as a :obj:`list` of :obj:`str`. >>> get_property_values(["geoId/06", "geoId/21", "geoId/24"], "name") { "geoId/06": ["California"], "geoId/21": ["Kentucky"], "geoId/24": ["Maryland"], } """ # Convert the dcids field and format the request to GetPropertyValue dcids = filter(lambda v: v == v, dcids) # Filter out NaN values dcids = list(dcids) if out: direction = 'out' else: direction = 'in' req_json = { 'dcids': dcids, 'property': prop, 'limit': limit, 'direction': direction } if value_type: req_json['value_type'] = value_type # Send the request url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_values'] payload = utils._send_request(url, req_json=req_json) # Create the result format for when dcids is provided as a list. unique_results = defaultdict(set) for dcid in dcids: # Get the list of nodes based on the direction given. nodes = [] if out: if dcid in payload and 'out' in payload[dcid]: nodes = payload[dcid]['out'] else: if dcid in payload and 'in' in payload[dcid]: nodes = payload[dcid]['in'] # Add nodes to unique_results if it is not empty for node in nodes: if 'dcid' in node: unique_results[dcid].add(node['dcid']) elif 'value' in node: unique_results[dcid].add(node['value']) # Make sure each dcid is in the results dict, and convert all sets to lists. results = {dcid: sorted(list(unique_results[dcid])) for dcid in dcids} return results
def get_related_places(dcids, population_type, constraining_properties={}, measured_property='count', stat_type='measured', within_place='', per_capita=False, same_place_type=False): """ Returns :obj:`Place`s related to :code:`dcids` for the given constraints. Args: dcids (:obj:`iterable` of :obj:`str`): Dcids to get related places. population_type (:obj:`str`): The type of statistical population. constraining_properties (:obj:`map` from :obj:`str` to :obj:`str`, optional): A map from constraining property to the value that the :obj:`StatisticalPopulation` should be constrained by. measured_property (:obj:`str`): The measured property. stat_type (:obj:`str`): The statistical type for the observation. within_place(:obj:`str`): Optional, the DCID of the place that all the related places are contained in. per_capita(:obj:`bool`): Optional, whether to take into account `PerCapita` when compute the relatedness. same_place_type(:obj:`bool`): Optional, whether to require all the related places under the same place type. Returns: The returned :obj:`Place`'s are formatted as a :obj:`dict` from a given dcid to a list of related places for the given constraints. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get all related places of `Santa Clara county <https://browser.datacommons.org/kg?dcid=geoId/06085>` Specifying the :code:`dcids` as a :obj:`list` result in the following. >>> get_related_places(["geoId/06"], "Person", { "age": "Years21To64", "gender": "Female" }, "count", "measured") { 'geoId/06085': [ 'geoId/06041', 'geoId/06089', 'geoId/06015', 'geoId/06023', ] } """ dcids = list(dcids) url = utils._API_ROOT + utils._API_ENDPOINTS['get_related_places'] pvs = [] for p in constraining_properties: pvs.append({'property': p, 'value': constraining_properties[p]}) req_json = { 'dcids': dcids, 'populationType': population_type, 'pvs': pvs, 'measuredProperty': measured_property, 'statType': '', # TODO: Set to stat_type when having it in BT data. 'withinPlace': within_place, 'perCapita': per_capita, 'samePlaceType': same_place_type, } payload = utils._send_request(url, req_json=req_json) return payload
def get_places_in(dcids, place_type): """ Returns :obj:`Place`s contained in :code:`dcids` of type :code:`place_type`. Args: dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): Dcids to get contained in places. place_type (:obj:`str`): The type of places contained in the given dcids to filter by. Returns: When :code:`dcids` is an instance of :obj:`list`, the returned :obj:`Place`'s are formatted as a :obj:`dict` from a given dcid to a list of places identified by dcids of the given `place_type`. When :code:`dcids` is an instance of :obj:`pandas.Series`, the returned :obj:`Place`'s are formatted as a :obj:`pandas.Series` where the `i`-th entry corresponds to places contained in the place identified by the dcid in `i`-th cell if :code:`dcids`. The cells of the returned series will always contain a :obj:`list` of place dcids of the given `place_type`. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get all Counties contained in `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_. Specifying the :code:`dcids` as a :obj:`list` result in the following. >>> get_places_in(["geoId/06"], "County") { 'geoId/06': [ 'geoId/06041', 'geoId/06089', 'geoId/06015', 'geoId/06023', 'geoId/06067', ... # and 53 more ] } We can also specify the :code:`dcids` as a :obj:`pandas.Series` like so. >>> import pandas as pd >>> dcids = pd.Series(["geoId/06"]) >>> get_places_in(dcids, "County") 0 [geoId/06041, geoId/06089, geoId/06015, geoId/... dtype: object """ # Convert the dcids field and format the request to GetPlacesIn dcids, req_dcids = utils._convert_dcids_type(dcids) url = utils._API_ROOT + utils._API_ENDPOINTS['get_places_in'] payload = utils._send_request(url, req_json={ 'dcids': req_dcids, 'place_type': place_type, }) # Create the results and format it appropriately result = utils._format_expand_payload(payload, 'place', must_exist=dcids) if isinstance(dcids, pd.Series): return pd.Series([result[dcid] for dcid in dcids]) return result
def test_send_request_w_api_key(self, urlopen): """ Handles row-less response. """ # Set the API key dc.set_api_key(_TEST_API_KEY) # Issue a dummy url that tells the mock to expect a key self.assertEqual(utils._send_request(_SEND_REQ_W_KEY), {})
def get_populations(dcids, population_type, constraining_properties={}): """ Returns :obj:`StatisticalPopulation`'s located at the given :code:`dcids`. Args: dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): Dcids identifying :obj:`Place`'s of populations to query for. These dcids are treated as the property value associated with returned :obj:`Population`'s by the property `location <https://browser.datacommons.org/kg?dcid=location>`_ population_type (:obj:`str`): The population type of the :obj:`StatisticalPopulation` constraining_properties (:obj:`map` from :obj:`str` to :obj:`str`, optional): A map from constraining property to the value that the :obj:`StatisticalPopulation` should be constrained by. Returns: When :code:`dcids` is an instance of :obj:`list`, the returned :obj:`StatisticalPopulation` are formatted as a :obj:`dict` from a given dcid to the unique :obj:`StatisticalPopulation` located at the dcid as specified by the `population_type` and `constraining_properties` *if such exists*. A given dcid will *NOT* be a member of the :obj:`dict` if such a population does not exist. When :code:`dcids` is an instance of :obj:`pandas.Series`, the returned :obj:`StatisticalPopulation` are formatted as a :obj:`pandas.Series` where the `i`-th entry corresponds to populations located at the given dcid specified by the `population_type` and `constraining_properties` *if such exists*. Otherwise, the cell is empty. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get - The `population of employed persons in California <https://browser.datacommons.org/kg?dcid=dc/p/x6t44d8jd95rd>`_ - The `population of employed persons in Kentucky <https://browser.datacommons.org/kg?dcid=dc/p/fs929fynprzs>`_ - The `population of employed persons in Maryland <https://browser.datacommons.org/kg?dcid=dc/p/lr52m1yr46r44>`_. These populations are specified as having a `population_type` as :obj:`Person` and the `constraining_properties` as `employment <https://browser.datacommons.org/kg?dcid=employment>`_ = BLS_Employed With a :obj:`list` of dcids for our states, we can get the populations we want as follows. >>> dcids = ["geoId/06", "geoId/21", "geoId/24"] >>> pvs = {'employment': 'BLS_Employed'} >>> dc.get_populations(dcids, 'Person', constraining_properties=pvs) { "geoId/06": "dc/p/x6t44d8jd95rd", "geoId/21": "dc/p/fs929fynprzs", "geoId/24": "dc/p/lr52m1yr46r44" } We can also specify the :code:`dcids` as a :obj:`pandas.Series` like so. >>> import pandas as pd >>> dcids = pd.Series(["geoId/06", "geoId/21", "geoId/24"]) >>> pvs = {'employment': 'BLS_Employed'} >>> dc.get_populations(dcids, 'Person', constraining_properties=pvs) 0 dc/p/x6t44d8jd95rd 1 dc/p/fs929fynprzs 2 dc/p/lr52m1yr46r44 dtype: object """ # Convert the dcids field and format the request to GetPopulations dcids, req_dcids = utils._convert_dcids_type(dcids) pv = [{ 'property': k, 'value': v } for k, v in constraining_properties.items()] url = utils._API_ROOT + utils._API_ENDPOINTS['get_populations'] payload = utils._send_request(url, req_json={ 'dcids': req_dcids, 'population_type': population_type, 'pvs': pv, }) # Create the results and format it appropriately result = utils._format_expand_payload(payload, 'population', must_exist=dcids) if isinstance(dcids, pd.Series): flattened = utils._flatten_results(result, default_value="") return pd.Series([flattened[dcid] for dcid in dcids], index=dcids.index) # Drop empty results while flattening return utils._flatten_results(result)
def get_stat_all(places, stat_vars): """Returns a nested `dict` of all time series for `places` and `stat_vars`. Args: places (`Iterable` of `str`): The dcids of Places to query for. stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables. Returns: A nested `dict` mapping Places to StatisticalVariables and all available time series for each Place and StatisticalVariable pair. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: >>> get_stat_all(["geoId/05", "geoId/06"], ["Count_Person", "Count_Person_Male"]) { "geoId/05": { "Count_Person": { "sourceSeries": [ { "val": { "2010": 1633, "2011": 1509, "2012": 1581, }, "observationPeriod": "P1Y", "importName": "Wikidata", "provenanceDomain": "wikidata.org" }, { "val": { "2010": 1333, "2011": 1309, "2012": 131, }, "observationPeriod": "P1Y", "importName": "CensusPEPSurvey", "provenanceDomain": "census.gov" } ], } }, "Count_Person_Male": { "sourceSeries": [ { "val": { "2010": 1633, "2011": 1509, "2012": 1581, }, "observationPeriod": "P1Y", "importName": "CensusPEPSurvey", "provenanceDomain": "census.gov" } ], } }, "geoId/02": { "Count_Person": {}, "Count_Person_Male": { "sourceSeries": [ { "val": { "2010": 13, "2011": 13, "2012": 322, }, "observationPeriod": "P1Y", "importName": "CensusPEPSurvey", "provenanceDomain": "census.gov" } ] } } } """ url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all'] # Cast iterable-like to list. places = list(places) stat_vars = list(stat_vars) # Aiming for _STAT_BATCH_SIZE entries total. # _STAT_BATCH_SIZE = num places x num stat_vars, so aim for # _STAT_BATCH_SIZE/len(stat_vars) places per batch. places_per_batch = _STAT_BATCH_SIZE // len(stat_vars) # Get number of batches via an arithmetic ceiling trick: # 11//10 rounds down to 1. # -11//10 rounds down to -2. # We can divide with, then remove the negative to get the ceiling. batches = -(-len(places) // places_per_batch) res = {} for i in range(batches): req_json = { 'stat_vars': stat_vars, 'places': places[i * places_per_batch:(i + 1) * places_per_batch] } # Send the request res_json = utils._send_request(url, req_json=req_json, use_payload=False) if 'placeData' not in res_json: # The REST API spec will always return a dictionary under # placeData, even if no places exist or have no # data. If no Places are provided, REST will return an # error, which will have been caught and passed on in # _send_request. raise ValueError("Unexpected response from REST stat/all API.") # Unnest the REST response for keys that have single-element values. place_statvar_series = collections.defaultdict(dict) for place_dcid, place in res_json['placeData'].items(): stat_var_data = place.get('statVarData') if not stat_var_data: # The REST API spec will always return a dictionary under # statVarData, even if no StatVars exist or have no # data. If no StatVars are provided, REST will return an # error, which will have been caught and passed on in # _send_request. raise ValueError("Unexpected response from REST stat/all API.") for stat_var_dcid, stat_var in stat_var_data.items(): place_statvar_series[place_dcid][stat_var_dcid] = stat_var res.update(dict(place_statvar_series)) return res
def get_populations(dcids, population_type, constraining_properties={}): """ Returns :obj:`StatisticalPopulation`'s located at the given :code:`dcids`. Args: dcids (:obj:`iterable` of :obj:`str`): Dcids identifying :obj:`Place`'s of populations to query for. These dcids are treated as the property value associated with returned :obj:`Population`'s by the property `location <https://browser.datacommons.org/kg?dcid=location>`_ population_type (:obj:`str`): The population type of the :obj:`StatisticalPopulation` constraining_properties (:obj:`map` from :obj:`str` to :obj:`str`, optional): A map from constraining property to the value that the :obj:`StatisticalPopulation` should be constrained by. Returns: The returned :obj:`StatisticalPopulation` are formatted as a :obj:`dict` from a given dcid to the unique :obj:`StatisticalPopulation` located at the dcid as specified by the `population_type` and `constraining_properties` *if such exists*. A given dcid will *NOT* be a member of the :obj:`dict` if such a population does not exist. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get - The `population of employed persons in California <https://browser.datacommons.org/kg?dcid=dc/p/x6t44d8jd95rd>`_ - The `population of employed persons in Kentucky <https://browser.datacommons.org/kg?dcid=dc/p/fs929fynprzs>`_ - The `population of employed persons in Maryland <https://browser.datacommons.org/kg?dcid=dc/p/lr52m1yr46r44>`_. These populations are specified as having a `population_type` as :obj:`Person` and the `constraining_properties` as `employment <https://browser.datacommons.org/kg?dcid=employment>`_ = BLS_Employed With a :obj:`list` of dcids for our states, we can get the populations we want as follows. >>> dcids = ["geoId/06", "geoId/21", "geoId/24"] >>> pvs = {'employment': 'BLS_Employed'} >>> dc.get_populations(dcids, 'Person', constraining_properties=pvs) { "geoId/06": "dc/p/x6t44d8jd95rd", "geoId/21": "dc/p/fs929fynprzs", "geoId/24": "dc/p/lr52m1yr46r44" } """ # Convert the dcids field and format the request to GetPopulations dcids = filter(lambda v: v == v, dcids) # Filter out NaN values dcids = list(dcids) pv = [{ 'property': k, 'value': v } for k, v in constraining_properties.items()] url = utils._API_ROOT + utils._API_ENDPOINTS['get_populations'] payload = utils._send_request(url, req_json={ 'dcids': dcids, 'population_type': population_type, 'pvs': pv, }) # Create the results and format it appropriately result = utils._format_expand_payload(payload, 'population', must_exist=dcids) # Drop empty results while flattening return _flatten_results(result)
def get_stats(dcids, stats_var, obs_dates='latest', measurement_method=None, unit=None, obs_period=None): """ Returns :obj:`TimeSeries` for :code:`dcids` \ based on the :code:`stats_var`. Args: dcids (:obj:`iterable` of :obj:`str`): Dcids of places to query for. stats_var (:obj:`str`): The dcid of the :obj:StatisticalVariable. obs_dates (:obj:`str` or :obj:`iterable` of :obj:`str`): Which observation to return. Can be 'latest', 'all', or an iterable of dates in 'YYYY-MM-DD' format. measurement_method (:obj:`str`): Optional, the dcid of the preferred `measurementMethod` value. unit (:obj:`str`): Optional, the dcid of the preferred `unit` value. obs_period (:obj:`str`): Optional, the dcid of the preferred `observationPeriod` value. Returns: A :obj:`dict` mapping the :obj:`Place` identified by the given :code:`dcid` to its place name and the :obj:`TimeSeries` associated with the :obj:`StatisticalVariable` identified by the given :code:`stats_var` and filtered by :code:`obs_dates` and optional args. See example below for more detail about how the returned :obj:`dict` is structured. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get the :obj:`TimeSeries` of the number of males at least 25 years old that attended 12th grade but did not receive a high school diploma (`dc/0hyp6tkn18vcb <https://browser.datacommons.org/kg?dcid=dc/0hyp6tkn18vcb>`_) in `Arkansas <https://browser.datacommons.org/kg?dcid=geoId/05>`_ and `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_. >>> get_stats(["geoId/05", "geoId/06"], "dc/0hyp6tkn18vcb") { 'geoId/05': { 'place_name': 'Arkansas' 'data': { '2011':18136, '2012':17279, '2013':17459, '2014':16966, '2015':17173, '2016':17041, '2017':17783, '2018':18003 }, }, 'geoId/05': { 'place_name': 'California' 'data': { '2011':316667, '2012':324116, '2013':331853, '2014':342818, '2015':348979, '2016':354806, '2017':360645, '2018':366331 }, }, } """ dcids = filter(lambda v: v == v, dcids) # Filter out NaN values dcids = list(dcids) url = utils._API_ROOT + utils._API_ENDPOINTS['get_stats'] batches = -(-len(dcids) // utils._QUERY_BATCH_SIZE ) # Ceil to get # of batches. res = {} for i in range(batches): req_json = { 'place': dcids[i * utils._QUERY_BATCH_SIZE:(i + 1) * utils._QUERY_BATCH_SIZE], 'stats_var': stats_var, } if measurement_method: req_json['measurement_method'] = measurement_method if unit: req_json['unit'] = unit if obs_period: req_json['observation_period'] = obs_period payload = utils._send_request(url, req_json) if obs_dates == 'all': res.update(payload) elif obs_dates == 'latest': for geo, stats in payload.items(): if not stats: continue time_series = stats.get('data') if not time_series: continue max_date = max(time_series) max_date_stat = time_series[max_date] time_series.clear() time_series[max_date] = max_date_stat res[geo] = stats elif obs_dates: obs_dates = set(obs_dates) for geo, stats in payload.items(): if not stats: continue time_series = stats.get('data') if not time_series: continue for date in list(time_series): if date not in obs_dates: time_series.pop(date) res[geo] = stats return res
def get_property_values(dcids, prop, out=True, value_type=None, limit=utils._MAX_LIMIT): """ Returns property values of given :code:`dcids` along the given property. Args: dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): dcids to get property values for. prop (:obj:`str`): The property to get property values for. out (:obj:`bool`, optional): A flag that indicates the property is directed away from the given nodes when set to true. value_type (:obj:`str`, optional): A type to filter returned property values by. limit (:obj:`int`, optional): The maximum number of property values returned aggregated over all given nodes. Returns: When :code:`dcids` is an instance of :obj:`list`, the returned property values are formatted as a :obj:`dict` from a given dcid to a list of its property values. When :code:`dcids` is an instance of :obj:`pandas.Series`, the returned property values are formatted as a :obj:`pandas.Series` where the `i`-th entry corresponds to property values associated with the `i`-th given dcid. The cells of the returned series will always contain a :obj:`list` of property values. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get the `name` of a list of states specified by their dcid: `geoId/06 <https://browser.datacommons.org/kg?dcid=geoId/06>`_, `geoId/21 <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and `geoId/24 <https://browser.datacommons.org/kg?dcid=geoId/24>`_ First, let's try specifying the :code:`dcids` as a :obj:`list` of :obj:`str`. >>> get_property_values(["geoId/06", "geoId/21", "geoId/24"], "name") { "geoId/06": ["California"], "geoId/21": ["Kentucky"], "geoId/24": ["Maryland"], } Next, we specify :code:`dcids` as a :obj:`pandas.Series` >>> import pandas as pd >>> dcids = pd.Series(["geoId/06", "geoId/21", "geoId/24"]) >>> get_property_values(dcids, "name") 0 [California] 1 [Kentucky] 2 [Maryland] dtype: object """ # Convert the dcids field and format the request to GetPropertyValue dcids, req_dcids = utils._convert_dcids_type(dcids) req_json = {'dcids': req_dcids, 'property': prop, 'limit': limit} if value_type: req_json['value_type'] = value_type # Send the request url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_values'] payload = utils._send_request(url, req_json=req_json) # Create the result format for when dcids is provided as a list. unique_results = defaultdict(set) for dcid in dcids: # Get the list of nodes based on the direction given. nodes = [] if dcid in payload and out: nodes = payload[dcid]['out'] elif dcid in payload and not out: nodes = payload[dcid]['in'] # Add nodes to unique_results if it is not empty for node in nodes: if 'dcid' in node: unique_results[dcid].add(node['dcid']) elif 'value' in node: unique_results[dcid].add(node['value']) # Make sure each dcid is in the results dict, and convert all sets to lists. results = {dcid: sorted(list(unique_results[dcid])) for dcid in dcids} # Format the results as a Series if a Pandas Series is provided. if isinstance(dcids, pd.Series): return pd.Series([results[dcid] for dcid in dcids], index=dcids.index) return results
def get_observations(dcids, measured_property, stats_type, observation_date, observation_period=None, measurement_method=None): """ Returns values of :obj:`Observation`'s observing the given :code:`dcids`. Args: dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): Dcids identifying nodes that returning :obj:`Observation`'s observe. These dcids are treated as the property value associated with returned :obj:`Observation`'s by the property `observedNode <https://browser.datacommons.org/kg?dcid=observedNode>`_ measured_property (:obj:`str`): The measured property. stats_type (:obj:`str`): The statistical type for the observation. observation_date (:obj:`str`): The associated observation date in ISO8601 format. observation_period (:obj:`str`, optional): An optional parameter specifying the observation period. measurement_method (:obj:`str`, optional): An optional parameter specifying the measurement method. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Returns: When :code:`dcids` is an instance of :obj:`list`, the returned :obj:`Observation`'s are formatted as a :obj:`dict` from a given dcid to the unique :obj:`Observation` observing the dcid where the observation is specified by what is given in the other parameters *if such exists*. A given dcid will *NOT* be a member of the :obj:`dict` if such an observation does not exist. When :code:`dcids` is an instance of :obj:`pandas.Series`, the returned :obj:`Observation`'s are formatted as a :obj:`pandas.Series` where the `i`-th entry corresponds to observation observing the given dcid as specified by the other parameters *if such exists*. Otherwise, the cell holds NaN. Examples: We would like to get the following for December, 2018: - The `total count of employed persons in California <https://browser.datacommons.org/kg?dcid=dc/o/wetnm9026gf73>`_ - The `total count of employed persons in Kentucky <https://browser.datacommons.org/kg?dcid=dc/o/4nklvdnkfq835>`_ - The `total count of employed persons in Maryland <https://browser.datacommons.org/kg?dcid=dc/o/nkntbc4vpshn9>`_. The observations we want are observations of the populations representing employed individuals in each state (to get these, see :any:module-datacommons.populations.get_populations). With a list of these population dcids, we can get the observations like so. >>> dcids = [ ... "dc/p/x6t44d8jd95rd", # Employed individuals in California ... "dc/p/fs929fynprzs", # Employed individuals in Kentucky ... "dc/p/lr52m1yr46r44" # Employed individuals in Maryland ... ] >>> get_observations(dcids, 'count', 'measuredValue', '2018-12', ... observation_period='P1M', ... measurement_method='BLSSeasonallyAdjusted' ... ) { "dc/p/x6t44d8jd95rd": 18704962.0, "dc/p/fs929fynprzs": 1973955.0, "dc/p/lr52m1yr46r44": 3075662.0 } We can also specify the :code:`dcids` as a :obj:`pandas.Series` like so. >>> import pandas as pd >>> dcids = pd.Series(["dc/p/x6t44d8jd95rd", "dc/p/fs929fynprzs", "dc/p/lr52m1yr46r44"]) >>> get_observations(dcids, 'count', 'measuredValue', '2018-12', ... observation_period='P1M', ... measurement_method='BLSSeasonallyAdjusted' ... ) 0 18704962.0 1 1973955.0 2 3075662.0 dtype: float64 """ # Convert the dcids field and format the request to GetObservation dcids, req_dcids = utils._convert_dcids_type(dcids) req_json = { 'dcids': req_dcids, 'measured_property': measured_property, 'stats_type': stats_type, 'observation_date': observation_date, } if observation_period: req_json['observation_period'] = observation_period if measurement_method: req_json['measurement_method'] = measurement_method # Issue the request to GetObservation url = utils._API_ROOT + utils._API_ENDPOINTS['get_observations'] payload = utils._send_request(url, req_json=req_json) # Create the results and format it appropriately result = utils._format_expand_payload(payload, 'observation', must_exist=dcids) if isinstance(dcids, pd.Series): flattened = utils._flatten_results(result, default_value="") series = pd.Series([flattened[dcid] for dcid in dcids], index=dcids.index) return series.apply(pd.to_numeric, errors='coerce') # Drop empty results by calling _flatten_results without default_value, then # coerce the type to float if possible. typed_results = {} for k, v in utils._flatten_results(result).items(): try: typed_results[k] = float(v) except ValueError: typed_results[k] = v return typed_results
def get_pop_obs(dcid): """ Returns all :obj:`StatisticalPopulation` and :obj:`Observation` \ of a :obj:`Thing`. Args: dcid (:obj:`str`): Dcid of the thing. Returns: A :obj:`dict` of :obj:`StatisticalPopulation` and :obj:`Observation` that are associated to the thing identified by the given :code:`dcid`. The given dcid is linked to the returned :obj:`StatisticalPopulation`, which are the :obj:`observedNode` of the returned :obj:`Observation`. See example below for more detail about how the returned :obj:`dict` is structured. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: We would like to get all :obj:`StatisticalPopulation` and :obj:`Observations` of `Santa Clara <https://browser.datacommons.org/kg?dcid=geoId/06085>`_. >>> get_pop_obs("geoId/06085") { 'name': 'Santa Clara', 'placeType': 'County', 'populations': { 'dc/p/zzlmxxtp1el87': { 'popType': 'Household', 'numConstraints': 3, 'propertyValues': { 'householderAge': 'Years45To64', 'householderRace': 'USC_AsianAlone', 'income': 'USDollar35000To39999' }, 'observations': [ { 'marginOfError': 274, 'measuredProp': 'count', 'measuredValue': 1352, 'measurementMethod': 'CensusACS5yrSurvey', 'observationDate': '2017' }, { 'marginOfError': 226, 'measuredProp': 'count', 'measuredValue': 1388, 'measurementMethod': 'CensusACS5yrSurvey', 'observationDate': '2013' } ], }, }, 'observations': [ { 'meanValue': 4.1583, 'measuredProp': 'particulateMatter25', 'measurementMethod': 'CDCHealthTracking', 'observationDate': '2014-04-04', 'observedNode': 'geoId/06085' }, { 'meanValue': 9.4461, 'measuredProp': 'particulateMatter25', 'measurementMethod': 'CDCHealthTracking', 'observationDate': '2014-03-20', 'observedNode': 'geoId/06085' } ] } Notice that the return value is a multi-level :obj:`dict`. The top level contains the following keys. - :code:`name` and :code:`placeType` provides the name and type of the :obj:`Place` identified by the given :code:`dcid`. - :code:`populations` maps to a :obj:`dict` containing all :obj:`StatisticalPopulation` that have the given :code:`dcid` as its :obj:`location`. - :code:`observations` maps to a :obj:`list` containing all :obj:`Observation` that have the given :code:`dcid` as its :obj:`observedNode`. The :code:`populations` dictionary is keyed by the dcid of each :obj:`StatisticalPopulation`. The mapped dictionary contains the following keys. - :code:`popType` which gives the population type of the :obj:`StatisticalPopulation` identified by the key. - :code:`numConstraints` which gives the number of constraining properties defined for the identified :obj:`StatisticalPopulation`. - :code:`propertyValues` which gives a :obj:`dict` mapping a constraining property to its value for the identified :obj:`StatisticalPopulation`. - :code:`observations` which gives a list of all :obj:`Observation`'s that have the identified :obj:`StatisticalPopulation` as their :obj:`observedNode`. Each :obj:`Observation` is represented by a :code:`dict` that have the keys: - :code:`measuredProp`: The property measured by the :obj:`Observation`. - :code:`observationDate`: The date when the :obj:`Observation` was made. - :code:`observationPeriod` (optional): The period over which the :obj:`Observation` was made. - :code:`measurementMethod` (optional): A field providing additional information on how the :obj:`Observation` was collected. - Additional fields that denote values measured by the :obj:`Observation`. These may include the following: :code:`measuredValue`, :code:`meanValue`, :code:`medianValue`, :code:`maxValue`, :code:`minValue`, :code:`sumValue`, :code:`marginOfError`, :code:`stdError`, :code:`meanStdError`, and others. """ url = utils._API_ROOT + utils._API_ENDPOINTS[ 'get_pop_obs'] + '?dcid={}'.format(dcid) return utils._send_request(url, compress=True, post=False)
def get_stat_all(places, stat_vars): """Returns a nested `dict` of all time series for `places` and `stat_vars`. Args: places (`Iterable` of `str`): The dcids of Places to query for. stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables. Returns: A nested `dict` mapping Places to StatisticalVariables and all available time series for each Place and StatisticalVariable pair. Raises: ValueError: If the payload returned by the Data Commons REST API is malformed. Examples: >>> get_stat_all(["geoId/05", "geoId/06"], ["Count_Person", "Count_Person_Male"]) { "geoId/05": { "Count_Person": [ { "val": { "2010": 1633, "2011": 1509, "2012": 1581, }, "observationPeriod": "P1Y", "importName": "Wikidata", "provenanceDomain": "wikidata.org" }, { "val": { "2010": 1333, "2011": 1309, "2012": 131, }, "observationPeriod": "P1Y", "importName": "CensusPEPSurvey", "provenanceDomain": "census.gov" } ], "Count_Person_Male": [ { "val": { "2010": 1633, "2011": 1509, "2012": 1581, }, "observationPeriod": "P1Y", "importName": "CensusPEPSurvey", "provenanceDomain": "census.gov" } ], }, "geoId/02": { "Count_Person": [], "Count_Person_Male": [ { "val": { "2010": 13, "2011": 13, "2012": 322, }, "observationPeriod": "P1Y", "importName": "CensusPEPSurvey", "provenanceDomain": "census.gov" } ], } } """ url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all'] req_json = {'stat_vars': stat_vars, 'places': places} # Send the request res_json = utils._send_request(url, req_json=req_json, use_payload=False) if 'placeData' not in res_json: raise ValueError('No data in response.') # Unnest the REST response for keys that have single-element values. place_statvar_series = collections.defaultdict(dict) for place_dcid, place in res_json['placeData'].items(): for stat_var_dcid, stat_var in place['statVarData'].items(): place_statvar_series[place_dcid][stat_var_dcid] = stat_var return dict(place_statvar_series)
def get_place_obs(place_type, observation_date, population_type, constraining_properties={}): """ Returns all :obj:`Observation`'s for all places given the place type, observation date and the :obj:`StatisticalPopulation` constraints. Args: place_type (:obj:`str`): The type of places to query :obj:`StatisticalPopulation`'s and :obj:`Observation`'s for. observation_date (:obj:`str`): The observation date in ISO-8601 format. population_type (:obj:`str`): The population type of the :obj:`StatisticalPopulation` constraining_properties (:obj:`map` from :obj:`str` to :obj:`str`, optional): A map from constraining property to the value that the :obj:`StatisticalPopulation` should be constrained by. Returns: A list of dictionaries, with each dictionary containng *all* :obj:`Observation`'s of a place that conform to the :obj:`StatisticalPopulation` constraints. See examples for more details on how the format of the return value is structured. Raises: ValueError: If the payload is malformed. Examples: We would like to get all :obj:`StatisticalPopulation` and :obj:`Observations` for all places of type :obj:`City` in year 2017 where the populations have a population type of :obj:`Person` is specified by the following constraining properties. - Persons should have `age <https://browser.datacommons.org/kg?dcid=age>`_ with value `Years5To17 <https://browser.datacommons.org/kg?dcid=Years5To17>`_ - Persons should have `placeOfBirth <https://browser.datacommons.org/kg?dcid=placeOfBirth>`_ with value BornInOtherStateInTheUnitedStates. >>> props = { ... 'age': 'Years5To17', ... 'placeOfBirth': 'BornInOtherStateInTheUnitedStates' ... } >>> get_place_obs('City', '2017', Person', constraining_properties=props) [ { 'name': 'Marcus Hook borough', 'place': 'geoId/4247344', 'populations': { 'dc/p/pq6frs32sfvk': { 'observations': [ { 'marginOfError': 39, 'measuredProp': 'count', 'measuredValue': 67, 'type': 'Observation' }, # More observations... ], } } }, # Entries for more cities... ] The value returned by :code:`get_place_obs` is a :obj:`list` of :obj:`dict`'s. Each dictionary corresponds to a :obj:`StatisticalPopulation` matching the given :code:`population_type` and :code:`constraining_properties` for a single place of the given :code:`place_type`. The dictionary contains the following keys. - :code:`name`: The name of the place being described. - :code:`place`: The dcid associated with the place being described. - :code:`populations`: A :obj:`dict` mapping :code:`StatisticalPopulation` dcids to a a :obj:`dict` with a list of :code:`observations`. Each :obj:`Observation` is represented by a :obj:`dict` with the following keys. - :code:`measuredProp`: The property measured by the :obj:`Observation`. - :code:`measurementMethod` (optional): A field identifying how the :obj:`Observation` was made - Additional fields that denote values measured by the :obj:`Observation`. These may include the following: :code:`measuredValue`, :code:`meanValue`, :code:`medianValue`, :code:`maxValue`, :code:`minValue`, :code:`sumValue`, :code:`marginOfError`, :code:`stdError`, :code:`meanStdError`, and others. """ # Create the json payload and send it to the REST API. pv = [{ 'property': k, 'value': v } for k, v in constraining_properties.items()] url = utils._API_ROOT + utils._API_ENDPOINTS['get_place_obs'] payload = utils._send_request(url, req_json={ 'place_type': place_type, 'observation_date': observation_date, 'population_type': population_type, 'pvs': pv, }, compress=True) return payload['places']
def test_send_request_no_api_key(self, urlopen): del os.environ[utils._ENV_VAR_API_KEY] # Issue a dummy url that tells the mock to not expect a key self.assertEqual( utils._send_request(_SEND_REQ_NO_KEY, {'foo': ['bar']}), {})