Beispiel #1
0
  def test_multiple_dcids(self, urlopen_mock):
    """ Calling get_property_values with multiple dcids returns valid
    results.
    """
    dcids = ['geoId/06085', 'geoId/24031']

    # Get the containedInPlace Towns for Santa Clara and Montgomery County.
    towns = dc.get_property_values(
      dcids, 'containedInPlace', out=False, value_type='Town')
    self.assertDictEqual(towns, {
      'geoId/06085': ['geoId/0643294', 'geoId/0644112'],
      'geoId/24031': ['geoId/2462850']
    })

    dcids = ['geoId/06085', 'geoId/24031', float('nan')]
    # Handle NaN values
    towns = dc.get_property_values(
      dcids, 'containedInPlace', out=False, value_type='Town')
    self.assertDictEqual(towns, {
      'geoId/06085': ['geoId/0643294', 'geoId/0644112'],
      'geoId/24031': ['geoId/2462850']
    })

    # Get the name of Santa Clara and Montgomery County.
    names = dc.get_property_values(dcids, 'name')
    self.assertDictEqual(names, {
      'geoId/06085': ['Santa Clara County'],
      'geoId/24031': ['Montgomery County']
    })

    # Return empty result when there is no data.
    names = dc.get_property_values(['dc/p/1234'], 'name')
    self.assertDictEqual(names, {
      'dc/p/1234': []
    })
Beispiel #2
0
    def test_multiple_dcids(self, post_mock):
        """ Calling get_property_values with multiple dcids returns valid
    results.
    """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        dcids = ['geoId/06085', 'geoId/24031']

        # Get the containedInPlace Towns for Santa Clara and Montgomery County.
        towns = dc.get_property_values(dcids,
                                       'containedInPlace',
                                       out=False,
                                       value_type='Town')
        self.assertDictEqual(
            towns, {
                'geoId/06085': ['geoId/0643294', 'geoId/0644112'],
                'geoId/24031': ['geoId/2462850']
            })

        # Get the name of Santa Clara and Montgomery County.
        names = dc.get_property_values(dcids, 'name')
        self.assertDictEqual(
            names, {
                'geoId/06085': ['Santa Clara County'],
                'geoId/24031': ['Montgomery County']
            })
Beispiel #3
0
    def download_data(self, place='country/USA', level=1):
        """Downloads GeoJSON data for a specified location.

        Given the specified location, extracts the GeoJSONs of all
        administrative areas one level below it (as specified by the
        LEVEL_MAP class constant). For example, if the input is country/USA,
        extracts all AdministrativeArea1's within the US (US states).

        Args:
            place: A string that is a valid value for the geoId property of a
                   DataCommons node.
            level: Number of administrative levels down from place that should
                   be fetched. For example if place='country/USA' and level=1,
                   US states will be fetched. If instead level=2, US counties
                   will be fetched, and so on.

        Raises:
            ValueError: If a Data Commons API call fails.
        """
        geolevel = dc.get_property_values([place], "typeOf")
        # There is an extra level of nesting in geojson files, so we have
        # to get the 0th element explicitly.
        assert len(geolevel[place]) == 1
        geolevel = geolevel[place][0]

        for i in range(level):
            if geolevel not in self.LEVEL_MAP:
                raise ValueError("Desired level does not exist.")
            geolevel = self.LEVEL_MAP[geolevel]

        geos_contained_in_place = dc.get_places_in([place], geolevel)[place]
        self.geojsons = dc.get_property_values(geos_contained_in_place,
                                               "geoJsonCoordinates")
        for area, coords in self.iter_subareas():
            self.geojsons[area][0] = geojson.loads(coords)
Beispiel #4
0
    def test_bad_dcids(self, post_mock):
        """ Calling get_property_values with dcids that do not exist returns empty
    results.
    """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        bad_dcids_1 = ['geoId/06085', 'dc/MadDcid']
        bad_dcids_2 = ['dc/MadDcid', 'dc/MadderDcid']

        # Get entities containedInPlace of Santa Clara County and a dcid that does
        # not exist.
        contained_1 = dc.get_property_values(bad_dcids_1,
                                             'containedInPlace',
                                             out=False)
        self.assertDictEqual(contained_1, {
            'geoId/06085': ['geoId/0644112'],
            'dc/MadDcid': []
        })

        # Get entities containedInPlace for two dcids that do not exist.
        contained_2 = dc.get_property_values(bad_dcids_2, 'containedInPlace')
        self.assertDictEqual(contained_2, {
            'dc/MadDcid': [],
            'dc/MadderDcid': []
        })
Beispiel #5
0
    def download_data(self, place='country/USA'):
        """Downloads GeoJSON data for a specified location.

        Given the specified location, extracts the GeoJSONs of all
        administrative areas one level below it (as specified by the
        LEVEL_MAP class constant). For example, if the input is country/USA,
        extracts all AdministrativeArea1's within the US (US states).

        Args:
            place: A string that is a valid value for the geoId property of a
                   DataCommons node.

        Raises:
            ValueError: If a Data Commons API call fails.
        """
        geolevel = dc.get_property_values([place], "typeOf")
        # There is an extra level of nesting in geojson files, so we have
        # to get the 0th element explicitly.
        assert len(geolevel[place]) == 1
        geolevel = geolevel[place][0]
        geos_contained_in_place = dc.get_places_in(
            [place], self.LEVEL_MAP[geolevel])[place]
        self.geojsons = dc.get_property_values(geos_contained_in_place,
                                               "geoJsonCoordinates")
        for area, coords in self.iter_subareas():
            self.geojsons[area][0] = geojson.loads(coords)
Beispiel #6
0
def _load_geojsons():
    countries = dc.get_places_in(['Earth'], 'Country')['Earth']
    resp = dc.get_property_values(countries, 'geoJsonCoordinatesDP2')
    geojsons = {}
    for p, gj in resp.items():
        if not gj:
            continue
        geojsons[p] = geometry.shape(json.loads(gj[0]))
    print('Got', len(geojsons), 'geojsons!')
    cip = dc.get_property_values(countries, 'containedInPlace')
    return geojsons, cip
Beispiel #7
0
def main():
  # Set the dcid to be that of Santa Clara County.
  dcids = ['geoId/06085', 'dc/p/zsb968m3v1f97']

  # Print all incoming and outgoing properties from Santa Clara County.
  print('Property Labels for Santa Clara County')
  in_labels = dc.get_property_labels(dcids)
  out_labels = dc.get_property_labels(dcids, out=False)
  print('> Printing properties for {}'.format(dcids))
  print('> Incoming properties: {}'.format(in_labels))
  print('> Outgoing properties: {}'.format(out_labels))

  # Print all property values for "containedInPlace" for Santa Clara County.
  print('Property Values for "containedInPlace" of Santa Clara County')
  prop_vals = dc.get_property_values(
    dcids, 'containedInPlace', out=False, value_type='City')
  print('> Cities contained in {}'.format(dcids))
  for dcid in dcids:
    for city_dcid in prop_vals[dcid]:
      print('  - {}'.format(city_dcid))

  # Print the first 10 triples associated with Santa Clara County
  print('Triples for Santa Clara County')
  triples = dc.get_triples(dcids)
  for dcid in dcids:
    print('> Triples for {}'.format(dcid))
    for s, p, o in triples[dcid][:5]:
      print('  - ("{}", {}, "{}")'.format(s, p, o))
Beispiel #8
0
 def test_bad_property(self, urlopen_mock):
     """ Calling get_property_values with a property that does not exist returns
 empty results.
 """
     # Get propery values for a property that does not exist.
     prop_vals = dc.get_property_values(['geoId/06085', 'geoId/24031'],
                                        'madProperty')
     self.assertDictEqual(prop_vals, {'geoId/06085': [], 'geoId/24031': []})
Beispiel #9
0
    def test_series_no_dcid(self, post_mock):
        # The input and expected series
        dcids = pd.Series([])
        expected = pd.Series([])

        # Call get_property_values and assert the results are correct.
        actual = dc.get_property_values(dcids, 'containedInPlace')
        assert_series_equal(actual, expected)
Beispiel #10
0
    def test_no_dcids(self, post_mock):
        """ Calling get_property_values with no dcids returns empty results. """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # Get property values with an empty list of dcids.
        prop_vals = dc.get_property_values([], 'containedInPlace')
        self.assertDictEqual(prop_vals, {})
Beispiel #11
0
    def test_bad_property(self, post_mock):
        """ Calling get_property_values with a property that does not exist returns
    empty results.
    """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # Get propery values for a property that does not exist.
        prop_vals = dc.get_property_values(['geoId/06085', 'geoId/24031'],
                                           'madProperty')
        self.assertDictEqual(prop_vals, {'geoId/06085': [], 'geoId/24031': []})
Beispiel #12
0
    def test_series_bad_property(self, post_mock):
        """ Calling get_property_values with a Pandas Series and a property that
    does not exist returns an empty result.
    """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # The input and expected series
        dcids = pd.Series(['geoId/06085', 'geoId/24031'])
        expected = pd.Series([[], []])

        # Call get_property_values and assert the results are correct.
        actual = dc.get_property_values(dcids, 'madProperty')
        assert_series_equal(actual, expected)
Beispiel #13
0
    def test_series_bad_dcids(self, post_mock):
        """ Calling get_property_values with a Pandas Series and dcids that does not
    exist resturns an empty result.
    """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # The given and expected series
        bad_dcids_1 = pd.Series(['geoId/06085', 'dc/MadDcid'])
        bad_dcids_2 = pd.Series(['dc/MadDcid', 'dc/MadderDcid'])
        expected_1 = pd.Series([['geoId/0644112'], []])
        expected_2 = pd.Series([[], []])

        # Call get_property_values with series as input
        actual_1 = dc.get_property_values(bad_dcids_1,
                                          'containedInPlace',
                                          out=False)
        actual_2 = dc.get_property_values(bad_dcids_2,
                                          'containedInPlace',
                                          out=False)

        # Assert the results are correct
        assert_series_equal(actual_1, expected_1)
        assert_series_equal(actual_2, expected_2)
Beispiel #14
0
    def test_dataframe(self, post_mock):
        """ Calling get_property_values with a Pandas DataFrame returns the correct
    results.
    """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # The given and expected series.
        dcids = pd.DataFrame({'dcids': ['geoId/06085', 'geoId/24031']})
        expected = pd.Series([['geoId/0643294', 'geoId/0644112'],
                              ['geoId/2462850']])

        # Call get_property_values with the series as input
        actual = dc.get_property_values(dcids,
                                        'containedInPlace',
                                        out=False,
                                        value_type='Town')
        assert_series_equal(actual, expected)
Beispiel #15
0
args = argparser.parse_args()

# Get the country name
aux = read_file(ROOT / "src" / "data" / "metadata.csv").set_index("key")
country_name = aux.loc[args.country_code, "country_name"]

# Convert 2-letter to 3-letter country code
iso_codes = read_file(ROOT / "src" / "data" /
                      "country_codes.csv").set_index("key")
country_code_alpha_3 = iso_codes.loc[args.country_code, "3166-1-alpha-3"]

dc.set_api_key(args.dc_api_key)
country = "country/{}".format(country_code_alpha_3)
nuts_name = "EurostatNUTS{}".format(args.nuts_level)
regions = dc.get_places_in([country], nuts_name)[country]
names = dc.get_property_values(regions, "name")
for key, name in names.items():
    region_name = name[0]
    region_code = key.split("/")[-1][2:]
    print(("{country_code}_{region_code},"
           "{country_code},"
           "{country_name},"
           "{region_code},"
           "{region_name},"
           ",,,0").format(
               **{
                   "country_code": args.country_code,
                   "region_code": region_code,
                   "country_name": country_name,
                   "region_name": region_name,
               }))
Beispiel #16
0
def main():
    # Set the dcid to be that of Santa Clara County.
    dcids = ['geoId/06085']

    # Print all incoming and outgoing properties from Santa Clara County.
    utils._print_header('Property Labels for Santa Clara County')
    in_labels = dc.get_property_labels(dcids)
    out_labels = dc.get_property_labels(dcids, out=False)
    print('> Printing properties for {}'.format(dcids))
    print('> Incoming properties: {}'.format(in_labels))
    print('> Outgoing properties: {}'.format(out_labels))

    # Print all property values for "containedInPlace" for Santa Clara County.
    utils._print_header(
        'Property Values for "containedInPlace" of Santa Clara County')
    prop_vals = dc.get_property_values(dcids,
                                       'containedInPlace',
                                       out=False,
                                       value_type='City')
    print('> Cities contained in {}'.format(dcids))
    for dcid in dcids:
        for city_dcid in prop_vals[dcid]:
            print('  - {}'.format(city_dcid))

    # Print the first 10 triples associated with Santa Clara County
    utils._print_header('Triples for Santa Clara County')
    triples = dc.get_triples(dcids)
    for dcid in dcids:
        print('> Triples for {}'.format(dcid))
        for s, p, o in triples[dcid][:5]:
            print('  - ("{}", {}, "{}")'.format(s, p, o))

    # get_property_values can be easily used to populate Pandas DataFrames. First
    # create a DataFrame with some data.
    utils._print_header('Initialize the DataFrame')
    pd_frame = pd.DataFrame({'county': ['geoId/06085', 'geoId/24031']})
    print(pd_frame)

    # Get the names for the given counties.
    utils._print_header('Get County Names')
    pd_frame['county_name'] = dc.get_property_values(pd_frame['county'],
                                                     'name')
    print(pd_frame)

    # Get the cities contained in these counties.
    utils._print_header('Get Contained Cities')
    pd_frame['city'] = dc.get_property_values(pd_frame['county'],
                                              'containedInPlace',
                                              out=False,
                                              value_type='City')
    print(pd_frame)

    # To expand on a column with get_property_values, the data frame has to be
    # flattened first. Clients can use flatten_frame to do this.
    utils._print_header('Flatten the Frame')
    pd_frame = dc.flatten_frame(pd_frame)
    print(pd_frame)

    # Get the names for each city.
    utils._print_header('Get City Names')
    pd_frame['city_name'] = dc.get_property_values(pd_frame['city'], 'name')
    print(pd_frame)

    # Format the final frame.
    utils._print_header('The Final Frame')
    pd_frame = dc.flatten_frame(pd_frame)
    print(pd_frame)
Beispiel #17
0
 def test_no_dcids(self, urlopen_mock):
     """ Calling get_property_values with no dcids returns empty results. """
     # Get property values with an empty list of dcids.
     prop_vals = dc.get_property_values([], 'containedInPlace')
     self.assertDictEqual(prop_vals, {})
Beispiel #18
0
def main():
    # Create a list of dcids for California, Kentucky, and Maryland
    ca, ky, md = 'geoId/06', 'geoId/21', 'geoId/24'
    dcids = [ca, ky, md]

    # Get the population of all employed individuals in the above states.
    utils._print_header('Get Populations for All Employed Individuals')
    employed = dc.get_populations(
        dcids,
        'Person',
        constraining_properties={'employment': 'BLS_Employed'})
    print('> Printing all populations of employed individuals\n')
    print(json.dumps(employed, indent=2))

    # Get the count for all male / females for the above states in 2016
    utils._print_header(
        'Get Population Counts for Employed Individuals in Maryland')
    pop_dcids = [employed[md]]
    print('> Requesting observations for {} in December 2018\n'.format(
        pop_dcids))
    obs = dc.get_observations(pop_dcids,
                              'count',
                              'measuredValue',
                              '2018-12',
                              observation_period='P1M',
                              measurement_method='BLSSeasonallyAdjusted')
    print(json.dumps(obs, indent=2))

    # We perform the same workflow using a Pandas DataFrame. First, initialize a
    # DataFrame with Santa Clara and Montgomery County.
    utils._print_header('Initialize the DataFrame')
    pd_frame = pd.DataFrame({'state': ['geoId/06', 'geoId/21', 'geoId/24']})
    pd_frame['state_name'] = dc.get_property_values(pd_frame['state'], 'name')
    pd_frame = dc.flatten_frame(pd_frame)
    print(pd_frame)

    # Get populations for employed individuals
    utils._print_header('Add Population and Observation to DataFrame')
    pd_frame['employed_pop'] = dc.get_populations(
        pd_frame['state'],
        'Person',
        constraining_properties={'employment': 'BLS_Employed'})

    # Add the observation for employed individuals
    pd_frame['employed_count'] = dc.get_observations(
        pd_frame['employed_pop'],
        'count',
        'measuredValue',
        '2018-12',
        observation_period='P1M',
        measurement_method='BLSSeasonallyAdjusted')
    print(pd_frame)

    # Final dataframe. Use the convenience function "clean_frame" to convert
    # columns to numerical types.
    utils._print_header('Final Data Frame')
    pd_frame = dc.clean_frame(pd_frame)
    print(pd_frame)

    # Get all population and observation data of Mountain View.
    utils._print_header('Get Mountain View population and observation')
    popobs = dc.get_pop_obs("geoId/0649670")
    pprint.pprint(popobs)
 def __addNameCol(self, df):
     df['name'] = df.index.map(dc.get_property_values(df.index, 'name'))
     df['name'] = df['name'].str[0]