Exemple #1
0
def main():
  # Create a list of dcids for Santa Clara and Montgomery County.
  sc, mc = 'geoId/06085', 'geoId/24031'
  dcids = [sc, mc]

  # Get all CensusTracts in these two counties.
  utils._print_header('Get Census Tracts')
  tracts = dc.get_places_in(dcids, 'CensusTract')
  if sc in tracts:
    print('> 10 CensusTracts in Santa Clara County')
    for dcid in tracts[sc][:10]:
      print('  - {}'.format(dcid))
  print()
  if mc in tracts:
    print('> 10 CensusTracts in Montgomery County')
    for dcid in tracts[mc][:10]:
      print('  - {}'.format(dcid))

  # We perform the same task using a Pandas DataFrame. First, initialize a
  # DataFrame with Santa Clara and Montgomery County.
  utils._print_header('Initialize the DataFrame')
  pd_frame = pd.DataFrame({'county': ['geoId/06085', 'geoId/24031']})
  print(pd_frame)

  # Get all CensusTracts in these two counties.
  utils._print_header('Get Census Tracts')
  pd_frame['tracts'] = dc.get_places_in(pd_frame['county'], 'CensusTract')
  pd_frame = dc.flatten_frame(pd_frame)
  print(pd_frame)
Exemple #2
0
def buildPlaceTreeState(state):
    st = RankStatVarPlace.getPlace("State", state)
    counties = dc.get_places_in([state], "County")[state]
    for county in counties:
        cnt = RankStatVarPlace.getPlace("County", county)
        st.addChild(cnt)
        cities = dc.get_places_in([county], "City")[county]
        for city in cities:
            ct = RankStatVarPlace.getPlace("City", city)
            cnt.addChild(ct)
    return st
Exemple #3
0
    def download_data(self, place='country/USA'):
        """Downloads GeoJSON data for a specified location.

        Given the specified location, extracts the GeoJSONs of all
        administrative areas one level below it (as specified by the
        LEVEL_MAP class constant). For example, if the input is country/USA,
        extracts all AdministrativeArea1's within the US (US states).

        Args:
            place: A string that is a valid value for the geoId property of a
                   DataCommons node.

        Raises:
            ValueError: If a Data Commons API call fails.
        """
        geolevel = dc.get_property_values([place], "typeOf")
        # There is an extra level of nesting in geojson files, so we have
        # to get the 0th element explicitly.
        assert len(geolevel[place]) == 1
        geolevel = geolevel[place][0]
        geos_contained_in_place = dc.get_places_in(
            [place], self.LEVEL_MAP[geolevel])[place]
        self.geojsons = dc.get_property_values(geos_contained_in_place,
                                               "geoJsonCoordinates")
        for area, coords in self.iter_subareas():
            self.geojsons[area][0] = geojson.loads(coords)
Exemple #4
0
    def download_data(self, place='country/USA', level=1):
        """Downloads GeoJSON data for a specified location.

        Given the specified location, extracts the GeoJSONs of all
        administrative areas one level below it (as specified by the
        LEVEL_MAP class constant). For example, if the input is country/USA,
        extracts all AdministrativeArea1's within the US (US states).

        Args:
            place: A string that is a valid value for the geoId property of a
                   DataCommons node.
            level: Number of administrative levels down from place that should
                   be fetched. For example if place='country/USA' and level=1,
                   US states will be fetched. If instead level=2, US counties
                   will be fetched, and so on.

        Raises:
            ValueError: If a Data Commons API call fails.
        """
        geolevel = dc.get_property_values([place], "typeOf")
        # There is an extra level of nesting in geojson files, so we have
        # to get the 0th element explicitly.
        assert len(geolevel[place]) == 1
        geolevel = geolevel[place][0]

        for i in range(level):
            if geolevel not in self.LEVEL_MAP:
                raise ValueError("Desired level does not exist.")
            geolevel = self.LEVEL_MAP[geolevel]

        geos_contained_in_place = dc.get_places_in([place], geolevel)[place]
        self.geojsons = dc.get_property_values(geos_contained_in_place,
                                               "geoJsonCoordinates")
        for area, coords in self.iter_subareas():
            self.geojsons[area][0] = geojson.loads(coords)
Exemple #5
0
 def test_no_dcids(self, urlopen):
     """ Calling get_places_in with no dcids returns empty results. """
     # Call get_places_in with no dcids.
     bad_dcids = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City')
     self.assertDictEqual(bad_dcids, {
         'dc/MadDcid': [],
         'dc/MadderDcid': []
     })
Exemple #6
0
    def test_bad_dcids(self, urlopen):
        """ Calling get_places_in with dcids that do not exist returns empty
      results.
    """
        # Call get_places_in with one dcid that does not exist
        bad_dcids_1 = dc.get_places_in(['geoId/06085', 'dc/MadDcid'], 'City')
        self.assertDictEqual(bad_dcids_1, {
            'geoId/06085': ['geoId/0649670'],
            'dc/MadDcid': []
        })

        # Call get_places_in when both dcids do not exist
        bad_dcids_2 = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City')
        self.assertDictEqual(bad_dcids_2, {
            'dc/MadDcid': [],
            'dc/MadderDcid': []
        })
Exemple #7
0
 def test_multiple_dcids(self, urlopen):
     """ Calling get_places_in with proper dcids returns valid results. """
     # Call get_places_in
     places = dc.get_places_in(['geoId/06085', 'geoId/24031'], 'City')
     self.assertDictEqual(
         places, {
             'geoId/06085': ['geoId/0649670'],
             'geoId/24031': ['geoId/2467675', 'geoId/2476650']
         })
Exemple #8
0
    def test_bad_dcids(self, post_mock):
        """ Calling get_places_in with dcids that do not exist returns empty
    results.
    """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # Call get_places_in with one dcid that does not exist
        bad_dcids_1 = dc.get_places_in(['geoId/06085', 'dc/MadDcid'], 'City')
        self.assertDictEqual(bad_dcids_1, {
            'geoId/06085': ['geoId/0649670'],
            'dc/MadDcid': []
        })

        # Call get_places_in when both dcids do not exist
        bad_dcids_2 = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City')
        self.assertDictEqual(bad_dcids_2, {
            'dc/MadDcid': [],
            'dc/MadderDcid': []
        })
Exemple #9
0
def _load_geojsons():
    countries = dc.get_places_in(['Earth'], 'Country')['Earth']
    resp = dc.get_property_values(countries, 'geoJsonCoordinatesDP2')
    geojsons = {}
    for p, gj in resp.items():
        if not gj:
            continue
        geojsons[p] = geometry.shape(json.loads(gj[0]))
    print('Got', len(geojsons), 'geojsons!')
    cip = dc.get_property_values(countries, 'containedInPlace')
    return geojsons, cip
Exemple #10
0
    def test_series_bad_dcids(self, post_mock):
        """ Calling get_places_in with a Pandas Series and dcids that do not exist
    returns empty results.
    """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # Get the input dcids and expected output
        bad_dcids_1 = pd.Series(['geoId/06085', 'dc/MadDcid'])
        bad_dcids_2 = pd.Series(['dc/MadDcid', 'dc/MadderDcid'])
        expected_1 = pd.Series([['geoId/0649670'], []])
        expected_2 = pd.Series([[], []])

        # Call get_places_in
        actual_1 = dc.get_places_in(bad_dcids_1, 'City')
        actual_2 = dc.get_places_in(bad_dcids_2, 'City')

        # Assert that the answers are correct
        assert_series_equal(actual_1, expected_1)
        assert_series_equal(actual_2, expected_2)
Exemple #11
0
    def test_no_dcids(self, urlopen):
        """ Calling get_places_in with no dcids returns empty results. """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # Call get_places_in with no dcids.
        bad_dcids = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City')
        self.assertDictEqual(bad_dcids, {
            'dc/MadDcid': [],
            'dc/MadderDcid': []
        })
Exemple #12
0
    def test_multiple_dcids(self, post_mock):
        """ Calling get_places_in with proper dcids returns valid results. """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # Call get_places_in
        places = dc.get_places_in(['geoId/06085', 'geoId/24031'], 'City')
        self.assertDictEqual(
            places, {
                'geoId/06085': ['geoId/0649670'],
                'geoId/24031': ['geoId/2467675', 'geoId/2476650']
            })
Exemple #13
0
    def test_series_no_dcids(self, post_mock):
        """ Calling get_places_in with no dcids returns empty results. """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # Get the input and expected output
        bad_dcids = pd.Series([])
        expected = pd.Series([])

        # Test get_places_in
        actual = dc.get_places_in(bad_dcids, 'City')
        assert_series_equal(actual, expected)
Exemple #14
0
    def test_series_multiple_dcids(self, post_mock):
        """ Calling get_places_in with a Pandas Series and proper dcids returns
    a Pandas Series with valid results.
    """
        # Set the API key
        dc.set_api_key('TEST-API-KEY')

        # Get the input dcids and expected output
        dcids = pd.Series(['geoId/06085', 'geoId/24031'])
        expected = pd.Series([['geoId/0649670'],
                              ['geoId/2467675', 'geoId/2476650']])

        # Call get_places_in
        actual = dc.get_places_in(dcids, 'City')
        assert_series_equal(actual, expected)
Exemple #15
0
def main():
    # Create a list of dcids for Santa Clara and Montgomery County.
    sc, mc = 'geoId/06085', 'geoId/24031'
    dcids = [sc, mc]

    # Get all CensusTracts in these two counties.
    print('Get Census Tracts')
    tracts = dc.get_places_in(dcids, 'CensusTract')
    if sc in tracts:
        print('> 10 CensusTracts in Santa Clara County')
        for dcid in tracts[sc][:10]:
            print('  - {}'.format(dcid))
    if mc in tracts:
        print('> 10 CensusTracts in Montgomery County')
        for dcid in tracts[mc][:10]:
            print('  - {}'.format(dcid))

    # Get place stats.
    print('Get place stats -- all')
    stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
                         'dc/0hyp6tkn18vcb',
                         obs_dates='all')
    print(stats)

    print('Get place stats -- latest')
    stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
                         'dc/0hyp6tkn18vcb')
    print(stats)

    print('Get place stats -- 2014')
    stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
                         'dc/0hyp6tkn18vcb',
                         obs_dates=['2014'])
    print(stats)

    print('Get place stats -- 2014 badly formatted')
    stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
                         'dc/0hyp6tkn18vcb',
                         obs_dates='2014')
    print(stats)

    print('Get place stats -- 2015-2016')
    stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
                         'dc/0hyp6tkn18vcb',
                         obs_dates=['2015', '2016'])
    print(stats)
Exemple #16
0
def buildPlaceTree(placeFile):
    if (os.path.isfile(placeFile)):
        ff = open(placeFile)
        for line in ff:
            it = line.strip().split('\t')
            pl = RankStatVarPlace.getPlace(it[1], it[0])
            par = RankStatVarPlace.getPlace(it[3], it[2])
            par.addChild(pl)
    else:
        US = RankStatVarPlace.getPlace("Country", US_dcid)
        states = dc.get_places_in([US_dcid], "State")[US_dcid]
        for state in states:
            st = buildPlaceTreeState(state)
            US.addChild(st)
        ff = open(placeFile, "w")
        for c in US.children:
            dumpPlaces(ff, c, US)
        ff.close()
Exemple #17
0
def main():
    # Create a list of dcids for Santa Clara and Montgomery County.
    sc, mc = 'geoId/06085', 'geoId/24031'
    dcids = [sc, mc]

    # Get all CensusTracts in these two counties.
    print('Get Census Tracts')
    tracts = dc.get_places_in(dcids, 'CensusTract')
    if sc in tracts:
        print('> 10 CensusTracts in Santa Clara County')
        for dcid in tracts[sc][:10]:
            print('  - {}'.format(dcid))
    if mc in tracts:
        print('> 10 CensusTracts in Montgomery County')
        for dcid in tracts[mc][:10]:
            print('  - {}'.format(dcid))

    # Get related places.
    print('Get related places')
    related_places = dc.get_related_places(['geoId/06085'], 'Person', {
        'age': "Years21To64",
        "gender": "Female"
    }, 'count', '')
    print(related_places)
Exemple #18
0
                       default=os.environ["DATACOMMONS_API_KEY"])
args = argparser.parse_args()

# Get the country name
aux = read_file(ROOT / "src" / "data" / "metadata.csv").set_index("key")
country_name = aux.loc[args.country_code, "country_name"]

# Convert 2-letter to 3-letter country code
iso_codes = read_file(ROOT / "src" / "data" /
                      "country_codes.csv").set_index("key")
country_code_alpha_3 = iso_codes.loc[args.country_code, "3166-1-alpha-3"]

dc.set_api_key(args.dc_api_key)
country = "country/{}".format(country_code_alpha_3)
nuts_name = "EurostatNUTS{}".format(args.nuts_level)
regions = dc.get_places_in([country], nuts_name)[country]
names = dc.get_property_values(regions, "name")
for key, name in names.items():
    region_name = name[0]
    region_code = key.split("/")[-1][2:]
    print(("{country_code}_{region_code},"
           "{country_code},"
           "{country_name},"
           "{region_code},"
           "{region_name},"
           ",,,0").format(
               **{
                   "country_code": args.country_code,
                   "region_code": region_code,
                   "country_name": country_name,
                   "region_name": region_name,
Exemple #19
0
def main():
    param_sets = [
        {
            'place': 'geoId/06085',
            'stat_var': "Count_Person",
        },
        {
            'place': 'geoId/06085',
            'stat_var': "Count_Person",
            'date': '2018',
        },
        {
            'place': 'geoId/06085',
            'stat_var': "Count_Person",
            'date': '2018',
            'measurement_method': 'CensusACS5yrSurvey',
        },
        {
            'place': 'geoId/06085',
            'stat_var': 'UnemploymentRate_Person',
        },
        {
            'place': 'geoId/06085',
            'stat_var': 'UnemploymentRate_Person',
            'observation_period': 'P1Y',
        },
        {
            'place': 'geoId/06085',
            'stat_var': 'UnemploymentRate_Person',
            'observation_period': 'P1Y',
            'measurement_method': 'BLSSeasonallyUnadjusted',
        },
        {
            'place': 'nuts/HU22',
            'stat_var':
            'Amount_EconomicActivity_GrossDomesticProduction_Nominal',
        },
        {
            'place': 'nuts/HU22',
            'stat_var':
            'Amount_EconomicActivity_GrossDomesticProduction_Nominal',
            'observation_period': 'P1Y',
            'unit': 'PurchasingPowerStandard'
        },
    ]

    def call_str(pvs):
        """Helper function to print the minimal call string."""
        s = "'{}', '{}'".format(pvs.get('place'), pvs.get('stat_var'))
        if pvs.get('measurement_method'):
            s += ", measurement_method='{}'".format(
                pvs.get('measurement_method'))
        if pvs.get('observation_period'):
            s += ", observation_period='{}'".format(
                pvs.get('observation_period'))
        if pvs.get('unit'):
            s += ", unit='{}'".format(pvs.get('unit'))
        if pvs.get('scaling_factor'):
            s += ", scaling_factor={}".format(pvs.get('scaling_factor'))
        return s

    for pvs in param_sets:
        print('\nget_stat_value({})'.format(call_str(pvs)))
        print(
            '>>> ',
            dc.get_stat_value(pvs.get('place'),
                              pvs.get('stat_var'),
                              date=pvs.get('date'),
                              measurement_method=pvs.get('measurement_method'),
                              observation_period=pvs.get('observation_period'),
                              unit=pvs.get('unit'),
                              scaling_factor=pvs.get('scaling_factor')))
    for pvs in param_sets:
        pvs.pop('date', None)
        print('\nget_stat_series({})'.format(call_str(pvs)))
        print(
            '>>> ',
            dc.get_stat_series(
                pvs.get('place'),
                pvs.get('stat_var'),
                measurement_method=pvs.get('measurement_method'),
                observation_period=pvs.get('observation_period'),
                unit=pvs.get('unit'),
                scaling_factor=pvs.get('scaling_factor')))

    pp = pprint.PrettyPrinter(indent=4)
    print(
        '\nget_stat_all(["geoId/06085", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
    )
    print('>>> ')
    pp.pprint(
        dc.get_stat_all(["geoId/06085", "country/FRA"],
                        ["Median_Age_Person", "Count_Person"]))

    print(
        '\nget_stat_all(["badPlaceId", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
    )
    print('>>> ')
    pp.pprint(
        dc.get_stat_all(["badPlaceId", "country/FRA"],
                        ["Median_Age_Person", "Count_Person"]))

    print('\nSTRESS TEST FOR GET_STAT_ALL')
    try:
        dc.get_stat_all(
            dc.get_places_in(['country/USA'], 'County')['country/USA'], [
                'Count_Person', 'LandAreaSqMeter',
                'PopulationDensityPerSqMeter',
                'Count_Person_BlackOrAfricanAmericanAlone',
                'PercentBlackOrAfricanAmericanAlone', 'Count_Person_Female',
                'Count_Person_Male',
                'Count_Person_AmericanIndianAndAlaskaNativeAlone',
                'Count_Person_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces',
                'Count_Person_AmericanIndianOrAlaskaNativeAlone',
                'Count_Person_AsianAlone',
                'Count_Person_AsianAloneOrInCombinationWithOneOrMoreOtherRaces',
                'Count_Person_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces',
                'Count_Person_HispanicOrLatino',
                'Count_Person_NativeHawaiianAndOtherPacificIslanderAlone',
                'Count_Person_NativeHawaiianAndOtherPacificIslanderAloneOrInCombinationWithOneOrMoreOtherRaces',
                'Count_Person_NativeHawaiianOrOtherPacificIslanderAlone',
                'Count_Person_SomeOtherRaceAlone',
                'Count_Person_SomeOtherRaceAloneOrInCombinationWithOneOrMoreOtherRaces',
                'Count_Person_TwoOrMoreRaces', 'Count_Person_WhiteAlone',
                'Count_Person_WhiteAloneNotHispanicOrLatino',
                'Count_Person_WhiteAloneOrInCombinationWithOneOrMoreOtherRaces',
                'Count_Person_Upto5Years', 'Count_Person_Upto18Years',
                'Count_Person_65OrMoreYears', 'Count_Person_75OrMoreYears',
                'Count_Person_ForeignBorn',
                'Count_Person_USCitizenByNaturalization',
                'Count_Person_NotAUSCitizen', 'Count_Person_Nonveteran',
                'Count_Person_Veteran', 'Count_Person_NotWorkedFullTime',
                'Count_Person_WorkedFullTime', 'Count_Person_Employed',
                'Count_Person_Unemployed', 'Count_Person_InLaborForce',
                'Count_Person_IncomeOf10000To14999USDollar',
                'Count_Person_IncomeOf15000To24999USDollar',
                'Count_Person_IncomeOf25000To34999USDollar',
                'Count_Person_IncomeOf35000To49999USDollar',
                'Count_Person_IncomeOf50000To64999USDollar',
                'Count_Person_IncomeOf65000To74999USDollar',
                'Count_Person_IncomeOf75000OrMoreUSDollar',
                'Count_Person_IncomeOfUpto9999USDollar',
                'Count_Person_EnrolledInSchool',
                'Count_Person_NotEnrolledInSchool',
                'Count_Person_EnrolledInCollegeUndergraduateYears',
                'Count_Person_EnrolledInGrade1ToGrade4',
                'Count_Person_EnrolledInGrade5ToGrade8',
                'Count_Person_EnrolledInGrade9ToGrade12',
                'Count_Person_EnrolledInKindergarten',
                'Count_Person_EnrolledInNurserySchoolPreschool',
                'Count_Person_GraduateOrProfessionalSchool',
                'Count_Person_EducationalAttainment10ThGrade',
                'Count_Person_EducationalAttainment11ThGrade',
                'Count_Person_EducationalAttainment12ThGradeNoDiploma',
                'Count_Person_EducationalAttainment1StGrade',
                'Count_Person_EducationalAttainment2NdGrade',
                'Count_Person_EducationalAttainment3RdGrade',
                'Count_Person_EducationalAttainment4ThGrade',
                'Count_Person_EducationalAttainment5ThGrade',
                'Count_Person_EducationalAttainment6ThGrade',
                'Count_Person_EducationalAttainment7ThGrade',
                'Count_Person_EducationalAttainment8ThGrade',
                'Count_Person_EducationalAttainment9ThGrade',
                'Count_Person_EducationalAttainmentAssociatesDegree',
                'Count_Person_EducationalAttainmentBachelorsDegree',
                'Count_Person_EducationalAttainmentBachelorsDegreeOrHigher',
                'Count_Person_EducationalAttainmentDoctorateDegree',
                'Count_Person_EducationalAttainmentGedOrAlternativeCredential',
                'Count_Person_EducationalAttainmentKindergarten',
                'Count_Person_EducationalAttainmentMastersDegree',
                'Count_Person_EducationalAttainmentNoSchoolingCompleted',
                'Count_Person_EducationalAttainmentNurserySchool',
                'Count_Person_EducationalAttainmentPrimarySchool',
                'Count_Person_EducationalAttainmentProfessionalSchoolDegree',
                'Count_Person_EducationalAttainmentRegularHighSchoolDiploma',
                'Count_Person_EducationalAttainmentSomeCollege1OrMoreYearsNoDegree',
                'Count_Person_EducationalAttainmentSomeCollegeLessThan1Year',
                'Count_Person_Divorced', 'Count_Person_MarriedAndNotSeparated',
                'Count_Person_NeverMarried', 'Count_Person_Separated',
                'Count_Person_Widowed', 'Count_Person_NowMarried',
                'Count_Person_AbovePovertyLevelInThePast12Months',
                'Count_Person_BelowPovertyLevelInThePast12Months',
                'Percent_Person_20OrMoreYears_WithDiabetes',
                'Percent_Person_20OrMoreYears_Obesity',
                'Percent_Person_20OrMoreYears_PhysicalInactivity',
                'Percent_Person_Upto64Years_NoHealthInsurance',
                'Median_Age_Person', 'Median_Income_Person', 'Count_Death',
                'Count_Death_CertainInfectiousParasiticDiseases',
                'Count_Death_DiseasesOfBloodAndBloodFormingOrgansAndImmuneDisorders',
                'Count_Death_DiseasesOfTheRespiratorySystem'
            ])
    except ValueError:
        print('Stress test for get_stat_all FAILED!')
    else:
        print('Stress test for get_stat_all succeeded.')