def main(): # Create a list of dcids for Santa Clara and Montgomery County. sc, mc = 'geoId/06085', 'geoId/24031' dcids = [sc, mc] # Get all CensusTracts in these two counties. utils._print_header('Get Census Tracts') tracts = dc.get_places_in(dcids, 'CensusTract') if sc in tracts: print('> 10 CensusTracts in Santa Clara County') for dcid in tracts[sc][:10]: print(' - {}'.format(dcid)) print() if mc in tracts: print('> 10 CensusTracts in Montgomery County') for dcid in tracts[mc][:10]: print(' - {}'.format(dcid)) # We perform the same task using a Pandas DataFrame. First, initialize a # DataFrame with Santa Clara and Montgomery County. utils._print_header('Initialize the DataFrame') pd_frame = pd.DataFrame({'county': ['geoId/06085', 'geoId/24031']}) print(pd_frame) # Get all CensusTracts in these two counties. utils._print_header('Get Census Tracts') pd_frame['tracts'] = dc.get_places_in(pd_frame['county'], 'CensusTract') pd_frame = dc.flatten_frame(pd_frame) print(pd_frame)
def buildPlaceTreeState(state): st = RankStatVarPlace.getPlace("State", state) counties = dc.get_places_in([state], "County")[state] for county in counties: cnt = RankStatVarPlace.getPlace("County", county) st.addChild(cnt) cities = dc.get_places_in([county], "City")[county] for city in cities: ct = RankStatVarPlace.getPlace("City", city) cnt.addChild(ct) return st
def download_data(self, place='country/USA'): """Downloads GeoJSON data for a specified location. Given the specified location, extracts the GeoJSONs of all administrative areas one level below it (as specified by the LEVEL_MAP class constant). For example, if the input is country/USA, extracts all AdministrativeArea1's within the US (US states). Args: place: A string that is a valid value for the geoId property of a DataCommons node. Raises: ValueError: If a Data Commons API call fails. """ geolevel = dc.get_property_values([place], "typeOf") # There is an extra level of nesting in geojson files, so we have # to get the 0th element explicitly. assert len(geolevel[place]) == 1 geolevel = geolevel[place][0] geos_contained_in_place = dc.get_places_in( [place], self.LEVEL_MAP[geolevel])[place] self.geojsons = dc.get_property_values(geos_contained_in_place, "geoJsonCoordinates") for area, coords in self.iter_subareas(): self.geojsons[area][0] = geojson.loads(coords)
def download_data(self, place='country/USA', level=1): """Downloads GeoJSON data for a specified location. Given the specified location, extracts the GeoJSONs of all administrative areas one level below it (as specified by the LEVEL_MAP class constant). For example, if the input is country/USA, extracts all AdministrativeArea1's within the US (US states). Args: place: A string that is a valid value for the geoId property of a DataCommons node. level: Number of administrative levels down from place that should be fetched. For example if place='country/USA' and level=1, US states will be fetched. If instead level=2, US counties will be fetched, and so on. Raises: ValueError: If a Data Commons API call fails. """ geolevel = dc.get_property_values([place], "typeOf") # There is an extra level of nesting in geojson files, so we have # to get the 0th element explicitly. assert len(geolevel[place]) == 1 geolevel = geolevel[place][0] for i in range(level): if geolevel not in self.LEVEL_MAP: raise ValueError("Desired level does not exist.") geolevel = self.LEVEL_MAP[geolevel] geos_contained_in_place = dc.get_places_in([place], geolevel)[place] self.geojsons = dc.get_property_values(geos_contained_in_place, "geoJsonCoordinates") for area, coords in self.iter_subareas(): self.geojsons[area][0] = geojson.loads(coords)
def test_no_dcids(self, urlopen): """ Calling get_places_in with no dcids returns empty results. """ # Call get_places_in with no dcids. bad_dcids = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City') self.assertDictEqual(bad_dcids, { 'dc/MadDcid': [], 'dc/MadderDcid': [] })
def test_bad_dcids(self, urlopen): """ Calling get_places_in with dcids that do not exist returns empty results. """ # Call get_places_in with one dcid that does not exist bad_dcids_1 = dc.get_places_in(['geoId/06085', 'dc/MadDcid'], 'City') self.assertDictEqual(bad_dcids_1, { 'geoId/06085': ['geoId/0649670'], 'dc/MadDcid': [] }) # Call get_places_in when both dcids do not exist bad_dcids_2 = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City') self.assertDictEqual(bad_dcids_2, { 'dc/MadDcid': [], 'dc/MadderDcid': [] })
def test_multiple_dcids(self, urlopen): """ Calling get_places_in with proper dcids returns valid results. """ # Call get_places_in places = dc.get_places_in(['geoId/06085', 'geoId/24031'], 'City') self.assertDictEqual( places, { 'geoId/06085': ['geoId/0649670'], 'geoId/24031': ['geoId/2467675', 'geoId/2476650'] })
def test_bad_dcids(self, post_mock): """ Calling get_places_in with dcids that do not exist returns empty results. """ # Set the API key dc.set_api_key('TEST-API-KEY') # Call get_places_in with one dcid that does not exist bad_dcids_1 = dc.get_places_in(['geoId/06085', 'dc/MadDcid'], 'City') self.assertDictEqual(bad_dcids_1, { 'geoId/06085': ['geoId/0649670'], 'dc/MadDcid': [] }) # Call get_places_in when both dcids do not exist bad_dcids_2 = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City') self.assertDictEqual(bad_dcids_2, { 'dc/MadDcid': [], 'dc/MadderDcid': [] })
def _load_geojsons(): countries = dc.get_places_in(['Earth'], 'Country')['Earth'] resp = dc.get_property_values(countries, 'geoJsonCoordinatesDP2') geojsons = {} for p, gj in resp.items(): if not gj: continue geojsons[p] = geometry.shape(json.loads(gj[0])) print('Got', len(geojsons), 'geojsons!') cip = dc.get_property_values(countries, 'containedInPlace') return geojsons, cip
def test_series_bad_dcids(self, post_mock): """ Calling get_places_in with a Pandas Series and dcids that do not exist returns empty results. """ # Set the API key dc.set_api_key('TEST-API-KEY') # Get the input dcids and expected output bad_dcids_1 = pd.Series(['geoId/06085', 'dc/MadDcid']) bad_dcids_2 = pd.Series(['dc/MadDcid', 'dc/MadderDcid']) expected_1 = pd.Series([['geoId/0649670'], []]) expected_2 = pd.Series([[], []]) # Call get_places_in actual_1 = dc.get_places_in(bad_dcids_1, 'City') actual_2 = dc.get_places_in(bad_dcids_2, 'City') # Assert that the answers are correct assert_series_equal(actual_1, expected_1) assert_series_equal(actual_2, expected_2)
def test_no_dcids(self, urlopen): """ Calling get_places_in with no dcids returns empty results. """ # Set the API key dc.set_api_key('TEST-API-KEY') # Call get_places_in with no dcids. bad_dcids = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City') self.assertDictEqual(bad_dcids, { 'dc/MadDcid': [], 'dc/MadderDcid': [] })
def test_multiple_dcids(self, post_mock): """ Calling get_places_in with proper dcids returns valid results. """ # Set the API key dc.set_api_key('TEST-API-KEY') # Call get_places_in places = dc.get_places_in(['geoId/06085', 'geoId/24031'], 'City') self.assertDictEqual( places, { 'geoId/06085': ['geoId/0649670'], 'geoId/24031': ['geoId/2467675', 'geoId/2476650'] })
def test_series_no_dcids(self, post_mock): """ Calling get_places_in with no dcids returns empty results. """ # Set the API key dc.set_api_key('TEST-API-KEY') # Get the input and expected output bad_dcids = pd.Series([]) expected = pd.Series([]) # Test get_places_in actual = dc.get_places_in(bad_dcids, 'City') assert_series_equal(actual, expected)
def test_series_multiple_dcids(self, post_mock): """ Calling get_places_in with a Pandas Series and proper dcids returns a Pandas Series with valid results. """ # Set the API key dc.set_api_key('TEST-API-KEY') # Get the input dcids and expected output dcids = pd.Series(['geoId/06085', 'geoId/24031']) expected = pd.Series([['geoId/0649670'], ['geoId/2467675', 'geoId/2476650']]) # Call get_places_in actual = dc.get_places_in(dcids, 'City') assert_series_equal(actual, expected)
def main(): # Create a list of dcids for Santa Clara and Montgomery County. sc, mc = 'geoId/06085', 'geoId/24031' dcids = [sc, mc] # Get all CensusTracts in these two counties. print('Get Census Tracts') tracts = dc.get_places_in(dcids, 'CensusTract') if sc in tracts: print('> 10 CensusTracts in Santa Clara County') for dcid in tracts[sc][:10]: print(' - {}'.format(dcid)) if mc in tracts: print('> 10 CensusTracts in Montgomery County') for dcid in tracts[mc][:10]: print(' - {}'.format(dcid)) # Get place stats. print('Get place stats -- all') stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], 'dc/0hyp6tkn18vcb', obs_dates='all') print(stats) print('Get place stats -- latest') stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], 'dc/0hyp6tkn18vcb') print(stats) print('Get place stats -- 2014') stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], 'dc/0hyp6tkn18vcb', obs_dates=['2014']) print(stats) print('Get place stats -- 2014 badly formatted') stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], 'dc/0hyp6tkn18vcb', obs_dates='2014') print(stats) print('Get place stats -- 2015-2016') stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], 'dc/0hyp6tkn18vcb', obs_dates=['2015', '2016']) print(stats)
def buildPlaceTree(placeFile): if (os.path.isfile(placeFile)): ff = open(placeFile) for line in ff: it = line.strip().split('\t') pl = RankStatVarPlace.getPlace(it[1], it[0]) par = RankStatVarPlace.getPlace(it[3], it[2]) par.addChild(pl) else: US = RankStatVarPlace.getPlace("Country", US_dcid) states = dc.get_places_in([US_dcid], "State")[US_dcid] for state in states: st = buildPlaceTreeState(state) US.addChild(st) ff = open(placeFile, "w") for c in US.children: dumpPlaces(ff, c, US) ff.close()
def main(): # Create a list of dcids for Santa Clara and Montgomery County. sc, mc = 'geoId/06085', 'geoId/24031' dcids = [sc, mc] # Get all CensusTracts in these two counties. print('Get Census Tracts') tracts = dc.get_places_in(dcids, 'CensusTract') if sc in tracts: print('> 10 CensusTracts in Santa Clara County') for dcid in tracts[sc][:10]: print(' - {}'.format(dcid)) if mc in tracts: print('> 10 CensusTracts in Montgomery County') for dcid in tracts[mc][:10]: print(' - {}'.format(dcid)) # Get related places. print('Get related places') related_places = dc.get_related_places(['geoId/06085'], 'Person', { 'age': "Years21To64", "gender": "Female" }, 'count', '') print(related_places)
default=os.environ["DATACOMMONS_API_KEY"]) args = argparser.parse_args() # Get the country name aux = read_file(ROOT / "src" / "data" / "metadata.csv").set_index("key") country_name = aux.loc[args.country_code, "country_name"] # Convert 2-letter to 3-letter country code iso_codes = read_file(ROOT / "src" / "data" / "country_codes.csv").set_index("key") country_code_alpha_3 = iso_codes.loc[args.country_code, "3166-1-alpha-3"] dc.set_api_key(args.dc_api_key) country = "country/{}".format(country_code_alpha_3) nuts_name = "EurostatNUTS{}".format(args.nuts_level) regions = dc.get_places_in([country], nuts_name)[country] names = dc.get_property_values(regions, "name") for key, name in names.items(): region_name = name[0] region_code = key.split("/")[-1][2:] print(("{country_code}_{region_code}," "{country_code}," "{country_name}," "{region_code}," "{region_name}," ",,,0").format( **{ "country_code": args.country_code, "region_code": region_code, "country_name": country_name, "region_name": region_name,
def main(): param_sets = [ { 'place': 'geoId/06085', 'stat_var': "Count_Person", }, { 'place': 'geoId/06085', 'stat_var': "Count_Person", 'date': '2018', }, { 'place': 'geoId/06085', 'stat_var': "Count_Person", 'date': '2018', 'measurement_method': 'CensusACS5yrSurvey', }, { 'place': 'geoId/06085', 'stat_var': 'UnemploymentRate_Person', }, { 'place': 'geoId/06085', 'stat_var': 'UnemploymentRate_Person', 'observation_period': 'P1Y', }, { 'place': 'geoId/06085', 'stat_var': 'UnemploymentRate_Person', 'observation_period': 'P1Y', 'measurement_method': 'BLSSeasonallyUnadjusted', }, { 'place': 'nuts/HU22', 'stat_var': 'Amount_EconomicActivity_GrossDomesticProduction_Nominal', }, { 'place': 'nuts/HU22', 'stat_var': 'Amount_EconomicActivity_GrossDomesticProduction_Nominal', 'observation_period': 'P1Y', 'unit': 'PurchasingPowerStandard' }, ] def call_str(pvs): """Helper function to print the minimal call string.""" s = "'{}', '{}'".format(pvs.get('place'), pvs.get('stat_var')) if pvs.get('measurement_method'): s += ", measurement_method='{}'".format( pvs.get('measurement_method')) if pvs.get('observation_period'): s += ", observation_period='{}'".format( pvs.get('observation_period')) if pvs.get('unit'): s += ", unit='{}'".format(pvs.get('unit')) if pvs.get('scaling_factor'): s += ", scaling_factor={}".format(pvs.get('scaling_factor')) return s for pvs in param_sets: print('\nget_stat_value({})'.format(call_str(pvs))) print( '>>> ', dc.get_stat_value(pvs.get('place'), pvs.get('stat_var'), date=pvs.get('date'), measurement_method=pvs.get('measurement_method'), observation_period=pvs.get('observation_period'), unit=pvs.get('unit'), scaling_factor=pvs.get('scaling_factor'))) for pvs in param_sets: pvs.pop('date', None) print('\nget_stat_series({})'.format(call_str(pvs))) print( '>>> ', dc.get_stat_series( pvs.get('place'), pvs.get('stat_var'), measurement_method=pvs.get('measurement_method'), observation_period=pvs.get('observation_period'), unit=pvs.get('unit'), scaling_factor=pvs.get('scaling_factor'))) pp = pprint.PrettyPrinter(indent=4) print( '\nget_stat_all(["geoId/06085", "country/FRA"], ["Median_Age_Person", "Count_Person"])' ) print('>>> ') pp.pprint( dc.get_stat_all(["geoId/06085", "country/FRA"], ["Median_Age_Person", "Count_Person"])) print( '\nget_stat_all(["badPlaceId", "country/FRA"], ["Median_Age_Person", "Count_Person"])' ) print('>>> ') pp.pprint( dc.get_stat_all(["badPlaceId", "country/FRA"], ["Median_Age_Person", "Count_Person"])) print('\nSTRESS TEST FOR GET_STAT_ALL') try: dc.get_stat_all( dc.get_places_in(['country/USA'], 'County')['country/USA'], [ 'Count_Person', 'LandAreaSqMeter', 'PopulationDensityPerSqMeter', 'Count_Person_BlackOrAfricanAmericanAlone', 'PercentBlackOrAfricanAmericanAlone', 'Count_Person_Female', 'Count_Person_Male', 'Count_Person_AmericanIndianAndAlaskaNativeAlone', 'Count_Person_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces', 'Count_Person_AmericanIndianOrAlaskaNativeAlone', 'Count_Person_AsianAlone', 'Count_Person_AsianAloneOrInCombinationWithOneOrMoreOtherRaces', 'Count_Person_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces', 'Count_Person_HispanicOrLatino', 'Count_Person_NativeHawaiianAndOtherPacificIslanderAlone', 'Count_Person_NativeHawaiianAndOtherPacificIslanderAloneOrInCombinationWithOneOrMoreOtherRaces', 'Count_Person_NativeHawaiianOrOtherPacificIslanderAlone', 'Count_Person_SomeOtherRaceAlone', 'Count_Person_SomeOtherRaceAloneOrInCombinationWithOneOrMoreOtherRaces', 'Count_Person_TwoOrMoreRaces', 'Count_Person_WhiteAlone', 'Count_Person_WhiteAloneNotHispanicOrLatino', 'Count_Person_WhiteAloneOrInCombinationWithOneOrMoreOtherRaces', 'Count_Person_Upto5Years', 'Count_Person_Upto18Years', 'Count_Person_65OrMoreYears', 'Count_Person_75OrMoreYears', 'Count_Person_ForeignBorn', 'Count_Person_USCitizenByNaturalization', 'Count_Person_NotAUSCitizen', 'Count_Person_Nonveteran', 'Count_Person_Veteran', 'Count_Person_NotWorkedFullTime', 'Count_Person_WorkedFullTime', 'Count_Person_Employed', 'Count_Person_Unemployed', 'Count_Person_InLaborForce', 'Count_Person_IncomeOf10000To14999USDollar', 'Count_Person_IncomeOf15000To24999USDollar', 'Count_Person_IncomeOf25000To34999USDollar', 'Count_Person_IncomeOf35000To49999USDollar', 'Count_Person_IncomeOf50000To64999USDollar', 'Count_Person_IncomeOf65000To74999USDollar', 'Count_Person_IncomeOf75000OrMoreUSDollar', 'Count_Person_IncomeOfUpto9999USDollar', 'Count_Person_EnrolledInSchool', 'Count_Person_NotEnrolledInSchool', 'Count_Person_EnrolledInCollegeUndergraduateYears', 'Count_Person_EnrolledInGrade1ToGrade4', 'Count_Person_EnrolledInGrade5ToGrade8', 'Count_Person_EnrolledInGrade9ToGrade12', 'Count_Person_EnrolledInKindergarten', 'Count_Person_EnrolledInNurserySchoolPreschool', 'Count_Person_GraduateOrProfessionalSchool', 'Count_Person_EducationalAttainment10ThGrade', 'Count_Person_EducationalAttainment11ThGrade', 'Count_Person_EducationalAttainment12ThGradeNoDiploma', 'Count_Person_EducationalAttainment1StGrade', 'Count_Person_EducationalAttainment2NdGrade', 'Count_Person_EducationalAttainment3RdGrade', 'Count_Person_EducationalAttainment4ThGrade', 'Count_Person_EducationalAttainment5ThGrade', 'Count_Person_EducationalAttainment6ThGrade', 'Count_Person_EducationalAttainment7ThGrade', 'Count_Person_EducationalAttainment8ThGrade', 'Count_Person_EducationalAttainment9ThGrade', 'Count_Person_EducationalAttainmentAssociatesDegree', 'Count_Person_EducationalAttainmentBachelorsDegree', 'Count_Person_EducationalAttainmentBachelorsDegreeOrHigher', 'Count_Person_EducationalAttainmentDoctorateDegree', 'Count_Person_EducationalAttainmentGedOrAlternativeCredential', 'Count_Person_EducationalAttainmentKindergarten', 'Count_Person_EducationalAttainmentMastersDegree', 'Count_Person_EducationalAttainmentNoSchoolingCompleted', 'Count_Person_EducationalAttainmentNurserySchool', 'Count_Person_EducationalAttainmentPrimarySchool', 'Count_Person_EducationalAttainmentProfessionalSchoolDegree', 'Count_Person_EducationalAttainmentRegularHighSchoolDiploma', 'Count_Person_EducationalAttainmentSomeCollege1OrMoreYearsNoDegree', 'Count_Person_EducationalAttainmentSomeCollegeLessThan1Year', 'Count_Person_Divorced', 'Count_Person_MarriedAndNotSeparated', 'Count_Person_NeverMarried', 'Count_Person_Separated', 'Count_Person_Widowed', 'Count_Person_NowMarried', 'Count_Person_AbovePovertyLevelInThePast12Months', 'Count_Person_BelowPovertyLevelInThePast12Months', 'Percent_Person_20OrMoreYears_WithDiabetes', 'Percent_Person_20OrMoreYears_Obesity', 'Percent_Person_20OrMoreYears_PhysicalInactivity', 'Percent_Person_Upto64Years_NoHealthInsurance', 'Median_Age_Person', 'Median_Income_Person', 'Count_Death', 'Count_Death_CertainInfectiousParasiticDiseases', 'Count_Death_DiseasesOfBloodAndBloodFormingOrgansAndImmuneDisorders', 'Count_Death_DiseasesOfTheRespiratorySystem' ]) except ValueError: print('Stress test for get_stat_all FAILED!') else: print('Stress test for get_stat_all succeeded.')