def upload_to_gcs(self, gcs_bucket): """Uploads population data from census to GCS bucket.""" metadata = fetch_acs_metadata(self.base_acs_url) var_map = parse_acs_metadata(metadata, list(GROUPS.keys())) concepts = list(SEX_BY_AGE_CONCEPTS_TO_RACE.keys()) concepts.append(HISPANIC_BY_RACE_CONCEPT) file_diff = False for concept in concepts: group_vars = get_vars_for_group(concept, var_map, 2) cols = list(group_vars.keys()) url_params = get_census_params(cols, self.county_level) concept_file_diff = url_file_to_gcs.url_file_to_gcs( self.base_acs_url, url_params, gcs_bucket, self.get_filename(concept)) file_diff = file_diff or concept_file_diff url_params = get_census_params([TOTAL_POP_VARIABLE_ID], self.county_level) next_file_diff = url_file_to_gcs.url_file_to_gcs( self.base_acs_url, url_params, gcs_bucket, self.add_filename_suffix(TOTAL_POP_VARIABLE_ID)) file_diff = file_diff or next_file_diff return file_diff
def upload_to_gcs(self, bucket): file_diff = False for group in POVERTY_BY_RACE_SEX_AGE_GROUPS: for is_county in [True, False]: file_diff = (url_file_to_gcs.url_file_to_gcs( self.base_url, get_params(group, is_county), bucket, get_filename(group, is_county), ) or file_diff) return file_diff
def upload_to_gcs(self, url, gcs_bucket, filename): """Uploads household income data from SAIPE to GCS bucket for all available years.""" year_range = {1989, 1993, *range(1995, 2019)} file_diff = False for year in year_range: url_params = census.get_census_params_by_county( self.get_household_income_columns().keys()) url_params['time'] = year next_file_diff = url_file_to_gcs.url_file_to_gcs( url, url_params, gcs_bucket, '{}_{}.json'.format(filename, year)) file_diff = file_diff or next_file_diff return file_diff
def upload_to_gcs(self, bucket): file_diff = False for is_county in [True, False]: params = get_params(HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX, is_county) file_diff = ( url_file_to_gcs.url_file_to_gcs( self.base_url, params, bucket, self.get_filename(is_county) ) or file_diff ) return file_diff
def upload_to_gcs(self, gcs_bucket, **attrs): """ Attempts to download a file from a url and upload as a blob to the given GCS bucket. Parameters: gcs_bucket: Name of the GCS bucket to upload to (without gs://). attrs: Additional message attributes such as url and filename that are needed for this data source. Returns: A boolean indication of a file diff. In the case that there are many files to download, this will return true if there is at least one file that is different. """ return url_file_to_gcs.url_file_to_gcs( self.get_attr(attrs, 'url'), None, gcs_bucket, self.get_attr(attrs, 'filename'))
def upload_to_gcs(self, bucket): male_state_params = format_params( HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX, HEALTH_INSURANCE_BY_SEX_MALE_SUFFIXES) file_diff = url_file_to_gcs.url_file_to_gcs( self.base_url, male_state_params, bucket, self.get_filename(Sex.MALE, None, False)) female_state_params = format_params( HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX, HEALTH_INSURANCE_BY_SEX_FEMALE_SUFFIXES) file_diff = url_file_to_gcs.url_file_to_gcs( self.base_url, female_state_params, bucket, self.get_filename(Sex.FEMALE, None, False)) or file_diff male_county_params = format_params( HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX, HEALTH_INSURANCE_BY_SEX_MALE_SUFFIXES, True) file_diff = url_file_to_gcs.url_file_to_gcs( self.base_url, male_county_params, bucket, self.get_filename(Sex.MALE, None, True)) or file_diff female_county_params = format_params( HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX, HEALTH_INSURANCE_BY_SEX_FEMALE_SUFFIXES, True) file_diff = url_file_to_gcs.url_file_to_gcs( self.base_url, female_county_params, bucket, self.get_filename(Sex.FEMALE, None, True)) or file_diff # Iterates over the different race ACS variables, # retrieves the race from the metadata merged dict # writes the data to the GCS bucket and sees if file diff is changed for prefix in HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES: for prefix_key in prefix: race_state_params = format_params( prefix, HEALTH_INSURANCE_BY_RACE_GROUP_SUFFIXES) race = prefix[prefix_key][MetadataKey.RACE] file_diff = url_file_to_gcs.url_file_to_gcs( self.base_url, race_state_params, bucket, self.get_filename(None, race, False)) or file_diff race_county_params = format_params( prefix, HEALTH_INSURANCE_BY_RACE_GROUP_SUFFIXES, True) file_diff = url_file_to_gcs.url_file_to_gcs( self.base_url, race_county_params, bucket, self.get_filename(None, race, True)) or file_diff return file_diff
def upload_to_gcs(self, bucket): # Iterates over the different race ACS variables, # retrieves the race from the metadata merged dict # writes the data to the GCS bucket and sees if file diff is changed file_diff = False for prefix_key in HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES: race = HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES[prefix_key] for is_county in [True, False]: params = get_params(prefix_key, is_county) file_diff = ( url_file_to_gcs.url_file_to_gcs( self.base_url, params, bucket, self.get_filename(race, is_county), ) or file_diff ) return file_diff
def upload_to_gcs(self, url, gcs_bucket, filename): """Uploads state names and FIPS codes from census to GCS bucket.""" url_params = {'get': 'NAME', 'for': 'state:*'} return url_file_to_gcs.url_file_to_gcs(url, url_params, gcs_bucket, filename)
def upload_to_gcs(self, url, gcs_bucket, filename): """Uploads county names and FIPS codes from census to GCS bucket.""" url_params = census.get_census_params_by_county(['NAME']) return url_file_to_gcs.url_file_to_gcs(url, url_params, gcs_bucket, filename)