zip_code = zip_json[u'attributes'][u'Join_Zip_Code'] case_counts.append(case_count) zip_codes.append(zip_code) # =========================================== # Store previous data in dictionary # =========================================== us_cases_rel_scripts_path = os.path.abspath( "../processed_data/cases/US") # Path to data relative to scripts prev_data_fpath = "%s/oakland-county_cases.csv" % us_cases_rel_scripts_path prev_data_file = open(prev_data_fpath, 'r') header = prev_data_file.readline().strip().split(',') # IMPORTANT: Only append new data if date isn't already there quoted_date = parse_data_utils.date_string_to_quoted(date) if quoted_date in header: prev_data_file.close() sys.exit( "This date has already been updated. May need to check if multiple updates were made in the same day." ) data = {} # Keys are zip codes, values are lists of case counts for row in prev_data_file: values = row.strip().split(',') zip_code = values[0] case_counts_prev = values[1:] data[zip_code] = case_counts_prev
def write_data(file_path, date, zips, cases, location=""): """Writes or appends the data to a csv. Rows are zip codes and columns are dates. Args: file_path: The path to the csv to write/append to. date: The date we should add data to. zip: List of zip codes to be updated. cases: List of cases corresponding to the given list of zip codes and the given date. location: The location for which the data is being scraped. """ # Fetch given csv and store in a dict. new_data = {} # Keys are zip codes, values are lists of case counts dates = [""] # IMPORTANT: Only append new data if date isn't already there quoted_date = parse_data_utils.date_string_to_quoted(date) overwrite = False try: read_csv = open(file_path, 'r') dates = read_csv.readline().strip().split(',') if quoted_date in dates: overwrite = True print("%s - WARNING: data has already been updated today... " "overwriting data for today with recently fetched data." % location) # Fill in new_data with old data from the csv. for row in read_csv: values = row.strip().split(',') zip_code = values[0] old_cases = values[1:] if overwrite: old_cases = values[1:-1] new_data[zip_code] = old_cases read_csv.close() except IOError: print("%s - Creating csv file at %s" % (location, file_path)) num_dates = len(dates) - 1 # Excluding today if overwrite: num_dates = num_dates - 1 expected_entries = num_dates + 1 # Fill in new_data with today's data. for zip_code, case_count in zip(zips, cases): quoted_zip_code = '"%s"' % zip_code if quoted_zip_code in new_data: if len(new_data[quoted_zip_code]) == expected_entries: new_data[quoted_zip_code][-1] = str( int(new_data[quoted_zip_code][-1]) + case_count) else: new_data[quoted_zip_code].append(str(case_count)) else: # New zip code new_data[quoted_zip_code] = ['NA'] * (num_dates) new_data[quoted_zip_code].append(str(case_count)) # Handle missing zip codes for zip_code in new_data: if len(new_data[zip_code]) < expected_entries: new_data[zip_code].append("NA") # Make sure everything has the same length # (There might be duplicates!) for zip_code in new_data: if len(new_data[zip_code]) != expected_entries: sys.exit( "%s - ERROR: Inconsistency in number of data points for zip code %s." " Data failed to update." % (location, zip_code)) # Overwrite csv write_csv = open(file_path, 'w+') if not overwrite: dates.append(quoted_date) write_csv.write(','.join(dates) + "\n") sorted_zip_codes = sorted(new_data.keys()) for zip_code in sorted_zip_codes: write_csv.write('%s,' % zip_code) write_csv.write(','.join(new_data[zip_code]) + "\n") write_csv.close()