def assert_accuracy(i, row, worksite_or_housing, df): table = row["table"] if (row["table"] == "dol_h") and (worksite_or_housing == "worksite"): return elif (row["Visa type"] == "H-2B") and (worksite_or_housing == "housing"): return else: # if checking for housing and h-2a, let it through if all the housing columns are empty if worksite_or_housing == "housing" and pd.isna( row["HOUSING_ADDRESS_LOCATION"]) and pd.isna( row["HOUSING_CITY"]) and pd.isna( row["HOUSING_STATE"]) and pd.isna( row["HOUSING_POSTAL_CODE"]): print_red_and_email( f"{row['CASE_NUMBER']} is H-2A but all of its housing columns are blank. If its worksite was fixed properly, it will be allowed to pass to job central. This was found while implementing fixes.", "H-2A job Without Housing Data - Implement Fixes") return if (not row[f"{worksite_or_housing} accuracy type"]) or ( row[f"{worksite_or_housing} accuracy"] < 0.7) or (row[f"{worksite_or_housing} accuracy type"] in helpers.bad_accuracy_types): print_red_and_email( f"The {worksite_or_housing} data of {row['CASE_NUMBER']} requires fixing, but its {worksite_or_housing}_fixed_by column was not specified to either address, coordinates, inactive, or impossible.", "Address Needs Fixing but Not Fixed") mark_as_failed(i, worksite_or_housing, df)
def perform_task_and_catch_errors(task_function, task_name): before = time.time() print(Fore.GREEN + f"{task_name}..." + Style.RESET_ALL) try: task_function() succeeded = True except Exception as error: print_red_and_email("Error: " + str(error), f"Unanticipated Error {task_name.lower()}!!") succeeded = False print(Fore.GREEN + f"Finished {task_name} in {time.time() - before} seconds." + "\n" + Style.RESET_ALL) return succeeded
def fix_row(i, row, worksite_or_housing, df): method = row[f"{worksite_or_housing}_fixed_by"] if method == "address": fix_by_address(i, row, worksite_or_housing, df) elif method == "coordinates": fix_by_coords(i, worksite_or_housing, df) elif method == "NA" or pd.isnull(method): assert_accuracy(i, row, worksite_or_housing, df) elif method == "impossible" or method == "inactive": pass else: error_message = f"Cannot fix job with case number: {row['CASE_NUMBER']}. {worksite_or_housing}_fixed_by column must be either `address`, `coordinates`, `impossible`, `NA`, or null - and it's case sensitive!" print_red_and_email(error_message, "Incorrect fixed_by Column Value") mark_as_failed(i, worksite_or_housing, df) return
def fix_by_address(i, row, worksite_or_housing, df): if worksite_or_housing == "worksite": full_address = helpers.create_address_from( row["WORKSITE_ADDRESS"], row["WORKSITE_CITY"], row["WORKSITE_STATE"], row["WORKSITE_POSTAL_CODE"]) elif worksite_or_housing == "housing": full_address = helpers.create_address_from( row["HOUSING_ADDRESS_LOCATION"], row["HOUSING_CITY"], row["HOUSING_STATE"], row["HOUSING_POSTAL_CODE"]) else: print_red_and_email( f"There was an error fixing the job with case number: {row['CASE_NUMBER']}. worksite_or_housing parameter in fix_by_address must be either `worksite` or `housing`", "Invalid Function Parameter") return try: geocoded = client.geocode(full_address) results = geocoded['results'][0] df.at[i, f"{worksite_or_housing}_long"] = results['location']['lng'] df.at[i, f"{worksite_or_housing}_lat"] = results['location']['lat'] df.at[i, f"{worksite_or_housing} accuracy"] = results['accuracy'] df.at[i, f"{worksite_or_housing} accuracy type"] = results[ 'accuracy_type'] if (results['accuracy'] < 0.7) or (results['accuracy_type'] in helpers.bad_accuracy_types): print_red_and_email( f"Geocoding the address `{full_address}` (case number {row['CASE_NUMBER']}) resulted in either an accuracy below 0.7 or a bad accuracy type. ", "Fixing Failed") mark_as_failed(i, worksite_or_housing, df) except Exception as error: print_red_and_email( f"Failed to geocode ~{row['CASE_NUMBER']}~ here's the error message:\n{str(error)}", "Geocoding Failure in Implement Fixes") mark_as_failed(i, worksite_or_housing, df)
def overwrite_our_feature(): # get all accurate h2a jobs that are in one of our states and have housing coordinates h2a_df = pd.read_sql("""SELECT * FROM job_central WHERE "Visa type" = 'H-2A' AND LOWER("WORKSITE_STATE") IN ('texas', 'tx', 'kentucky', 'ky', 'tennessee', 'tn', 'arkansas', 'ar', 'louisiana', 'la', 'mississippi', 'ms', 'alabama', 'al') AND housing_lat IS NOT NUll AND housing_long IS NOT NULL""", con=engine) # get all h2a jobs from job_central that are in one of our states and do not have housing coordinates h2a_no_housing_df = pd.read_sql("""SELECT * FROM job_central WHERE "Visa type" = 'H-2A' AND LOWER("WORKSITE_STATE") IN ('texas', 'tx', 'kentucky', 'ky', 'tennessee', 'tn', 'arkansas', 'ar', 'louisiana', 'la', 'mississippi', 'ms', 'alabama', 'al') AND (housing_lat IS NUll OR housing_long IS NULL)""", con=engine) # for arcGIS map purposes because color-coding is based on this column h2a_df["TOTAL_OCCUPANCY"].fillna(600, inplace=True) h2a_no_housing_df["TOTAL_OCCUPANCY"].fillna(600, inplace=True) # get all forestry h2b jobs from job_central that are in one of our states forestry_h2b_in_our_states_df = pd.read_sql( """SELECT * FROM job_central WHERE "Visa type" = 'H-2B' AND "SOC_CODE" IN ('45-4011.00', '45-4011') AND LOWER("WORKSITE_STATE") IN ('texas', 'tx', 'kentucky', 'ky', 'tennessee', 'tn', 'arkansas', 'ar', 'louisiana', 'la', 'mississippi', 'ms', 'alabama', 'al') """, con=engine) # set housing coordinates of h2b jobs and h2a jobs without housing to their worksite coordinates so that arecGIS will map them forestry_h2b_in_our_states_df[ "housing_lat"] = forestry_h2b_in_our_states_df.apply( lambda job: job["worksite_lat"], axis=1) forestry_h2b_in_our_states_df[ "housing_long"] = forestry_h2b_in_our_states_df.apply( lambda job: job["worksite_long"], axis=1) h2a_no_housing_df["housing_lat"] = h2a_no_housing_df.apply( lambda job: job["worksite_lat"], axis=1) h2a_no_housing_df["housing_long"] = h2a_no_housing_df.apply( lambda job: job["worksite_long"], axis=1) # combine h2a and forestry data h2a_and_h2b_df = h2a_df.append(forestry_h2b_in_our_states_df) h2a_housing_and_no_housing_and_h2b_df = h2a_and_h2b_df.append( h2a_no_housing_df) # get all additional housing rows that are in one of our states and that have a matching case number in job_central additional_housing_df = pd.read_sql( """SELECT * FROM additional_housing WHERE "CASE_NUMBER" IN (SELECT "CASE_NUMBER" FROM job_central WHERE "Visa type" = 'H-2A' AND LOWER("WORKSITE_STATE") IN ('texas', 'tx', 'kentucky', 'ky', 'tennessee', 'tn', 'arkansas', 'ar', 'louisiana', 'la', 'mississippi', 'ms', 'alabama', 'al')) """, con=engine) myprint(f"There will be {len(h2a_df)} normal H2A jobs in the feature.") myprint( f"There will be {len(h2a_no_housing_df)} H2A jobs mapped using their worksites in the feature." ) myprint( f"There will be {len(forestry_h2b_in_our_states_df)} forestry H2B jobs in the feature." ) myprint( f"There will be {len(additional_housing_df)} additional housing rows in the feature." ) # get columns that are in the h2a data but not the additional housing data and add each one to the additional housing datafrane cols_only_in_h2a = set(h2a_df.columns) - set(additional_housing_df.columns) for column in cols_only_in_h2a: additional_housing_df[column] = None # for each additional housing row, find its matching row in job_central and insert the data about that case number that is in job_central but not the additional_housing row for i, row in additional_housing_df.iterrows(): case_number = row["CASE_NUMBER"] job_in_h2a = h2a_df[h2a_df["CASE_NUMBER"] == case_number] if len(job_in_h2a) == 1: for column in cols_only_in_h2a: additional_housing_df.at[i, column] = get_value( job_in_h2a, column) else: print_red_and_email( f"{case_number} is in additional_housing, so I looked for it in job_central, and found a number of matching rows not equal to 1.", "Overwriting ArcGIS Layer") # append completed additional_housing df to the h2a and forestry data full_layer = h2a_housing_and_no_housing_and_h2b_df.append( additional_housing_df) overwrite_feature(ARCGIS_USERNAME, ARCGIS_PASSWORD, full_layer, 'H2Data')