Example #1
0
    def assert_accuracy(i, row, worksite_or_housing, df):
        table = row["table"]
        if (row["table"] == "dol_h") and (worksite_or_housing == "worksite"):
            return
        elif (row["Visa type"] == "H-2B") and (worksite_or_housing
                                               == "housing"):
            return
        else:
            # if checking for housing and h-2a, let it through if all the housing columns are empty
            if worksite_or_housing == "housing" and pd.isna(
                    row["HOUSING_ADDRESS_LOCATION"]) and pd.isna(
                        row["HOUSING_CITY"]) and pd.isna(
                            row["HOUSING_STATE"]) and pd.isna(
                                row["HOUSING_POSTAL_CODE"]):
                print_red_and_email(
                    f"{row['CASE_NUMBER']} is H-2A but all of its housing columns are blank. If its worksite was fixed properly, it will be allowed to pass to job central. This was found while implementing fixes.",
                    "H-2A job Without Housing Data - Implement Fixes")
                return

            if (not row[f"{worksite_or_housing} accuracy type"]) or (
                    row[f"{worksite_or_housing} accuracy"] <
                    0.7) or (row[f"{worksite_or_housing} accuracy type"]
                             in helpers.bad_accuracy_types):
                print_red_and_email(
                    f"The {worksite_or_housing} data of {row['CASE_NUMBER']} requires fixing, but its {worksite_or_housing}_fixed_by column was not specified to either address, coordinates, inactive, or impossible.",
                    "Address Needs Fixing but Not Fixed")
                mark_as_failed(i, worksite_or_housing, df)
def perform_task_and_catch_errors(task_function, task_name):
    before = time.time()
    print(Fore.GREEN + f"{task_name}..." + Style.RESET_ALL)
    try:
        task_function()
        succeeded = True
    except Exception as error:
        print_red_and_email("Error: " + str(error),
                            f"Unanticipated Error {task_name.lower()}!!")
        succeeded = False

    print(Fore.GREEN +
          f"Finished {task_name} in {time.time() - before} seconds." + "\n" +
          Style.RESET_ALL)
    return succeeded
Example #3
0
 def fix_row(i, row, worksite_or_housing, df):
     method = row[f"{worksite_or_housing}_fixed_by"]
     if method == "address":
         fix_by_address(i, row, worksite_or_housing, df)
     elif method == "coordinates":
         fix_by_coords(i, worksite_or_housing, df)
     elif method == "NA" or pd.isnull(method):
         assert_accuracy(i, row, worksite_or_housing, df)
     elif method == "impossible" or method == "inactive":
         pass
     else:
         error_message = f"Cannot fix job with case number: {row['CASE_NUMBER']}. {worksite_or_housing}_fixed_by column must be either `address`, `coordinates`, `impossible`, `NA`, or null - and it's case sensitive!"
         print_red_and_email(error_message,
                             "Incorrect fixed_by Column Value")
         mark_as_failed(i, worksite_or_housing, df)
         return
Example #4
0
    def fix_by_address(i, row, worksite_or_housing, df):
        if worksite_or_housing == "worksite":
            full_address = helpers.create_address_from(
                row["WORKSITE_ADDRESS"], row["WORKSITE_CITY"],
                row["WORKSITE_STATE"], row["WORKSITE_POSTAL_CODE"])
        elif worksite_or_housing == "housing":
            full_address = helpers.create_address_from(
                row["HOUSING_ADDRESS_LOCATION"], row["HOUSING_CITY"],
                row["HOUSING_STATE"], row["HOUSING_POSTAL_CODE"])
        else:
            print_red_and_email(
                f"There was an error fixing the job with case number: {row['CASE_NUMBER']}. worksite_or_housing parameter in fix_by_address must be either `worksite` or `housing`",
                "Invalid Function Parameter")
            return

        try:
            geocoded = client.geocode(full_address)
            results = geocoded['results'][0]
            df.at[i,
                  f"{worksite_or_housing}_long"] = results['location']['lng']
            df.at[i, f"{worksite_or_housing}_lat"] = results['location']['lat']
            df.at[i, f"{worksite_or_housing} accuracy"] = results['accuracy']
            df.at[i, f"{worksite_or_housing} accuracy type"] = results[
                'accuracy_type']
            if (results['accuracy'] < 0.7) or (results['accuracy_type']
                                               in helpers.bad_accuracy_types):
                print_red_and_email(
                    f"Geocoding the address `{full_address}` (case number {row['CASE_NUMBER']}) resulted in either an accuracy below 0.7 or a bad accuracy type. ",
                    "Fixing Failed")
                mark_as_failed(i, worksite_or_housing, df)
        except Exception as error:
            print_red_and_email(
                f"Failed to geocode ~{row['CASE_NUMBER']}~ here's the error message:\n{str(error)}",
                "Geocoding Failure in Implement Fixes")
            mark_as_failed(i, worksite_or_housing, df)
def overwrite_our_feature():

    # get all accurate h2a jobs that are in one of our states and have housing coordinates
    h2a_df = pd.read_sql("""SELECT * FROM job_central WHERE
                        "Visa type" = 'H-2A' AND
                        LOWER("WORKSITE_STATE") IN
                        ('texas', 'tx', 'kentucky', 'ky', 'tennessee', 'tn', 'arkansas', 'ar', 'louisiana', 'la', 'mississippi', 'ms', 'alabama', 'al') AND
                        housing_lat IS NOT NUll AND housing_long IS NOT NULL""",
                         con=engine)

    # get all h2a jobs from job_central that are in one of our states and do not have housing coordinates
    h2a_no_housing_df = pd.read_sql("""SELECT * FROM job_central WHERE
                                       "Visa type" = 'H-2A' AND
                                       LOWER("WORKSITE_STATE") IN
                                       ('texas', 'tx', 'kentucky', 'ky', 'tennessee', 'tn', 'arkansas', 'ar', 'louisiana', 'la', 'mississippi', 'ms', 'alabama', 'al') AND
                                       (housing_lat IS NUll OR housing_long IS NULL)""",
                                    con=engine)

    # for arcGIS map purposes because color-coding is based on this column
    h2a_df["TOTAL_OCCUPANCY"].fillna(600, inplace=True)
    h2a_no_housing_df["TOTAL_OCCUPANCY"].fillna(600, inplace=True)

    # get all forestry h2b jobs from job_central that are in one of our states
    forestry_h2b_in_our_states_df = pd.read_sql(
        """SELECT * FROM job_central WHERE
                                                   "Visa type" = 'H-2B' AND
                                                   "SOC_CODE" IN ('45-4011.00', '45-4011') AND
                                                   LOWER("WORKSITE_STATE") IN
                                                   ('texas', 'tx', 'kentucky', 'ky', 'tennessee', 'tn', 'arkansas', 'ar', 'louisiana', 'la', 'mississippi', 'ms', 'alabama', 'al')
                                                    """,
        con=engine)

    # set housing coordinates of h2b jobs and h2a jobs without housing to their worksite coordinates so that arecGIS will map them
    forestry_h2b_in_our_states_df[
        "housing_lat"] = forestry_h2b_in_our_states_df.apply(
            lambda job: job["worksite_lat"], axis=1)
    forestry_h2b_in_our_states_df[
        "housing_long"] = forestry_h2b_in_our_states_df.apply(
            lambda job: job["worksite_long"], axis=1)
    h2a_no_housing_df["housing_lat"] = h2a_no_housing_df.apply(
        lambda job: job["worksite_lat"], axis=1)
    h2a_no_housing_df["housing_long"] = h2a_no_housing_df.apply(
        lambda job: job["worksite_long"], axis=1)

    # combine h2a and forestry data
    h2a_and_h2b_df = h2a_df.append(forestry_h2b_in_our_states_df)
    h2a_housing_and_no_housing_and_h2b_df = h2a_and_h2b_df.append(
        h2a_no_housing_df)

    # get all additional housing rows that are in one of our states and that have a matching case number in job_central
    additional_housing_df = pd.read_sql(
        """SELECT * FROM additional_housing WHERE
                                           "CASE_NUMBER" IN
                                                (SELECT "CASE_NUMBER" FROM job_central WHERE
                                                "Visa type" = 'H-2A' AND
                                                LOWER("WORKSITE_STATE") IN
                                                ('texas', 'tx', 'kentucky', 'ky', 'tennessee', 'tn', 'arkansas', 'ar', 'louisiana', 'la', 'mississippi', 'ms', 'alabama', 'al'))
                                                 """,
        con=engine)

    myprint(f"There will be {len(h2a_df)} normal H2A jobs in the feature.")
    myprint(
        f"There will be {len(h2a_no_housing_df)} H2A jobs mapped using their worksites in the feature."
    )
    myprint(
        f"There will be {len(forestry_h2b_in_our_states_df)} forestry H2B jobs in the feature."
    )
    myprint(
        f"There will be {len(additional_housing_df)} additional housing rows in the feature."
    )

    # get columns that are in the h2a data but not the additional housing data and add each one to the additional housing datafrane
    cols_only_in_h2a = set(h2a_df.columns) - set(additional_housing_df.columns)
    for column in cols_only_in_h2a:
        additional_housing_df[column] = None

    # for each additional housing row, find its matching row in job_central and insert the data about that case number that is in job_central but not the additional_housing row
    for i, row in additional_housing_df.iterrows():
        case_number = row["CASE_NUMBER"]
        job_in_h2a = h2a_df[h2a_df["CASE_NUMBER"] == case_number]

        if len(job_in_h2a) == 1:
            for column in cols_only_in_h2a:
                additional_housing_df.at[i, column] = get_value(
                    job_in_h2a, column)
        else:
            print_red_and_email(
                f"{case_number} is in additional_housing, so I looked for it in job_central, and found a number of matching rows not equal to 1.",
                "Overwriting ArcGIS Layer")

    # append completed additional_housing df to the h2a and forestry data
    full_layer = h2a_housing_and_no_housing_and_h2b_df.append(
        additional_housing_df)

    overwrite_feature(ARCGIS_USERNAME, ARCGIS_PASSWORD, full_layer, 'H2Data')