コード例 #1
0
def expand_abbreviations():

    for abbreviation in states_abbreviations:
        state = states_abbreviations[abbreviation]
        make_query(f"""UPDATE job_central
                       SET "WORKSITE_STATE" = '{state}'
                       WHERE "WORKSITE_STATE" IN ('{abbreviation}', '{abbreviation.lower()}')"""
                   )

        make_query(f"""UPDATE low_accuracies
                       SET "WORKSITE_STATE" = '{state}'
                       WHERE "WORKSITE_STATE" IN ('{abbreviation}', '{abbreviation.lower()}')"""
                   )

        make_query(f"""UPDATE job_central
                       SET "HOUSING_STATE" = '{state}'
                       WHERE "HOUSING_STATE" IN ('{abbreviation}', '{abbreviation.lower()}')"""
                   )

        make_query(f"""UPDATE additional_housing
                       SET "HOUSING_STATE" = '{state}'
                       WHERE "HOUSING_STATE" IN ('{abbreviation}', '{abbreviation.lower()}')"""
                   )

        make_query(f"""UPDATE low_accuracies
                       SET "HOUSING_STATE" = '{state}'
                       WHERE "HOUSING_STATE" IN ('{abbreviation}', '{abbreviation.lower()}')"""
                   )
コード例 #2
0
def add_housing_to_postgres():

    file_path = "dol_data/" + input(
        "Put the additional housing file in a folder named `dol_data` in the `scripts` folder. Now enter the name of the file (this is case sensitive).\n"
    ).strip()
    year = input("What year is it? (eg: 2020)\n").strip()
    quarter = input("What quarter it is? (enter 1, 2, 3, or 4)\n").strip()
    input(
        f"Ok, adding additional housing from {file_path} for fiscal year {year}Q{quarter}. If this is correct press any key, othewise press control + c to start over."
    )

    housing = pd.read_excel(file_path)
    accurate_housing, inaccurate_housing = geocode_manage_split_housing(
        housing, year, quarter)

    accurate_housing.to_sql("additional_housing",
                            engine,
                            if_exists='append',
                            index=False)
    inaccurate_housing.to_sql("low_accuracies",
                              engine,
                              if_exists='append',
                              index=False)

    if quarter != 1:
        response = input(
            f"Enter 'yes' or 'y' if you're ready to run the queries to delete the additional_housing rows from the previous quarter ({year}Q{quarter - 1}). You may want to check that adding the current quarter ({year}Q{quarter}) went well first. If it didn't you can always redo it, but geocoding may cost more because I won't be able to steal geocoding results from last quarter's additional housing rows."
        )
        if response in ["y", "yes"]:
            make_query(
                f"""DELETE FROM additional_housing WHERE fy = '{year}Q{quarter - 1}'"""
            )
            make_query(
                f"""DELETE FROM low_accuracies WHERE fy = '{year}Q{quarter - 1}' and "table" = 'dol_h'"""
            )
def mark_inactive_dolH_as_fixed():
    acc_case_nums_statuses_df = pd.read_sql(
        """select "CASE_NUMBER", status from job_central""", con=engine)
    inacc_case_nums_statuses_df = pd.read_sql(
        """select "CASE_NUMBER", status from low_accuracies where "table" != 'dol_h'""",
        con=engine)

    # check that there are no duplicate case numbers
    case_nums_list = acc_case_nums_statuses_df["CASE_NUMBER"].tolist(
    ) + inacc_case_nums_statuses_df["CASE_NUMBER"].tolist()
    assert len(case_nums_list) == len(set(case_nums_list))

    all_case_nums_statuses_df = acc_case_nums_statuses_df.append(
        inacc_case_nums_statuses_df)

    inaccurate_additional_housings = pd.read_sql(
        """select "id", "CASE_NUMBER" from low_accuracies where "table" = 'dol_h'""",
        con=engine)
    myprint(
        f"There are {len(inaccurate_additional_housings)} inaccurate additional housing rows."
    )

    case_nums_with_no_matches = []
    num_fixed = 0
    # iterate over all inaccurate additional housing case numbers and ids. if a rows's corresponding row in job_central is inactive, update the row's fixed column to be true
    for i, job in inaccurate_additional_housings.iterrows():
        job_case_num = job["CASE_NUMBER"]
        central_job = all_case_nums_statuses_df[
            all_case_nums_statuses_df["CASE_NUMBER"] == job_case_num]

        if len(central_job) > 1:
            myprint(
                f"{job_case_num} has {len(central_job)} matching rows in the dataframe with all non dol_h case numbers. This should be impossible."
            )
        elif len(central_job) == 0:
            myprint(
                f"{job_case_num} has no matching rows in the dataframe with all non dol_h case numbers."
            )
            case_nums_with_no_matches.append(job_case_num)
        else:
            job_status = central_job["status"].tolist()[0]
            if job_status not in ["not yet started", "started"]:
                job_id = job["id"]
                make_query(f"""UPDATE low_accuracies SET
                                fixed = true,
                                worksite_fixed_by = 'inactive',
                                housing_fixed_by = 'inactive'
                                WHERE "id" = {job_id}""")
                num_fixed += 1

    # myprint(f"There are {len(case_nums_with_no_matches)} additional housing rows in low_accuracies without a matching central case number.")
    # myprint(f"There are {len(set(case_nums_with_no_matches))} additional housing unique case numbers in low_accuracies without a matching central case number.")
    myprint(
        f"{num_fixed} additional housing rows in low accuracies were marked as fixed."
    )
コード例 #4
0
def append_excel_to_table(file_name, table_name, year, quarter):
    if table_name in all_table_names:
        if check_for_new_columns(file_name, table_name):
            raise Exception(f"Since there are columns in {file_name} that aren't in {table_name}, we can't continue this process or it will fail. Either add the missing columns to PostgreSQL or change the column names in the excel file to match ones that already exist in Postgres.")
    else:
        myprint(f"The table {table_name} doesn't exist yet, so it will be added.")

    df = pd.read_excel(f"dol_data/{file_name}")
    df["fy"] = f"{year}Q{quarter}"

    myprint(f"Adding {len(df)} rows from {file_name} to {table_name}.")

    df.to_sql(table_name, engine, if_exists='append', index=False)
    myprint("Done adding rows.")

    if int(quarter) != 1:
        make_query(f"""DELETE FROM {table_name} WHERE fy = '{year}Q{int(quarter) - 1}'""")
コード例 #5
0
ファイル: interface.py プロジェクト: gpratt/gffutils
    def _relation(self, id, join_on, join_to, level=None, featuretype=None,
                  order_by=None, reverse=False, completely_within=False):

        # The following docstring will be included in the parents() and
        # children() docstrings to maintain consistency, since they both
        # delegate to this method.
        """
        Parameters
        ----------

        id : string or a Feature object

        level : None or int

            If `level=None` (default), then return all children regardless
            of level.  If `level` is an integer, then constrain to just that
            level.
        {_method_doc}
        """

        if isinstance(id, Feature):
            id = id.id

        other = '''
        JOIN relations
        ON relations.{join_on} = features.id
        WHERE relations.{join_to} = ?
        '''.format(**locals())
        args = [id]

        level_clause = ''
        if level is not None:
            level_clause = 'relations.level = ?'
            args.append(level)

        query, args = helpers.make_query(
            args=args,
            other=other,
            extra=level_clause,
            featuretype=featuretype,
            order_by=order_by,
            reverse=reverse,
            completely_within=completely_within,
        )

        # modify _SELECT so that only unique results are returned
        query = query.replace("SELECT", "SELECT DISTINCT")
        for i in self._execute(query, args):
            yield self._feature_returner(**i)
コード例 #6
0
def add_worksites_to_postgres():
    file_path = "dol_data/" + input(
        "Check that the additional worksites file is in a folder named `dol_data` in the `scripts` folder. (If it isn't, exit this using control + c then re-run this script once you've done it.)  Now enter the name of the file (this is case sensitive).\n"
    ).strip()
    year = input("What year is it? (eg: 2020)\n").strip()
    quarter = int(input("What quarter it is? (enter 1, 2, 3, or 4)\n").strip())
    input(
        f"Ok, adding worksites from {file_path} for fiscal year {year}Q{quarter}. If this is correct press any key, othewise press control + c to start over."
    )

    worksites = pd.read_excel(file_path)

    worksites = manage_worksites(worksites, year, quarter)
    myprint(f"Adding {len(worksites)} rows to additional_worksites table.")

    worksites.to_sql("additional_worksites",
                     engine,
                     if_exists='append',
                     index=False)

    if int(quarter) != 1:
        make_query(
            f"""DELETE FROM additional_worksites WHERE fy = '{year}Q{int(quarter) - 1}'"""
        )
コード例 #7
0
def update_halfway_columns():

    # update days_until_halfway column for job_central and low_accuracies
    make_query("""
                UPDATE job_central
                	SET days_until_halfway =
                	CASE WHEN status != 'ended' THEN
                		EXTRACT(DAY FROM ("EMPLOYMENT_BEGIN_DATE" + (("EMPLOYMENT_END_DATE" - "EMPLOYMENT_BEGIN_DATE") / 2)) - CURRENT_DATE)
                	ELSE NULL
                	END
              """)

    make_query("""
                UPDATE low_accuracies
                	SET days_until_halfway =
                	CASE WHEN status != 'ended' THEN
                		EXTRACT(DAY FROM ("EMPLOYMENT_BEGIN_DATE" + (("EMPLOYMENT_END_DATE" - "EMPLOYMENT_BEGIN_DATE") / 2)) - CURRENT_DATE)
                	ELSE NULL
                	END
                WHERE "table" = 'central'
               """)

    # update is_halfway column for job_central and low_accuracies
    make_query("""
                UPDATE job_central
                	SET is_halfway =
                	CASE WHEN days_until_halfway > 0 THEN FALSE
                	WHEN days_until_halfway IS NULL THEN NULL
                	ELSE TRUE
                	END
               """)

    make_query("""
                UPDATE low_accuracies
                	SET is_halfway =
                	CASE WHEN days_until_halfway > 0 THEN FALSE
                	WHEN days_until_halfway IS NULL THEN NULL
                	ELSE TRUE
                	END
                WHERE "table" = 'central'
               """)
コード例 #8
0
ファイル: interface.py プロジェクト: gpratt/gffutils
    def all_features(self, limit=None, strand=None, featuretype=None,
                     order_by=None, reverse=False, completely_within=False):
        """
        Returns an iterator of all :class:`Feature` objects in the database.

        Parameters
        ----------
        {_method_doc}
        """
        query, args = helpers.make_query(
            args=[],
            limit=limit,
            strand=strand,
            featuretype=featuretype,
            order_by=order_by,
            reverse=reverse,
            completely_within=completely_within
        )
        for i in self._execute(query, args):
            yield self._feature_returner(**i)
コード例 #9
0
ファイル: interface.py プロジェクト: gpratt/gffutils
    def features_of_type(self, featuretype, limit=None, strand=None,
                         order_by=None, reverse=False,
                         completely_within=False):
        """
        Returns an iterator of :class:`gffutils.Feature` objects.

        Parameters
        ----------
        {_method_doc}
        """
        query, args = helpers.make_query(
            args=[],
            limit=limit,
            featuretype=featuretype,
            order_by=order_by,
            reverse=reverse,
            strand=strand,
            completely_within=completely_within,
        )

        for i in self._execute(query, args):
            yield self._feature_returner(**i)
コード例 #10
0
def update_status_columns(table_name):

    make_query(f"""UPDATE {table_name}
                    SET "EMPLOYMENT_BEGIN_DATE" = "REQUESTED_BEGIN_DATE"
                    WHERE "EMPLOYMENT_BEGIN_DATE" IS null""")

    make_query(f"""UPDATE {table_name}
                    SET "EMPLOYMENT_END_DATE" = "REQUESTED_END_DATE"
                    WHERE "EMPLOYMENT_END_DATE" IS null""")

    make_query(f"""UPDATE {table_name}
                    SET status =
                    CASE
         			WHEN ("EMPLOYMENT_BEGIN_DATE" IS null) OR ("EMPLOYMENT_END_DATE" IS null) THEN null
         			WHEN ("EMPLOYMENT_BEGIN_DATE" <= CURRENT_DATE) AND (CURRENT_DATE <= "EMPLOYMENT_END_DATE") THEN 'started'
                    WHEN ("EMPLOYMENT_BEGIN_DATE" > CURRENT_DATE) AND (CURRENT_DATE <= "EMPLOYMENT_END_DATE") THEN 'not yet started'
         			ELSE 'ended'
         			END""")
コード例 #11
0
def set_test_database_state(accurates, inaccurates):
    make_query("DELETE FROM job_central")
    make_query("DELETE FROM low_accuracies")
    accurates.to_sql("job_central", engine, if_exists='append', index=False, dtype=helpers.column_types)
    inaccurates.to_sql("low_accuracies", engine, if_exists='append', index=False, dtype=helpers.column_types)
    make_query("REFRESH MATERIALIZED VIEW previously_geocoded")
def mark_inactive_central_as_fixed():
    make_query("""UPDATE low_accuracies SET
                    fixed = true,
                    worksite_fixed_by = 'inactive',
                    housing_fixed_by = 'inactive'
                    WHERE status = 'ended'""")
コード例 #13
0
def send_fixes_to_postgres():
    fixed = pd.read_sql_query('select * from low_accuracies where fixed=true',
                              con=engine)

    if len(fixed) == 0:
        myprint("No jobs have been fixed.")
        return

    myprint(f"{len(fixed)} jobs have been fixed.")
    central, housing, failures = implement_fixes(fixed)
    assert len(central) + len(housing) + len(failures) == len(fixed)
    myprint(f"{len(central)} rows moving from low_accuracies to job_central.")
    myprint(
        f"{len(housing)} rows moving from low_accuracies to additional housing."
    )
    myprint(f"{len(failures)} failed fixes.")

    # saving appropriate fixes to previously_fixed table
    prev_fixed_columns = [
        "HOUSING_ADDRESS_LOCATION", "HOUSING_CITY", "HOUSING_POSTAL_CODE",
        "HOUSING_STATE", "fixed", "housing accuracy", "housing accuracy type",
        "housing_fixed_by", "housing_lat", "housing_long", "notes"
    ]
    fixed_by_vals_to_save_fixes_for = ["coordinates", "address", "impossible"]

    central_for_prev_fixed_table = central[central["housing_fixed_by"].isin(
        fixed_by_vals_to_save_fixes_for)][prev_fixed_columns]
    housing_for_prev_fixed_table = housing[housing["housing_fixed_by"].isin(
        fixed_by_vals_to_save_fixes_for)][prev_fixed_columns]
    for_prev_fixed_table = central_for_prev_fixed_table.append(
        housing_for_prev_fixed_table)

    if not for_prev_fixed_table.empty:
        myprint(
            f"There are {len(for_prev_fixed_table)} rows to add to the previously_fixed table."
        )
        for_prev_fixed_table["initial_address"] = for_prev_fixed_table.apply(
            lambda job: handle_null(job["HOUSING_ADDRESS_LOCATION"]) +
            handle_null(job["HOUSING_CITY"]) + handle_null(job[
                "HOUSING_STATE"]) + handle_null(job["HOUSING_POSTAL_CODE"]),
            axis=1)
        for_prev_fixed_table = for_prev_fixed_table.drop_duplicates(
            subset='initial_address', keep="last")
        for_prev_fixed_table.to_sql("previously_fixed",
                                    engine,
                                    if_exists='append',
                                    index=False)
        myprint(f"All rows successfully added to the previously_fixed table.")
    else:
        myprint(f"No rows to add to the previously_fixed table.")

    # adding fixes to appropriate tables and deleting appropriate rows from low_accuracies
    central.to_sql('job_central', engine, if_exists='append', index=False)
    housing.to_sql('additional_housing',
                   engine,
                   if_exists='append',
                   index=False)
    failures.to_sql('low_accuracies', engine, if_exists='append', index=False)
    make_query("delete from low_accuracies where fixed=true")

    myprint(
        f"Done implementing fixes. There were {len(failures)} failed fixes out of {len(fixed)} attempts."
    )
コード例 #14
0
def update_workers_and_occupancy_columns():

    # merging TOTAL_WORKERS_H-2A_REQUESTED with TOTAL_WORKERS_NEEDED column - this should happen automatically, but just in case something is missed:
    make_query("""
                UPDATE job_central
                    SET "TOTAL_WORKERS_NEEDED" = "TOTAL_WORKERS_H-2A_REQUESTED"
                    WHERE "TOTAL_WORKERS_NEEDED" IS NULL
              """)

    make_query("""
                UPDATE low_accuracies
                    SET "TOTAL_WORKERS_NEEDED" = "TOTAL_WORKERS_H-2A_REQUESTED"
                    WHERE "TOTAL_WORKERS_NEEDED" IS NULL
              """)

    # updating occupancy_minus_workers workers column
    make_query("""
                UPDATE job_central
                    SET occupancy_minus_workers = "TOTAL_OCCUPANCY" - "TOTAL_WORKERS_NEEDED"
                    WHERE "TOTAL_OCCUPANCY" IS NOT NULL AND
                          "TOTAL_WORKERS_NEEDED" IS NOT NULL
              """)

    make_query("""
                UPDATE low_accuracies
                    SET occupancy_minus_workers = "TOTAL_OCCUPANCY" - "TOTAL_WORKERS_NEEDED"
                    WHERE "TOTAL_OCCUPANCY" IS NOT NULL AND
                          "TOTAL_WORKERS_NEEDED" IS NOT NULL
              """)

    # updating W to H Ratio column
    make_query("""
                UPDATE job_central
                    SET "W to H Ratio" =
                    CASE WHEN "TOTAL_WORKERS_NEEDED" > "TOTAL_OCCUPANCY" THEN 'W>H'
                    WHEN "TOTAL_WORKERS_NEEDED" < "TOTAL_OCCUPANCY" THEN 'W<H'
                    ELSE 'W=H'
                    END
                WHERE "TOTAL_OCCUPANCY" IS NOT NULL AND
                      "TOTAL_WORKERS_NEEDED" IS NOT NULL
              """)

    make_query("""
                UPDATE low_accuracies
                    SET "W to H Ratio" =
                    CASE WHEN "TOTAL_WORKERS_NEEDED" > "TOTAL_OCCUPANCY" THEN 'W>H'
                    WHEN "TOTAL_WORKERS_NEEDED" < "TOTAL_OCCUPANCY" THEN 'W<H'
                    ELSE 'W=H'
                    END
                WHERE "TOTAL_OCCUPANCY" IS NOT NULL AND
                      "TOTAL_WORKERS_NEEDED" IS NOT NULL
              """)
コード例 #15
0
def send_fixes_to_postgres():
    fixed = pd.read_sql_query('select * from low_accuracies where fixed=true',
                              con=engine)

    if len(fixed) == 0:
        myprint("No jobs have been fixed.")
        return

    myprint(f"{len(fixed)} jobs have been fixed.")
    central, housing, failures = implement_fixes(fixed)
    assert len(central) + len(housing) + len(failures) == len(fixed)
    myprint(f"{len(central)} rows moving from low_accuracies to job_central.")
    myprint(
        f"{len(housing)} rows moving from low_accuracies to additional housing."
    )
    myprint(f"{len(failures)} failed fixes.")

    # saving appropriate fixes to previously_fixed table
    prev_fixed_columns = [
        "HOUSING_ADDRESS_LOCATION", "HOUSING_CITY", "HOUSING_POSTAL_CODE",
        "HOUSING_STATE", "fixed", "housing accuracy", "housing accuracy type",
        "housing_fixed_by", "housing_lat", "housing_long", "notes"
    ]
    fixed_by_vals_to_save_fixes_for = ["coordinates", "address", "impossible"]

    central_for_prev_fixed_table = central[central["housing_fixed_by"].isin(
        fixed_by_vals_to_save_fixes_for)][prev_fixed_columns]
    housing_for_prev_fixed_table = housing[housing["housing_fixed_by"].isin(
        fixed_by_vals_to_save_fixes_for)][prev_fixed_columns]
    for_prev_fixed_table = central_for_prev_fixed_table.append(
        housing_for_prev_fixed_table)

    if not for_prev_fixed_table.empty:
        myprint(
            f"There are {len(for_prev_fixed_table)} rows to add to the previously_fixed table."
        )
        for_prev_fixed_table["initial_address"] = for_prev_fixed_table.apply(
            lambda job: handle_null(job["HOUSING_ADDRESS_LOCATION"]) +
            handle_null(job["HOUSING_CITY"]) + handle_null(job[
                "HOUSING_STATE"]) + handle_null(job["HOUSING_POSTAL_CODE"]),
            axis=1)
        for_prev_fixed_table = for_prev_fixed_table.drop_duplicates(
            subset='initial_address', keep="last")
        for_prev_fixed_table.to_sql("previously_fixed",
                                    engine,
                                    if_exists='append',
                                    index=False)
        myprint(f"All rows successfully added to the previously_fixed table.")
    else:
        myprint(f"No rows to add to the previously_fixed table.")

    # adding fixes to appropriate tables and deleting appropriate rows from low_accuracies
    central.to_sql('job_central', engine, if_exists='append', index=False)
    housing.to_sql('additional_housing',
                   engine,
                   if_exists='append',
                   index=False)

    # adding failed fixes back to low_accuracies - can't just use to_sql because of primary key
    for i, job in failures.iterrows():
        query = text("""UPDATE low_accuracies SET
                    (fixed, housing_lat, housing_long, "HOUSING_ADDRESS_LOCATION", "HOUSING_CITY",
                    "HOUSING_STATE", "HOUSING_POSTAL_CODE", notes, housing_fixed_by)
                     =
                     (:fixed, :lat, :long, :address, :city, :state, :zip, :notes, :fixed_by)
                     WHERE id = :id""")

        with engine.connect() as connection:
            connection.execute(query,
                               fixed=job["fixed"],
                               lat=job["housing_lat"],
                               long=job["housing_long"],
                               address=job["HOUSING_ADDRESS_LOCATION"],
                               city=job["HOUSING_CITY"],
                               state=job["HOUSING_STATE"],
                               zip=job["HOUSING_POSTAL_CODE"],
                               notes=job["notes"],
                               fixed_by=job["housing_fixed_by"],
                               id=job["id"])

    # all rows that had fixed as true are now either in job_central or in low_accuracies as a new row with fixed_by as failed and fixed as false
    make_query("delete from low_accuracies where fixed=true")

    myprint(
        f"Done implementing fixes. There were {len(failures)} failed fixes out of {len(fixed)} attempts."
    )