def persist_parsed_cases(cases: List[Dict[str, Any]]) -> None:
    import persist

    logger.info(
        f"Finished making case list, now will send all {len(cases)} cases to SQL."
    )

    failed_cases = []
    for parsed_case in cases:
        try:
            persist.rest_case(parsed_case)
        except:
            try:
                failed_cases.append(parsed_case["case_number"])
            except:
                logger.error(
                    "A case failed to be parsed but it doesn't have a case number."
                )

    if failed_cases:
        error_message = f"Failed to send the following case numbers to SQL:\n{', '.join(failed_cases)}"
        log_and_email(
            error_message,
            "Case Numbers for Which Sending to SQL Failed",
            error=True,
        )
    logger.info("Finished sending cases to SQL.")
def fetch_parsed_case(case_id: str) -> Tuple[str, str]:
    query_result = query_case_id(case_id)
    if query_result is None:
        return None
    result_page, register_page = query_result
    result_soup = BeautifulSoup(result_page, "html.parser")
    register_soup = BeautifulSoup(register_page, "html.parser")

    register_url = hearing.get_register_url(result_soup)
    status, type = hearing.get_status_and_type(result_soup)

    if status.lower() not in hearing.statuses_map:
        if config.local_dev:
            log_and_email(
                f"Case {case_id} has status '{status}', which is not in our list of known statuses.",
                "Found Unknown Status",
                error=True,
            )
        else:
            logger.info(
                f"Case {case_id} has status '{status}', which is not in our list of known statuses."
            )

    return hearing.make_parsed_case(
        soup=register_soup, status=status, type=type, register_url=register_url
    )
def fetch_parsed_case(case_id: str) -> Tuple[str, str]:
    query_result = fetch_page.query_case_id(case_id)
    if query_result is None:
        return None
    result_page, register_page = query_result
    result_soup = BeautifulSoup(result_page, "html.parser")
    register_soup = BeautifulSoup(register_page, "html.parser")

    register_url = get_register_url(result_soup)
    status, type = get_status_and_type(result_soup)

    if status.lower() not in statuses_map:
        load_dotenv()
        if os.getenv("LOCAL_DEV") != "true":
            log_and_email(
                f"Case {case_id} has status '{status}', which is not in our list of known statuses.",
                "Found Unknown Status",
                error=True)
        else:
            logger.info(
                f"Case {case_id} has status '{status}', which is not in our list of known statuses."
            )

    return make_parsed_case(soup=register_soup,
                            status=status,
                            type=type,
                            register_url=register_url)
def active_or_inactive(status):
    status = status.lower()
    if status in statuses_map:
        return "Active" if statuses_map[status]["is_active"] else "Inactive"
    else:
        log_and_email(
            f"Can't figure out whether case with substatus '{status}' is active or inactive because '{status}' is not in our statuses map dictionary.",
            "Encountered Unknown Substatus",
            error=True)
        return ""
Beispiel #5
0
def make_case_list(ids_to_parse: List[str]) -> List[Dict[str, Any]]:
    """Gets case details for each case number in `ids_to_parse`"""

    parsed_cases, failed_ids = [], []
    for id_to_parse in ids_to_parse:
        new_case = fetch_page.fetch_parsed_case(id_to_parse)
        if new_case:
            parsed_cases.append(new_case)
        else:
            failed_ids.append(id_to_parse)

    if failed_ids:
        error_message = f"Failed to scrape data for {len(failed_ids)} case numbers. Here they are:\n{', '.join(failed_ids)}"
        log_and_email(error_message, "Failed Case Numbers", error=True)

    return parsed_cases
Beispiel #6
0
def perform_task_and_catch_errors(task_function, task_name):
    """
    Calls the function `task_function` named `task_name` (just used for logging purposes)
    Logs and emails error message if there is one
    """

    before = time.time()
    logger.info(f"\n{task_name}...")
    for tries in range(1, 2):
        try:
            task_function()
            logger.info(f"Finished {task_name} in {round(time.time() - before, 2)} seconds.")
            return
        except Exception as error:
            logger.error(f"Unanticipated Error {task_name} on attempt {tries} of 1:\n{str(error)}")
    log_and_email(f"{task_name} failed on every attempt. Check Heroku logs for more details.", f"{task_name} failed", error=True)
Beispiel #7
0
def create_jpdata_df() -> DataFrame:
    """Creates a DataFrame with various fields to replicate the JPData2 csv on arcGIS"""
    def handle_null(expected_string: str) -> str:
        if pd.isnull(expected_string):
            return ""
        else:
            return expected_string

    def get_case_status(case: str):
        substatus = handle_null(case["Substatus"]).lower()
        if substatus in statuses_map:
            return statuses_map[substatus]["status"]
        else:
            log_and_email(
                f"Can't figure out the Status column of the JPData csv for {case['Case_Num']} because '{substatus}' is not in our statuses map dictionary.",
                "Encountered Unknown Substatus",
                error=True)
            return None
Beispiel #8
0
def parse_all_from_parse_filings(case_nums: List[str],
                                 showbrowser=False) -> List[Dict[str, Any]]:
    """
    Gets case details for each case number in `case_nums` and sends the data to PostgreSQL.
    Logs any case numbers for which getting data failed.
    """

    if showbrowser:
        from selenium import webdriver

        fetch_page.driver = webdriver.Chrome("./chromedriver")

    parsed_cases = make_case_list(case_nums)
    logger.info(
        f"Finished making case list, now will send all {len(parsed_cases)} cases to SQL."
    )

    failed_cases = []
    for parsed_case in parsed_cases:
        try:
            persist.rest_case(parsed_case)
        except:
            try:
                failed_cases.append(parsed_case["case_number"])
            except:
                logger.error(
                    "A case failed to be parsed but it doesn't have a case number."
                )

    if failed_cases:
        error_message = f"Failed to send the following case numbers to SQL:\n{', '.join(failed_cases)}"
        log_and_email(error_message,
                      "Case Numbers for Which Sending to SQL Failed",
                      error=True)
    logger.info("Finished sending cases to SQL.")

    return parsed_cases
Beispiel #9
0
def update_features(layer_name: str):
    """Handles updating of features for JPPrecincts and JPZips layers because we can't just overwrite them (doing so ruins the joins associated with them on arcGIS)"""

    gis = GIS(url='https://www.arcgis.com',
              username=ARCGIS_USERNAME,
              password=ARCGIS_PASSWORD)

    feature_layer = gis.content.search(f"title: {layer_name}",
                                       'Feature Layer')[0].tables[0]
    feature_set = feature_layer.query()
    all_features = [feature.as_dict for feature in feature_set.features]

    if layer_name == "JPZips":
        logger.info("Updating zip codes csv...")
        new_features = create_zips_df()

        def create_feature(zip_code, num_filings):
            row_with_same_zip_id = [
                feature["attributes"]["ObjectId"] for feature in all_features
                if str(feature["attributes"]["ZIP_Code"]) == zip_code
            ]
            if row_with_same_zip_id:
                assert len(row_with_same_zip_id) == 1
                return {
                    "attributes": {
                        "Number_of_Filings": num_filings,
                        "ObjectId": row_with_same_zip_id[0]
                    }
                }
            else:
                return {
                    "attributes": {
                        "Number_of_Filings": num_filings,
                        "ZIP_Code": zip_code,
                        "GEOID_Data": "8600US" + zip_code
                    }
                }

        all_zip_codes = [
            str(feature["attributes"]["ZIP_Code"]) for feature in all_features
        ]

        features_created = [
            create_feature(row["ZIP_Code"], row["Number_of_Filings"])
            for i, row in new_features.iterrows()
        ]
        for zip_code in all_zip_codes:
            if zip_code not in new_features["ZIP_Code"].tolist():
                features_created.append(create_feature(zip_code, 0))

        features_for_update = [
            feature for feature in features_created
            if "ObjectId" in feature["attributes"]
        ]
        features_to_add = [
            feature for feature in features_created
            if "ZIP_Code" in feature["attributes"]
        ]

        # get zip codes that are in our data but missing from the layer with which we're joining
        to_join_layer = gis.content.search(
            f"title: Updated_Travis_ZIPs_12022020",
            'Feature Layer')[0].layers[0]
        to_join_features = [
            feature.as_dict for feature in feature_layer.query().features
        ]
        to_join_zip_codes = [
            str(feature["attributes"]["ZIP_Code"]) for feature in all_features
        ]
        zips_in_our_data_but_not_to_join = [
            zip for zip in all_zip_codes if zip not in to_join_zip_codes
        ]
        if zips_in_our_data_but_not_to_join:
            log_and_email(
                f"The following zip codes are in our scraped data but not the Updated_Travis_ZIPs_12022020 arcGIS later:\n{zips_in_our_data_but_not_to_join}",
                "New Zip Codes")

    else:
        logger.info("Updating precincts CSV...")
        new_features = create_precincts_df()

        def create_feature(row):
            return {
                "attributes": {
                    "Count_":
                    row["Count"],
                    "ObjectId": [
                        feature["attributes"]["ObjectId"]
                        for feature in all_features
                        if int(feature["attributes"]["Preceinct"]) == int(
                            row["Precinct"])
                    ][0]
                }
            }

        features_for_update = [
            create_feature(row) for i, row in new_features.iterrows()
        ]
        features_to_add = []

    update_response = feature_layer.edit_features(updates=features_for_update,
                                                  adds=features_to_add)
    update_statuses = [
        result['success'] for result in update_response['updateResults']
    ]
    add_statuses = [
        result['success'] for result in update_response['addResults']
    ]

    if all(update_statuses) and all(add_statuses):
        logger.info(f"Updating {layer_name} succeeded for all rows.")
    else:
        log_and_email(
            f"Updating {layer_name} failed for at least one row, here's the info: {update_response}",
            "Error Updating ArcGIS CSV",
            error=True)