Example #1
0
def mirror_old_style_records_etl():
    """Ensure the old-style records exist for all present new-style ones."""
    start_time = datetime.datetime.now()
    LOG.info("Start: Mirror record documents in old-style naming.")
    conn = credential.UNCPathCredential(path.RLID_DATA_SHARE,
                                        **credential.RLID_DATA_SHARE)
    count = Counter()
    with conn:
        for doc_path in rlid_record_paths():
            doc_name = os.path.basename(doc_path)
            doc_id, ext = os.path.splitext(doc_name)
            old_style_path = rlid_record_path_old(doc_id, ext)
            if not old_style_path:
                count["not in database"] += 1
            elif os.path.exists(old_style_path):
                count["already mirrored"] += 1
            elif place_record_old(doc_path):
                count["mirrored"] += 1
            else:
                count["failed to mirror"] += 1
                LOG.warning("%r failed to mirror to %r.", doc_name,
                            old_style_path)
    document.log_state_counts(count, documents_type="records")
    LOG.info("End: Mirror.")
    elapsed(start_time, LOG)
Example #2
0
def petition_documents_map():
    """Return mapping of petition ID to list of document attribute dictionaries.

    The Access database has infuriatingly-named objects, so we need to munge that
    rather than just joining values from the database table directly.

    Returns:
        dict
    """
    sql = """
        select
            ltrim(rtrim([id_num])) as petition_id,
            replace(
                replace(ucase(ltrim(rtrim([tiffname]))), '.PDF', ''), '.TIF', ''
            ) as document_name,
            ltrim(rtrim([image type])) as document_type
        from [image path]
        where [id_num] is not null and [tiffname] is not null
        group by [id_num], [tiffname], [image type];
    """
    odbc_conn = pyodbc.connect(
        database.access_odbc_string(PATH["petition_database"]))
    unc_conn = credential.UNCPathCredential(path.EUGENE_DATABASE_SHARE,
                                            **credential.CEDOM100)
    with odbc_conn, unc_conn:
        documents = defaultdict(list)
        for petition_id, document_name, document_type in odbc_conn.execute(
                sql):
            documents[petition_id].append({
                "document_name": document_name,
                "document_type": document_type
            })
    return documents
Example #3
0
def plat_maps_update():
    """Run update for RLID plat map repository."""
    root_path = REPO_PATH["plat"]
    source_root_path = os.path.join(path.EUGENE_IMAGES_SHARE, "PLAT")
    conn = credential.UNCPathCredential(
        path.RLID_DATA_SHARE, **credential.RLID_DATA_SHARE
    )
    source_conn = credential.UNCPathCredential(source_root_path, **credential.CEDOM100)
    with conn, source_conn:
        # Currently only Eugene provides plat maps for RLID.
        document.update_repository(
            root_path,
            source_root_path,
            file_extensions=[".jpg", "jpeg", ".pdf", ".tif", ".tiff"],
            flatten_tree=True,
            create_pdf_copies=True,
        )
Example #4
0
def deeds_records_update():
    """Run update for deeds & records documents RLID repository."""
    start_time = datetime.datetime.now()
    PATH["logfile"] = os.path.join(
        PATH["staging"], "Deeds_Records_Update_{}.log".format(start_time.year))
    conn = credential.UNCPathCredential(PATH["staging"],
                                        **credential.RLID_DATA_SHARE)
    with conn:
        # Attach logfile handler for staging logfile.
        logfile = logging.FileHandler(PATH["logfile"])
        logfile.setLevel(logging.INFO)
        logfile.setFormatter(LOGFILE_FORMATTER)
        LOG.addHandler(logfile)
        LOG.info("START SCRIPT: Update RLID deeds & records repository.")
        LOG.info(
            "Start: Move deeds & records drop-files to staging directory.")
        drop_extensions = [".exe", ".pdf", ".zip"
                           ] + document.IMAGE_FILE_EXTENSIONS
        for file_name in os.listdir(PATH["drop"]):
            file_path = os.path.join(PATH["drop"], file_name)
            file_extension = os.path.splitext(file_name)[-1].lower()
            if all(
                [os.path.isfile(file_path), file_extension
                 in drop_extensions]):
                move_path = os.path.join(PATH["staging"], file_name)
                shutil.move(file_path, move_path)
                LOG.info("Moved %r to %r.", file_path, move_path)
        LOG.info("End: Move.")
        LOG.info("Start: Extract record archives.")
        count = Counter()
        for file_path in path.folder_file_paths(PATH["staging"]):
            if os.path.splitext(file_path)[-1].lower() in [".exe", ".zip"]:
                count[extract_records(file_path, archive_original=True)] += 1
        document.log_state_counts(count, documents_type="archives")
        # D&R archives include a few log & reference files; delete if present.
        for file_path in path.folder_file_paths(PATH["staging"]):
            for pattern in ["_logfile", "_xreffile"]:
                if pattern.lower() in file_path.lower():
                    os.remove(file_path)
        LOG.info("Start: Replace record images with PDFs.")
        count = Counter()
        for file_path in path.folder_file_paths(PATH["staging"]):
            if (os.path.splitext(file_path)[-1].lower()
                    in document.IMAGE_FILE_EXTENSIONS):
                count[convert_image(file_path, delete_original=True)] += 1
        document.log_state_counts(count, documents_type="images")
        LOG.info("Start: Place record PDFs in RLID repository.")
        count = Counter()
        for file_path in path.folder_file_paths(PATH["staging"]):
            if os.path.splitext(file_path)[-1].lower() == ".pdf":
                old_state = place_record_old(file_path)
                new_state = place_record(
                    file_path, delete_original=(old_state == "placed"))
                count.update([old_state, new_state])
        document.log_state_counts(count, documents_type="records")
    elapsed(start_time, LOG)
    LOG.info("END SCRIPT")
def weekly_datasets_etl():
    """Run ETL for map server datasets with weekly update cycle.

    This script should only be used for updating geodatabase datasets & other managed
    data stores. Purely file-based formats like shapefiles are best updated via
    `file_datasets_etl`, for reasons related to locking mechanisms.
    """
    conn = credential.UNCPathCredential(DATA_PATH, **credential.CPA_MAP_SERVER)
    with conn:
        for kwargs in DATASET_KWARGS_WEEKLY:
            if kwargs.get("source_path"):
                transform.etl_dataset(**kwargs)
def locators_etl():
    """Run ETL for map server locators/geocoders.

    Need to shut down service before rebuilding.
    """
    conn = credential.UNCPathCredential(DATA_PATH, **credential.CPA_MAP_SERVER)
    with conn:
        token = arcetl.services.generate_token(SERVER_URL, **credential.RLID_MAPS)
        for kwargs in LOCATOR_KWARGS:
            arcetl.services.toggle_service(token=token, stop_service=True, **kwargs)
            arcetl.workspace.build_locator(**kwargs)
            arcetl.services.toggle_service(token=token, start_service=True, **kwargs)
def rlidgeo_datasets_etl():
    """Run ETL for map server datasets in the RLIDGeo replica geodatabase."""
    conn = credential.UNCPathCredential(DATA_PATH, **credential.CPA_MAP_SERVER)
    with conn:
        for name in arcetl.workspace.dataset_names(database.RLIDGEO.path):
            if any(
                pattern.lower() in name.lower()
                for pattern in IGNORE_PATTERNS_RLIDGEO_SNAPSHOT
            ):
                LOG.warning("%s matches ignore-pattern: Skipping.", name)
                continue

            transform.etl_dataset(
                source_path=os.path.join(database.RLIDGEO.path, name),
                output_path=os.path.join(DATA_PATH, "RLIDGeo.gdb", name.split(".")[-1]),
            )
def file_datasets_etl():
    """Run ETL for map server file-based datasets.

    This script should only be used for updating shapefiles & other purely file-based
    datasets. Managed data store formats like geodatabases are best updated via
    `etl_gimap_dataset`, for reasons related to locking mechanisms.

    Essentially, the file-based formats will not append-load on shapefiles locked by a
    service. So we pre-load them to a staging copy, where a server-side batch script
    can clear the locks & wholly replace the files.
    """
    conn = credential.UNCPathCredential(STAGING_PATH, **credential.CPA_MAP_SERVER)
    with conn:
        for kwargs in DATASET_KWARGS_FILE:
            if kwargs.get("source_path"):
                transform.etl_dataset(**kwargs)
def service_datasets_monthly_etl():
    """Run ETL for GIMap datasets with weekly update cycle.

    This script should only be used for updating geodatabase datasets & other
    managed data stores. Purely file-based formats like shapefiles are best
    updated in another manner, for reasons related to locking mechanisms.
    """
    conn = credential.UNCPathCredential(path.RLID_MAPS_DATA_SHARE,
                                        **credential.CPA_MAP_SERVER)
    with conn:
        for gdb_relpath in sorted(KWARGS_MONTHLY_DATASETS):
            LOG.info("Update datasets in %s", gdb_relpath)
            gdb_path = os.path.join(DATA_PATH, gdb_relpath)
            for kwargs in KWARGS_MONTHLY_DATASETS[gdb_relpath]:
                kwargs['output_path'] = os.path.join(gdb_path,
                                                     kwargs['output_name'])
                transform.etl_dataset(**kwargs)
Example #10
0
def plat_documents_map():
    """Return mapping of plat document ID to list of document attribute dictionaries.

    The Access database has infuriatingly-named objects, so we need to munge that rather
    than just joining values from the database table directly.

    Returns:
        dict
    """
    sql = """
        select
            ltrim(rtrim([plat number])) as document_number,
            ltrim(rtrim([subdivision name])) as plat_name,
            replace(
                replace(ucase(ltrim(rtrim([image name]))), '.PDF', ''), '.TIF', ''
            ) as document_name,
            ucase(ltrim(rtrim([Description]))) as document_type
        from [image path]
        where
            [plat number] is not null
            and [plat number] <> 0
            and [image name] is not null
        group by
            [subdivision name], [plat number], [image name], [description];
    """
    odbc_conn = pyodbc.connect(
        database.access_odbc_string(PATH["plat_database"]))
    unc_conn = credential.UNCPathCredential(path.EUGENE_DATABASE_SHARE,
                                            **credential.CEDOM100)
    with odbc_conn, unc_conn:
        documents = defaultdict(list)
        for (
                document_number,
                plat_name,
                document_name,
                document_type,
        ) in odbc_conn.execute(sql):
            documents[int(document_number)].append({
                "plat_name":
                plat_name,
                "document_name":
                document_name,
                "document_type":
                document_type,
            })
    return documents
Example #11
0
def lcso_cad_datasets_etl():
    """Run ETL for LSCO CAD delivery datasets."""
    for dataset_name, kwargs in DATASET_KWARGS.items():
        kwargs["output_path"] = os.path.join(DELIVERABLES_PATH, dataset_name + ".shp")
        transform.etl_dataset(**kwargs)
    zip_name = "LCSO_CAD_{}.zip".format(datestamp())
    zip_path = os.path.join(path.RLID_MAPS_WWW_SHARE, "Download", zip_name)
    conn = credential.UNCPathCredential(
        path.RLID_MAPS_WWW_SHARE, **credential.CPA_MAP_SERVER
    )
    with conn:
        path.archive_directory(
            directory_path=DELIVERABLES_PATH,
            archive_path=zip_path,
            directory_as_base=False,
            archive_exclude_patterns=[".lock", ".zip"],
        )
    zip_url = url.RLID_MAPS + "Download/" + zip_name
    send_links_email(urls=[zip_url], **MESSAGE_KWARGS)
Example #12
0
def property_cards_staging_update():
    """Run update for RLID assessor property card staging repository."""
    LOG.info("Start: Update assessor property card staging repository.")
    start_time = datetime.datetime.now()
    source_paths = document.repository_file_paths(path.LANE_PROPERTY_CARDS)
    conn = credential.UNCPathCredential(
        path.RLID_DATA_STAGING_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        count = Counter()
        for source_path in source_paths:
            staging_path = os.path.join(
                REPO_PATH["property-card-staging"], os.path.basename(source_path)
            )
            if document.changed(staging_path, source_path):
                result_key = document.update_document(source_path, staging_path)
                count[result_key] += 1
    LOG.info("End: Update.")
    document.log_state_counts(count, documents_type="property cards (staging)")
    elapsed(start_time, LOG)
Example #13
0
def tax_maps_staging_update():
    """Run update for RLID tax map staging repository."""
    LOG.info("Start: Update tax map staging repository.")
    start_time = datetime.datetime.now()
    conn = credential.UNCPathCredential(
        path.RLID_DATA_STAGING_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        count = Counter()
        for source_path in document.repository_file_paths(path.LANE_TAX_MAP_IMAGES):
            staging_path = os.path.join(
                REPO_PATH["tax-map-staging"],
                # Tax maps have a one-deep bin.
                os.path.split(os.path.dirname(source_path))[-1],
                os.path.basename(source_path),
            )
            if document.changed(staging_path, source_path):
                result_key = document.update_document(source_path, staging_path)
                count[result_key] += 1
    document.log_state_counts(count, documents_type="tax maps (staging)")
    elapsed(start_time, LOG)
    LOG.info("End: Update.")
Example #14
0
def property_cards_update():
    """Run update for assessor property card RLID production repository."""
    LOG.info("Start: Update RLID assessor property card repository.")
    start_time = datetime.datetime.now()
    staging_paths = document.repository_file_paths(
        REPO_PATH["property-card-staging"], file_extensions=[".pdf"]
    )
    conn = credential.UNCPathCredential(
        path.RLID_DATA_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        count = Counter()
        for staging_path in staging_paths:
            rlid_path = rlid_document_path(
                os.path.basename(staging_path), document_type="property-card"
            )
            if document.changed(rlid_path, staging_path):
                result_key = document.update_document(staging_path, rlid_path)
                count[result_key] += 1
    LOG.info("End: Update.")
    document.log_state_counts(count, documents_type="property cards")
    elapsed(start_time, LOG)
def tillamook_delivery_etl():
    """Run ETL for Tillamook delivery."""
    name = "Tillamook"
    gdb_path = os.path.join(PATH["tillamook_deliverables"], name + ".gdb")
    for dataset_name, kwargs in chain(TILLAMOOK_DATASET_KWARGS.items(),
                                      TILLAMOOK_GIS_DATASET_KWARGS.items()):
        kwargs["output_path"] = os.path.join(gdb_path, dataset_name)
        transform.etl_dataset(**kwargs)
    zip_name = "{}_{}.zip".format(name, datestamp())
    zip_path = os.path.join(path.RLID_MAPS_WWW_SHARE, "Download", zip_name)
    conn = credential.UNCPathCredential(path.RLID_MAPS_WWW_SHARE,
                                        **credential.CPA_MAP_SERVER)
    with conn:
        path.archive_directory(
            directory_path=gdb_path,
            archive_path=zip_path,
            directory_as_base=True,
            archive_exclude_patterns=[".lock"],
        )
    zip_url = url.RLID_MAPS + "Download/" + zip_name
    send_message_tillamook(zip_url,
                           metadata_where_sql="in_tillamook = 1",
                           **TILLAMOOK_MESSAGE_KWARGS)
Example #16
0
def tax_maps_update():
    """Run update for RLID tax map repository."""
    start_time = datetime.datetime.now()
    conn = credential.UNCPathCredential(
        path.RLID_DATA_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        # Attach logfile handler for repository update logfile.
        logfile = logging.FileHandler(
            os.path.join(
                REPO_PATH["tax-map"], "Tax_Map_Update_{}.log".format(start_time.year)
            )
        )
        logfile.setLevel(logging.INFO)
        logfile.setFormatter(LOGFILE_FORMATTER)
        LOG.addHandler(logfile)
        LOG.info("START SCRIPT: Update RLID tax map repository from staging.")
        file_name_release_date = tax_map_file_name_release_map(
            start_datetime=rlid_data_currency("Tax Maps")
        )
        count = Counter()
        # Iterate through path/date map, adding, archiving & updating.
        for file_name, release_date in file_name_release_date.items():
            rlid_path = rlid_document_path(file_name, document_type="tax-map")
            staging_path = rlid_document_path(
                file_name, document_type="tax-map-staging"
            )
            result_key = update_tax_map(
                staging_path, rlid_path, release_date, archive_previous=True
            )
            count[result_key] += 1
    document.log_state_counts(count, documents_type="tax maps")
    # Finally, update tax map repository currency date (if we placed any).
    if count["updated"]:
        rlid_data_currency_setter("Tax Maps", max(file_name_release_date.values()))
    elapsed(start_time, LOG)
    LOG.info("END SCRIPT: Update")
Example #17
0
def missing_in_rlid_etl():
    """Run ETL for log of deeds & records documents missing in RLID."""
    start_time = datetime.datetime.now()
    LOG.info(
        "Start: Compile table of deeds & records listed in Lane County records system,"
        + " but not present in RLID repository.")
    conn = credential.UNCPathCredential(PATH["staging"],
                                        **credential.RLID_DATA_SHARE)
    csv_path = os.path.join(PATH["staging"], "Missing_in_RLID.csv")
    check_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
    missing_count = 0
    with conn:
        csvfile = open(csv_path, "wb")
        with csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(["document_id", "document_path", "check_time"])
            for doc_path in rlid_record_paths():
                if not os.path.exists(doc_path):
                    doc_id = os.path.splitext(os.path.basename(doc_path))[0]
                    csvwriter.writerow((doc_id, doc_path, check_time))
                    missing_count += 1
    LOG.info("Found %s missing documents.", missing_count)
    LOG.info("End: Compile.")
    elapsed(start_time, LOG)
Example #18
0
def tax_maps_not_in_source_etl():
    """Run ETL for log of tax map documents in RLID but not source repository.

    We used to have an automatic check & retire for RLID tax maps that were no longer in
    the source repository. This pretty much retired the entire taxmap repository the
    night of 2015-05-07. This was because there appear to be times when the source
    repository is not reachable, and/or reports nothing in the source. For now, we will
    just log potential orphans.

    If you do need to "retire" a tax map no longer in use:
    1. Make an archive copy of the document with this function call:
        ```
        archive_tax_map(
            tax_map_path, archive_date=datetime.datetime.now(), is_replaced=False
        )
        ```
    2. Move the document file to the `RetiredNoReplacement` subfolder.
    3. Execute the following SQL statement:
        ```
        if exists (
            select 1 from RLID.dbo.Taxmap_Retired where image_filename = {file-name}
        ) begin;
            update RLID.dbo.Taxmap_Retired
            set date_retired = {same-date-as-archive-above}
            where image_filename = {file-name};
        end;
        else begin;
            insert into RLID.dbo.Taxmap_Retired(image_filename, date_retired)
            values ({file-name}, {same-date-as-archive-above});
        end;
        delete from RLID.dbo.Taxmap_Image where image_filename = {file-name};`
    """
    start_time = datetime.datetime.now()
    LOG.info(
        "Start: Compile table of tax maps not mirrored between the Lane County & RLID"
        " repositories.\nAny tax maps in RLID not mirrored in the county repositoryare"
        " likely tax maps that no longer exist, and should be researched (and perhaps"
        " retired)."
    )
    conn = credential.UNCPathCredential(
        path.RLID_DATA_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        check_time = start_time.strftime("%Y-%m-%d %H:%M")
        file_names = {
            "County": {
                fixed_file_name(name)
                for _, _, filenames in os.walk(REPO_PATH["tax-map-staging"])
                for name in filenames
                if name.lower().endswith(".pdf")
            },
            "RLID": {
                fixed_file_name(name)
                for name in os.listdir(REPO_PATH["tax-map"])
                if name.lower().endswith(".pdf")
            },
        }
        for repo, other in permutations(["County", "RLID"]):
            LOG.info("Checking %s repository for tax maps not mirrored.", repo)
            unmirrored_file_names = sorted(file_names[repo] - file_names[other])
            csv_path = os.path.join(
                REPO_PATH["tax-map"], "In_{}_Not_{}.csv".format(repo, other)
            )
            csv_file = open(csv_path, "wb")
            with csv_file:
                csv_ = csv.writer(csv_file)
                csv_.writerow(("file_name", "check_time"))
                for file_name in unmirrored_file_names:
                    csv_.writerow((file_name, check_time))
            LOG.info(
                "Found %s tax maps in %s repository not mirrored in %s.",
                len(unmirrored_file_names),
                repo,
                other,
            )
    LOG.info("End: Compile.")
    elapsed(start_time, LOG)