def main(config_file: str, search_text: str, dry_run: bool, delete: bool,
         debug: bool):
    logging.basicConfig(
        format="[%(name)s][%(levelname)s][%(asctime)s] %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        level=(logging.DEBUG if debug else logging.INFO),
    )
    config = parse_config(config_file=config_file)
    client = get_client(**config)

    if delete:
        if search_text is None:
            logging.error(
                "--delete must be invoked with an explicit value for --search-text"
            )
            return False
        return delete_existing_policies(client=client,
                                        dry_run=dry_run,
                                        search_text=search_text,
                                        debug=debug)
    else:
        return create_or_update_policies(
            client=client,
            config_root=config["config_root"],
            dry_run=dry_run,
            debug=debug,
        )
def main(config_file: str, search_text: str, dry_run: bool, debug: bool):
    logging.basicConfig(
        format="[%(name)s][%(levelname)s][%(asctime)s] %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        level=(logging.DEBUG if debug else logging.INFO),
    )
    config = parse_config(config_file=config_file)
    client = get_client(**config)
    tagger = Tagger(config_root=config["config_root"])

    logging.info("Making tags")
    tagger.make_tags(client)

    logging.info("Gathering data sources to tag")
    data_sources_to_tag = []
    with Paginator(client.get_data_source_list,
                   search_text=search_text) as paginator:
        for data_source in paginator:
            data_sources_to_tag.append({
                "id": data_source["id"],
                "name": data_source["name"]
            })

    progress_iterator = tqdm(data_sources_to_tag)
    for data_source in progress_iterator:
        progress_iterator.set_description(
            desc=
            f"Tagging ID: {data_source['id']}, Name: {data_source['name']} :")
        data_source_tags = tagger.get_tags_for_data_source(
            name=data_source["name"])
        if data_source_tags:
            logging.debug(f"Adding data source tags to {data_source['name']}.")
            if not dry_run:
                client.tag_data_source(id=data_source["id"],
                                       tag_data=data_source_tags)
        dictionary = client.get_data_source_dictionary(id=data_source["id"])
        enriched_columns = tagger.enrich_columns_with_tagging(
            dictionary.metadata)
        if enriched_columns == dictionary.metadata:
            logging.debug(
                f"No change to column tags for data source: {data_source['name']}. Skipping."
            )
            continue
        logging.debug(f"Enriched columns for {data_source['name']}:"
                      f" {dictionary.dict()['metadata']}")
        logging.info(
            f"Change detected to column tags. Updating data source {data_source['name']}'s data dictionary."
        )
        dictionary.metadata = enriched_columns
        if not dry_run:
            client.update_data_source_dictionary(id=data_source["id"],
                                                 dictionary=dictionary)
    logging.info("FIN.")
Exemple #3
0
def main(
    config_file: str, search_text: str, hard_delete: bool, dry_run: bool, debug: bool
):
    logging.basicConfig(
        format="[%(name)s][%(levelname)s][%(asctime)s] %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        level=(logging.DEBUG if debug else logging.INFO),
    )
    config = parse_config(config_file=config_file)
    client = get_client(**config)

    logging.info("Gathering data-stores to delete")
    data_sources_to_delete = []
    with Paginator(client.get_data_source_list, search_text=search_text) as paginator:
        for data_source in paginator:
            data_sources_to_delete.append(
                {"id": data_source["id"], "name": data_source["name"]}
            )

    if dry_run:
        logging.info("bulk-delete dry run")
        for data_source in data_sources_to_delete:
            logging.info(
                f"Data source Id: {data_source['id']}. Name: {data_source['name']}"
            )
    elif hard_delete:
        logging.info(
            f"Hard deleting {len(data_sources_to_delete)} data sources. "
            f"The data sources will not be able to be restored in the future"
        )
        for data_source in tqdm(data_sources_to_delete, desc="Deleting"):
            logging.debug(f"Hard deleting {data_source['name']}")
            client.delete_data_source(data_source["id"])
    else:
        logging.info(
            f"Disabling {len(data_sources_to_delete)} data sources. "
            f"The data sources can be restored in the future"
        )
        for data_source in tqdm(data_sources_to_delete, desc="Disabling"):
            logging.debug(f"Disabling {data_source['name']}")
            client.disable_data_source(data_source["id"])
def main(config_file: str, glob_prefix: str, debug: bool, dry_run: bool) -> bool:

    logging.basicConfig(
        format="[%(name)s][%(levelname)s][%(asctime)s] %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        level=(logging.DEBUG if debug else logging.INFO),
    )

    no_enrollment_errors = True

    config = parse_config(config_file=config_file)
    client = get_client(**config)

    dataset_spec_filepath = os.path.join(
        config["config_root"], "enrolled_datasets", glob_prefix
    )
    LOGGER.debug(f"Globbing for files in {dataset_spec_filepath}")
    for filepath in glob.glob(dataset_spec_filepath):
        LOGGER.info("Processing file: %s", filepath)
        with open(filepath) as handle:
            dataset_spec = yaml.safe_load(handle)
        credentials = retrieve_credentials(dataset_spec["credentials"])
        dataset_spec["username"] = credentials["username"]
        dataset_spec["password"] = credentials["password"]

        schema_table_mapping = get_tables_in_database(client, dataset_spec)

        failed_tables = set()
        data_sources_to_enroll = [
            (dataset_spec["schemas_to_enroll"], data_sources_enroll_iterator),
            (dataset_spec["schemas_to_bulk_enroll"], data_sources_bulk_enroll_iterator),
        ]

        for schemas, enroll_iter in data_sources_to_enroll:
            if not schemas:
                continue
            for schema_object in schemas:
                for (data_source, handler) in enroll_iter(  # type: ignore
                    client=client,
                    schema_table_mapping=schema_table_mapping,
                    schema_obj=schema_object,
                    config=dataset_spec,
                ):
                    LOGGER.debug("Data source: %s", data_source.json())
                    if isinstance(handler, list):
                        LOGGER.debug("Handler[0]: %s", handler[0].json())
                    elif isinstance(handler, Handler):
                        LOGGER.debug("Handler: %s", handler.json())
                    else:
                        raise TypeError(
                            f"Unexpected type for handler; Got: {type(handler)}"
                        )
                    if not dry_run:
                        if not create_data_source(
                            client=client, data_source=data_source, handler=handler
                        ):
                            failed_tables.add(data_source.name)
        if failed_tables:
            no_enrollment_errors = False
            LOGGER.warning("Tables that failed creation:")
            for table in failed_tables:
                LOGGER.warning(table)

    LOGGER.info("Finished enrollment")
    return no_enrollment_errors