def main(user, google_cloud_credentials_file_path, pipeline_configuration_file_path, raw_data_dir):
    # Read the settings from the configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path,
        pipeline_configuration.phone_number_uuid_table.firebase_credentials_file_url
    ))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials,
        "avf-phone-uuid-"
    )
    log.info("Initialised the Firestore UUID table")

    log.info(f"Fetching data from {len(pipeline_configuration.raw_data_sources)} sources...")
    for i, raw_data_source in enumerate(pipeline_configuration.raw_data_sources):
        log.info(f"Fetching from source {i + 1}/{len(pipeline_configuration.raw_data_sources)}...")
        if isinstance(raw_data_source, RapidProSource):
            fetch_from_rapid_pro(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table,
                                 raw_data_source)
        elif isinstance(raw_data_source, GCloudBucketSource):
            fetch_from_gcloud_bucket(google_cloud_credentials_file_path, raw_data_dir, raw_data_source)
        elif isinstance(raw_data_source, ShaqadoonCSVSource):
            fetch_from_shaqadoon_csv(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table,
                                     raw_data_source)

        else:
            assert False, f"Unknown raw_data_source type {type(raw_data_source)}"
def main(user, google_cloud_credentials_file_path,
         pipeline_configuration_file_path, raw_data_dir):
    # Read the settings from the configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path,
            pipeline_configuration.uuid_table.firebase_credentials_file_url))

    uuid_table = FirestoreUuidTable(
        pipeline_configuration.uuid_table.table_name,
        firestore_uuid_table_credentials,
        pipeline_configuration.uuid_table.uuid_prefix)
    log.info("Initialised the Firestore UUID table")

    log.info(
        f"Fetching data from {len(pipeline_configuration.raw_data_sources)} sources..."
    )
    for i, raw_data_source in enumerate(
            pipeline_configuration.raw_data_sources):
        log.info(
            f"Fetching from source {i + 1}/{len(pipeline_configuration.raw_data_sources)}..."
        )
        if isinstance(raw_data_source, RapidProSource):
            fetch_from_rapid_pro(user, google_cloud_credentials_file_path,
                                 raw_data_dir, uuid_table, raw_data_source)
        elif isinstance(raw_data_source, GCloudBucketSource):
            fetch_from_gcloud_bucket(google_cloud_credentials_file_path,
                                     raw_data_dir, raw_data_source)
        elif isinstance(raw_data_source, RecoveryCSVSource):
            fetch_from_recovery_csv(user, google_cloud_credentials_file_path,
                                    raw_data_dir, uuid_table, raw_data_source)
        elif isinstance(raw_data_source, FacebookSource):
            fetch_from_facebook(user, google_cloud_credentials_file_path,
                                raw_data_dir, uuid_table, raw_data_source)
        else:
            assert False, f"Unknown raw_data_source type {type(raw_data_source)}"
Beispiel #3
0
        metavar="csv-output-file-path",
        help=
        "Path to a CSV file to write the contacts from the locations of interest to. "
        "Exported file is in a format suitable for direct upload to Rapid Pro")

    args = parser.parse_args()

    exclusion_list_file_path = args.exclusion_list_file_path
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    traced_data_paths = args.traced_data_paths
    csv_output_file_path = args.csv_output_file_path

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")
    log.info("Initialised the Firestore UUID table")

    uuids = set()