Ejemplo n.º 1
0
def main():
    print('Fetching training configuration')
    train_config = fetch_json(TRAIN_CONFIG)

    # Train model
    if train_config['task_type'] == 'classification':
        print('Starting classification')
        accuracy, classes, model_path, acc_plot_path, remove_paths = train_classification(train_config)
        metadata = {'classes': classes}
    else:
        print('Starting sentiment analysis')
        accuracy, model_path, metadata_path, acc_plot_path, remove_paths = train_sa(train_config)
        metadata = {'metadata_filename': metadata_path}

    # Deploy model
    print('Deploying model')
    setup_inference(
        train_config['token'],
        train_config['task_type'],
        accuracy,
        model_path,
        acc_plot_path,
        metadata,
    )

    # Clear files
    for remove_path in remove_paths:
        shutil.rmtree(remove_path)

    # Delete training config from S3
    # This will also shutdown the instance
    delete_object(TRAIN_CONFIG)
Ejemplo n.º 2
0
def main(username):
    print(" In server training ")
    os.makedirs(os.path.join(DATA_PATH, 'checkpoints'))

    print("Created /data/checkpoints folders")

    # # Download user file
    userdata_filename = os.path.join(DATA_PATH, f'{username}.json')
    download_file(
        os.path.join(TRAINING_CONFIG, f'{username}.json'),
        userdata_filename,
    )

    (task, username, model_name, ratio, is_reducelrscheduler, patience, factor,
     min_lr, optimizer, batch_size, learning_rate, epochs,
     dataset_filename) = get_config_data(userdata_filename)

    # Download dataset
    download_file(
        os.path.join(TRAINING_CONFIG, dataset_filename),
        os.path.join(DATA_PATH, dataset_filename),
    )

    print(" Completed fetching data from s3 ")
    inference_data = {}
    if task == 'image':
        inference_data = train_image_classification(
            username, model_name, ratio, is_reducelrscheduler, patience,
            factor, min_lr, optimizer, batch_size, learning_rate, epochs,
            dataset_filename)
    elif task == 'text':
        inference_data = train_sentiment_analysis(username, model_name, ratio,
                                                  is_reducelrscheduler,
                                                  patience, factor, min_lr,
                                                  optimizer, batch_size,
                                                  learning_rate, epochs,
                                                  dataset_filename)

    # Upload data to S3
    upload_model_data(task, username)
    print('Uploaded inference data to s3')

    # Update inference json
    inference_config = fetch_json(INFERENCE_CONFIG)
    inference_config[username] = inference_data
    inference_config[username]['created'] = datetime.now().strftime(
        '%d-%m-%y %H:%M')
    put_object(INFERENCE_CONFIG, inference_config)
    print("Added user information to inference.json and uploaded to s3")

    # Delete train data from S3
    delete_object(os.path.join(TRAINING_CONFIG, dataset_filename))
    delete_object(os.path.join(TRAINING_CONFIG, f'{username}.json'))
    print("Deleted user data from training folder in s3")

    # Delete data
    shutil.rmtree(DATA_PATH)
    print("Deleted data folder")
def check_can_delete_manifest(s3_client, package_name, registry_name,
                              push_dest):
    print(
        "Attempting to delete package manifest pointer and raw manifest directly"
    )

    generate_new_package_version(package_name, registry_name, push_dest)

    manifest_pointer_s3_key = f".quilt/named_packages/{package_name}/latest"
    manifest_pointer_s3_bucket = registry_name.lstrip("s3://").rstrip("/")
    raw_manifest_hash = None

    fn_info = FunctionReporter(
        f"Retrieving hash from manifest point s3://{manifest_pointer_s3_bucket}/{manifest_pointer_s3_key}"
    )
    try:
        raw_manifest_hash = s3.get_object_as_string(
            s3_client, manifest_pointer_s3_bucket, manifest_pointer_s3_key)
        fn_info.succeeded(raw_manifest_hash)
    except Exception as ex:
        fn_info.failed(ex)

    fn_info = FunctionReporter(
        f"Deleting manifest pointer file s3://{manifest_pointer_s3_bucket}/{manifest_pointer_s3_key}"
    )
    try:
        delete_response = s3.delete_object(s3_client,
                                           manifest_pointer_s3_bucket,
                                           manifest_pointer_s3_key)
        fn_info.succeeded(delete_response)
        deleted_manifest_pointer = True
    except Exception as ex:
        fn_info.failed(ex)
        deleted_manifest_pointer = False

    raw_manifest_s3_key = f".quilt/packages/{raw_manifest_hash}"
    fn_info = FunctionReporter(
        f"Deleting raw manifest file s3://{manifest_pointer_s3_bucket}/{raw_manifest_s3_key}"
    )
    try:
        delete_response = s3.delete_object(s3_client,
                                           manifest_pointer_s3_bucket,
                                           raw_manifest_s3_key)
        fn_info.succeeded(delete_response)
        deleted_raw_manifest = True
    except Exception as ex:
        fn_info.failed(ex)
        deleted_raw_manifest = False

    return deleted_manifest_pointer and deleted_raw_manifest
Ejemplo n.º 4
0
def clean(event, context):
    try:
        # Check if model is currently training
        clean_status_config = fetch_object('cleanup.json')
        if clean_status_config['status'] != 'active':
            print('Status inactive')
            return create_response({
                'result': 'error',
                'message': 'cleanup status is inactive.'
            })

        # Fetch inference data
        infer_config = fetch_object(INFERENCE_CONFIG)

        # Loop through configs
        print('Checking configs')
        safe_objects = {}
        current_time = datetime.now()
        for token, infer_vals in infer_config.items():
            if token not in WHITELIST_TOKENS:
                creation_time = datetime.strptime(infer_vals['created'], '%d-%m-%y %H:%M')
                if (current_time - creation_time).seconds < 7200:  # 2 hours
                    safe_objects[token] = infer_vals
                else:  # Delete objects
                    delete_object(infer_vals['model_filename'])
                    if infer_vals['task_type'] == 'sentimentanalysis':
                        delete_object(infer_vals['metadata_filename'])
                    print('Deleted:', token)
            else:
                safe_objects[token] = infer_vals

        # Update inference json
        update_object(INFERENCE_CONFIG, safe_objects)

        return create_response({
            'result': 'success',
            'message': 'Old objects deleted'
        })
    except Exception as e:
        print(repr(e))
        return create_response({
            'result': 'internal_error',
            'message': repr(e),
        }, status_code=500)
Ejemplo n.º 5
0
def delete_all_segments(bucket, prefix):
    segment = 0
    while True:
        object = '%s.%03d' % (prefix, segment)

        # See if object exists
        r = s3.data_to_url('HEAD', '/%s/%s' % (bucket, object), None, None)

        if r.status != 200:
            break

        print 'Deleting', bucket, object
        r = s3.delete_object(bucket, object)

        segment += 1
Ejemplo n.º 6
0
def delete_all_segments(bucket, prefix):
    segment = 0
    while True:
        object = '%s.%03d' % (prefix, segment)

        # See if object exists
        r = s3.data_to_url('HEAD', '/%s/%s' % (bucket, object),
                           None, None)

        if r.status != 200:
            break

        print 'Deleting', bucket, object
        r = s3.delete_object(bucket, object)

        segment += 1
def check_normal_creds_can_create_and_delete_random_file(bucket, key):
    s3_client = s3.get_s3_client(use_quilt3_botocore_session=False)

    put_fn_info = FunctionReporter(f"Putting test file in s3://{bucket}/{key}")
    try:
        put_response = s3.put_object(s3_client, bucket, key)
        put_fn_info.succeeded(put_response)
    except Exception as ex:
        put_fn_info.failed(ex)

    del_fn_info = FunctionReporter(
        f"Deleting test file in s3://{bucket}/{key}")
    try:
        delete_response = s3.delete_object(s3_client, bucket, key)
        del_fn_info.succeeded(delete_response)
        return True
    except Exception as ex:
        del_fn_info.failed(ex)
        return False
def main(package_name, bucket):
    """
    Debugging process:

        1. Using normal boto3 credentials, see if there are any restrictions on the bucket/keys in the bucket (legal
           hold, object lock, etc)

        2. Using normal boto3 credentials, confirm that we can delete some object from the bucket

        3. Record information about the normal boto3 credentials

        4. Reproduce the bug using quilt3 library directly

        5. Using normal boto3 credentials, confirm that we can delete the manifest objects (is problem key-specific?).

        6. Using s3_client from data_transfer.get_s3_client(), see if we can delete the manifest objects (is problem a
           bug in delete logic or pure permissions)

        7. Using session.get_botocore_session, see if we can delete the manifest objects (is the problem a bug in the
           data_transfer.get_s3_client logic?)

        8. We expect that the above problem will fail indicating that the problem originates in the permissions
           retrieved from session.get_botocore_session. Record information about the permissions.
    """
    registry_name = f"s3://{bucket}"
    push_dest_key_prefix = "quilt-tmp"
    push_dest = f"{registry_name}/{push_dest_key_prefix}"
    manifest_pointer_s3_key = f"{registry_name}./quilt/named_packages/{package_name}/latest"
    test_file_s3_key = "quilt-debug-tmp/tmpfile"

    header("Get bucket info (policy, object lock, etc)")
    get_info_about_bucket(bucket)

    header(
        "Get bucket + key info (legal hold, object retention). (key = 'latest' manifest pointer) "
    )
    generate_new_package_version(package_name, registry_name, push_dest)
    get_info_about_key(bucket, manifest_pointer_s3_key)

    header(
        "Checking if boto3 with default cred provider chain can create and delete a file on s3"
    )
    normal_creds_can_delete = check_normal_creds_can_create_and_delete_random_file(
        bucket=bucket, key=test_file_s3_key)
    print("Normal creds can delete test file?", normal_creds_can_delete)

    header("Getting info about role/user given by default cred provider chain")
    get_info_about_normal_creds()

    header("Trying to reproduce failure during delete package")
    repro(package_name, registry_name, push_dest)

    header(
        "Checking if boto3 with default cred provider chain can delete a manifest"
    )
    normal_creds_can_delete = check_can_delete_manifest(
        s3.get_s3_client(use_quilt3_botocore_session=False), package_name,
        registry_name, push_dest)
    print("Normal creds can delete raw manifest and manifest pointer?",
          normal_creds_can_delete)

    header(
        "Checking if s3_client from quilt3.data_transfer.create_s3_client() can delete a manifest"
    )
    quilt3_create_s3_client_can_delete = check_can_delete_manifest(
        quilt3.data_transfer.create_s3_client(), package_name, registry_name,
        push_dest)
    print(
        "S3 client from quilt3.data_transfer.create_s3_client() can delete raw manifest and manifest pointer?",
        quilt3_create_s3_client_can_delete)

    header(
        "Checking if s3_client from quilt3.session.create_botocore_session() can delete a manifest"
    )
    quilt3_botocore_session_can_delete = check_can_delete_manifest(
        s3.get_s3_client(use_quilt3_botocore_session=True), package_name,
        registry_name, push_dest)
    print(
        "S3 client from quilt3.session.create_botocore_session() can delete raw manifest and manifest pointer?",
        quilt3_botocore_session_can_delete)

    header(
        "Getting IAM info about credentials provided via quilt3.session.create_botocore_session()"
    )
    get_info_about_quilt3_creds()

    header("Trying to clean up any leftover files")
    try:
        s3_client = s3.get_s3_client(use_quilt3_botocore_session=False)

        fn_info = FunctionReporter(
            f"Deleting all keys in s3://{bucket}/{push_dest_key_prefix}")
        try:
            s3.empty_keyspace(s3_client, bucket, push_dest_key_prefix)
            fn_info.succeeded(None)
        except Exception as ex:
            fn_info.failed(ex)

        manifest_pointer_prefix = f".quilt/named_packages/{package_name}/"
        for manifest_pointer_object in s3.list_keyspace(
                s3_client, bucket, prefix=manifest_pointer_prefix):
            manifest_pointer_key = manifest_pointer_object["Key"]

            raw_manifest_hash = None
            fn_info = FunctionReporter(
                f"Trying to get contents of manifest pointer s3://{bucket}/{manifest_pointer_key}"
            )
            try:
                raw_manifest_hash = s3.get_object_as_string(
                    s3_client, bucket, manifest_pointer_key)
                fn_info.succeeded(raw_manifest_hash)
            except Exception as ex:
                fn_info.failed(ex)

            fn_info = FunctionReporter(
                f"Trying to delete manifest_pointer: s3://{bucket}/{manifest_pointer_key}"
            )
            try:
                delete_response = s3.delete_object(s3_client, bucket,
                                                   manifest_pointer_key)
                fn_info.succeeded(delete_response)
            except Exception as ex:
                fn_info.failed(ex)

            if raw_manifest_hash is not None:
                raw_manifest_key = f".quilt/packages/{raw_manifest_hash}"

                fn_info = FunctionReporter(
                    f"Trying to delete raw_manifest: s3://{bucket}/{raw_manifest_key}"
                )
                try:
                    delete_response = s3.delete_object(s3_client, bucket,
                                                       raw_manifest_key)
                    fn_info.succeeded(delete_response)
                except Exception as ex:
                    fn_info.failed(ex)

    except Exception as ex:
        print("Cleanup failed with exception:", ex)