Example #1
0
def run_job(args):
    should_ingest, cols_to_validate, cols_to_aggregate, cols_to_transform, training_datasets = parse_args(
        args)

    resource_id_list = cols_to_validate + cols_to_aggregate + cols_to_transform + training_datasets

    try:
        ctx = Context(s3_path=args.context,
                      cache_dir=args.cache_dir,
                      workload_id=args.workload_id)
    except Exception as e:
        logger.exception("An error occurred, see the logs for more details.")
        sys.exit(1)

    try:
        spark = None  # For the finally clause
        spark = get_spark_session(ctx.workload_id)
        spark.sparkContext.parallelize(
            [1, 2, 3, 4, 5]).count()  # test that executors are allocated
        raw_df = ingest_raw_dataset(spark, ctx, cols_to_validate,
                                    should_ingest)

        if len(cols_to_aggregate) > 0:
            run_aggregators(spark, ctx, cols_to_aggregate, raw_df)

        if len(cols_to_transform) > 0:
            validate_transformers(spark, ctx, cols_to_transform, raw_df)

        create_training_datasets(spark, ctx, training_datasets, raw_df)

        util.log_job_finished(ctx.workload_id)
    except CortexException as e:
        e.wrap("error")
        logger.error(str(e))
        logger.exception(
            "An error occurred, see `cortex logs -v {} {}` for more details.".
            format(
                ctx.id_map[resource_id_list[0]]["resource_type"],
                ctx.id_map[resource_id_list[0]]["name"],
            ))
        sys.exit(1)
    except Exception as e:
        logger.exception(
            "An error occurred, see `cortex logs -v {} {}` for more details.".
            format(
                ctx.id_map[resource_id_list[0]]["resource_type"],
                ctx.id_map[resource_id_list[0]]["name"],
            ))
        sys.exit(1)
    finally:
        if spark is not None:
            spark.stop()
Example #2
0
def train(args):
    ctx = Context(s3_path=args.context,
                  cache_dir=args.cache_dir,
                  workload_id=args.workload_id)

    package.install_packages(ctx.python_packages, ctx.bucket)

    model = ctx.models_id_map[args.model]

    logger.info("Training")

    with util.Tempdir(ctx.cache_dir) as temp_dir:
        model_dir = os.path.join(temp_dir, "model_dir")
        ctx.upload_resource_status_start(model)

        try:
            model_impl = ctx.get_model_impl(model["name"])
            train_util.train(model["name"], model_impl, ctx, model_dir)
            ctx.upload_resource_status_success(model)

            logger.info("Caching")
            logger.info("Caching model " + model["name"])
            model_export_dir = os.path.join(model_dir, "export", "estimator")
            model_zip_path = os.path.join(temp_dir, "model.zip")
            util.zip_dir(model_export_dir, model_zip_path)

            aws.upload_file_to_s3(local_path=model_zip_path,
                                  key=model["key"],
                                  bucket=ctx.bucket)
            util.log_job_finished(ctx.workload_id)

        except CortexException as e:
            ctx.upload_resource_status_failed(model)
            e.wrap("error")
            logger.error(str(e))
            logger.exception(
                "An error occurred, see `cx logs model {}` for more details.".
                format(model["name"]))
            sys.exit(1)
        except Exception as e:
            ctx.upload_resource_status_failed(model)
            logger.exception(
                "An error occurred, see `cx logs model {}` for more details.".
                format(model["name"]))
            sys.exit(1)
Example #3
0
def build(args):
    ctx = Context(s3_path=args.context,
                  cache_dir=args.cache_dir,
                  workload_id=args.workload_id)
    python_packages_list = [
        ctx.pp_id_map[id] for id in args.python_packages.split(",")
    ]
    python_packages = {
        python_package["name"]: python_package
        for python_package in python_packages_list
    }
    ctx.upload_resource_status_start(*python_packages_list)
    try:
        build_packages(python_packages, ctx.bucket)
        util.log_job_finished(ctx.workload_id)
    except Exception as e:
        logger.exception(e)
        ctx.upload_resource_status_failed(*python_packages_list)
    else:
        ctx.upload_resource_status_success(*python_packages_list)