Ejemplo n.º 1
0
def getAlisaBucket():
    """Get Alisa oss bucket, this function get params from env variables"""
    ep = os.getenv("SQLFLOW_OSS_ALISA_ENDPOINT")
    ak = os.getenv("SQLFLOW_OSS_AK")
    sk = os.getenv("SQLFLOW_OSS_SK")
    bucketName = os.getenv("SQLFLOW_OSS_ALISA_BUCKET")

    if ep == "" or ak == "" or sk == "":
        return SQLFlowDiagnostic(
            "should define SQLFLOW_OSS_ALISA_ENDPOINT, "
            "SQLFLOW_OSS_ALISA_BUCKET, SQLFLOW_OSS_AK, SQLFLOW_OSS_SK "
            "when using submitter alisa")

    return oss.get_bucket(bucketName, ak, sk, endpoint=ep)
Ejemplo n.º 2
0
def submit_optflow_job(train_table, result_table, fsl_file_content, solver,
                       user_id):
    """
    Submit the OptFlow job.

    Args:
        train_table (str): the source table name.
        result_table (str): the table name to save the solved results.
        fsl_file_content (str): the FSL file content to submit.
        solver (str): the solver used to solve the model.
        user_id (str): the user id.

    Returns:
        None
    """
    project_name = train_table.split(".")[0]

    snapshot_id = os.getenv("SQLFLOW_OPTFLOW_SNAPSHOT_ID")
    if not snapshot_id:
        raise ValueError("SQLFLOW_OPTFLOW_SNAPSHOT_ID must be set")

    token = os.getenv("SQLFLOW_OPTFLOW_TOKEN")
    if not token:
        raise ValueError("SQLFLOW_OPTFLOW_TOKEN must be set")

    submit_job_url = os.getenv("SQLFLOW_OPTFLOW_SUBMIT_JOB_URL")
    if not submit_job_url:
        raise ValueError("SQLFLOW_OPTFLOW_SUBMIT_JOB_URL must be set")

    query_job_status_url = os.getenv("SQLFLOW_OPTFLOW_QUERY_JOB_STATUS_URL")
    if not query_job_status_url:
        raise ValueError("SQLFLOW_OPTFLOW_QUERY_JOB_STATUS_URL must be set")

    query_job_log_url = os.getenv("SQLFLOW_OPTFLOW_QUERY_JOB_LOG_URL")
    if not query_job_log_url:
        raise ValueError("SQLFLOW_OPTFLOW_QUERY_JOB_LOG_URL must be set")

    bucket_name = "sqlflow-optflow-models"
    bucket = get_bucket(bucket_name)
    try:
        bucket_info = bucket.get_bucket_info()
    except oss2.exceptions.NoSuchBucket:
        # Create bucket if not exists
        bucket.create_bucket()
        bucket_info = bucket.get_bucket_info()

    fsl_file_id = '{}.fsl'.format(uuid.uuid4())
    bucket.put_object(fsl_file_id, fsl_file_content)
    should_delete_object = True
    try:
        bucket.put_object_acl(fsl_file_id, oss2.BUCKET_ACL_PUBLIC_READ)
        fsl_url = "http://{}.{}/{}".format(bucket_name,
                                           bucket_info.extranet_endpoint,
                                           fsl_file_id)

        input_params = {
            "input_table": train_table,
            "output_table": result_table,
            "fsl_path": fsl_url,
            "solver_name": solver,
        }

        json_data = {
            "userNumber": user_id,
            "projectName": project_name,
            "snapshotId": snapshot_id,
            "token": token,
            "inputParams": input_params,
        }

        response = requests.post(submit_job_url,
                                 json=json_data,
                                 headers=OPTFLOW_HTTP_HEADERS)
        response.raise_for_status()
        response_json = response.json()
        if not response_json['success']:
            raise ValueError("Job submission fails")

        print('Job submission succeeds')
        record_id = response_json['data']['recordId']
        try:
            success = print_job_log_till_finish(query_job_status_url,
                                                query_job_log_url, record_id,
                                                user_id, token)
            if success:
                print("Job succeeds. Save solved result in {}.".format(
                    result_table))
            else:
                print("Job fails.")
        except:  # noqa: E722
            # FIXME(sneaxiy): we should not delete object if there is any
            # network error when querying job status and logs. But when
            # should we clean the object?
            should_delete_object = False
            six.reraise(*sys.exc_info())
    finally:
        if should_delete_object:
            bucket.delete_object(fsl_file_id)