Esempio n. 1
0
def create_task(**kwargs):
    """Creates a new task in our database/object storage.

    Args:
        **kwargs: arbitrary keyword arguments.

    Returns:
        The task info.
    """
    name = kwargs.get('name', None)
    description = kwargs.get('description', None)
    tags = kwargs.get('tags', None)
    image = kwargs.get('image', None)
    commands = kwargs.get('commands', None)
    arguments = kwargs.get('arguments', None)
    experiment_notebook = kwargs.get('experiment_notebook', None)
    deployment_notebook = kwargs.get('deployment_notebook', None)
    is_default = kwargs.get('is_default', None)
    copy_from = kwargs.get('copy_from', None)

    if not isinstance(name, str):
        raise BadRequest("name is required")

    if copy_from and (experiment_notebook or deployment_notebook):
        raise BadRequest("Either provide notebooks or a task to copy from")

    if tags is None or len(tags) == 0:
        tags = ["DEFAULT"]

    if any(tag not in VALID_TAGS for tag in tags):
        valid_str = ",".join(VALID_TAGS)
        raise BadRequest(f"Invalid tag. Choose any of {valid_str}")

    # check if image is a valid docker image
    if image:
        pattern = re.compile('[a-z0-9.-]+([/]{1}[a-z0-9.-]+)+([:]{1}[a-z0-9.-]+){0,1}$')
        if pattern.match(image) is None:
            raise BadRequest("invalid docker image name")

    check_comp_name = db_session.query(Task).filter_by(name=name).first()
    if check_comp_name:
        raise BadRequest("a task with that name already exists")

    # creates a task with specified name,
    # but copies notebooks from a source task
    if copy_from:
        return copy_task(name, description, tags, copy_from)

    task_id = str(uuid_alpha())

    # loads a sample notebook if none was sent
    if experiment_notebook is None and "DATASETS" not in tags:
        experiment_notebook = EXPERIMENT_NOTEBOOK

    if deployment_notebook is None and "DATASETS" not in tags:
        deployment_notebook = DEPLOYMENT_NOTEBOOK

    # The new task must have its own task_id, experiment_id and operator_id.
    # Notice these values are ignored when a notebook is run in a pipeline.
    # They are only used by JupyterLab interface.
    init_notebook_metadata(task_id, deployment_notebook, experiment_notebook)

    # saves new notebooks to object storage
    if "DATASETS" not in tags:
        obj_name = f"{PREFIX}/{task_id}/Experiment.ipynb"
        experiment_notebook_path = f"minio://{BUCKET_NAME}/{obj_name}"
        put_object(obj_name, dumps(experiment_notebook).encode())

        obj_name = f"{PREFIX}/{task_id}/Deployment.ipynb"
        deployment_notebook_path = f"minio://{BUCKET_NAME}/{obj_name}"
        put_object(obj_name, dumps(deployment_notebook).encode())

        # create deployment notebook and experiment_notebook on jupyter
        create_jupyter_files(task_name=name,
                             deployment_notebook=dumps(deployment_notebook).encode(),
                             experiment_notebook=dumps(experiment_notebook).encode())
    else:
        experiment_notebook_path = None
        deployment_notebook_path = None

    if commands is None or len(commands) == 0:
        commands = DEFAULT_COMMANDS

    if arguments is None or len(arguments) == 0:
        arguments = DEFAULT_ARGUMENTS

    # saves task info to the database
    task = Task(uuid=task_id,
                name=name,
                description=description,
                tags=tags,
                image=image,
                commands=commands,
                arguments=arguments,
                experiment_notebook_path=experiment_notebook_path,
                deployment_notebook_path=deployment_notebook_path,
                is_default=is_default)
    db_session.add(task)
    db_session.commit()

    return task.as_dict()
Esempio n. 2
0
def copy_task(name, description, tags, copy_from):
    """Makes a copy of a task in our database/object storage.

    Args:
        name (str): the task name.
        description (str): the task description.
        tags (list): the task tags list.
        copy_from (str): the task_id from which the notebooks are copied.

    Returns:
        The task info.
    """
    task = Task.query.get(copy_from)

    if task is None:
        raise BadRequest("Source task does not exist")

    task_id = uuid_alpha()
    image = task.image
    commands = task.commands
    arguments = task.arguments

    # reads source notebooks from object storage
    source_name = f"{PREFIX}/{copy_from}/Deployment.ipynb"
    deployment_notebook = loads(get_object(source_name))

    source_name = f"{PREFIX}/{copy_from}/Experiment.ipynb"
    experiment_notebook = loads(get_object(source_name))

    # Even though we are creating 'copies', the new task must have
    # its own task_id, experiment_id and operator_id.
    # We don't want to mix models and metrics of different tasks.
    # Notice these values are ignored when a notebook is run in a pipeline.
    # They are only used by JupyterLab interface.
    init_notebook_metadata(task_id, deployment_notebook, experiment_notebook)

    # saves new notebooks to object storage
    destination_name = f"{PREFIX}/{task_id}/Deployment.ipynb"
    deployment_notebook_path = f"minio://{BUCKET_NAME}/{destination_name}"
    put_object(destination_name, dumps(deployment_notebook).encode())

    destination_name = f"{PREFIX}/{task_id}/Experiment.ipynb"
    experiment_notebook_path = f"minio://{BUCKET_NAME}/{destination_name}"
    put_object(destination_name, dumps(experiment_notebook).encode())

    # create deployment notebook and eperiment notebook on jupyter
    create_jupyter_files(task_name=name,
                         deployment_notebook=dumps(deployment_notebook).encode(),
                         experiment_notebook=dumps(experiment_notebook).encode())

    # saves task info to the database
    task = Task(uuid=task_id,
                name=name,
                description=description,
                tags=tags,
                image=image,
                commands=commands,
                arguments=arguments,
                deployment_notebook_path=deployment_notebook_path,
                experiment_notebook_path=experiment_notebook_path,
                is_default=False)
    db_session.add(task)
    db_session.commit()

    return task.as_dict()