예제 #1
0
def clone(id):
    """
    Download files from a job.

    This will download the files that were originally uploaded at
    the start of the job.
    """
    try:
        experiment = ExperimentClient().get(
            normalize_job_name(id, use_config=False))
    except FloydException:
        experiment = ExperimentClient().get(id)

    task_instance_id = get_module_task_instance_id(experiment.task_instances)
    task_instance = TaskInstanceClient().get(
        task_instance_id) if task_instance_id else None
    if not task_instance:
        sys.exit(
            "Cannot clone this version of the job. Try a different version.")
    module = ModuleClient().get(
        task_instance.module_id) if task_instance else None
    code_url = "{}/api/v1/resources/{}?content=true&download=true".format(
        floyd.floyd_host, module.resource_id)
    ExperimentClient().download_tar(url=code_url,
                                    untar=True,
                                    delete_after_untar=True)
예제 #2
0
def delete(id, yes):
    """
    Delete project run
    """
    experiment = ExperimentClient().get(id)
    task_instance = TaskInstanceClient().get(
        get_module_task_instance_id(experiment.task_instances))

    if experiment.state in ["queued", "running"]:
        floyd_logger.info(
            "Experiment in {} state cannot be deleted. Stop it first".format(
                experiment.state))
        return

    if not yes:
        click.confirm('Delete Run: {}?'.format(experiment.name),
                      abort=True,
                      default=False)

    if task_instance.module_id:
        ModuleClient().delete(task_instance.module_id)

    if ExperimentClient().delete(id):
        floyd_logger.info("Experiment deleted")
    else:
        floyd_logger.error("Failed to delete experiment")
예제 #3
0
def clone(id):
    """
    Download the code for the experiment to the current path
    """
    experiment = ExperimentClient().get(id)
    task_instance_id = get_module_task_instance_id(experiment.task_instances)
    task_instance = TaskInstanceClient().get(task_instance_id) if task_instance_id else None
    if not task_instance:
        sys.exit("Cannot clone this version of the job. Try a different version.")
    module = ModuleClient().get(task_instance.module_id) if task_instance else None
    code_url = "{}/api/v1/resources/{}?content=true&download=true".format(floyd.floyd_host,
                                                                          module.resource_id)
    ExperimentClient().download_tar(url=code_url,
                                    untar=True,
                                    delete_after_untar=True)
예제 #4
0
def clone(id, path):
    """
    - Download all files from a job

    Eg: alice/projects/mnist/1/

    Note: This will download the files that were originally uploaded at
    the start of the job.

    - Download files in a specific path from a job

    Specify the path to a directory and download all its files and subdirectories.

    Eg: --path models/checkpoint1
    """
    try:
        experiment = ExperimentClient().get(
            normalize_job_name(id, use_config=False))
    except FloydException:
        experiment = ExperimentClient().get(id)

    task_instance_id = get_module_task_instance_id(experiment.task_instances)
    task_instance = TaskInstanceClient().get(
        task_instance_id) if task_instance_id else None
    if not task_instance:
        sys.exit(
            "Cannot clone this version of the job. Try a different version.")
    module = ModuleClient().get(
        task_instance.module_id) if task_instance else None

    if path:
        # Download a directory from Code
        code_url = "{}/api/v1/download/artifacts/code/{}?is_dir=true&path={}".format(
            floyd.floyd_host, experiment.id, path)
    else:
        # Download the full Code
        code_url = "{}/api/v1/resources/{}?content=true&download=true".format(
            floyd.floyd_host, module.resource_id)
    ExperimentClient().download_tar(url=code_url,
                                    untar=True,
                                    delete_after_untar=True)
예제 #5
0
파일: run.py 프로젝트: rmdort/floyd-cli
def run(ctx, gpu, env, message, data, mode, open, tensorboard, command):
    """
    Run a command on Floyd. Floyd will upload contents of the
    current directory and run your command remotely.
    This command will generate a run id for reference.
    """
    experiment_config = ExperimentConfigManager.get_config()
    if not ProjectClient().exists(experiment_config.family_id):
        floyd_logger.error(
            'Invalid project id, please run '
            '"floyd init PROJECT_NAME" before scheduling a job.')
        return

    access_token = AuthConfigManager.get_access_token()
    experiment_name = "{}/{}".format(access_token.username,
                                     experiment_config.name)

    # Create module
    if len(data) > 5:
        floyd_logger.error("Cannot attach more than 5 datasets to an job")
        return

    # Get the data entity from the server to:
    # 1. Confirm that the data id or uri exists and has the right permissions
    # 2. If uri is used, get the id of the dataset
    data_ids = []
    for data_name_or_id in data:
        path = None
        if ':' in data_name_or_id:
            data_name_or_id, path = data_name_or_id.split(':')
        data_obj = DataClient().get(data_name_or_id)
        if not data_obj:
            floyd_logger.error(
                "Data not found for name or id: {}".format(data_name_or_id))
            return
        data_ids.append(
            "{}:{}".format(data_obj.id, path) if path else data_obj.id)

    default_name = 'input' if len(data_ids) <= 1 else None
    module_inputs = [{
        'name': get_data_name(data_str, default_name),
        'type': 'dir'
    } for data_str in data_ids]

    if gpu:
        arch = 'gpu'
        instance_type = GPU_INSTANCE_TYPE
    else:
        arch = 'cpu'
        instance_type = CPU_INSTANCE_TYPE

    env_map = EnvClient().get_all()
    envs = env_map.get(arch)
    if envs:
        if env not in envs:
            floyd_logger.error(
                "{} is not in the list of supported environments: {}".format(
                    env, ', '.join(envs.keys())))
            return
    else:
        floyd_logger.error("{} is not a supported architecture".format(arch))
        return

    command_str = ' '.join(command)
    module = Module(name=experiment_name,
                    description=message or '',
                    command=command_str,
                    mode=get_mode_parameter(mode),
                    enable_tensorboard=tensorboard,
                    family_id=experiment_config.family_id,
                    inputs=module_inputs,
                    env=env,
                    arch=arch)

    from floyd.exceptions import BadRequestException
    try:
        module_id = ModuleClient().create(module)
    except BadRequestException as e:
        if 'Project not found, ID' in e.message:
            floyd_logger.error(
                'ERROR: Please run "floyd init PROJECT_NAME" before scheduling a job.'
            )
        else:
            floyd_logger.error('ERROR: %s', e.message)
        sys.exit(1)
    floyd_logger.debug("Created module with id : {}".format(module_id))

    # Create experiment request
    # Get the actual command entered in the command line
    full_command = get_command_line(gpu, env, message, data, mode, open,
                                    tensorboard, command)
    experiment_request = ExperimentRequest(
        name=experiment_name,
        description=message,
        full_command=full_command,
        module_id=module_id,
        data_ids=data_ids,
        family_id=experiment_config.family_id,
        instance_type=instance_type)
    expt_cli = ExperimentClient()
    expt_info = expt_cli.create(experiment_request)
    floyd_logger.debug("Created job : {}".format(expt_info['id']))

    table_output = [["JOB NAME"], [expt_info['name']]]
    floyd_logger.info(tabulate(table_output, headers="firstrow"))
    floyd_logger.info("")

    if mode in ['jupyter', 'serve']:
        while True:
            # Wait for the experiment / task instances to become available
            try:
                experiment = expt_cli.get(expt_info['id'])
                if experiment.task_instances:
                    break
            except Exception:
                floyd_logger.debug("Job not available yet: {}".format(
                    expt_info['id']))

            floyd_logger.debug("Job not available yet: {}".format(
                expt_info['id']))
            sleep(3)
            continue

        # Print the path to jupyter notebook
        if mode == 'jupyter':
            jupyter_url = experiment.service_url
            print(
                "Setting up your instance and waiting for Jupyter notebook to become available ...",
                end='')
            if wait_for_url(jupyter_url,
                            sleep_duration_seconds=2,
                            iterations=900):
                floyd_logger.info(
                    "\nPath to jupyter notebook: {}".format(jupyter_url))
                if open:
                    webbrowser.open(jupyter_url)
            else:
                floyd_logger.info(
                    "\nPath to jupyter notebook: {}".format(jupyter_url))
                floyd_logger.info(
                    "Notebook is still loading. View logs to track progress")
                floyd_logger.info("   floyd logs {}".format(expt_info['name']))

        # Print the path to serving endpoint
        if mode == 'serve':
            floyd_logger.info("Path to service endpoint: {}".format(
                experiment.service_url))

        if experiment.timeout_seconds < 4 * 60 * 60:
            floyd_logger.info(
                "\nYour job timeout is currently set to {} seconds".format(
                    experiment.timeout_seconds))
            floyd_logger.info(
                "This is because you are in a trial account. Paid users will have longer timeouts. "
                "See https://www.floydhub.com/pricing for details")

    else:
        floyd_logger.info("To view logs enter:")
        floyd_logger.info("   floyd logs {}".format(expt_info['name']))
예제 #6
0
파일: run.py 프로젝트: longhuei/floyd-cli
def run(ctx, cpu, gpu, env, message, data, mode, open_notebook, follow,
        tensorboard, gpup, cpup, gpu2, cpu2, max_runtime, task, command):
    """
    Run a command on Floyd. Floyd will upload contents of the
    current directory and run your command remotely.
    This command will generate a run id for reference.
    """
    # cli_default is used for any option that has default value
    cli_default = {'description': '', 'command': ''}
    # Error early if more than one --env is passed.  Then get the first/only
    # --env out of the list so all other operations work normally (they don't
    # expect an iterable). For details on this approach, see the comment above
    # the --env click option
    if not env:
        cli_default['env'] = DEFAULT_ENV
        env = None
    elif len(env) > 1:
        floyd_logger.error(
            "You passed more than one environment: {}. Please specify a single environment."
            .format(env))
        sys.exit(1)
    else:
        env = env[0]

    if not mode:
        cli_default['mode'] = 'command'

    experiment_config = ExperimentConfigManager.get_config()
    access_token = AuthConfigManager.get_access_token()
    namespace = experiment_config.namespace or access_token.username

    if not ProjectClient().exists(experiment_config.name, namespace=namespace):
        floyd_logger.error(
            'Invalid project id, please run '
            '"floyd init PROJECT_NAME" before scheduling a job.')
        sys.exit(1)

    experiment_name = "{}/{}".format(namespace, experiment_config.name)

    success, data_ids = process_data_ids(data)
    if not success:
        sys.exit(2)

    # Create module
    default_name = 'input' if len(data_ids) <= 1 else None
    module_inputs = [{
        'name': get_data_name(data_str, default_name),
        'type': 'dir'
    } for data_str in data_ids]

    instance_type = None
    if gpu2:
        instance_type = G2_INSTANCE_TYPE
    elif cpu2:
        instance_type = C2_INSTANCE_TYPE
    elif gpup:
        instance_type = G1P_INSTANCE_TYPE
    elif cpup:
        instance_type = C1P_INSTANCE_TYPE
    elif gpu:
        instance_type = G1_INSTANCE_TYPE
    elif cpu:
        instance_type = C1_INSTANCE_TYPE

    if not instance_type:
        cli_default['instance_type'] = C1_INSTANCE_TYPE

    yaml_config = read_yaml_config()
    arch = INSTANCE_ARCH_MAP[resolve_final_instance_type(
        instance_type, yaml_config, task, cli_default)]
    if not validate_env(env or cli_default['env'], arch):
        sys.exit(3)

    command_str = ' '.join(command)
    if command_str and mode in ('jupyter', 'serve'):
        floyd_logger.error(
            'Command argument "%s" cannot be used with mode: %s.\nSee http://docs.floydhub.com/guides/run_a_job/#mode for more information about run modes.',
            command_str, mode)  # noqa
        sys.exit(3)
    if command_str == '':
        # set to none so it won't override floyd config
        command_str = None

    module = Module(name=experiment_name,
                    description=message or '',
                    command=command_str,
                    mode=mode,
                    enable_tensorboard=tensorboard,
                    family_id=experiment_config.family_id,
                    inputs=module_inputs,
                    env=env,
                    instance_type=instance_type,
                    yaml_config=yaml_config,
                    task=task)

    try:
        module_id = ModuleClient().create(module, cli_default)
    except BadRequestException as e:
        if 'Project not found, ID' in e.message:
            floyd_logger.error(
                'ERROR: Please run "floyd init PROJECT_NAME" before scheduling a job.'
            )
        else:
            floyd_logger.error('ERROR: %s', e.message)
        sys.exit(4)
    floyd_logger.debug("Created module with id : %s", module_id)

    # Create experiment request
    # Get the actual command entered in the command line
    if max_runtime:
        max_runtime = int(max_runtime)
    full_command = get_command_line(instance_type, env, message, data, mode,
                                    open_notebook, tensorboard, command_str)
    experiment_request = ExperimentRequest(
        name=experiment_name,
        description=message,
        full_command=full_command,
        module_id=module_id,
        max_runtime=max_runtime,
        env=env,
        data_ids=data_ids,
        family_id=experiment_config.family_id,
        instance_type=instance_type,
        yaml_config=yaml_config,
        task=task)
    expt_client = ExperimentClient()
    expt_info = expt_client.create(experiment_request, cli_default)
    floyd_logger.debug("Created job : %s", expt_info['id'])

    job_name = expt_info['name']
    if not follow:
        show_new_job_info(expt_client, job_name, expt_info, mode,
                          open_notebook)
    else:
        # If the user specified --follow, we assume they're only interested in
        # log output and not in anything that would be displayed by
        # show_new_job_info.
        floyd_logger.info("Opening logs ...")
        instance_log_id = instance_log_id = get_log_id(job_name)
        follow_logs(instance_log_id)
예제 #7
0
def run(ctx, gpu, env, message, data, mode, open_notebook, tensorboard, gpup,
        cpup, gpu2, cpu2, command):
    """
    Run a command on Floyd. Floyd will upload contents of the
    current directory and run your command remotely.
    This command will generate a run id for reference.
    """
    # Error early if more than one --env is passed.  Then get the first/only
    # --env out of the list so all other operations work normally (they don't
    # expect an iterable). For details on this approach, see the comment above
    # the --env click option
    if len(env) > 1:
        floyd_logger.error(
            "You passed more than one environment: {}. Please specify a single environment."
            .format(env))
        sys.exit(1)
    env = env[0]
    experiment_config = ExperimentConfigManager.get_config()
    access_token = AuthConfigManager.get_access_token()
    namespace = experiment_config.namespace or access_token.username

    if not ProjectClient().exists(experiment_config.name, namespace=namespace):
        floyd_logger.error(
            'Invalid project id, please run '
            '"floyd init PROJECT_NAME" before scheduling a job.')
        sys.exit(1)

    experiment_name = "{}/{}".format(namespace, experiment_config.name)

    success, data_ids = process_data_ids(data)
    if not success:
        sys.exit(2)

    # Create module
    default_name = 'input' if len(data_ids) <= 1 else None
    module_inputs = [{
        'name': get_data_name(data_str, default_name),
        'type': 'dir'
    } for data_str in data_ids]

    if gpu2:
        instance_type = G2_INSTANCE_TYPE
    elif cpu2:
        instance_type = C2_INSTANCE_TYPE
    elif gpup:
        instance_type = G1P_INSTANCE_TYPE
    elif cpup:
        instance_type = C1P_INSTANCE_TYPE
    elif gpu:
        instance_type = G1_INSTANCE_TYPE
    else:
        instance_type = C1_INSTANCE_TYPE

    if not validate_env(env, instance_type):
        sys.exit(3)

    command_str = ' '.join(command)
    if command_str and mode in ('jupyter', 'serve'):
        floyd_logger.error(
            'Command argument "%s" cannot be used with mode: %s.\nSee http://docs.floydhub.com/guides/run_a_job/#mode for more information about run modes.',
            command_str, mode)  # noqa
        sys.exit(3)

    module = Module(name=experiment_name,
                    description=message or '',
                    command=command_str,
                    mode=get_mode_parameter(mode),
                    enable_tensorboard=tensorboard,
                    family_id=experiment_config.family_id,
                    inputs=module_inputs,
                    env=env,
                    arch=INSTANCE_ARCH_MAP[instance_type])

    try:
        module_id = ModuleClient().create(module)
    except BadRequestException as e:
        if 'Project not found, ID' in e.message:
            floyd_logger.error(
                'ERROR: Please run "floyd init PROJECT_NAME" before scheduling a job.'
            )
        else:
            floyd_logger.error('ERROR: %s', e.message)
        sys.exit(4)
    floyd_logger.debug("Created module with id : %s", module_id)

    # Create experiment request
    # Get the actual command entered in the command line
    full_command = get_command_line(instance_type, env, message, data, mode,
                                    open_notebook, tensorboard, command_str)
    experiment_request = ExperimentRequest(
        name=experiment_name,
        description=message,
        full_command=full_command,
        module_id=module_id,
        env=env,
        data_ids=data_ids,
        family_id=experiment_config.family_id,
        instance_type=instance_type)
    expt_client = ExperimentClient()
    expt_info = expt_client.create(experiment_request)
    floyd_logger.debug("Created job : %s", expt_info['id'])

    job_name = expt_info['name']
    show_new_job_info(expt_client, job_name, expt_info, mode, open_notebook)
예제 #8
0
def run(ctx, gpu, env, data, mode, command):
    """
    Run a command on Floyd. Floyd will upload contents of the
    current directory and run your command remotely.
    This command will generate a run id for reference.
    """
    command_str = ' '.join(command)
    experiment_config = ExperimentConfigManager.get_config()
    access_token = AuthConfigManager.get_access_token()
    version = experiment_config.version
    experiment_name = "{}/{}:{}".format(access_token.username,
                                        experiment_config.name,
                                        version)

    # Create module
    module = Module(name=experiment_name,
                    description=version,
                    command=command_str,
                    mode=get_mode_parameter(mode),
                    family_id=experiment_config.family_id,
                    default_container=get_docker_image(env, gpu),
                    version=version)
    module_id = ModuleClient().create(module)
    floyd_logger.debug("Created module with id : {}".format(module_id))

    # Create experiment request
    instance_type = GPU_INSTANCE_TYPE if gpu else CPU_INSTANCE_TYPE
    experiment_request = ExperimentRequest(name=experiment_name,
                                           description=version,
                                           module_id=module_id,
                                           data_id=data,
                                           predecessor=experiment_config.experiment_predecessor,
                                           family_id=experiment_config.family_id,
                                           version=version,
                                           instance_type=instance_type)
    experiment_id = ExperimentClient().create(experiment_request)
    floyd_logger.debug("Created experiment : {}".format(experiment_id))

    # Update expt config including predecessor
    experiment_config.increment_version()
    experiment_config.set_module_predecessor(module_id)
    experiment_config.set_experiment_predecessor(experiment_id)
    ExperimentConfigManager.set_config(experiment_config)

    table_output = [["RUN ID", "NAME", "VERSION"],
                    [experiment_id, experiment_name, version]]
    floyd_logger.info(tabulate(table_output, headers="firstrow"))
    floyd_logger.info("")

    if mode != 'default':
        while True:
            # Wait for the experiment to become available
            try:
                experiment = ExperimentClient().get(experiment_id)
                break
            except Exception:
                floyd_logger.debug("Experiment not available yet: {}".format(experiment_id))
                sleep(1)
                continue

        # Print the path to jupyter notebook
        if mode == 'jupyter':
            jupyter_url = get_task_url(get_module_task_instance_id(experiment.task_instances))
            floyd_logger.info("Waiting for Jupyter notebook to become available ...")
            if wait_for_url(jupyter_url):
                floyd_logger.info("\nPath to jupyter notebook: {}".format(jupyter_url))
            else:
                floyd_logger.info("Problem starting the notebook. View logs for more information")

        # Print the path to serving endpoint
        if mode == 'serve':
            floyd_logger.info("Path to service endpoint: {}".format(
                get_task_url(get_module_task_instance_id(experiment.task_instances))))

    floyd_logger.info("""
To view logs enter:
    floyd logs {}
        """.format(experiment_id))
예제 #9
0
def run(ctx, gpu, env, message, data, mode, open, tensorboard, gpup, cpup,
        command):
    """
    Run a command on Floyd. Floyd will upload contents of the
    current directory and run your command remotely.
    This command will generate a run id for reference.
    """
    experiment_config = ExperimentConfigManager.get_config()
    if not ProjectClient().exists(experiment_config.family_id):
        floyd_logger.error(
            'Invalid project id, please run '
            '"floyd init PROJECT_NAME" before scheduling a job.')
        sys.exit(1)

    access_token = AuthConfigManager.get_access_token()
    experiment_name = "{}/{}".format(access_token.username,
                                     experiment_config.name)

    success, data_ids = process_data_ids(data)
    if not success:
        sys.exit(2)

    # Create module
    default_name = 'input' if len(data_ids) <= 1 else None
    module_inputs = [{
        'name': get_data_name(data_str, default_name),
        'type': 'dir'
    } for data_str in data_ids]

    if gpup:
        instance_type = G1P_INSTANCE_TYPE
    elif cpup:
        instance_type = C1P_INSTANCE_TYPE
    elif gpu:
        instance_type = G1_INSTANCE_TYPE
    else:
        instance_type = C1_INSTANCE_TYPE

    if not validate_env(env, instance_type):
        sys.exit(3)

    command_str = ' '.join(command)
    if command_str and mode in ('jupyter', 'serve'):
        floyd_logger.error(
            'Command argument "%s" cannot be used with mode: %s.\nSee http://docs.floydhub.com/guides/run_a_job/#mode for more information about run modes.',
            command_str, mode)
        sys.exit(3)

    module = Module(name=experiment_name,
                    description=message or '',
                    command=command_str,
                    mode=get_mode_parameter(mode),
                    enable_tensorboard=tensorboard,
                    family_id=experiment_config.family_id,
                    inputs=module_inputs,
                    env=env,
                    arch=INSTANCE_ARCH_MAP[instance_type])

    try:
        module_id = ModuleClient().create(module)
    except BadRequestException as e:
        if 'Project not found, ID' in e.message:
            floyd_logger.error(
                'ERROR: Please run "floyd init PROJECT_NAME" before scheduling a job.'
            )
        else:
            floyd_logger.error('ERROR: %s', e.message)
        sys.exit(4)
    floyd_logger.debug("Created module with id : %s", module_id)

    # Create experiment request
    # Get the actual command entered in the command line
    full_command = get_command_line(instance_type, env, message, data, mode,
                                    open, tensorboard, command_str)
    experiment_request = ExperimentRequest(
        name=experiment_name,
        description=message,
        full_command=full_command,
        module_id=module_id,
        data_ids=data_ids,
        family_id=experiment_config.family_id,
        instance_type=instance_type)
    expt_client = ExperimentClient()
    expt_info = expt_client.create(experiment_request)
    floyd_logger.debug("Created job : %s", expt_info['id'])

    job_name = normalize_job_name(expt_info['name'])
    floyd_logger.info("")
    table_output = [["JOB NAME"], [job_name]]
    floyd_logger.info(tabulate(table_output, headers="firstrow"))
    floyd_logger.info("")
    show_new_job_info(expt_client, job_name, expt_info, mode)