def clone(id): """ Download files from a job. This will download the files that were originally uploaded at the start of the job. """ try: experiment = ExperimentClient().get( normalize_job_name(id, use_config=False)) except FloydException: experiment = ExperimentClient().get(id) task_instance_id = get_module_task_instance_id(experiment.task_instances) task_instance = TaskInstanceClient().get( task_instance_id) if task_instance_id else None if not task_instance: sys.exit( "Cannot clone this version of the job. Try a different version.") module = ModuleClient().get( task_instance.module_id) if task_instance else None code_url = "{}/api/v1/resources/{}?content=true&download=true".format( floyd.floyd_host, module.resource_id) ExperimentClient().download_tar(url=code_url, untar=True, delete_after_untar=True)
def delete(id, yes): """ Delete project run """ experiment = ExperimentClient().get(id) task_instance = TaskInstanceClient().get( get_module_task_instance_id(experiment.task_instances)) if experiment.state in ["queued", "running"]: floyd_logger.info( "Experiment in {} state cannot be deleted. Stop it first".format( experiment.state)) return if not yes: click.confirm('Delete Run: {}?'.format(experiment.name), abort=True, default=False) if task_instance.module_id: ModuleClient().delete(task_instance.module_id) if ExperimentClient().delete(id): floyd_logger.info("Experiment deleted") else: floyd_logger.error("Failed to delete experiment")
def clone(id): """ Download the code for the experiment to the current path """ experiment = ExperimentClient().get(id) task_instance_id = get_module_task_instance_id(experiment.task_instances) task_instance = TaskInstanceClient().get(task_instance_id) if task_instance_id else None if not task_instance: sys.exit("Cannot clone this version of the job. Try a different version.") module = ModuleClient().get(task_instance.module_id) if task_instance else None code_url = "{}/api/v1/resources/{}?content=true&download=true".format(floyd.floyd_host, module.resource_id) ExperimentClient().download_tar(url=code_url, untar=True, delete_after_untar=True)
def clone(id, path): """ - Download all files from a job Eg: alice/projects/mnist/1/ Note: This will download the files that were originally uploaded at the start of the job. - Download files in a specific path from a job Specify the path to a directory and download all its files and subdirectories. Eg: --path models/checkpoint1 """ try: experiment = ExperimentClient().get( normalize_job_name(id, use_config=False)) except FloydException: experiment = ExperimentClient().get(id) task_instance_id = get_module_task_instance_id(experiment.task_instances) task_instance = TaskInstanceClient().get( task_instance_id) if task_instance_id else None if not task_instance: sys.exit( "Cannot clone this version of the job. Try a different version.") module = ModuleClient().get( task_instance.module_id) if task_instance else None if path: # Download a directory from Code code_url = "{}/api/v1/download/artifacts/code/{}?is_dir=true&path={}".format( floyd.floyd_host, experiment.id, path) else: # Download the full Code code_url = "{}/api/v1/resources/{}?content=true&download=true".format( floyd.floyd_host, module.resource_id) ExperimentClient().download_tar(url=code_url, untar=True, delete_after_untar=True)
def run(ctx, gpu, env, message, data, mode, open, tensorboard, command): """ Run a command on Floyd. Floyd will upload contents of the current directory and run your command remotely. This command will generate a run id for reference. """ experiment_config = ExperimentConfigManager.get_config() if not ProjectClient().exists(experiment_config.family_id): floyd_logger.error( 'Invalid project id, please run ' '"floyd init PROJECT_NAME" before scheduling a job.') return access_token = AuthConfigManager.get_access_token() experiment_name = "{}/{}".format(access_token.username, experiment_config.name) # Create module if len(data) > 5: floyd_logger.error("Cannot attach more than 5 datasets to an job") return # Get the data entity from the server to: # 1. Confirm that the data id or uri exists and has the right permissions # 2. If uri is used, get the id of the dataset data_ids = [] for data_name_or_id in data: path = None if ':' in data_name_or_id: data_name_or_id, path = data_name_or_id.split(':') data_obj = DataClient().get(data_name_or_id) if not data_obj: floyd_logger.error( "Data not found for name or id: {}".format(data_name_or_id)) return data_ids.append( "{}:{}".format(data_obj.id, path) if path else data_obj.id) default_name = 'input' if len(data_ids) <= 1 else None module_inputs = [{ 'name': get_data_name(data_str, default_name), 'type': 'dir' } for data_str in data_ids] if gpu: arch = 'gpu' instance_type = GPU_INSTANCE_TYPE else: arch = 'cpu' instance_type = CPU_INSTANCE_TYPE env_map = EnvClient().get_all() envs = env_map.get(arch) if envs: if env not in envs: floyd_logger.error( "{} is not in the list of supported environments: {}".format( env, ', '.join(envs.keys()))) return else: floyd_logger.error("{} is not a supported architecture".format(arch)) return command_str = ' '.join(command) module = Module(name=experiment_name, description=message or '', command=command_str, mode=get_mode_parameter(mode), enable_tensorboard=tensorboard, family_id=experiment_config.family_id, inputs=module_inputs, env=env, arch=arch) from floyd.exceptions import BadRequestException try: module_id = ModuleClient().create(module) except BadRequestException as e: if 'Project not found, ID' in e.message: floyd_logger.error( 'ERROR: Please run "floyd init PROJECT_NAME" before scheduling a job.' ) else: floyd_logger.error('ERROR: %s', e.message) sys.exit(1) floyd_logger.debug("Created module with id : {}".format(module_id)) # Create experiment request # Get the actual command entered in the command line full_command = get_command_line(gpu, env, message, data, mode, open, tensorboard, command) experiment_request = ExperimentRequest( name=experiment_name, description=message, full_command=full_command, module_id=module_id, data_ids=data_ids, family_id=experiment_config.family_id, instance_type=instance_type) expt_cli = ExperimentClient() expt_info = expt_cli.create(experiment_request) floyd_logger.debug("Created job : {}".format(expt_info['id'])) table_output = [["JOB NAME"], [expt_info['name']]] floyd_logger.info(tabulate(table_output, headers="firstrow")) floyd_logger.info("") if mode in ['jupyter', 'serve']: while True: # Wait for the experiment / task instances to become available try: experiment = expt_cli.get(expt_info['id']) if experiment.task_instances: break except Exception: floyd_logger.debug("Job not available yet: {}".format( expt_info['id'])) floyd_logger.debug("Job not available yet: {}".format( expt_info['id'])) sleep(3) continue # Print the path to jupyter notebook if mode == 'jupyter': jupyter_url = experiment.service_url print( "Setting up your instance and waiting for Jupyter notebook to become available ...", end='') if wait_for_url(jupyter_url, sleep_duration_seconds=2, iterations=900): floyd_logger.info( "\nPath to jupyter notebook: {}".format(jupyter_url)) if open: webbrowser.open(jupyter_url) else: floyd_logger.info( "\nPath to jupyter notebook: {}".format(jupyter_url)) floyd_logger.info( "Notebook is still loading. View logs to track progress") floyd_logger.info(" floyd logs {}".format(expt_info['name'])) # Print the path to serving endpoint if mode == 'serve': floyd_logger.info("Path to service endpoint: {}".format( experiment.service_url)) if experiment.timeout_seconds < 4 * 60 * 60: floyd_logger.info( "\nYour job timeout is currently set to {} seconds".format( experiment.timeout_seconds)) floyd_logger.info( "This is because you are in a trial account. Paid users will have longer timeouts. " "See https://www.floydhub.com/pricing for details") else: floyd_logger.info("To view logs enter:") floyd_logger.info(" floyd logs {}".format(expt_info['name']))
def run(ctx, cpu, gpu, env, message, data, mode, open_notebook, follow, tensorboard, gpup, cpup, gpu2, cpu2, max_runtime, task, command): """ Run a command on Floyd. Floyd will upload contents of the current directory and run your command remotely. This command will generate a run id for reference. """ # cli_default is used for any option that has default value cli_default = {'description': '', 'command': ''} # Error early if more than one --env is passed. Then get the first/only # --env out of the list so all other operations work normally (they don't # expect an iterable). For details on this approach, see the comment above # the --env click option if not env: cli_default['env'] = DEFAULT_ENV env = None elif len(env) > 1: floyd_logger.error( "You passed more than one environment: {}. Please specify a single environment." .format(env)) sys.exit(1) else: env = env[0] if not mode: cli_default['mode'] = 'command' experiment_config = ExperimentConfigManager.get_config() access_token = AuthConfigManager.get_access_token() namespace = experiment_config.namespace or access_token.username if not ProjectClient().exists(experiment_config.name, namespace=namespace): floyd_logger.error( 'Invalid project id, please run ' '"floyd init PROJECT_NAME" before scheduling a job.') sys.exit(1) experiment_name = "{}/{}".format(namespace, experiment_config.name) success, data_ids = process_data_ids(data) if not success: sys.exit(2) # Create module default_name = 'input' if len(data_ids) <= 1 else None module_inputs = [{ 'name': get_data_name(data_str, default_name), 'type': 'dir' } for data_str in data_ids] instance_type = None if gpu2: instance_type = G2_INSTANCE_TYPE elif cpu2: instance_type = C2_INSTANCE_TYPE elif gpup: instance_type = G1P_INSTANCE_TYPE elif cpup: instance_type = C1P_INSTANCE_TYPE elif gpu: instance_type = G1_INSTANCE_TYPE elif cpu: instance_type = C1_INSTANCE_TYPE if not instance_type: cli_default['instance_type'] = C1_INSTANCE_TYPE yaml_config = read_yaml_config() arch = INSTANCE_ARCH_MAP[resolve_final_instance_type( instance_type, yaml_config, task, cli_default)] if not validate_env(env or cli_default['env'], arch): sys.exit(3) command_str = ' '.join(command) if command_str and mode in ('jupyter', 'serve'): floyd_logger.error( 'Command argument "%s" cannot be used with mode: %s.\nSee http://docs.floydhub.com/guides/run_a_job/#mode for more information about run modes.', command_str, mode) # noqa sys.exit(3) if command_str == '': # set to none so it won't override floyd config command_str = None module = Module(name=experiment_name, description=message or '', command=command_str, mode=mode, enable_tensorboard=tensorboard, family_id=experiment_config.family_id, inputs=module_inputs, env=env, instance_type=instance_type, yaml_config=yaml_config, task=task) try: module_id = ModuleClient().create(module, cli_default) except BadRequestException as e: if 'Project not found, ID' in e.message: floyd_logger.error( 'ERROR: Please run "floyd init PROJECT_NAME" before scheduling a job.' ) else: floyd_logger.error('ERROR: %s', e.message) sys.exit(4) floyd_logger.debug("Created module with id : %s", module_id) # Create experiment request # Get the actual command entered in the command line if max_runtime: max_runtime = int(max_runtime) full_command = get_command_line(instance_type, env, message, data, mode, open_notebook, tensorboard, command_str) experiment_request = ExperimentRequest( name=experiment_name, description=message, full_command=full_command, module_id=module_id, max_runtime=max_runtime, env=env, data_ids=data_ids, family_id=experiment_config.family_id, instance_type=instance_type, yaml_config=yaml_config, task=task) expt_client = ExperimentClient() expt_info = expt_client.create(experiment_request, cli_default) floyd_logger.debug("Created job : %s", expt_info['id']) job_name = expt_info['name'] if not follow: show_new_job_info(expt_client, job_name, expt_info, mode, open_notebook) else: # If the user specified --follow, we assume they're only interested in # log output and not in anything that would be displayed by # show_new_job_info. floyd_logger.info("Opening logs ...") instance_log_id = instance_log_id = get_log_id(job_name) follow_logs(instance_log_id)
def run(ctx, gpu, env, message, data, mode, open_notebook, tensorboard, gpup, cpup, gpu2, cpu2, command): """ Run a command on Floyd. Floyd will upload contents of the current directory and run your command remotely. This command will generate a run id for reference. """ # Error early if more than one --env is passed. Then get the first/only # --env out of the list so all other operations work normally (they don't # expect an iterable). For details on this approach, see the comment above # the --env click option if len(env) > 1: floyd_logger.error( "You passed more than one environment: {}. Please specify a single environment." .format(env)) sys.exit(1) env = env[0] experiment_config = ExperimentConfigManager.get_config() access_token = AuthConfigManager.get_access_token() namespace = experiment_config.namespace or access_token.username if not ProjectClient().exists(experiment_config.name, namespace=namespace): floyd_logger.error( 'Invalid project id, please run ' '"floyd init PROJECT_NAME" before scheduling a job.') sys.exit(1) experiment_name = "{}/{}".format(namespace, experiment_config.name) success, data_ids = process_data_ids(data) if not success: sys.exit(2) # Create module default_name = 'input' if len(data_ids) <= 1 else None module_inputs = [{ 'name': get_data_name(data_str, default_name), 'type': 'dir' } for data_str in data_ids] if gpu2: instance_type = G2_INSTANCE_TYPE elif cpu2: instance_type = C2_INSTANCE_TYPE elif gpup: instance_type = G1P_INSTANCE_TYPE elif cpup: instance_type = C1P_INSTANCE_TYPE elif gpu: instance_type = G1_INSTANCE_TYPE else: instance_type = C1_INSTANCE_TYPE if not validate_env(env, instance_type): sys.exit(3) command_str = ' '.join(command) if command_str and mode in ('jupyter', 'serve'): floyd_logger.error( 'Command argument "%s" cannot be used with mode: %s.\nSee http://docs.floydhub.com/guides/run_a_job/#mode for more information about run modes.', command_str, mode) # noqa sys.exit(3) module = Module(name=experiment_name, description=message or '', command=command_str, mode=get_mode_parameter(mode), enable_tensorboard=tensorboard, family_id=experiment_config.family_id, inputs=module_inputs, env=env, arch=INSTANCE_ARCH_MAP[instance_type]) try: module_id = ModuleClient().create(module) except BadRequestException as e: if 'Project not found, ID' in e.message: floyd_logger.error( 'ERROR: Please run "floyd init PROJECT_NAME" before scheduling a job.' ) else: floyd_logger.error('ERROR: %s', e.message) sys.exit(4) floyd_logger.debug("Created module with id : %s", module_id) # Create experiment request # Get the actual command entered in the command line full_command = get_command_line(instance_type, env, message, data, mode, open_notebook, tensorboard, command_str) experiment_request = ExperimentRequest( name=experiment_name, description=message, full_command=full_command, module_id=module_id, env=env, data_ids=data_ids, family_id=experiment_config.family_id, instance_type=instance_type) expt_client = ExperimentClient() expt_info = expt_client.create(experiment_request) floyd_logger.debug("Created job : %s", expt_info['id']) job_name = expt_info['name'] show_new_job_info(expt_client, job_name, expt_info, mode, open_notebook)
def run(ctx, gpu, env, data, mode, command): """ Run a command on Floyd. Floyd will upload contents of the current directory and run your command remotely. This command will generate a run id for reference. """ command_str = ' '.join(command) experiment_config = ExperimentConfigManager.get_config() access_token = AuthConfigManager.get_access_token() version = experiment_config.version experiment_name = "{}/{}:{}".format(access_token.username, experiment_config.name, version) # Create module module = Module(name=experiment_name, description=version, command=command_str, mode=get_mode_parameter(mode), family_id=experiment_config.family_id, default_container=get_docker_image(env, gpu), version=version) module_id = ModuleClient().create(module) floyd_logger.debug("Created module with id : {}".format(module_id)) # Create experiment request instance_type = GPU_INSTANCE_TYPE if gpu else CPU_INSTANCE_TYPE experiment_request = ExperimentRequest(name=experiment_name, description=version, module_id=module_id, data_id=data, predecessor=experiment_config.experiment_predecessor, family_id=experiment_config.family_id, version=version, instance_type=instance_type) experiment_id = ExperimentClient().create(experiment_request) floyd_logger.debug("Created experiment : {}".format(experiment_id)) # Update expt config including predecessor experiment_config.increment_version() experiment_config.set_module_predecessor(module_id) experiment_config.set_experiment_predecessor(experiment_id) ExperimentConfigManager.set_config(experiment_config) table_output = [["RUN ID", "NAME", "VERSION"], [experiment_id, experiment_name, version]] floyd_logger.info(tabulate(table_output, headers="firstrow")) floyd_logger.info("") if mode != 'default': while True: # Wait for the experiment to become available try: experiment = ExperimentClient().get(experiment_id) break except Exception: floyd_logger.debug("Experiment not available yet: {}".format(experiment_id)) sleep(1) continue # Print the path to jupyter notebook if mode == 'jupyter': jupyter_url = get_task_url(get_module_task_instance_id(experiment.task_instances)) floyd_logger.info("Waiting for Jupyter notebook to become available ...") if wait_for_url(jupyter_url): floyd_logger.info("\nPath to jupyter notebook: {}".format(jupyter_url)) else: floyd_logger.info("Problem starting the notebook. View logs for more information") # Print the path to serving endpoint if mode == 'serve': floyd_logger.info("Path to service endpoint: {}".format( get_task_url(get_module_task_instance_id(experiment.task_instances)))) floyd_logger.info(""" To view logs enter: floyd logs {} """.format(experiment_id))
def run(ctx, gpu, env, message, data, mode, open, tensorboard, gpup, cpup, command): """ Run a command on Floyd. Floyd will upload contents of the current directory and run your command remotely. This command will generate a run id for reference. """ experiment_config = ExperimentConfigManager.get_config() if not ProjectClient().exists(experiment_config.family_id): floyd_logger.error( 'Invalid project id, please run ' '"floyd init PROJECT_NAME" before scheduling a job.') sys.exit(1) access_token = AuthConfigManager.get_access_token() experiment_name = "{}/{}".format(access_token.username, experiment_config.name) success, data_ids = process_data_ids(data) if not success: sys.exit(2) # Create module default_name = 'input' if len(data_ids) <= 1 else None module_inputs = [{ 'name': get_data_name(data_str, default_name), 'type': 'dir' } for data_str in data_ids] if gpup: instance_type = G1P_INSTANCE_TYPE elif cpup: instance_type = C1P_INSTANCE_TYPE elif gpu: instance_type = G1_INSTANCE_TYPE else: instance_type = C1_INSTANCE_TYPE if not validate_env(env, instance_type): sys.exit(3) command_str = ' '.join(command) if command_str and mode in ('jupyter', 'serve'): floyd_logger.error( 'Command argument "%s" cannot be used with mode: %s.\nSee http://docs.floydhub.com/guides/run_a_job/#mode for more information about run modes.', command_str, mode) sys.exit(3) module = Module(name=experiment_name, description=message or '', command=command_str, mode=get_mode_parameter(mode), enable_tensorboard=tensorboard, family_id=experiment_config.family_id, inputs=module_inputs, env=env, arch=INSTANCE_ARCH_MAP[instance_type]) try: module_id = ModuleClient().create(module) except BadRequestException as e: if 'Project not found, ID' in e.message: floyd_logger.error( 'ERROR: Please run "floyd init PROJECT_NAME" before scheduling a job.' ) else: floyd_logger.error('ERROR: %s', e.message) sys.exit(4) floyd_logger.debug("Created module with id : %s", module_id) # Create experiment request # Get the actual command entered in the command line full_command = get_command_line(instance_type, env, message, data, mode, open, tensorboard, command_str) experiment_request = ExperimentRequest( name=experiment_name, description=message, full_command=full_command, module_id=module_id, data_ids=data_ids, family_id=experiment_config.family_id, instance_type=instance_type) expt_client = ExperimentClient() expt_info = expt_client.create(experiment_request) floyd_logger.debug("Created job : %s", expt_info['id']) job_name = normalize_job_name(expt_info['name']) floyd_logger.info("") table_output = [["JOB NAME"], [job_name]] floyd_logger.info(tabulate(table_output, headers="firstrow")) floyd_logger.info("") show_new_job_info(expt_client, job_name, expt_info, mode)