def delete(ids, yes): """ Delete project runs """ failures = False for id in ids: experiment = ExperimentClient().get(id) if not experiment: failures = True continue if not yes and not click.confirm("Delete Job: {}?".format( experiment.name), abort=False, default=False): floyd_logger.info("Job {}: Skipped.".format(experiment.name)) continue if not ExperimentClient().delete(experiment.id): failures = True else: floyd_logger.info("Job %s Deleted", experiment.name) if failures: sys.exit(1)
def initialize_new_upload(data_config, access_token, description=None, source_dir='.'): # TODO: hit upload server to check for liveness before moving on data_config.set_tarball_path(None) data_config.set_data_endpoint(None) data_config.set_resource_id(None) namespace = data_config.namespace or access_token.username data_name = "{}/{}".format(namespace, data_config.name) # Create tarball of the data using the ID returned from the API # TODO: allow to the users to change directory for the compression temp_dir = tempfile.mkdtemp() tarball_path = os.path.join(temp_dir, "floydhub_data.tar.gz") floyd_logger.debug("Creating tarfile with contents of current directory: %s", tarball_path) floyd_logger.info("Compressing data...") # TODO: purge tarball on Ctrl-C create_tarfile(source_dir=source_dir, filename=tarball_path) # If starting a new upload fails for some reason down the line, we don't # want to re-tar, so save off the tarball path now data_config.set_tarball_path(tarball_path) DataConfigManager.set_config(data_config) # Create data object using API data = DataRequest(name=data_name, description=description, family_id=data_config.family_id, data_type='gzip') data_info = DataClient().create(data) if not data_info: rmtree(temp_dir) sys.exit(1) data_config.set_data_id(data_info['id']) data_config.set_data_name(data_info['name']) DataConfigManager.set_config(data_config) # fetch auth token for upload server creds = DataClient().new_tus_credentials(data_info['id']) if not creds: # TODO: delete module from server? rmtree(temp_dir) sys.exit(1) data_resource_id = creds[0] data_endpoint = TusDataClient().initialize_upload( tarball_path, metadata={"filename": data_resource_id}, auth=creds) if not data_endpoint: # TODO: delete module from server? floyd_logger.error("Failed to get upload URL from Floydhub!") rmtree(temp_dir) sys.exit(1) data_config.set_data_endpoint(data_endpoint) DataConfigManager.set_config(data_config)
def logs(id, url, tail, sleep_duration=1): """ Print the logs of the run. """ experiment = ExperimentClient().get(id) task_instance = TaskInstanceClient().get( get_module_task_instance_id(experiment.task_instances)) log_url = "{}/api/v1/resources/{}?content=true".format( floyd.floyd_host, task_instance.log_id) if url: floyd_logger.info(log_url) return if tail: floyd_logger.info("Launching job ...") current_shell_output = "" while True: # Get the logs in a loop and log the new lines log_file_contents = get_url_contents(log_url) print_output = log_file_contents[len(current_shell_output):] if len(print_output.strip()): floyd_logger.info(print_output) current_shell_output = log_file_contents sleep(sleep_duration) else: log_file_contents = get_url_contents(log_url) if len(log_file_contents.strip()): floyd_logger.info(log_file_contents) else: floyd_logger.info("Launching job now. Try after a few seconds.")
def info(job_name_or_id): """ Prints detailed info for the run """ try: experiment = ExperimentClient().get(normalize_job_name(job_name_or_id)) except FloydException: experiment = ExperimentClient().get(job_name_or_id) task_instance_id = get_module_task_instance_id(experiment.task_instances) task_instance = TaskInstanceClient().get( task_instance_id) if task_instance_id else None normalized_job_name = normalize_job_name(experiment.name) table = [["Job name", normalized_job_name], [ "Output name", normalized_job_name + '/output' if task_instance else None ], ["Created", experiment.created_pretty], ["Status", experiment.state], ["Duration(s)", experiment.duration_rounded], ["Instance", experiment.instance_type_trimmed], ["Description", experiment.description]] if task_instance and task_instance.mode in ['jupyter', 'serving']: table.append(["Mode", task_instance.mode]) table.append(["Url", experiment.service_url]) if experiment.tensorboard_url: table.append(["Tensorboard", experiment.tensorboard_url]) floyd_logger.info(tabulate(table))
def init(dataset_name): """ Initialize a new dataset at the current dir. After init ensure that your data files are in this directory. Then you can upload them to Floyd. Example: floyd data upload """ dataset_obj = DatasetClient().get_by_name(dataset_name) if not dataset_obj: create_dataset_base_url = "{}/datasets/create".format( floyd.floyd_web_host) create_dataset_url = "{}?name={}".format(create_dataset_base_url, dataset_name) floyd_logger.error( ("Dataset name does not match your list of datasets. " "Create your new dataset in the web dashboard:\n\t%s"), create_dataset_base_url) webbrowser.open(create_dataset_url) return data_config = DataConfig(name=dataset_name, family_id=dataset_obj.id) DataConfigManager.set_config(data_config) floyd_logger.info( "Data source \"{}\" initialized in current directory".format( dataset_name)) floyd_logger.info(""" You can now upload your data to Floyd by: floyd data upload """)
def delete(ids, yes): """ Delete data sets. """ failures = False for id in ids: data_source = DataClient().get(id) if not data_source: failures = True continue data_name = normalize_data_name(data_source.name) suffix = data_name.split('/')[-1] if not suffix.isdigit(): failures = True floyd_logger.error('%s is not a dataset, skipped.', id) if suffix == 'output': floyd_logger.error( 'To delete job output, please delete the job itself.') continue if not yes and not click.confirm("Delete Data: {}?".format(data_name), abort=False, default=False): floyd_logger.info("Data %s: Skipped", data_name) continue if not DataClient().delete(data_source.id): failures = True else: floyd_logger.info("Data %s: Deleted", data_name) if failures: sys.exit(1)
def __get_nfiles_to_compress(self): """ Return the number of files to compress Note: it should take about 0.1s for counting 100k files on a dual core machine """ floyd_logger.info( "Get number of files to compress... (this could take a few seconds)" ) paths = [self.source_dir] try: # Traverse each subdirs of source_dir and count files/dirs while paths: path = paths.pop() for item in scandir(path): if item.is_dir(): paths.append(item.path) self.__files_to_compress += 1 elif item.is_file(): self.__files_to_compress += 1 except OSError as e: # OSError: [Errno 13] Permission denied if e.errno == errno.EACCES: self.source_dir = os.getcwd( ) if self.source_dir == '.' else self.source_dir # Expand cwd sys.exit( ("Permission denied. Make sure to have read permission " "for all the files and directories in the path: %s") % (self.source_dir)) floyd_logger.info("Compressing %d files", self.__files_to_compress)
def delete(self, id): try: self.request("DELETE", "{}{}".format(self.url, id)) return True except FloydException as e: floyd_logger.info("Module {}: ERROR! {}".format(id, e.message)) return False
def init(project): """ Initialize new project at the current dir. After init run your command. Example: floyd run 'python tensorflow.py > /output/model.1' """ project_obj = ProjectClient().get_by_name(project) if not project_obj: create_project_base_url = "{}/projects/create".format( floyd.floyd_web_host) create_project_url = "{}?name={}".format(create_project_base_url, project) floyd_logger.error( ("Project name does not match your list of projects. " "Create your new project in the web dashboard:\n\t%s"), create_project_base_url) webbrowser.open(create_project_url) return experiment_config = ExperimentConfig(name=project, family_id=project_obj.id) ExperimentConfigManager.set_config(experiment_config) FloydIgnoreManager.init() floyd_logger.info( "Project \"{}\" initialized in current directory".format(project))
def delete(id, yes): """ Delete project run """ experiment = ExperimentClient().get(id) task_instance = TaskInstanceClient().get( get_module_task_instance_id(experiment.task_instances)) if experiment.state in ["queued", "running"]: floyd_logger.info( "Experiment in {} state cannot be deleted. Stop it first".format( experiment.state)) return if not yes: click.confirm('Delete Run: {}?'.format(experiment.name), abort=True, default=False) if task_instance.module_id: ModuleClient().delete(task_instance.module_id) if ExperimentClient().delete(id): floyd_logger.info("Experiment deleted") else: floyd_logger.error("Failed to delete experiment")
def info(id): """ Prints detailed info for the run """ experiment = ExperimentClient().get(id) task_instance_id = get_module_task_instance_id(experiment.task_instances) task_instance = TaskInstanceClient().get( task_instance_id) if task_instance_id else None mode = url = None if experiment.state == "running": if task_instance and task_instance.mode in ['jupyter', 'serving']: mode = task_instance.mode url = get_task_url(task_instance.id) table = [["Run ID", experiment.id], ["Name", experiment.name], ["Created", experiment.created_pretty], ["Status", experiment.state], ["Duration(s)", experiment.duration_rounded], ["Output ID", task_instance.id if task_instance else None], ["Instance", experiment.instance_type_trimmed], ["Version", experiment.description]] if mode: table.append(["Mode", mode]) if url: table.append(["Url", url]) floyd_logger.info(tabulate(table))
def version(): """ Prints the current version of the CLI """ import pkg_resources version = pkg_resources.require(PROJECT_NAME)[0].version floyd_logger.info(version)
def print_data(data_sources): headers = ["DATA ID", "CREATED", "DISK USAGE", "NAME", "VERSION"] data_list = [] for data_source in data_sources: data_list.append([data_source.id, data_source.created_pretty, data_source.size, data_source.name, data_source.version]) floyd_logger.info(tabulate(data_list, headers=headers))
def upload(): """ Upload data in the current dir to Floyd. """ data_config = DataConfigManager.get_config() access_token = AuthConfigManager.get_access_token() version = data_config.version # Create data object data_name = "{}/{}:{}".format(access_token.username, data_config.name, version) data = DataRequest(name=data_name, description=version, version=version) data_id = DataClient().create(data) floyd_logger.debug("Created data with id : {}".format(data_id)) floyd_logger.info("Upload finished") # Update expt config including predecessor data_config.increment_version() data_config.set_data_predecessor(data_id) DataConfigManager.set_config(data_config) # Print output table_output = [["DATA ID", "NAME", "VERSION"], [data_id, data_name, version]] floyd_logger.info(tabulate(table_output, headers="firstrow"))
def wait_for_apikey(): floyd_logger.info('Waiting for login from browser...') key_queue = Queue() with get_free_port() as port: if not port: floyd_logger.error("Failed to allocate TCP port for automatic login.") return server = LoginServer(('', port), LoginHttpRequestHandler, key_queue) t = threading.Thread( target=server.serve_forever) t.daemon = True t.start() cli_host = 'http://127.0.0.1' url = '%s/cli_login?callback=%s:%s' % (floyd.floyd_web_host, cli_host, port) subprocess.check_output( [sys.executable, '-m', 'webbrowser', url], stderr=subprocess.STDOUT) wait_timeout_sec = 0.5 wait_cnt = 0 while True: if wait_cnt > 60: floyd_logger.error("Failed to get login info from browser, please login manually by creating login key at %s/settings/apikey.", floyd.floyd_web_host) server.shutdown() sys.exit(1) try: apikey = key_queue.get(timeout=wait_timeout_sec) break except QueueEmpty: wait_cnt += 1 server.shutdown() return apikey
def output(id, url, download): """ Shows the output url of the run. By default opens the output page in your default browser. """ experiment = ExperimentClient().get(id) task_instance = TaskInstanceClient().get( get_module_task_instance_id(experiment.task_instances)) if "output" in task_instance.output_ids: resource = ResourceClient().get(task_instance.output_ids["output"]) output_dir_url = "{}/viewer/{}".format(floyd.floyd_host, resource.uri) if url: floyd_logger.info(output_dir_url) else: if download: output_dir_url = "{}&download=true".format(output_dir_url) ExperimentClient().download_tar(url=output_dir_url, untar=True, delete_after_untar=True) else: floyd_logger.info( "Opening output directory in your browser ...") webbrowser.open(output_dir_url) else: floyd_logger.error("Output directory not available")
def delete(names, yes): """ Delete project runs """ failures = False for name in names: try: experiment = ExperimentClient().get(normalize_job_name(name)) except FloydException: experiment = ExperimentClient().get(name) if not experiment: failures = True continue if not yes and not click.confirm("Delete Job: {}?".format( experiment.name), abort=False, default=False): floyd_logger.info("Job {}: Skipped.".format(experiment.name)) continue if not ExperimentClient().delete(experiment.id): failures = True else: floyd_logger.info("Job %s Deleted", experiment.name) if failures: sys.exit(1)
def get(self, resource_id): try: response = self.request('GET', self.URL_PREFIX + resource_id) resource_dict = response.json() return Resource.from_dict(resource_dict) except FloydException as e: floyd_logger.info("Resource %s: ERROR! %s", resource_id, e.message) return None
def get_all(self): try: response = self.request("GET", self.url) return response.json() except FloydException as e: floyd_logger.info("Error while retrieving env: {}".format( e.message)) return {}
def warn_purge_exit(info_msg, filename, exit_msg): """ Warn the user that's something went wrong, remove the tarball and provide an exit message. """ floyd_logger.info(info_msg) rmtree(os.path.dirname(filename)) sys.exit(exit_msg)
def delete(self, data_id): try: # data delete is a synchronous process, it can take a long time self.request("DELETE", self.url + data_id, timeout=60) floyd_logger.info("Data %s: Deleted", data_id) return True except FloydException as e: floyd_logger.error("Data %s: ERROR! %s", data_id, e.message) return False
def get(self, id): try: response = self.request("GET", "{}{}".format(self.url, id)) task_instance_dict = response.json() ti = TaskInstance.from_dict(task_instance_dict) return ti except FloydException as e: floyd_logger.info("Task Instance {}: ERROR! {}".format( id, e.message)) return None
def print_experiments(experiments): """ Prints expt details in a table. Includes urls and mode parameters """ headers = ["RUN ID", "CREATED", "STATUS", "DURATION(s)", "NAME", "INSTANCE", "VERSION"] expt_list = [] for experiment in experiments: expt_list.append([experiment.id, experiment.created_pretty, experiment.state, experiment.duration_rounded, experiment.name, experiment.instance_type_trimmed, experiment.description]) floyd_logger.info(tabulate(expt_list, headers=headers))
def create(self, module): upload_files = get_files_in_directory(path='.', file_type='code') request_data = {"json": json.dumps(module.to_dict())} floyd_logger.debug("Creating module. Uploading {} files ...".format( len(upload_files))) floyd_logger.info("Syncing code ...") response = self.request("POST", self.url, data=request_data, files=upload_files, timeout=3600) return response.json().get("id")
def create(self, data): upload_files = get_files_in_directory(path='.', file_type='data') request_data = {"json": json.dumps(data.to_dict())} floyd_logger.info( "Creating data source. Uploading {} files ...".format( len(upload_files))) response = self.request("POST", self.url, data=request_data, files=upload_files, timeout=3600) return response.json().get("id")
def output(id, url): """ Shows the output url of the run. By default opens the output page in your default browser. """ experiment = ExperimentClient().get(id) output_dir_url = "%s/%s/output" % (floyd.floyd_web_host, experiment.name) if url: floyd_logger.info(output_dir_url) else: floyd_logger.info("Opening output path in your browser ...") webbrowser.open(output_dir_url)
def init(project): """ Initialize new project at the current dir. After init run your command. Example: floyd run python tensorflow.py > /output/model.1 """ experiment_config = ExperimentConfig(name=project, family_id=generate_uuid()) ExperimentConfigManager.set_config(experiment_config) FloydIgnoreManager.init() floyd_logger.info( "Project \"{}\" initialized in current directory".format(project))
def delete(id, yes): """ Delete data set. """ data_source = DataClient().get(id) if not yes: click.confirm('Delete Data: {}?'.format(data_source.name), abort=True, default=False) if DataClient().delete(id): floyd_logger.info("Data deleted") else: floyd_logger.error("Failed to delete data")
def stop(id): """ Stop a run before it can finish. """ experiment = ExperimentClient().get(id) if experiment.state not in ["queued", "running"]: floyd_logger.info("Job in {} state cannot be stopped".format(experiment.state)) return if ExperimentClient().stop(experiment.id): floyd_logger.info("Experiment shutdown request submitted. Check status to confirm shutdown") else: floyd_logger.error("Failed to stop job")
def output(id, url): """ Shows the output url of the run. By default opens the output page in your default browser. """ data_source = DataClient().get(id) data_url = "{}/api/v1/resources/{}?content=true".format( floyd.floyd_host, data_source.resource_id) if url: floyd_logger.info(data_url) else: floyd_logger.info("Opening output directory in your browser ...") webbrowser.open(data_url)