def init(project): """ Initialize new project at the current dir. After init run your command. Example: floyd run 'python tensorflow.py > /output/model.1' """ project_obj = ProjectClient().get_by_name(project) if not project_obj: create_project_base_url = "{}/projects/create".format( floyd.floyd_web_host) create_project_url = "{}?name={}".format(create_project_base_url, project) floyd_logger.error( ("Project name does not match your list of projects. " "Create your new project in the web dashboard:\n\t%s"), create_project_base_url) webbrowser.open(create_project_url) return experiment_config = ExperimentConfig(name=project, family_id=project_obj.id) ExperimentConfigManager.set_config(experiment_config) FloydIgnoreManager.init() floyd_logger.info( "Project \"{}\" initialized in current directory".format(project))
def initialize_new_upload(data_config, access_token, description=None, source_dir='.'): # TODO: hit upload server to check for liveness before moving on data_config.set_tarball_path(None) data_config.set_data_endpoint(None) data_config.set_resource_id(None) namespace = data_config.namespace or access_token.username data_name = "{}/{}".format(namespace, data_config.name) # Create tarball of the data using the ID returned from the API # TODO: allow to the users to change directory for the compression temp_dir = tempfile.mkdtemp() tarball_path = os.path.join(temp_dir, "floydhub_data.tar.gz") floyd_logger.debug("Creating tarfile with contents of current directory: %s", tarball_path) floyd_logger.info("Compressing data...") # TODO: purge tarball on Ctrl-C create_tarfile(source_dir=source_dir, filename=tarball_path) # If starting a new upload fails for some reason down the line, we don't # want to re-tar, so save off the tarball path now data_config.set_tarball_path(tarball_path) DataConfigManager.set_config(data_config) # Create data object using API data = DataRequest(name=data_name, description=description, family_id=data_config.family_id, data_type='gzip') data_info = DataClient().create(data) if not data_info: rmtree(temp_dir) sys.exit(1) data_config.set_data_id(data_info['id']) data_config.set_data_name(data_info['name']) DataConfigManager.set_config(data_config) # fetch auth token for upload server creds = DataClient().new_tus_credentials(data_info['id']) if not creds: # TODO: delete module from server? rmtree(temp_dir) sys.exit(1) data_resource_id = creds[0] data_endpoint = TusDataClient().initialize_upload( tarball_path, metadata={"filename": data_resource_id}, auth=creds) if not data_endpoint: # TODO: delete module from server? floyd_logger.error("Failed to get upload URL from Floydhub!") rmtree(temp_dir) sys.exit(1) data_config.set_data_endpoint(data_endpoint) DataConfigManager.set_config(data_config)
def wait_for_apikey(): floyd_logger.info('Waiting for login from browser...') key_queue = Queue() with get_free_port() as port: if not port: floyd_logger.error("Failed to allocate TCP port for automatic login.") return server = LoginServer(('', port), LoginHttpRequestHandler, key_queue) t = threading.Thread( target=server.serve_forever) t.daemon = True t.start() cli_host = 'http://127.0.0.1' url = '%s/cli_login?callback=%s:%s' % (floyd.floyd_web_host, cli_host, port) subprocess.check_output( [sys.executable, '-m', 'webbrowser', url], stderr=subprocess.STDOUT) wait_timeout_sec = 0.5 wait_cnt = 0 while True: if wait_cnt > 60: floyd_logger.error("Failed to get login info from browser, please login manually by creating login key at %s/settings/apikey.", floyd.floyd_web_host) server.shutdown() sys.exit(1) try: apikey = key_queue.get(timeout=wait_timeout_sec) break except QueueEmpty: wait_cnt += 1 server.shutdown() return apikey
def init(dataset_name): """ Initialize a new dataset at the current dir. After init ensure that your data files are in this directory. Then you can upload them to Floyd. Example: floyd data upload """ dataset_obj = DatasetClient().get_by_name(dataset_name) if not dataset_obj: create_dataset_base_url = "{}/datasets/create".format( floyd.floyd_web_host) create_dataset_url = "{}?name={}".format(create_dataset_base_url, dataset_name) floyd_logger.error( ("Dataset name does not match your list of datasets. " "Create your new dataset in the web dashboard:\n\t%s"), create_dataset_base_url) webbrowser.open(create_dataset_url) return data_config = DataConfig(name=dataset_name, family_id=dataset_obj.id) DataConfigManager.set_config(data_config) floyd_logger.info( "Data source \"{}\" initialized in current directory".format( dataset_name)) floyd_logger.info(""" You can now upload your data to Floyd by: floyd data upload """)
def output(id, url, download): """ Shows the output url of the run. By default opens the output page in your default browser. """ experiment = ExperimentClient().get(id) task_instance = TaskInstanceClient().get( get_module_task_instance_id(experiment.task_instances)) if "output" in task_instance.output_ids: resource = ResourceClient().get(task_instance.output_ids["output"]) output_dir_url = "{}/viewer/{}".format(floyd.floyd_host, resource.uri) if url: floyd_logger.info(output_dir_url) else: if download: output_dir_url = "{}&download=true".format(output_dir_url) ExperimentClient().download_tar(url=output_dir_url, untar=True, delete_after_untar=True) else: floyd_logger.info( "Opening output directory in your browser ...") webbrowser.open(output_dir_url) else: floyd_logger.error("Output directory not available")
def delete(ids, yes): """ Delete data sets. """ failures = False for id in ids: data_source = DataClient().get(id) if not data_source: failures = True continue data_name = normalize_data_name(data_source.name) suffix = data_name.split('/')[-1] if not suffix.isdigit(): failures = True floyd_logger.error('%s is not a dataset, skipped.', id) if suffix == 'output': floyd_logger.error( 'To delete job output, please delete the job itself.') continue if not yes and not click.confirm("Delete Data: {}?".format(data_name), abort=False, default=False): floyd_logger.info("Data %s: Skipped", data_name) continue if not DataClient().delete(data_source.id): failures = True else: floyd_logger.info("Data %s: Deleted", data_name) if failures: sys.exit(1)
def delete(id, yes): """ Delete project run """ experiment = ExperimentClient().get(id) task_instance = TaskInstanceClient().get( get_module_task_instance_id(experiment.task_instances)) if experiment.state in ["queued", "running"]: floyd_logger.info( "Experiment in {} state cannot be deleted. Stop it first".format( experiment.state)) return if not yes: click.confirm('Delete Run: {}?'.format(experiment.name), abort=True, default=False) if task_instance.module_id: ModuleClient().delete(task_instance.module_id) if ExperimentClient().delete(id): floyd_logger.info("Experiment deleted") else: floyd_logger.error("Failed to delete experiment")
def process_data_ids(data): if len(data) > 5: floyd_logger.error("Cannot attach more than 5 datasets to a job") return False, None # Get the data entity from the server to: # 1. Confirm that the data id or uri exists and has the right permissions # 2. If uri is used, get the id of the dataset data_ids = [] for data_name_or_id in data: path = None if ':' in data_name_or_id: data_name_or_id, path = data_name_or_id.split(':') data_name_or_id = normalize_data_name(data_name_or_id, use_data_config=False) data_obj = DataClient().get( normalize_data_name(data_name_or_id, use_data_config=False)) if not data_obj: # Try with the raw ID data_obj = DataClient().get(data_name_or_id) if not data_obj: floyd_logger.error( "Data not found for name or id: {}".format(data_name_or_id)) return False, None if path: data_ids.append("%s:%s" % (data_obj.id, path)) else: data_ids.append(data_obj.id) return True, data_ids
def show_new_job_info(expt_client, job_name, expt_info, mode): if mode in ['jupyter', 'serve']: while True: # Wait for the experiment / task instances to become available try: experiment = expt_client.get(expt_info['id']) if experiment.task_instances: break except Exception: floyd_logger.debug("Job not available yet: %s", expt_info['id']) floyd_logger.debug("Job not available yet: %s", expt_info['id']) sleep(3) continue # Print the path to jupyter notebook if mode == 'jupyter': jupyter_url = experiment.service_url if not jupyter_url: floyd_logger.error( "Jupyter URL not available, please check job state and log for error." ) sys.exit(1) print( "Setting up your instance and waiting for Jupyter notebook to become available ...", end='') if wait_for_url(jupyter_url, sleep_duration_seconds=2, iterations=900): sleep(3) # HACK: sleep extra 3 seconds for traffic route sync floyd_logger.info("\nPath to jupyter notebook: %s", jupyter_url) if open: webbrowser.open(jupyter_url) else: floyd_logger.info("\nPath to jupyter notebook: %s", jupyter_url) floyd_logger.info( "Notebook is still loading. View logs to track progress") floyd_logger.info(" floyd logs %s", job_name) # Print the path to serving endpoint if mode == 'serve': floyd_logger.info("Path to service endpoint: %s", experiment.service_url) if experiment.timeout_seconds < 24 * 60 * 60: floyd_logger.info( "\nYour job timeout is currently set to %s seconds", experiment.timeout_seconds) floyd_logger.info( "This is because you are in the free plan. Paid users will have longer timeouts. " "See https://www.floydhub.com/pricing for details") else: floyd_logger.info("To view logs enter:") floyd_logger.info(" floyd logs %s", job_name)
def show_new_job_info(expt_client, job_name, expt_info, mode, open_notebook=True): table_output = [["JOB NAME"], [job_name]] floyd_logger.info('\n' + tabulate(table_output, headers="firstrow") + '\n') job_url = '%s/%s' % (floyd.floyd_web_host, job_name) floyd_logger.info("URL to job: %s", job_url) if mode == 'jupyter': floyd_logger.info( "\n[!] DEPRECATION NOTICE\n" "Jupyter mode will no longer be supported after September 15th.\n" "Please migrate your projects to use Workspaces: " "https://docs.floydhub.com/guides/workspace/.") if mode in ['jupyter', 'serve']: while True: # Wait for the experiment / task instances to become available try: experiment = expt_client.get(expt_info['id']) if experiment.task_instances: break except Exception: pass floyd_logger.debug("Job not available yet: %s", expt_info['id']) sleep(3) continue # Print the url to jupyter notebook if mode == 'jupyter': if not experiment.service_url: floyd_logger.error( "Jupyter not available, please check job state and log for error." ) sys.exit(1) if open_notebook: webbrowser.open(job_url) # Print the url to serving endpoint if mode == 'serve': floyd_logger.info("URL to service endpoint: %s", experiment.service_url) if experiment.timeout_seconds < 24 * 60 * 60: floyd_logger.info( "\nYour job timeout is currently set to %s seconds", experiment.timeout_seconds) floyd_logger.info( "This is because you are in the free plan. Paid users will have longer timeouts. " "See https://www.floydhub.com/pricing for details") else: floyd_logger.info("\nTo view logs enter:") floyd_logger.info(" floyd logs %s", job_name)
def delete(self, data_id): try: # data delete is a synchronous process, it can take a long time self.request("DELETE", self.url + data_id, timeout=60) return True except FloydException as e: floyd_logger.error("Data %s: ERROR! %s", data_id, e.message) return False
def auto_upgrade(): try: if is_conda_env(): conda_upgrade() else: pip_upgrade() except Exception as e: floyd_logger.error(e)
def upgrade(): """ Upgrade floyd command line """ try: pip.main(["install", "--upgrade", PROJECT_NAME]) except Exception as e: floyd_logger.error(e)
def upgrade(): """ Upgrade floyd command line """ try: pip_upgrade() except Exception as e: floyd_logger.error(e)
def restart(ctx, job_name, data, open_notebook, env, message, gpu, cpu, gpup, cpup, command): """ Restart a given job as a new job. """ parameters = {} expt_client = ExperimentClient() try: job = expt_client.get(normalize_job_name(job_name)) except FloydException: job = expt_client.get(job_name) if gpup: instance_type = G1P_INSTANCE_TYPE elif cpup: instance_type = C1P_INSTANCE_TYPE elif gpu: instance_type = G1_INSTANCE_TYPE elif cpu: instance_type = C1_INSTANCE_TYPE else: instance_type = job.instance_type if instance_type is not None: parameters['instance_type'] = instance_type else: instance_type = job.instance_type if env is not None: if not validate_env(env, instance_type): sys.exit(1) parameters['env'] = env success, data_ids = process_data_ids(data) if not success: sys.exit(1) if message: parameters['message'] = message if command: parameters['command'] = ' '.join(command) floyd_logger.info('Restarting job %s...', job_name) new_job_info = expt_client.restart(job.id, parameters=parameters) if not new_job_info: floyd_logger.error("Failed to restart job") sys.exit(1) floyd_logger.info('New job created:') table_output = [["JOB NAME"], [new_job_info['name']]] floyd_logger.info('\n' + tabulate(table_output, headers="firstrow") + '\n') show_new_job_info(expt_client, new_job_info['name'], new_job_info, job.mode, open_notebook)
def auto_upgrade(): try: from floyd.cli.utils import is_conda_env if is_conda_env(): conda_upgrade() else: pip_upgrade() except Exception as e: floyd_logger.error(e)
def _loads_config(file_path): if not os.path.isfile(file_path): floyd_logger.error( 'floyd cli config not found, please use "floyd login" command initialize it.' ) sys.exit(5) with open(file_path, "r") as config_file: access_token_str = config_file.read() return json.loads(access_token_str)
def new_tus_credentials(self, data_id): try: response = self.request( "POST", "%s%s/upload_v2_credentials" % (self.url, data_id)) data_dict = response.json() return (data_dict["data_upload_id"], data_dict["token"]) except FloydException as e: floyd_logger.error( "Error while fetching data upload metadata for %s:\n\t%s", data_id, e.message) return ()
def upgrade(): """ Upgrade floyd command line """ try: if 'conda' in sys.version or 'ontinuum' in sys.version: conda_upgrade() else: pip_upgrade() except Exception as e: floyd_logger.error(e)
def get_access_token(cls): if not os.path.isfile(cls.CONFIG_FILE_PATH): floyd_logger.error( 'floyd cli config not found, please use "floyd login" command initialize it.' ) sys.exit(5) with open(cls.CONFIG_FILE_PATH, "r") as config_file: access_token_str = config_file.read() return AccessToken.from_dict(json.loads(access_token_str))
def delete(id, yes): """ Delete data set. """ data_source = DataClient().get(id) if not yes: click.confirm('Delete Data: {}?'.format(data_source.name), abort=True, default=False) if DataClient().delete(id): floyd_logger.info("Data deleted") else: floyd_logger.error("Failed to delete data")
def get(self, id): try: response = self.request("GET", self.url + id) data_dict = response.json() if data_dict['module_type'] != 'DataModule': floyd_logger.error( "Data %s: ERROR! Resource given is not a data.", id) return None return Data.from_dict(data_dict) except FloydException as e: floyd_logger.info("Data %s: ERROR! %s\n", id, e.message) return None
def stop(id): """ Stop a run before it can finish. """ experiment = ExperimentClient().get(id) if experiment.state not in ["queued", "running"]: floyd_logger.info("Job in {} state cannot be stopped".format(experiment.state)) return if ExperimentClient().stop(experiment.id): floyd_logger.info("Experiment shutdown request submitted. Check status to confirm shutdown") else: floyd_logger.error("Failed to stop job")
def get_all(self): try: access_token = AuthConfigManager.get_access_token() response = self.request("GET", self.url, params={ "module_type": "data", "username": access_token.username }) data_dict = response.json() return [Data.from_dict(data) for data in data_dict] except FloydException as e: floyd_logger.error("Error while retrieving data: %s", e.message) return []
def show_new_job_info(expt_client, job_name, expt_info, mode, open_notebook=True): if mode in ['jupyter', 'serve']: while True: # Wait for the experiment / task instances to become available try: experiment = expt_client.get(expt_info['id']) if experiment.task_instances: break except Exception: floyd_logger.debug("Job not available yet: %s", expt_info['id']) floyd_logger.debug("Job not available yet: %s", expt_info['id']) sleep(3) continue # Print the path to jupyter notebook if mode == 'jupyter': if not experiment.service_url: floyd_logger.error( "Jupyter not available, please check job state and log for error." ) sys.exit(1) jupyter_url = '%s/%s' % (floyd.floyd_web_host, job_name) floyd_logger.info("\nPath to jupyter notebook: %s", jupyter_url) if open_notebook: webbrowser.open(jupyter_url) # Print the path to serving endpoint if mode == 'serve': floyd_logger.info("Path to service endpoint: %s", experiment.service_url) if experiment.timeout_seconds < 24 * 60 * 60: floyd_logger.info( "\nYour job timeout is currently set to %s seconds", experiment.timeout_seconds) floyd_logger.info( "This is because you are in the free plan. Paid users will have longer timeouts. " "See https://www.floydhub.com/pricing for details") else: floyd_logger.info("To view logs enter:") floyd_logger.info(" floyd logs %s", job_name)
def validate_env(env, instance_type): arch = INSTANCE_ARCH_MAP[instance_type] env_map = EnvClient().get_all() envs = env_map.get(arch) if envs: if env not in envs: floyd_logger.error( "{} is not in the list of supported environments:\n{}".format( env, tabulate([[env_name] for env_name in envs.keys()]))) return False else: floyd_logger.error("invalid instance type") return False return True
def get(self, id): try: response = self.request("GET", self.url + id) data_dict = response.json() if data_dict['module_type'] != 'DataModule': floyd_logger.error( "Data %s: ERROR! Resource given is not a data.", id) return None return Data.from_dict(data_dict) except FloydException as e: floyd_logger.info( "Data %s: ERROR! %s\nIf you have already created the dataset, make sure you have uploaded at least one version.", id, e.message) return None
def get_all(self): try: data_config = DataConfigManager.get_config() response = self.request("GET", self.url, params={ "module_type": "data", "family_id": data_config.family_id }) data_dict = response.json() return [Data.from_dict(data) for data in data_dict] except FloydException as e: floyd_logger.error("Error while retrieving data: %s", e.message) return []
def validate_env(env, arch): env_map = EnvClient().get_all() envs = env_map.get(arch) if envs: if env not in envs: envlist = tabulate([[env_name] for env_name in sorted(envs.keys())]) floyd_logger.error( "%s is not in the list of supported environments:\n%s", env, envlist) return False else: floyd_logger.error("invalid instance type") return False return True
def resume_upload(self, file_path, file_endpoint, chunk_size=None, headers=None, auth=None, offset=None): chunk_size = chunk_size or self.chunk_size try: offset = self._get_offset(file_endpoint, headers=headers, auth=auth) except FloydException as e: floyd_logger.error( "Failed to fetch offset data from upload server! %s", e.message) return False except requests.exceptions.ConnectionError as e: floyd_logger.error( "Cannot connect to the Floyd data upload server for offset. " "Check your internet connection.") return False total_sent = 0 file_size = os.path.getsize(file_path) with open(file_path, 'rb') as f: pb = ProgressBar(filled_char="=", expected_size=file_size) while offset < file_size: pb.show(offset) f.seek(offset) data = f.read(chunk_size) try: offset = self._upload_chunk(data, offset, file_endpoint, headers=headers, auth=auth) total_sent += len(data) floyd_logger.debug("%s bytes sent", total_sent) except FloydException as e: floyd_logger.error( "Failed to fetch offset data from upload server! %s", e.message) return False except requests.exceptions.ConnectionError as e: floyd_logger.error( "Cannot connect to the Floyd data upload server. " "Check your internet connection.") return False # Complete the progress bar with one more call to show() pb.show(offset) pb.done() return True