Beispiel #1
0
def init(project):
    """
    Initialize new project at the current dir.
    After init run your command. Example:

        floyd run 'python tensorflow.py > /output/model.1'
    """
    project_obj = ProjectClient().get_by_name(project)
    if not project_obj:
        create_project_base_url = "{}/projects/create".format(
            floyd.floyd_web_host)
        create_project_url = "{}?name={}".format(create_project_base_url,
                                                 project)
        floyd_logger.error(
            ("Project name does not match your list of projects. "
             "Create your new project in the web dashboard:\n\t%s"),
            create_project_base_url)
        webbrowser.open(create_project_url)
        return

    experiment_config = ExperimentConfig(name=project,
                                         family_id=project_obj.id)
    ExperimentConfigManager.set_config(experiment_config)
    FloydIgnoreManager.init()
    floyd_logger.info(
        "Project \"{}\" initialized in current directory".format(project))
Beispiel #2
0
def initialize_new_upload(data_config, access_token, description=None, source_dir='.'):
    # TODO: hit upload server to check for liveness before moving on
    data_config.set_tarball_path(None)
    data_config.set_data_endpoint(None)
    data_config.set_resource_id(None)

    namespace = data_config.namespace or access_token.username
    data_name = "{}/{}".format(namespace, data_config.name)

    # Create tarball of the data using the ID returned from the API
    # TODO: allow to the users to change directory for the compression
    temp_dir = tempfile.mkdtemp()
    tarball_path = os.path.join(temp_dir, "floydhub_data.tar.gz")

    floyd_logger.debug("Creating tarfile with contents of current directory: %s",
                       tarball_path)
    floyd_logger.info("Compressing data...")

    # TODO: purge tarball on Ctrl-C
    create_tarfile(source_dir=source_dir, filename=tarball_path)

    # If starting a new upload fails for some reason down the line, we don't
    # want to re-tar, so save off the tarball path now
    data_config.set_tarball_path(tarball_path)
    DataConfigManager.set_config(data_config)

    # Create data object using API
    data = DataRequest(name=data_name,
                       description=description,
                       family_id=data_config.family_id,
                       data_type='gzip')
    data_info = DataClient().create(data)
    if not data_info:
        rmtree(temp_dir)
        sys.exit(1)

    data_config.set_data_id(data_info['id'])
    data_config.set_data_name(data_info['name'])
    DataConfigManager.set_config(data_config)

    # fetch auth token for upload server
    creds = DataClient().new_tus_credentials(data_info['id'])
    if not creds:
        # TODO: delete module from server?
        rmtree(temp_dir)
        sys.exit(1)

    data_resource_id = creds[0]
    data_endpoint = TusDataClient().initialize_upload(
        tarball_path,
        metadata={"filename": data_resource_id},
        auth=creds)
    if not data_endpoint:
        # TODO: delete module from server?
        floyd_logger.error("Failed to get upload URL from Floydhub!")
        rmtree(temp_dir)
        sys.exit(1)

    data_config.set_data_endpoint(data_endpoint)
    DataConfigManager.set_config(data_config)
Beispiel #3
0
def wait_for_apikey():
    floyd_logger.info('Waiting for login from browser...')

    key_queue = Queue()
    with get_free_port() as port:
        if not port:
            floyd_logger.error("Failed to allocate TCP port for automatic login.")
            return
        server = LoginServer(('', port), LoginHttpRequestHandler, key_queue)

    t = threading.Thread(
        target=server.serve_forever)
    t.daemon = True
    t.start()

    cli_host = 'http://127.0.0.1'
    url = '%s/cli_login?callback=%s:%s' % (floyd.floyd_web_host, cli_host, port)
    subprocess.check_output(
        [sys.executable, '-m', 'webbrowser', url], stderr=subprocess.STDOUT)

    wait_timeout_sec = 0.5
    wait_cnt = 0
    while True:
        if wait_cnt > 60:
            floyd_logger.error("Failed to get login info from browser, please login manually by creating login key at %s/settings/apikey.", floyd.floyd_web_host)
            server.shutdown()
            sys.exit(1)
        try:
            apikey = key_queue.get(timeout=wait_timeout_sec)
            break
        except QueueEmpty:
            wait_cnt += 1

    server.shutdown()
    return apikey
Beispiel #4
0
def init(dataset_name):
    """
    Initialize a new dataset at the current dir.
    After init ensure that your data files are in this directory.
    Then you can upload them to Floyd. Example:

        floyd data upload
    """
    dataset_obj = DatasetClient().get_by_name(dataset_name)
    if not dataset_obj:
        create_dataset_base_url = "{}/datasets/create".format(
            floyd.floyd_web_host)
        create_dataset_url = "{}?name={}".format(create_dataset_base_url,
                                                 dataset_name)
        floyd_logger.error(
            ("Dataset name does not match your list of datasets. "
             "Create your new dataset in the web dashboard:\n\t%s"),
            create_dataset_base_url)
        webbrowser.open(create_dataset_url)
        return

    data_config = DataConfig(name=dataset_name, family_id=dataset_obj.id)
    DataConfigManager.set_config(data_config)
    floyd_logger.info(
        "Data source \"{}\" initialized in current directory".format(
            dataset_name))
    floyd_logger.info("""
    You can now upload your data to Floyd by:
        floyd data upload
    """)
Beispiel #5
0
def output(id, url, download):
    """
    Shows the output url of the run.
    By default opens the output page in your default browser.
    """
    experiment = ExperimentClient().get(id)
    task_instance = TaskInstanceClient().get(
        get_module_task_instance_id(experiment.task_instances))
    if "output" in task_instance.output_ids:
        resource = ResourceClient().get(task_instance.output_ids["output"])
        output_dir_url = "{}/viewer/{}".format(floyd.floyd_host, resource.uri)
        if url:
            floyd_logger.info(output_dir_url)
        else:
            if download:
                output_dir_url = "{}&download=true".format(output_dir_url)
                ExperimentClient().download_tar(url=output_dir_url,
                                                untar=True,
                                                delete_after_untar=True)
            else:
                floyd_logger.info(
                    "Opening output directory in your browser ...")
                webbrowser.open(output_dir_url)
    else:
        floyd_logger.error("Output directory not available")
Beispiel #6
0
def delete(ids, yes):
    """
    Delete data sets.
    """
    failures = False

    for id in ids:
        data_source = DataClient().get(id)
        if not data_source:
            failures = True
            continue

        data_name = normalize_data_name(data_source.name)
        suffix = data_name.split('/')[-1]
        if not suffix.isdigit():
            failures = True
            floyd_logger.error('%s is not a dataset, skipped.', id)
            if suffix == 'output':
                floyd_logger.error(
                    'To delete job output, please delete the job itself.')
            continue

        if not yes and not click.confirm("Delete Data: {}?".format(data_name),
                                         abort=False,
                                         default=False):
            floyd_logger.info("Data %s: Skipped", data_name)
            continue

        if not DataClient().delete(data_source.id):
            failures = True
        else:
            floyd_logger.info("Data %s: Deleted", data_name)

    if failures:
        sys.exit(1)
Beispiel #7
0
def delete(id, yes):
    """
    Delete project run
    """
    experiment = ExperimentClient().get(id)
    task_instance = TaskInstanceClient().get(
        get_module_task_instance_id(experiment.task_instances))

    if experiment.state in ["queued", "running"]:
        floyd_logger.info(
            "Experiment in {} state cannot be deleted. Stop it first".format(
                experiment.state))
        return

    if not yes:
        click.confirm('Delete Run: {}?'.format(experiment.name),
                      abort=True,
                      default=False)

    if task_instance.module_id:
        ModuleClient().delete(task_instance.module_id)

    if ExperimentClient().delete(id):
        floyd_logger.info("Experiment deleted")
    else:
        floyd_logger.error("Failed to delete experiment")
Beispiel #8
0
def process_data_ids(data):
    if len(data) > 5:
        floyd_logger.error("Cannot attach more than 5 datasets to a job")
        return False, None

    # Get the data entity from the server to:
    # 1. Confirm that the data id or uri exists and has the right permissions
    # 2. If uri is used, get the id of the dataset
    data_ids = []
    for data_name_or_id in data:
        path = None
        if ':' in data_name_or_id:
            data_name_or_id, path = data_name_or_id.split(':')
            data_name_or_id = normalize_data_name(data_name_or_id,
                                                  use_data_config=False)

        data_obj = DataClient().get(
            normalize_data_name(data_name_or_id, use_data_config=False))

        if not data_obj:
            # Try with the raw ID
            data_obj = DataClient().get(data_name_or_id)

        if not data_obj:
            floyd_logger.error(
                "Data not found for name or id: {}".format(data_name_or_id))
            return False, None
        if path:
            data_ids.append("%s:%s" % (data_obj.id, path))
        else:
            data_ids.append(data_obj.id)
    return True, data_ids
Beispiel #9
0
def show_new_job_info(expt_client, job_name, expt_info, mode):
    if mode in ['jupyter', 'serve']:
        while True:
            # Wait for the experiment / task instances to become available
            try:
                experiment = expt_client.get(expt_info['id'])
                if experiment.task_instances:
                    break
            except Exception:
                floyd_logger.debug("Job not available yet: %s",
                                   expt_info['id'])

            floyd_logger.debug("Job not available yet: %s", expt_info['id'])
            sleep(3)
            continue

        # Print the path to jupyter notebook
        if mode == 'jupyter':
            jupyter_url = experiment.service_url
            if not jupyter_url:
                floyd_logger.error(
                    "Jupyter URL not available, please check job state and log for error."
                )
                sys.exit(1)

            print(
                "Setting up your instance and waiting for Jupyter notebook to become available ...",
                end='')
            if wait_for_url(jupyter_url,
                            sleep_duration_seconds=2,
                            iterations=900):
                sleep(3)  # HACK: sleep extra 3 seconds for traffic route sync
                floyd_logger.info("\nPath to jupyter notebook: %s",
                                  jupyter_url)
                if open:
                    webbrowser.open(jupyter_url)
            else:
                floyd_logger.info("\nPath to jupyter notebook: %s",
                                  jupyter_url)
                floyd_logger.info(
                    "Notebook is still loading. View logs to track progress")
                floyd_logger.info("   floyd logs %s", job_name)

        # Print the path to serving endpoint
        if mode == 'serve':
            floyd_logger.info("Path to service endpoint: %s",
                              experiment.service_url)

        if experiment.timeout_seconds < 24 * 60 * 60:
            floyd_logger.info(
                "\nYour job timeout is currently set to %s seconds",
                experiment.timeout_seconds)
            floyd_logger.info(
                "This is because you are in the free plan. Paid users will have longer timeouts. "
                "See https://www.floydhub.com/pricing for details")

    else:
        floyd_logger.info("To view logs enter:")
        floyd_logger.info("   floyd logs %s", job_name)
Beispiel #10
0
def show_new_job_info(expt_client,
                      job_name,
                      expt_info,
                      mode,
                      open_notebook=True):
    table_output = [["JOB NAME"], [job_name]]
    floyd_logger.info('\n' + tabulate(table_output, headers="firstrow") + '\n')

    job_url = '%s/%s' % (floyd.floyd_web_host, job_name)
    floyd_logger.info("URL to job: %s", job_url)

    if mode == 'jupyter':
        floyd_logger.info(
            "\n[!] DEPRECATION NOTICE\n"
            "Jupyter mode will no longer be supported after September 15th.\n"
            "Please migrate your projects to use Workspaces: "
            "https://docs.floydhub.com/guides/workspace/.")

    if mode in ['jupyter', 'serve']:
        while True:
            # Wait for the experiment / task instances to become available
            try:
                experiment = expt_client.get(expt_info['id'])
                if experiment.task_instances:
                    break
            except Exception:
                pass

            floyd_logger.debug("Job not available yet: %s", expt_info['id'])
            sleep(3)
            continue

        # Print the url to jupyter notebook
        if mode == 'jupyter':
            if not experiment.service_url:
                floyd_logger.error(
                    "Jupyter not available, please check job state and log for error."
                )
                sys.exit(1)

            if open_notebook:
                webbrowser.open(job_url)

        # Print the url to serving endpoint
        if mode == 'serve':
            floyd_logger.info("URL to service endpoint: %s",
                              experiment.service_url)

        if experiment.timeout_seconds < 24 * 60 * 60:
            floyd_logger.info(
                "\nYour job timeout is currently set to %s seconds",
                experiment.timeout_seconds)
            floyd_logger.info(
                "This is because you are in the free plan. Paid users will have longer timeouts. "
                "See https://www.floydhub.com/pricing for details")

    else:
        floyd_logger.info("\nTo view logs enter:")
        floyd_logger.info("   floyd logs %s", job_name)
Beispiel #11
0
 def delete(self, data_id):
     try:
         # data delete is a synchronous process, it can take a long time
         self.request("DELETE", self.url + data_id, timeout=60)
         return True
     except FloydException as e:
         floyd_logger.error("Data %s: ERROR! %s", data_id, e.message)
         return False
Beispiel #12
0
def auto_upgrade():
    try:
        if is_conda_env():
            conda_upgrade()
        else:
            pip_upgrade()
    except Exception as e:
        floyd_logger.error(e)
Beispiel #13
0
def upgrade():
    """
    Upgrade floyd command line
    """
    try:
        pip.main(["install", "--upgrade", PROJECT_NAME])
    except Exception as e:
        floyd_logger.error(e)
Beispiel #14
0
def upgrade():
    """
    Upgrade floyd command line
    """
    try:
        pip_upgrade()
    except Exception as e:
        floyd_logger.error(e)
Beispiel #15
0
def restart(ctx, job_name, data, open_notebook, env, message, gpu, cpu, gpup,
            cpup, command):
    """
    Restart a given job as a new job.
    """
    parameters = {}

    expt_client = ExperimentClient()

    try:
        job = expt_client.get(normalize_job_name(job_name))
    except FloydException:
        job = expt_client.get(job_name)

    if gpup:
        instance_type = G1P_INSTANCE_TYPE
    elif cpup:
        instance_type = C1P_INSTANCE_TYPE
    elif gpu:
        instance_type = G1_INSTANCE_TYPE
    elif cpu:
        instance_type = C1_INSTANCE_TYPE
    else:
        instance_type = job.instance_type

    if instance_type is not None:
        parameters['instance_type'] = instance_type
    else:
        instance_type = job.instance_type

    if env is not None:
        if not validate_env(env, instance_type):
            sys.exit(1)
        parameters['env'] = env

    success, data_ids = process_data_ids(data)
    if not success:
        sys.exit(1)

    if message:
        parameters['message'] = message

    if command:
        parameters['command'] = ' '.join(command)

    floyd_logger.info('Restarting job %s...', job_name)

    new_job_info = expt_client.restart(job.id, parameters=parameters)
    if not new_job_info:
        floyd_logger.error("Failed to restart job")
        sys.exit(1)

    floyd_logger.info('New job created:')
    table_output = [["JOB NAME"], [new_job_info['name']]]
    floyd_logger.info('\n' + tabulate(table_output, headers="firstrow") + '\n')

    show_new_job_info(expt_client, new_job_info['name'], new_job_info,
                      job.mode, open_notebook)
Beispiel #16
0
def auto_upgrade():
    try:
        from floyd.cli.utils import is_conda_env
        if is_conda_env():
            conda_upgrade()
        else:
            pip_upgrade()
    except Exception as e:
        floyd_logger.error(e)
Beispiel #17
0
def _loads_config(file_path):
    if not os.path.isfile(file_path):
        floyd_logger.error(
            'floyd cli config not found, please use "floyd login" command initialize it.'
        )
        sys.exit(5)

    with open(file_path, "r") as config_file:
        access_token_str = config_file.read()

    return json.loads(access_token_str)
Beispiel #18
0
 def new_tus_credentials(self, data_id):
     try:
         response = self.request(
             "POST", "%s%s/upload_v2_credentials" % (self.url, data_id))
         data_dict = response.json()
         return (data_dict["data_upload_id"], data_dict["token"])
     except FloydException as e:
         floyd_logger.error(
             "Error while fetching data upload metadata for %s:\n\t%s",
             data_id, e.message)
         return ()
Beispiel #19
0
def upgrade():
    """
    Upgrade floyd command line
    """
    try:
        if 'conda' in sys.version or 'ontinuum' in sys.version:
            conda_upgrade()
        else:
            pip_upgrade()
    except Exception as e:
        floyd_logger.error(e)
Beispiel #20
0
    def get_access_token(cls):
        if not os.path.isfile(cls.CONFIG_FILE_PATH):
            floyd_logger.error(
                'floyd cli config not found, please use "floyd login" command initialize it.'
            )
            sys.exit(5)

        with open(cls.CONFIG_FILE_PATH, "r") as config_file:
            access_token_str = config_file.read()

        return AccessToken.from_dict(json.loads(access_token_str))
Beispiel #21
0
def delete(id, yes):
    """
    Delete data set.
    """
    data_source = DataClient().get(id)

    if not yes:
        click.confirm('Delete Data: {}?'.format(data_source.name), abort=True, default=False)

    if DataClient().delete(id):
        floyd_logger.info("Data deleted")
    else:
        floyd_logger.error("Failed to delete data")
Beispiel #22
0
    def get(self, id):
        try:
            response = self.request("GET", self.url + id)
            data_dict = response.json()
            if data_dict['module_type'] != 'DataModule':
                floyd_logger.error(
                    "Data %s: ERROR! Resource given is not a data.", id)
                return None

            return Data.from_dict(data_dict)
        except FloydException as e:
            floyd_logger.info("Data %s: ERROR! %s\n", id, e.message)
            return None
Beispiel #23
0
def stop(id):
    """
    Stop a run before it can finish.
    """
    experiment = ExperimentClient().get(id)
    if experiment.state not in ["queued", "running"]:
        floyd_logger.info("Job in {} state cannot be stopped".format(experiment.state))
        return

    if ExperimentClient().stop(experiment.id):
        floyd_logger.info("Experiment shutdown request submitted. Check status to confirm shutdown")
    else:
        floyd_logger.error("Failed to stop job")
Beispiel #24
0
 def get_all(self):
     try:
         access_token = AuthConfigManager.get_access_token()
         response = self.request("GET",
                                 self.url,
                                 params={
                                     "module_type": "data",
                                     "username": access_token.username
                                 })
         data_dict = response.json()
         return [Data.from_dict(data) for data in data_dict]
     except FloydException as e:
         floyd_logger.error("Error while retrieving data: %s", e.message)
         return []
Beispiel #25
0
def show_new_job_info(expt_client,
                      job_name,
                      expt_info,
                      mode,
                      open_notebook=True):
    if mode in ['jupyter', 'serve']:
        while True:
            # Wait for the experiment / task instances to become available
            try:
                experiment = expt_client.get(expt_info['id'])
                if experiment.task_instances:
                    break
            except Exception:
                floyd_logger.debug("Job not available yet: %s",
                                   expt_info['id'])

            floyd_logger.debug("Job not available yet: %s", expt_info['id'])
            sleep(3)
            continue

        # Print the path to jupyter notebook
        if mode == 'jupyter':
            if not experiment.service_url:
                floyd_logger.error(
                    "Jupyter not available, please check job state and log for error."
                )
                sys.exit(1)

            jupyter_url = '%s/%s' % (floyd.floyd_web_host, job_name)
            floyd_logger.info("\nPath to jupyter notebook: %s", jupyter_url)
            if open_notebook:
                webbrowser.open(jupyter_url)

        # Print the path to serving endpoint
        if mode == 'serve':
            floyd_logger.info("Path to service endpoint: %s",
                              experiment.service_url)

        if experiment.timeout_seconds < 24 * 60 * 60:
            floyd_logger.info(
                "\nYour job timeout is currently set to %s seconds",
                experiment.timeout_seconds)
            floyd_logger.info(
                "This is because you are in the free plan. Paid users will have longer timeouts. "
                "See https://www.floydhub.com/pricing for details")

    else:
        floyd_logger.info("To view logs enter:")
        floyd_logger.info("   floyd logs %s", job_name)
Beispiel #26
0
def validate_env(env, instance_type):
    arch = INSTANCE_ARCH_MAP[instance_type]
    env_map = EnvClient().get_all()
    envs = env_map.get(arch)
    if envs:
        if env not in envs:
            floyd_logger.error(
                "{} is not in the list of supported environments:\n{}".format(
                    env, tabulate([[env_name] for env_name in envs.keys()])))
            return False
    else:
        floyd_logger.error("invalid instance type")
        return False

    return True
Beispiel #27
0
    def get(self, id):
        try:
            response = self.request("GET", self.url + id)
            data_dict = response.json()
            if data_dict['module_type'] != 'DataModule':
                floyd_logger.error(
                    "Data %s: ERROR! Resource given is not a data.", id)
                return None

            return Data.from_dict(data_dict)
        except FloydException as e:
            floyd_logger.info(
                "Data %s: ERROR! %s\nIf you have already created the dataset, make sure you have uploaded at least one version.",
                id, e.message)
            return None
Beispiel #28
0
    def get_all(self):
        try:
            data_config = DataConfigManager.get_config()

            response = self.request("GET",
                                    self.url,
                                    params={
                                        "module_type": "data",
                                        "family_id": data_config.family_id
                                    })
            data_dict = response.json()
            return [Data.from_dict(data) for data in data_dict]
        except FloydException as e:
            floyd_logger.error("Error while retrieving data: %s", e.message)
            return []
Beispiel #29
0
def validate_env(env, arch):
    env_map = EnvClient().get_all()
    envs = env_map.get(arch)
    if envs:
        if env not in envs:
            envlist = tabulate([[env_name]
                                for env_name in sorted(envs.keys())])
            floyd_logger.error(
                "%s is not in the list of supported environments:\n%s", env,
                envlist)
            return False
    else:
        floyd_logger.error("invalid instance type")
        return False

    return True
Beispiel #30
0
    def resume_upload(self,
                      file_path,
                      file_endpoint,
                      chunk_size=None,
                      headers=None,
                      auth=None,
                      offset=None):
        chunk_size = chunk_size or self.chunk_size

        try:
            offset = self._get_offset(file_endpoint,
                                      headers=headers,
                                      auth=auth)
        except FloydException as e:
            floyd_logger.error(
                "Failed to fetch offset data from upload server! %s",
                e.message)
            return False
        except requests.exceptions.ConnectionError as e:
            floyd_logger.error(
                "Cannot connect to the Floyd data upload server for offset. "
                "Check your internet connection.")
            return False

        total_sent = 0
        file_size = os.path.getsize(file_path)

        with open(file_path, 'rb') as f:

            pb = ProgressBar(filled_char="=", expected_size=file_size)
            while offset < file_size:
                pb.show(offset)
                f.seek(offset)
                data = f.read(chunk_size)
                try:
                    offset = self._upload_chunk(data,
                                                offset,
                                                file_endpoint,
                                                headers=headers,
                                                auth=auth)
                    total_sent += len(data)
                    floyd_logger.debug("%s bytes sent", total_sent)
                except FloydException as e:
                    floyd_logger.error(
                        "Failed to fetch offset data from upload server! %s",
                        e.message)
                    return False
                except requests.exceptions.ConnectionError as e:
                    floyd_logger.error(
                        "Cannot connect to the Floyd data upload server. "
                        "Check your internet connection.")
                    return False

            # Complete the progress bar with one more call to show()
            pb.show(offset)
            pb.done()
        return True