Beispiel #1
0
    def _upload_chunk(self,
                      data,
                      offset,
                      file_endpoint,
                      headers=None,
                      auth=None):
        floyd_logger.debug("Uploading {} bytes chunk from offset: {}".format(
            len(data), offset))

        h = {
            'Content-Type': 'application/offset+octet-stream',
            'Upload-Offset': str(offset),
            'Tus-Resumable': self.TUS_VERSION,
        }

        if headers:
            h.update(headers)

        response = requests.patch(file_endpoint,
                                  headers=h,
                                  data=data,
                                  auth=auth)
        self.check_response_status(response)

        return int(response.headers["Upload-Offset"])
Beispiel #2
0
def get_files_in_directory(path, file_type):
    """
    Gets the list of files in the directory and subdirectories
    Respects .floydignore file if present
    """
    local_files = []
    separator = os.path.sep
    ignore_list = FloydIgnoreManager.get_list()
    ignore_list_localized = [
        ".{}{}".format(separator, item) for item in ignore_list
    ]
    floyd_logger.debug("Ignoring list : {}".format(ignore_list_localized))

    for root, dirs, files in os.walk(path):
        ignore_dir = False
        for item in ignore_list_localized:
            if root.startswith(item):
                ignore_dir = True

        if ignore_dir:
            floyd_logger.debug("Ignoring directory : {}".format(root))
            continue

        for file_name in files:
            file_relative_path = os.path.join(root, file_name)
            if separator != '/':  # convert relative paths to Unix style
                file_relative_path = file_relative_path.replace(
                    os.path.sep, '/')

            file_full_path = os.path.join(os.getcwd(), root, file_name)
            local_files.append(
                (file_type, (file_relative_path, open(file_full_path,
                                                      'rb'), 'text/plain')))

    return local_files
Beispiel #3
0
    def download(self, url, filename, relative=False, headers=None, timeout=5):
        """
        Download the file from the given url at the current path
        """
        request_url = self.base_url + url if relative else url
        floyd_logger.debug("Downloading file from url: {}".format(request_url))

        # Auth headers if access_token is present
        request_headers = {"Authorization": "Bearer {}".format(
            self.access_token.token if self.access_token else None),
        }
        # Add any additional headers
        if headers:
            request_headers.update(headers)

        try:
            response = requests.get(request_url,
                                    headers=request_headers,
                                    timeout=timeout,
                                    stream=True)
            self.check_response_status(response)
            with open(filename, 'wb') as f:
                total_length = int(response.headers.get('content-length'))
                for chunk in progress.bar(response.iter_content(chunk_size=1024),
                                          expected_size=(total_length / 1024) + 1):
                    if chunk:
                        f.write(chunk)
            return filename
        except requests.exceptions.ConnectionError as exception:
            floyd_logger.debug("Exception: {}".format(exception))
            sys.exit("Cannot connect to the Floyd server. Check your internet connection.")
Beispiel #4
0
    def create(self, module):
        try:
            upload_files, total_file_size = get_files_in_directory(
                path='.', file_type='code')
        except OSError:
            sys.exit(
                "Directory contains too many files to upload. Add unused files and directories to .floydignore file."
                "Or upload data separately using floyd data command")

        floyd_logger.info("Creating project run. Total upload size: {}".format(
            total_file_size))
        floyd_logger.debug("Creating module. Uploading: {} files".format(
            len(upload_files)))
        floyd_logger.info("Syncing code ...")

        # Add request data
        upload_files.append(("json", json.dumps(module.to_dict())))
        multipart_encoder = MultipartEncoder(fields=upload_files)

        # Attach progress bar
        progress_callback = create_progress_callback(multipart_encoder)
        multipart_encoder_monitor = MultipartEncoderMonitor(
            multipart_encoder, progress_callback)

        response = self.request(
            "POST",
            self.url,
            data=multipart_encoder_monitor,
            headers={"Content-Type": multipart_encoder.content_type},
            timeout=3600)

        floyd_logger.info("Done")
        return response.json().get("id")
Beispiel #5
0
def upload():
    """
    Upload data in the current dir to Floyd.
    """
    data_config = DataConfigManager.get_config()
    access_token = AuthConfigManager.get_access_token()
    version = data_config.version

    # Create data object
    data_name = "{}/{}:{}".format(access_token.username, data_config.name,
                                  version)
    data = DataRequest(name=data_name, description=version, version=version)
    data_id = DataClient().create(data)
    floyd_logger.debug("Created data with id : {}".format(data_id))
    floyd_logger.info("Upload finished")

    # Update expt config including predecessor
    data_config.increment_version()
    data_config.set_data_predecessor(data_id)
    DataConfigManager.set_config(data_config)

    # Print output
    table_output = [["DATA ID", "NAME", "VERSION"],
                    [data_id, data_name, version]]
    floyd_logger.info(tabulate(table_output, headers="firstrow"))
Beispiel #6
0
 def check_response_status(self, response):
     """
     Check if response is successful. Else raise Exception.
     """
     if not (200 <= response.status_code < 300):
         try:
             message = response.json()["errors"]
         except Exception:
             message = None
         floyd_logger.debug("Error received : status_code: {}, message: {}".format(response.status_code,
                                                                                   message or response.content))
         if response.status_code == 400:
             raise BadRequestException(response)
         elif response.status_code == 401:
             raise AuthenticationException()
         elif response.status_code == 403:
             raise AuthorizationException()
         elif response.status_code == 404:
             raise NotFoundException()
         elif response.status_code == 429:
             raise OverLimitException(response.json().get("message"))
         elif response.status_code == 502:
             raise BadGatewayException()
         elif response.status_code == 504:
             raise GatewayTimeoutException()
         elif 500 <= response.status_code < 600:
             if 'Server under maintenance' in response.content.decode():
                 raise ServerException('Server under maintenance, please try again later.')
             else:
                 raise ServerException()
         else:
             msg = "An error occurred. Server response: {}".format(response.status_code)
             raise FloydException(message=msg)
Beispiel #7
0
    def init(cls):
        floyd_logger.debug(
            "Setting default floyd ignore in the file {}".format(
                cls.CONFIG_FILE_PATH))

        with open(cls.CONFIG_FILE_PATH, "w") as config_file:
            config_file.write(DEFAULT_FLOYD_IGNORE_LIST)
Beispiel #8
0
def initialize_new_upload(data_config, access_token, description=None, source_dir='.'):
    # TODO: hit upload server to check for liveness before moving on
    data_config.set_tarball_path(None)
    data_config.set_data_endpoint(None)
    data_config.set_resource_id(None)

    namespace = data_config.namespace or access_token.username
    data_name = "{}/{}".format(namespace, data_config.name)

    # Create tarball of the data using the ID returned from the API
    # TODO: allow to the users to change directory for the compression
    temp_dir = tempfile.mkdtemp()
    tarball_path = os.path.join(temp_dir, "floydhub_data.tar.gz")

    floyd_logger.debug("Creating tarfile with contents of current directory: %s",
                       tarball_path)
    floyd_logger.info("Compressing data...")

    # TODO: purge tarball on Ctrl-C
    create_tarfile(source_dir=source_dir, filename=tarball_path)

    # If starting a new upload fails for some reason down the line, we don't
    # want to re-tar, so save off the tarball path now
    data_config.set_tarball_path(tarball_path)
    DataConfigManager.set_config(data_config)

    # Create data object using API
    data = DataRequest(name=data_name,
                       description=description,
                       family_id=data_config.family_id,
                       data_type='gzip')
    data_info = DataClient().create(data)
    if not data_info:
        rmtree(temp_dir)
        sys.exit(1)

    data_config.set_data_id(data_info['id'])
    data_config.set_data_name(data_info['name'])
    DataConfigManager.set_config(data_config)

    # fetch auth token for upload server
    creds = DataClient().new_tus_credentials(data_info['id'])
    if not creds:
        # TODO: delete module from server?
        rmtree(temp_dir)
        sys.exit(1)

    data_resource_id = creds[0]
    data_endpoint = TusDataClient().initialize_upload(
        tarball_path,
        metadata={"filename": data_resource_id},
        auth=creds)
    if not data_endpoint:
        # TODO: delete module from server?
        floyd_logger.error("Failed to get upload URL from Floydhub!")
        rmtree(temp_dir)
        sys.exit(1)

    data_config.set_data_endpoint(data_endpoint)
    DataConfigManager.set_config(data_config)
Beispiel #9
0
def show_new_job_info(expt_client, job_name, expt_info, mode):
    if mode in ['jupyter', 'serve']:
        while True:
            # Wait for the experiment / task instances to become available
            try:
                experiment = expt_client.get(expt_info['id'])
                if experiment.task_instances:
                    break
            except Exception:
                floyd_logger.debug("Job not available yet: %s",
                                   expt_info['id'])

            floyd_logger.debug("Job not available yet: %s", expt_info['id'])
            sleep(3)
            continue

        # Print the path to jupyter notebook
        if mode == 'jupyter':
            jupyter_url = experiment.service_url
            if not jupyter_url:
                floyd_logger.error(
                    "Jupyter URL not available, please check job state and log for error."
                )
                sys.exit(1)

            print(
                "Setting up your instance and waiting for Jupyter notebook to become available ...",
                end='')
            if wait_for_url(jupyter_url,
                            sleep_duration_seconds=2,
                            iterations=900):
                sleep(3)  # HACK: sleep extra 3 seconds for traffic route sync
                floyd_logger.info("\nPath to jupyter notebook: %s",
                                  jupyter_url)
                if open:
                    webbrowser.open(jupyter_url)
            else:
                floyd_logger.info("\nPath to jupyter notebook: %s",
                                  jupyter_url)
                floyd_logger.info(
                    "Notebook is still loading. View logs to track progress")
                floyd_logger.info("   floyd logs %s", job_name)

        # Print the path to serving endpoint
        if mode == 'serve':
            floyd_logger.info("Path to service endpoint: %s",
                              experiment.service_url)

        if experiment.timeout_seconds < 24 * 60 * 60:
            floyd_logger.info(
                "\nYour job timeout is currently set to %s seconds",
                experiment.timeout_seconds)
            floyd_logger.info(
                "This is because you are in the free plan. Paid users will have longer timeouts. "
                "See https://www.floydhub.com/pricing for details")

    else:
        floyd_logger.info("To view logs enter:")
        floyd_logger.info("   floyd logs %s", job_name)
Beispiel #10
0
def show_new_job_info(expt_client,
                      job_name,
                      expt_info,
                      mode,
                      open_notebook=True):
    table_output = [["JOB NAME"], [job_name]]
    floyd_logger.info('\n' + tabulate(table_output, headers="firstrow") + '\n')

    job_url = '%s/%s' % (floyd.floyd_web_host, job_name)
    floyd_logger.info("URL to job: %s", job_url)

    if mode == 'jupyter':
        floyd_logger.info(
            "\n[!] DEPRECATION NOTICE\n"
            "Jupyter mode will no longer be supported after September 15th.\n"
            "Please migrate your projects to use Workspaces: "
            "https://docs.floydhub.com/guides/workspace/.")

    if mode in ['jupyter', 'serve']:
        while True:
            # Wait for the experiment / task instances to become available
            try:
                experiment = expt_client.get(expt_info['id'])
                if experiment.task_instances:
                    break
            except Exception:
                pass

            floyd_logger.debug("Job not available yet: %s", expt_info['id'])
            sleep(3)
            continue

        # Print the url to jupyter notebook
        if mode == 'jupyter':
            if not experiment.service_url:
                floyd_logger.error(
                    "Jupyter not available, please check job state and log for error."
                )
                sys.exit(1)

            if open_notebook:
                webbrowser.open(job_url)

        # Print the url to serving endpoint
        if mode == 'serve':
            floyd_logger.info("URL to service endpoint: %s",
                              experiment.service_url)

        if experiment.timeout_seconds < 24 * 60 * 60:
            floyd_logger.info(
                "\nYour job timeout is currently set to %s seconds",
                experiment.timeout_seconds)
            floyd_logger.info(
                "This is because you are in the free plan. Paid users will have longer timeouts. "
                "See https://www.floydhub.com/pricing for details")

    else:
        floyd_logger.info("\nTo view logs enter:")
        floyd_logger.info("   floyd logs %s", job_name)
Beispiel #11
0
 def get_content(self, resource_id):
     try:
         response = self.request(
             'GET', self.URL_PREFIX + resource_id + "?content=true")
         return response.content.decode(response.encoding)
     except FloydException as e:
         floyd_logger.debug("Resource %s: ERROR! %s", resource_id,
                            e.message)
         return None
Beispiel #12
0
 def set_apikey(cls, username, apikey):
     floyd_logger.debug("Setting apikey in the file %s",
                        cls.CONFIG_FILE_PATH)
     with open(cls.CONFIG_FILE_PATH, "w") as config_file:
         config_file.write(
             json.dumps({
                 'username': username,
                 'apikey': apikey
             }))
Beispiel #13
0
    def resume_upload(self,
                      file_path,
                      file_endpoint,
                      chunk_size=None,
                      headers=None,
                      auth=None,
                      offset=None):
        chunk_size = chunk_size or self.chunk_size

        try:
            offset = self._get_offset(file_endpoint,
                                      headers=headers,
                                      auth=auth)
        except FloydException as e:
            floyd_logger.error(
                "Failed to fetch offset data from upload server! %s",
                e.message)
            return False
        except requests.exceptions.ConnectionError as e:
            floyd_logger.error(
                "Cannot connect to the Floyd data upload server for offset. "
                "Check your internet connection.")
            return False

        total_sent = 0
        file_size = os.path.getsize(file_path)

        with open(file_path, 'rb') as f:

            pb = ProgressBar(filled_char="=", expected_size=file_size)
            while offset < file_size:
                pb.show(offset)
                f.seek(offset)
                data = f.read(chunk_size)
                try:
                    offset = self._upload_chunk(data,
                                                offset,
                                                file_endpoint,
                                                headers=headers,
                                                auth=auth)
                    total_sent += len(data)
                    floyd_logger.debug("%s bytes sent", total_sent)
                except FloydException as e:
                    floyd_logger.error(
                        "Failed to fetch offset data from upload server! %s",
                        e.message)
                    return False
                except requests.exceptions.ConnectionError as e:
                    floyd_logger.error(
                        "Cannot connect to the Floyd data upload server. "
                        "Check your internet connection.")
                    return False

            # Complete the progress bar with one more call to show()
            pb.show(offset)
            pb.done()
        return True
Beispiel #14
0
    def init(cls):
        if os.path.isfile(cls.CONFIG_FILE_PATH):
            floyd_logger.debug("floyd ignore file already present at %s",
                               cls.CONFIG_FILE_PATH)
            return

        floyd_logger.debug("Setting default floyd ignore in the file %s",
                           cls.CONFIG_FILE_PATH)

        with open(cls.CONFIG_FILE_PATH, "w") as config_file:
            config_file.write(DEFAULT_FLOYD_IGNORE_LIST)
Beispiel #15
0
 def create(self, module):
     upload_files = get_files_in_directory(path='.', file_type='code')
     request_data = {"json": json.dumps(module.to_dict())}
     floyd_logger.debug("Creating module. Uploading {} files ...".format(
         len(upload_files)))
     floyd_logger.info("Syncing code ...")
     response = self.request("POST",
                             self.url,
                             data=request_data,
                             files=upload_files,
                             timeout=3600)
     return response.json().get("id")
Beispiel #16
0
    def _get_offset(self, file_endpoint, headers=None, auth=None):
        floyd_logger.debug("Getting offset")

        h = {"Tus-Resumable": self.TUS_VERSION}

        if headers:
            h.update(headers)

        response = requests.head(file_endpoint, headers=h, auth=auth)
        self.check_response_status(response)

        offset = int(response.headers["Upload-Offset"])
        floyd_logger.debug("offset: %s", offset)
        return offset
Beispiel #17
0
def show_new_job_info(expt_client,
                      job_name,
                      expt_info,
                      mode,
                      open_notebook=True):
    if mode in ['jupyter', 'serve']:
        while True:
            # Wait for the experiment / task instances to become available
            try:
                experiment = expt_client.get(expt_info['id'])
                if experiment.task_instances:
                    break
            except Exception:
                floyd_logger.debug("Job not available yet: %s",
                                   expt_info['id'])

            floyd_logger.debug("Job not available yet: %s", expt_info['id'])
            sleep(3)
            continue

        # Print the path to jupyter notebook
        if mode == 'jupyter':
            if not experiment.service_url:
                floyd_logger.error(
                    "Jupyter not available, please check job state and log for error."
                )
                sys.exit(1)

            jupyter_url = '%s/%s' % (floyd.floyd_web_host, job_name)
            floyd_logger.info("\nPath to jupyter notebook: %s", jupyter_url)
            if open_notebook:
                webbrowser.open(jupyter_url)

        # Print the path to serving endpoint
        if mode == 'serve':
            floyd_logger.info("Path to service endpoint: %s",
                              experiment.service_url)

        if experiment.timeout_seconds < 24 * 60 * 60:
            floyd_logger.info(
                "\nYour job timeout is currently set to %s seconds",
                experiment.timeout_seconds)
            floyd_logger.info(
                "This is because you are in the free plan. Paid users will have longer timeouts. "
                "See https://www.floydhub.com/pricing for details")

    else:
        floyd_logger.info("To view logs enter:")
        floyd_logger.info("   floyd logs %s", job_name)
Beispiel #18
0
    def create(self, module):
        try:
            upload_files, total_file_size = get_files_in_current_directory(
                file_type='code')
        except OSError:
            sys.exit(
                "Directory contains too many files to upload. If you have data files in the current directory, "
                "please upload them separately using \"floyd data\" command and remove them from here.\n"
                "See http://docs.floydhub.com/faqs/job/#i-get-too-many-open-files-error-when-i-run-my-project "
                "for more details on how to fix this.")

        if total_file_size > self.MAX_UPLOAD_SIZE:
            sys.exit((
                "Code size too large to sync, please keep it under %s.\n"
                "If you have data files in the current directory, please upload them "
                "separately using \"floyd data\" command and remove them from here.\n"
                "You may find the following documentation useful:\n\n"
                "\thttps://docs.floydhub.com/guides/create_and_upload_dataset/\n"
                "\thttps://docs.floydhub.com/guides/data/mounting_data/\n"
                "\thttps://docs.floydhub.com/guides/floyd_ignore/") %
                     (sizeof_fmt(self.MAX_UPLOAD_SIZE)))

        floyd_logger.info("Creating project run. Total upload size: %s",
                          sizeof_fmt(total_file_size))
        floyd_logger.debug("Creating module. Uploading: %s files",
                           len(upload_files))
        floyd_logger.info("Syncing code ...")

        # Add request data
        upload_files.append(("json", json.dumps(module.to_dict())))
        multipart_encoder = MultipartEncoder(fields=upload_files)

        # Attach progress bar
        progress_callback, bar = create_progress_callback(multipart_encoder)
        multipart_encoder_monitor = MultipartEncoderMonitor(
            multipart_encoder, progress_callback)

        try:
            response = self.request(
                "POST",
                self.url,
                data=multipart_encoder_monitor,
                headers={"Content-Type": multipart_encoder.content_type},
                timeout=3600)
        finally:
            # always make sure we clear the console
            bar.done()
        return response.json().get("id")
Beispiel #19
0
    def create(self, data):
        try:
            upload_files, total_file_size = get_files_in_directory(path='.', file_type='data')
        except OSError:
            sys.exit("Directory contains too many files to upload. Add unused directories to .floydignore file. "
                     "Or download data directly from the internet into FloydHub")

        request_data = {"json": json.dumps(data.to_dict())}
        floyd_logger.info("Creating data source. Total upload size: {}".format(total_file_size))
        floyd_logger.debug("Total files: {}".format(len(upload_files)))
        floyd_logger.info("Uploading files ...".format(len(upload_files)))
        response = self.request("POST",
                                self.url,
                                data=request_data,
                                files=upload_files,
                                timeout=3600)
        return response.json().get("id")
Beispiel #20
0
    def download(self, url, filename, relative=False, headers=None, timeout=5):
        """
        Download the file from the given url at the current path
        """
        request_url = self.base_url + url if relative else url
        floyd_logger.debug("Downloading file from url: {}".format(request_url))

        # Auth headers if access_token is present
        request_headers = {}
        if self.access_token:
            request_headers[
                "Authorization"] = "Bearer " + self.access_token.token
        # Add any additional headers
        if headers:
            request_headers.update(headers)

        try:
            response = requests.get(request_url,
                                    headers=request_headers,
                                    timeout=timeout,
                                    stream=True)
            self.check_response_status(response)
            with open(filename, 'wb') as f:
                # chunk mode response doesn't have content-length so we are
                # using a custom header here
                content_length = response.headers.get(
                    'x-floydhub-content-length')
                if not content_length:
                    content_length = response.headers.get('content-length')
                if content_length:
                    for chunk in progress.bar(
                            response.iter_content(chunk_size=1024),
                            expected_size=(int(content_length) / 1024) + 1):
                        if chunk:
                            f.write(chunk)
                else:
                    for chunk in response.iter_content(chunk_size=1024):
                        if chunk:
                            f.write(chunk)
            return filename
        except requests.exceptions.ConnectionError as exception:
            floyd_logger.debug("Exception: {}".format(exception))
            sys.exit(
                "Cannot connect to the Floyd server. Check your internet connection."
            )
Beispiel #21
0
    def initialize_upload(self,
                          file_path,
                          base_url=None,
                          headers=None,
                          metadata=None,
                          auth=None):
        base_url = base_url or self.base_url
        floyd_logger.info("Initializing upload...")

        file_size = os.path.getsize(file_path)

        h = {
            "Tus-Resumable": self.TUS_VERSION,
            "Upload-Length": str(file_size),
        }

        if headers:
            h.update(headers)

        if metadata:
            pairs = [
                k + ' ' + base64.b64encode(v.encode('utf-8')).decode()
                for k, v in metadata.items()
            ]
            h["Upload-Metadata"] = ','.join(pairs)

        try:
            response = requests.post(base_url, headers=h, auth=auth)
            self.check_response_status(response)

            location = response.headers["Location"]
            floyd_logger.debug("Data upload enpoint: %s", location)
        except FloydException as e:
            floyd_logger.info("Data upload create: ERROR! %s", e.message)
            location = ""
        except requests.exceptions.ConnectionError as e:
            floyd_logger.error(
                "Cannot connect to the Floyd data upload server for upload url. "
                "Check your internet connection.")
            location = ""

        return location
Beispiel #22
0
def get_files_in_current_directory(file_type):
    """
    Gets the list of files in the current directory and subdirectories.
    Respects .floydignore file if present
    """
    local_files = []
    total_file_size = 0

    ignore_list, whitelist = FloydIgnoreManager.get_lists()

    floyd_logger.debug("Ignoring: %s", ignore_list)
    floyd_logger.debug("Whitelisting: %s", whitelist)

    file_paths = get_unignored_file_paths(ignore_list, whitelist)

    for file_path in file_paths:
        local_files.append((file_type, (unix_style_path(file_path), open(file_path, 'rb'), 'text/plain')))
        total_file_size += os.path.getsize(file_path)

    return (local_files, sizeof_fmt(total_file_size))
Beispiel #23
0
    def request(self,
                method,
                url,
                params=None,
                data=None,
                files=None,
                json=None,
                timeout=5,
                headers=None):
        """
        Execute the request using requests library
        """
        request_url = self.base_url + url
        floyd_logger.debug(
            "Starting request to url: {} with params: {}, data: {}".format(
                request_url, params, data))

        # Auth headers if access_token is present
        request_headers = {
            "Authorization":
            "Bearer {}".format(
                self.access_token.token if self.access_token else None),
        }
        # Add any additional headers
        if headers:
            request_headers.update(headers)

        try:
            response = requests.request(method,
                                        request_url,
                                        params=params,
                                        data=data,
                                        json=json,
                                        headers=request_headers,
                                        files=files,
                                        timeout=timeout)
        except requests.exceptions.ConnectionError as exception:
            floyd_logger.debug("Exception: {}".format(exception))
            sys.exit(
                "Cannot connect to the Floyd server. Check your internet connection."
            )

        try:
            floyd_logger.debug("Response Content: {}, Headers: {}".format(
                response.json(), response.headers))
        except Exception:
            floyd_logger.debug("Request failed. Response: {}".format(
                response.content))

        self.check_response_status(response)
        return response
Beispiel #24
0
    def request(self,
                method,
                url,
                params=None,
                data=None,
                files=None,
                json=None,
                timeout=5,
                headers=None,
                skip_auth=False):
        """
        Execute the request using requests library
        """
        request_url = self.base_url + url
        floyd_logger.debug(
            "Starting request to url: %s with params: %s, data: %s",
            request_url, params, data)

        request_headers = {'x-floydhub-cli-version': get_cli_version()}
        # Auth headers if access_token is present
        if self.access_token:
            request_headers[
                "Authorization"] = "Bearer " + self.access_token.token
        # Add any additional headers
        if headers:
            request_headers.update(headers)

        try:
            response = requests.request(method,
                                        request_url,
                                        params=params,
                                        data=data,
                                        json=json,
                                        headers=request_headers,
                                        files=files,
                                        timeout=timeout)
        except requests.exceptions.ConnectionError as exception:
            floyd_logger.debug("Exception: %s", exception, exc_info=True)
            sys.exit(
                "Cannot connect to the Floyd server. Check your internet connection."
            )
        except requests.exceptions.Timeout as exception:
            floyd_logger.debug("Exception: %s", exception, exc_info=True)
            sys.exit(
                "Connection to FloydHub server timed out. Please retry or check your internet connection."
            )

        floyd_logger.debug("Response Content: %s, Headers: %s" %
                           (response.content, response.headers))
        self.check_response_status(response)
        return response
Beispiel #25
0
    def check_response_status(self, response):
        """
        Check if response is successful. Else raise Exception.
        """
        if not (200 <= response.status_code < 300):
            try:
                message = response.json()["errors"]
            except Exception:
                message = None
            floyd_logger.debug("Error received : status_code: {}, message: {}".format(response.status_code,
                                                                                      message or response.content))

            if response.status_code == 401:
                raise AuthenticationException()
            elif response.status_code == 404:
                raise NotFoundException()
            elif response.status_code == 400:
                raise BadRequestException()
            elif response.status_code == 429:
                raise OverLimitException(response.json().get("message"))
            else:
                response.raise_for_status()
Beispiel #26
0
    def create(self, data):
        """
        Create a temporary directory for the tar file that will be removed at the
        end of the operation.
        """
        with tempfile.TemporaryDirectory() as temp_directory:
            floyd_logger.info("Compressing data ...")
            compressed_file_path = os.path.join(temp_directory, "data.tar.gz")

            # Create tarfile
            floyd_logger.debug("Creating tarfile with contents of current directory: {}".format(compressed_file_path))
            create_tarfile(source_dir='.', filename=compressed_file_path)

            total_file_size = os.path.getsize(compressed_file_path)
            floyd_logger.info("Creating data source. Total upload size: {}".format(sizeof_fmt(total_file_size)))
            floyd_logger.info("Uploading compressed data ...")

            # Add request data
            request_data = []
            request_data.append(("data", ('data.tar', open(compressed_file_path, 'rb'), 'text/plain')))
            request_data.append(("json", json.dumps(data.to_dict())))

            multipart_encoder = MultipartEncoder(
                fields=request_data
            )

            # Attach progress bar
            progress_callback = create_progress_callback(multipart_encoder)
            multipart_encoder_monitor = MultipartEncoderMonitor(multipart_encoder, progress_callback)

            response = self.request("POST",
                                    self.url,
                                    data=multipart_encoder_monitor,
                                    headers={"Content-Type": multipart_encoder.content_type},
                                    timeout=3600)

            floyd_logger.info("Done")
            return response.json().get("id")
Beispiel #27
0
def get_files_in_directory(path, file_type):
    """
    Gets the list of files in the directory and subdirectories
    Respects .floydignore file if present
    """
    local_files = []
    separator = os.path.sep
    ignore_list = FloydIgnoreManager.get_list()

    # make sure that subdirectories are also excluded
    ignore_list_expanded = ignore_list + ["{}/**".format(item) for item in ignore_list]
    floyd_logger.debug("Ignoring list : {}".format(ignore_list))
    total_file_size = 0

    for root, dirs, files in os.walk(path):
        dirs[:] = [d for d in dirs if d not in ignore_list]
        
        ignore_dir = False
        normalized_path = normalize_path(path, root)
        for item in ignore_list_expanded:
            if PurePath(normalized_path).match(item):
                ignore_dir = True
                break

        if ignore_dir:
            floyd_logger.debug("Ignoring directory : {}".format(root))
            continue

        for file_name in files:
            ignore_file = False
            normalized_path = normalize_path(path, os.path.join(root, file_name))
            for item in ignore_list_expanded:
                if PurePath(normalized_path).match(item):
                    ignore_file = True
                    break

            if ignore_file:
                floyd_logger.debug("Ignoring file : {}".format(normalized_path))
                continue

            file_relative_path = os.path.join(root, file_name)
            if separator != '/':  # convert relative paths to Unix style
                file_relative_path = file_relative_path.replace(os.path.sep, '/')
            file_full_path = os.path.join(os.getcwd(), root, file_name)

            local_files.append((file_type, (file_relative_path, open(file_full_path, 'rb'), 'text/plain')))
            total_file_size += os.path.getsize(file_full_path)

    return (local_files, sizeof_fmt(total_file_size))
Beispiel #28
0
def get_unignored_file_paths(ignore_list=None, whitelist=None):
    """
    Given an ignore_list and a whitelist of glob patterns, returns the list of
    unignored file paths in the current directory and its subdirectories
    """
    unignored_files = []
    if ignore_list is None:
        ignore_list = []
    if whitelist is None:
        whitelist = []

    for root, dirs, files in os.walk("."):
        floyd_logger.debug("Root:%s, Dirs:%s", root, dirs)

        if ignore_path(unix_style_path(root), ignore_list, whitelist):
            # Reset dirs to avoid going further down this directory.
            # Then continue to the next iteration of os.walk, which causes
            # everything in this directory to be ignored.
            #
            # Note that whitelisted files that are within directories that are
            # ignored will not be whitelisted. This follows the expected
            # behavior established by .gitignore logic:
            # "It is not possible to re-include a file if a parent directory of
            # that file is excluded."
            # https://git-scm.com/docs/gitignore#_pattern_format
            dirs[:] = []
            floyd_logger.debug("Ignoring directory : %s", root)
            continue

        for file_name in files:
            file_path = unix_style_path(os.path.join(root, file_name))
            if ignore_path(file_path, ignore_list, whitelist):
                floyd_logger.debug("Ignoring file : %s", file_name)
                continue

            unignored_files.append(os.path.join(root, file_name))

    return unignored_files
Beispiel #29
0
def complete_upload(data_config):
    data_endpoint = data_config.data_endpoint
    data_id = data_config.data_id
    tarball_path = data_config.tarball_path

    if not data_id:
        floyd_logger.error("Corrupted upload state, please start a new one.")
        sys.exit(1)

    # check for tarball upload, upload to server if not done
    if not data_config.resource_id and (tarball_path and data_endpoint):
        floyd_logger.debug("Getting fresh upload credentials")
        creds = DataClient().new_tus_credentials(data_id)
        if not creds:
            sys.exit(1)

        file_size = os.path.getsize(tarball_path)
        # check for upload limit dimension
        if file_size > MAX_UPLOAD_SIZE:
            try:
                floyd_logger.info("Removing compressed data...")
                rmtree(os.path.dirname(tarball_path))
            except (OSError, TypeError):
                pass

            sys.exit(("Data size too large to upload, please keep it under %s.\n") %
                     (sizeof_fmt(MAX_UPLOAD_SIZE)))

        floyd_logger.info("Uploading compressed data. Total upload size: %s",
                          sizeof_fmt(file_size))
        tus_client = TusDataClient()
        if not tus_client.resume_upload(tarball_path, data_endpoint, auth=creds):
            floyd_logger.error("Failed to finish upload!")
            return

        try:
            floyd_logger.info("Removing compressed data...")
            rmtree(os.path.dirname(tarball_path))
        except (OSError, TypeError):
            pass

        floyd_logger.debug("Created data with id : %s", data_id)
        floyd_logger.info("Upload finished.")

        # Update data config
        data_config.set_tarball_path(None)
        data_config.set_data_endpoint(None)
        data_source = DataClient().get(data_id)
        data_config.set_resource_id(data_source.resource_id)
        DataConfigManager.set_config(data_config)

    # data tarball uploaded, check for server untar
    if data_config.resource_id:
        floyd_logger.info(
            "Waiting for server to unpack data.\n"
            "You can exit at any time and come back to check the status with:\n"
            "\tfloyd data upload -r")
        try:
            for i in dots(ResourceWaitIter(data_config.resource_id),
                          label='Waiting for unpack...'):
                pass
        except WaitTimeoutException:
            clint_STREAM.write('\n')
            clint_STREAM.flush()
            floyd_logger.info(
                "Looks like it is going to take longer for Floydhub to unpack "
                "your data. Please check back later.")
            sys.exit(1)
        else:
            data_config.set_resource_id(None)
            data_config.set_tarball_path(None)
            data_config.set_data_endpoint(None)
            data_config.set_resource_id(None)
            data_config.set_data_id(None)
            DataConfigManager.set_config(data_config)

    # Print output
    table_output = [["NAME"],
                    [normalize_data_name(data_config.data_name)]]
    floyd_logger.info('')
    floyd_logger.info(tabulate(table_output, headers="firstrow"))
Beispiel #30
0
 def set_config(cls, experiment_config):
     floyd_logger.debug("Setting {} in the file {}".format(
         experiment_config.to_dict(), cls.CONFIG_FILE_PATH))
     with open(cls.CONFIG_FILE_PATH, "w") as config_file:
         config_file.write(json.dumps(experiment_config.to_dict()))