def _upload_chunk(self, data, offset, file_endpoint, headers=None, auth=None): floyd_logger.debug("Uploading {} bytes chunk from offset: {}".format( len(data), offset)) h = { 'Content-Type': 'application/offset+octet-stream', 'Upload-Offset': str(offset), 'Tus-Resumable': self.TUS_VERSION, } if headers: h.update(headers) response = requests.patch(file_endpoint, headers=h, data=data, auth=auth) self.check_response_status(response) return int(response.headers["Upload-Offset"])
def get_files_in_directory(path, file_type): """ Gets the list of files in the directory and subdirectories Respects .floydignore file if present """ local_files = [] separator = os.path.sep ignore_list = FloydIgnoreManager.get_list() ignore_list_localized = [ ".{}{}".format(separator, item) for item in ignore_list ] floyd_logger.debug("Ignoring list : {}".format(ignore_list_localized)) for root, dirs, files in os.walk(path): ignore_dir = False for item in ignore_list_localized: if root.startswith(item): ignore_dir = True if ignore_dir: floyd_logger.debug("Ignoring directory : {}".format(root)) continue for file_name in files: file_relative_path = os.path.join(root, file_name) if separator != '/': # convert relative paths to Unix style file_relative_path = file_relative_path.replace( os.path.sep, '/') file_full_path = os.path.join(os.getcwd(), root, file_name) local_files.append( (file_type, (file_relative_path, open(file_full_path, 'rb'), 'text/plain'))) return local_files
def download(self, url, filename, relative=False, headers=None, timeout=5): """ Download the file from the given url at the current path """ request_url = self.base_url + url if relative else url floyd_logger.debug("Downloading file from url: {}".format(request_url)) # Auth headers if access_token is present request_headers = {"Authorization": "Bearer {}".format( self.access_token.token if self.access_token else None), } # Add any additional headers if headers: request_headers.update(headers) try: response = requests.get(request_url, headers=request_headers, timeout=timeout, stream=True) self.check_response_status(response) with open(filename, 'wb') as f: total_length = int(response.headers.get('content-length')) for chunk in progress.bar(response.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1): if chunk: f.write(chunk) return filename except requests.exceptions.ConnectionError as exception: floyd_logger.debug("Exception: {}".format(exception)) sys.exit("Cannot connect to the Floyd server. Check your internet connection.")
def create(self, module): try: upload_files, total_file_size = get_files_in_directory( path='.', file_type='code') except OSError: sys.exit( "Directory contains too many files to upload. Add unused files and directories to .floydignore file." "Or upload data separately using floyd data command") floyd_logger.info("Creating project run. Total upload size: {}".format( total_file_size)) floyd_logger.debug("Creating module. Uploading: {} files".format( len(upload_files))) floyd_logger.info("Syncing code ...") # Add request data upload_files.append(("json", json.dumps(module.to_dict()))) multipart_encoder = MultipartEncoder(fields=upload_files) # Attach progress bar progress_callback = create_progress_callback(multipart_encoder) multipart_encoder_monitor = MultipartEncoderMonitor( multipart_encoder, progress_callback) response = self.request( "POST", self.url, data=multipart_encoder_monitor, headers={"Content-Type": multipart_encoder.content_type}, timeout=3600) floyd_logger.info("Done") return response.json().get("id")
def upload(): """ Upload data in the current dir to Floyd. """ data_config = DataConfigManager.get_config() access_token = AuthConfigManager.get_access_token() version = data_config.version # Create data object data_name = "{}/{}:{}".format(access_token.username, data_config.name, version) data = DataRequest(name=data_name, description=version, version=version) data_id = DataClient().create(data) floyd_logger.debug("Created data with id : {}".format(data_id)) floyd_logger.info("Upload finished") # Update expt config including predecessor data_config.increment_version() data_config.set_data_predecessor(data_id) DataConfigManager.set_config(data_config) # Print output table_output = [["DATA ID", "NAME", "VERSION"], [data_id, data_name, version]] floyd_logger.info(tabulate(table_output, headers="firstrow"))
def check_response_status(self, response): """ Check if response is successful. Else raise Exception. """ if not (200 <= response.status_code < 300): try: message = response.json()["errors"] except Exception: message = None floyd_logger.debug("Error received : status_code: {}, message: {}".format(response.status_code, message or response.content)) if response.status_code == 400: raise BadRequestException(response) elif response.status_code == 401: raise AuthenticationException() elif response.status_code == 403: raise AuthorizationException() elif response.status_code == 404: raise NotFoundException() elif response.status_code == 429: raise OverLimitException(response.json().get("message")) elif response.status_code == 502: raise BadGatewayException() elif response.status_code == 504: raise GatewayTimeoutException() elif 500 <= response.status_code < 600: if 'Server under maintenance' in response.content.decode(): raise ServerException('Server under maintenance, please try again later.') else: raise ServerException() else: msg = "An error occurred. Server response: {}".format(response.status_code) raise FloydException(message=msg)
def init(cls): floyd_logger.debug( "Setting default floyd ignore in the file {}".format( cls.CONFIG_FILE_PATH)) with open(cls.CONFIG_FILE_PATH, "w") as config_file: config_file.write(DEFAULT_FLOYD_IGNORE_LIST)
def initialize_new_upload(data_config, access_token, description=None, source_dir='.'): # TODO: hit upload server to check for liveness before moving on data_config.set_tarball_path(None) data_config.set_data_endpoint(None) data_config.set_resource_id(None) namespace = data_config.namespace or access_token.username data_name = "{}/{}".format(namespace, data_config.name) # Create tarball of the data using the ID returned from the API # TODO: allow to the users to change directory for the compression temp_dir = tempfile.mkdtemp() tarball_path = os.path.join(temp_dir, "floydhub_data.tar.gz") floyd_logger.debug("Creating tarfile with contents of current directory: %s", tarball_path) floyd_logger.info("Compressing data...") # TODO: purge tarball on Ctrl-C create_tarfile(source_dir=source_dir, filename=tarball_path) # If starting a new upload fails for some reason down the line, we don't # want to re-tar, so save off the tarball path now data_config.set_tarball_path(tarball_path) DataConfigManager.set_config(data_config) # Create data object using API data = DataRequest(name=data_name, description=description, family_id=data_config.family_id, data_type='gzip') data_info = DataClient().create(data) if not data_info: rmtree(temp_dir) sys.exit(1) data_config.set_data_id(data_info['id']) data_config.set_data_name(data_info['name']) DataConfigManager.set_config(data_config) # fetch auth token for upload server creds = DataClient().new_tus_credentials(data_info['id']) if not creds: # TODO: delete module from server? rmtree(temp_dir) sys.exit(1) data_resource_id = creds[0] data_endpoint = TusDataClient().initialize_upload( tarball_path, metadata={"filename": data_resource_id}, auth=creds) if not data_endpoint: # TODO: delete module from server? floyd_logger.error("Failed to get upload URL from Floydhub!") rmtree(temp_dir) sys.exit(1) data_config.set_data_endpoint(data_endpoint) DataConfigManager.set_config(data_config)
def show_new_job_info(expt_client, job_name, expt_info, mode): if mode in ['jupyter', 'serve']: while True: # Wait for the experiment / task instances to become available try: experiment = expt_client.get(expt_info['id']) if experiment.task_instances: break except Exception: floyd_logger.debug("Job not available yet: %s", expt_info['id']) floyd_logger.debug("Job not available yet: %s", expt_info['id']) sleep(3) continue # Print the path to jupyter notebook if mode == 'jupyter': jupyter_url = experiment.service_url if not jupyter_url: floyd_logger.error( "Jupyter URL not available, please check job state and log for error." ) sys.exit(1) print( "Setting up your instance and waiting for Jupyter notebook to become available ...", end='') if wait_for_url(jupyter_url, sleep_duration_seconds=2, iterations=900): sleep(3) # HACK: sleep extra 3 seconds for traffic route sync floyd_logger.info("\nPath to jupyter notebook: %s", jupyter_url) if open: webbrowser.open(jupyter_url) else: floyd_logger.info("\nPath to jupyter notebook: %s", jupyter_url) floyd_logger.info( "Notebook is still loading. View logs to track progress") floyd_logger.info(" floyd logs %s", job_name) # Print the path to serving endpoint if mode == 'serve': floyd_logger.info("Path to service endpoint: %s", experiment.service_url) if experiment.timeout_seconds < 24 * 60 * 60: floyd_logger.info( "\nYour job timeout is currently set to %s seconds", experiment.timeout_seconds) floyd_logger.info( "This is because you are in the free plan. Paid users will have longer timeouts. " "See https://www.floydhub.com/pricing for details") else: floyd_logger.info("To view logs enter:") floyd_logger.info(" floyd logs %s", job_name)
def show_new_job_info(expt_client, job_name, expt_info, mode, open_notebook=True): table_output = [["JOB NAME"], [job_name]] floyd_logger.info('\n' + tabulate(table_output, headers="firstrow") + '\n') job_url = '%s/%s' % (floyd.floyd_web_host, job_name) floyd_logger.info("URL to job: %s", job_url) if mode == 'jupyter': floyd_logger.info( "\n[!] DEPRECATION NOTICE\n" "Jupyter mode will no longer be supported after September 15th.\n" "Please migrate your projects to use Workspaces: " "https://docs.floydhub.com/guides/workspace/.") if mode in ['jupyter', 'serve']: while True: # Wait for the experiment / task instances to become available try: experiment = expt_client.get(expt_info['id']) if experiment.task_instances: break except Exception: pass floyd_logger.debug("Job not available yet: %s", expt_info['id']) sleep(3) continue # Print the url to jupyter notebook if mode == 'jupyter': if not experiment.service_url: floyd_logger.error( "Jupyter not available, please check job state and log for error." ) sys.exit(1) if open_notebook: webbrowser.open(job_url) # Print the url to serving endpoint if mode == 'serve': floyd_logger.info("URL to service endpoint: %s", experiment.service_url) if experiment.timeout_seconds < 24 * 60 * 60: floyd_logger.info( "\nYour job timeout is currently set to %s seconds", experiment.timeout_seconds) floyd_logger.info( "This is because you are in the free plan. Paid users will have longer timeouts. " "See https://www.floydhub.com/pricing for details") else: floyd_logger.info("\nTo view logs enter:") floyd_logger.info(" floyd logs %s", job_name)
def get_content(self, resource_id): try: response = self.request( 'GET', self.URL_PREFIX + resource_id + "?content=true") return response.content.decode(response.encoding) except FloydException as e: floyd_logger.debug("Resource %s: ERROR! %s", resource_id, e.message) return None
def set_apikey(cls, username, apikey): floyd_logger.debug("Setting apikey in the file %s", cls.CONFIG_FILE_PATH) with open(cls.CONFIG_FILE_PATH, "w") as config_file: config_file.write( json.dumps({ 'username': username, 'apikey': apikey }))
def resume_upload(self, file_path, file_endpoint, chunk_size=None, headers=None, auth=None, offset=None): chunk_size = chunk_size or self.chunk_size try: offset = self._get_offset(file_endpoint, headers=headers, auth=auth) except FloydException as e: floyd_logger.error( "Failed to fetch offset data from upload server! %s", e.message) return False except requests.exceptions.ConnectionError as e: floyd_logger.error( "Cannot connect to the Floyd data upload server for offset. " "Check your internet connection.") return False total_sent = 0 file_size = os.path.getsize(file_path) with open(file_path, 'rb') as f: pb = ProgressBar(filled_char="=", expected_size=file_size) while offset < file_size: pb.show(offset) f.seek(offset) data = f.read(chunk_size) try: offset = self._upload_chunk(data, offset, file_endpoint, headers=headers, auth=auth) total_sent += len(data) floyd_logger.debug("%s bytes sent", total_sent) except FloydException as e: floyd_logger.error( "Failed to fetch offset data from upload server! %s", e.message) return False except requests.exceptions.ConnectionError as e: floyd_logger.error( "Cannot connect to the Floyd data upload server. " "Check your internet connection.") return False # Complete the progress bar with one more call to show() pb.show(offset) pb.done() return True
def init(cls): if os.path.isfile(cls.CONFIG_FILE_PATH): floyd_logger.debug("floyd ignore file already present at %s", cls.CONFIG_FILE_PATH) return floyd_logger.debug("Setting default floyd ignore in the file %s", cls.CONFIG_FILE_PATH) with open(cls.CONFIG_FILE_PATH, "w") as config_file: config_file.write(DEFAULT_FLOYD_IGNORE_LIST)
def create(self, module): upload_files = get_files_in_directory(path='.', file_type='code') request_data = {"json": json.dumps(module.to_dict())} floyd_logger.debug("Creating module. Uploading {} files ...".format( len(upload_files))) floyd_logger.info("Syncing code ...") response = self.request("POST", self.url, data=request_data, files=upload_files, timeout=3600) return response.json().get("id")
def _get_offset(self, file_endpoint, headers=None, auth=None): floyd_logger.debug("Getting offset") h = {"Tus-Resumable": self.TUS_VERSION} if headers: h.update(headers) response = requests.head(file_endpoint, headers=h, auth=auth) self.check_response_status(response) offset = int(response.headers["Upload-Offset"]) floyd_logger.debug("offset: %s", offset) return offset
def show_new_job_info(expt_client, job_name, expt_info, mode, open_notebook=True): if mode in ['jupyter', 'serve']: while True: # Wait for the experiment / task instances to become available try: experiment = expt_client.get(expt_info['id']) if experiment.task_instances: break except Exception: floyd_logger.debug("Job not available yet: %s", expt_info['id']) floyd_logger.debug("Job not available yet: %s", expt_info['id']) sleep(3) continue # Print the path to jupyter notebook if mode == 'jupyter': if not experiment.service_url: floyd_logger.error( "Jupyter not available, please check job state and log for error." ) sys.exit(1) jupyter_url = '%s/%s' % (floyd.floyd_web_host, job_name) floyd_logger.info("\nPath to jupyter notebook: %s", jupyter_url) if open_notebook: webbrowser.open(jupyter_url) # Print the path to serving endpoint if mode == 'serve': floyd_logger.info("Path to service endpoint: %s", experiment.service_url) if experiment.timeout_seconds < 24 * 60 * 60: floyd_logger.info( "\nYour job timeout is currently set to %s seconds", experiment.timeout_seconds) floyd_logger.info( "This is because you are in the free plan. Paid users will have longer timeouts. " "See https://www.floydhub.com/pricing for details") else: floyd_logger.info("To view logs enter:") floyd_logger.info(" floyd logs %s", job_name)
def create(self, module): try: upload_files, total_file_size = get_files_in_current_directory( file_type='code') except OSError: sys.exit( "Directory contains too many files to upload. If you have data files in the current directory, " "please upload them separately using \"floyd data\" command and remove them from here.\n" "See http://docs.floydhub.com/faqs/job/#i-get-too-many-open-files-error-when-i-run-my-project " "for more details on how to fix this.") if total_file_size > self.MAX_UPLOAD_SIZE: sys.exit(( "Code size too large to sync, please keep it under %s.\n" "If you have data files in the current directory, please upload them " "separately using \"floyd data\" command and remove them from here.\n" "You may find the following documentation useful:\n\n" "\thttps://docs.floydhub.com/guides/create_and_upload_dataset/\n" "\thttps://docs.floydhub.com/guides/data/mounting_data/\n" "\thttps://docs.floydhub.com/guides/floyd_ignore/") % (sizeof_fmt(self.MAX_UPLOAD_SIZE))) floyd_logger.info("Creating project run. Total upload size: %s", sizeof_fmt(total_file_size)) floyd_logger.debug("Creating module. Uploading: %s files", len(upload_files)) floyd_logger.info("Syncing code ...") # Add request data upload_files.append(("json", json.dumps(module.to_dict()))) multipart_encoder = MultipartEncoder(fields=upload_files) # Attach progress bar progress_callback, bar = create_progress_callback(multipart_encoder) multipart_encoder_monitor = MultipartEncoderMonitor( multipart_encoder, progress_callback) try: response = self.request( "POST", self.url, data=multipart_encoder_monitor, headers={"Content-Type": multipart_encoder.content_type}, timeout=3600) finally: # always make sure we clear the console bar.done() return response.json().get("id")
def create(self, data): try: upload_files, total_file_size = get_files_in_directory(path='.', file_type='data') except OSError: sys.exit("Directory contains too many files to upload. Add unused directories to .floydignore file. " "Or download data directly from the internet into FloydHub") request_data = {"json": json.dumps(data.to_dict())} floyd_logger.info("Creating data source. Total upload size: {}".format(total_file_size)) floyd_logger.debug("Total files: {}".format(len(upload_files))) floyd_logger.info("Uploading files ...".format(len(upload_files))) response = self.request("POST", self.url, data=request_data, files=upload_files, timeout=3600) return response.json().get("id")
def download(self, url, filename, relative=False, headers=None, timeout=5): """ Download the file from the given url at the current path """ request_url = self.base_url + url if relative else url floyd_logger.debug("Downloading file from url: {}".format(request_url)) # Auth headers if access_token is present request_headers = {} if self.access_token: request_headers[ "Authorization"] = "Bearer " + self.access_token.token # Add any additional headers if headers: request_headers.update(headers) try: response = requests.get(request_url, headers=request_headers, timeout=timeout, stream=True) self.check_response_status(response) with open(filename, 'wb') as f: # chunk mode response doesn't have content-length so we are # using a custom header here content_length = response.headers.get( 'x-floydhub-content-length') if not content_length: content_length = response.headers.get('content-length') if content_length: for chunk in progress.bar( response.iter_content(chunk_size=1024), expected_size=(int(content_length) / 1024) + 1): if chunk: f.write(chunk) else: for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) return filename except requests.exceptions.ConnectionError as exception: floyd_logger.debug("Exception: {}".format(exception)) sys.exit( "Cannot connect to the Floyd server. Check your internet connection." )
def initialize_upload(self, file_path, base_url=None, headers=None, metadata=None, auth=None): base_url = base_url or self.base_url floyd_logger.info("Initializing upload...") file_size = os.path.getsize(file_path) h = { "Tus-Resumable": self.TUS_VERSION, "Upload-Length": str(file_size), } if headers: h.update(headers) if metadata: pairs = [ k + ' ' + base64.b64encode(v.encode('utf-8')).decode() for k, v in metadata.items() ] h["Upload-Metadata"] = ','.join(pairs) try: response = requests.post(base_url, headers=h, auth=auth) self.check_response_status(response) location = response.headers["Location"] floyd_logger.debug("Data upload enpoint: %s", location) except FloydException as e: floyd_logger.info("Data upload create: ERROR! %s", e.message) location = "" except requests.exceptions.ConnectionError as e: floyd_logger.error( "Cannot connect to the Floyd data upload server for upload url. " "Check your internet connection.") location = "" return location
def get_files_in_current_directory(file_type): """ Gets the list of files in the current directory and subdirectories. Respects .floydignore file if present """ local_files = [] total_file_size = 0 ignore_list, whitelist = FloydIgnoreManager.get_lists() floyd_logger.debug("Ignoring: %s", ignore_list) floyd_logger.debug("Whitelisting: %s", whitelist) file_paths = get_unignored_file_paths(ignore_list, whitelist) for file_path in file_paths: local_files.append((file_type, (unix_style_path(file_path), open(file_path, 'rb'), 'text/plain'))) total_file_size += os.path.getsize(file_path) return (local_files, sizeof_fmt(total_file_size))
def request(self, method, url, params=None, data=None, files=None, json=None, timeout=5, headers=None): """ Execute the request using requests library """ request_url = self.base_url + url floyd_logger.debug( "Starting request to url: {} with params: {}, data: {}".format( request_url, params, data)) # Auth headers if access_token is present request_headers = { "Authorization": "Bearer {}".format( self.access_token.token if self.access_token else None), } # Add any additional headers if headers: request_headers.update(headers) try: response = requests.request(method, request_url, params=params, data=data, json=json, headers=request_headers, files=files, timeout=timeout) except requests.exceptions.ConnectionError as exception: floyd_logger.debug("Exception: {}".format(exception)) sys.exit( "Cannot connect to the Floyd server. Check your internet connection." ) try: floyd_logger.debug("Response Content: {}, Headers: {}".format( response.json(), response.headers)) except Exception: floyd_logger.debug("Request failed. Response: {}".format( response.content)) self.check_response_status(response) return response
def request(self, method, url, params=None, data=None, files=None, json=None, timeout=5, headers=None, skip_auth=False): """ Execute the request using requests library """ request_url = self.base_url + url floyd_logger.debug( "Starting request to url: %s with params: %s, data: %s", request_url, params, data) request_headers = {'x-floydhub-cli-version': get_cli_version()} # Auth headers if access_token is present if self.access_token: request_headers[ "Authorization"] = "Bearer " + self.access_token.token # Add any additional headers if headers: request_headers.update(headers) try: response = requests.request(method, request_url, params=params, data=data, json=json, headers=request_headers, files=files, timeout=timeout) except requests.exceptions.ConnectionError as exception: floyd_logger.debug("Exception: %s", exception, exc_info=True) sys.exit( "Cannot connect to the Floyd server. Check your internet connection." ) except requests.exceptions.Timeout as exception: floyd_logger.debug("Exception: %s", exception, exc_info=True) sys.exit( "Connection to FloydHub server timed out. Please retry or check your internet connection." ) floyd_logger.debug("Response Content: %s, Headers: %s" % (response.content, response.headers)) self.check_response_status(response) return response
def check_response_status(self, response): """ Check if response is successful. Else raise Exception. """ if not (200 <= response.status_code < 300): try: message = response.json()["errors"] except Exception: message = None floyd_logger.debug("Error received : status_code: {}, message: {}".format(response.status_code, message or response.content)) if response.status_code == 401: raise AuthenticationException() elif response.status_code == 404: raise NotFoundException() elif response.status_code == 400: raise BadRequestException() elif response.status_code == 429: raise OverLimitException(response.json().get("message")) else: response.raise_for_status()
def create(self, data): """ Create a temporary directory for the tar file that will be removed at the end of the operation. """ with tempfile.TemporaryDirectory() as temp_directory: floyd_logger.info("Compressing data ...") compressed_file_path = os.path.join(temp_directory, "data.tar.gz") # Create tarfile floyd_logger.debug("Creating tarfile with contents of current directory: {}".format(compressed_file_path)) create_tarfile(source_dir='.', filename=compressed_file_path) total_file_size = os.path.getsize(compressed_file_path) floyd_logger.info("Creating data source. Total upload size: {}".format(sizeof_fmt(total_file_size))) floyd_logger.info("Uploading compressed data ...") # Add request data request_data = [] request_data.append(("data", ('data.tar', open(compressed_file_path, 'rb'), 'text/plain'))) request_data.append(("json", json.dumps(data.to_dict()))) multipart_encoder = MultipartEncoder( fields=request_data ) # Attach progress bar progress_callback = create_progress_callback(multipart_encoder) multipart_encoder_monitor = MultipartEncoderMonitor(multipart_encoder, progress_callback) response = self.request("POST", self.url, data=multipart_encoder_monitor, headers={"Content-Type": multipart_encoder.content_type}, timeout=3600) floyd_logger.info("Done") return response.json().get("id")
def get_files_in_directory(path, file_type): """ Gets the list of files in the directory and subdirectories Respects .floydignore file if present """ local_files = [] separator = os.path.sep ignore_list = FloydIgnoreManager.get_list() # make sure that subdirectories are also excluded ignore_list_expanded = ignore_list + ["{}/**".format(item) for item in ignore_list] floyd_logger.debug("Ignoring list : {}".format(ignore_list)) total_file_size = 0 for root, dirs, files in os.walk(path): dirs[:] = [d for d in dirs if d not in ignore_list] ignore_dir = False normalized_path = normalize_path(path, root) for item in ignore_list_expanded: if PurePath(normalized_path).match(item): ignore_dir = True break if ignore_dir: floyd_logger.debug("Ignoring directory : {}".format(root)) continue for file_name in files: ignore_file = False normalized_path = normalize_path(path, os.path.join(root, file_name)) for item in ignore_list_expanded: if PurePath(normalized_path).match(item): ignore_file = True break if ignore_file: floyd_logger.debug("Ignoring file : {}".format(normalized_path)) continue file_relative_path = os.path.join(root, file_name) if separator != '/': # convert relative paths to Unix style file_relative_path = file_relative_path.replace(os.path.sep, '/') file_full_path = os.path.join(os.getcwd(), root, file_name) local_files.append((file_type, (file_relative_path, open(file_full_path, 'rb'), 'text/plain'))) total_file_size += os.path.getsize(file_full_path) return (local_files, sizeof_fmt(total_file_size))
def get_unignored_file_paths(ignore_list=None, whitelist=None): """ Given an ignore_list and a whitelist of glob patterns, returns the list of unignored file paths in the current directory and its subdirectories """ unignored_files = [] if ignore_list is None: ignore_list = [] if whitelist is None: whitelist = [] for root, dirs, files in os.walk("."): floyd_logger.debug("Root:%s, Dirs:%s", root, dirs) if ignore_path(unix_style_path(root), ignore_list, whitelist): # Reset dirs to avoid going further down this directory. # Then continue to the next iteration of os.walk, which causes # everything in this directory to be ignored. # # Note that whitelisted files that are within directories that are # ignored will not be whitelisted. This follows the expected # behavior established by .gitignore logic: # "It is not possible to re-include a file if a parent directory of # that file is excluded." # https://git-scm.com/docs/gitignore#_pattern_format dirs[:] = [] floyd_logger.debug("Ignoring directory : %s", root) continue for file_name in files: file_path = unix_style_path(os.path.join(root, file_name)) if ignore_path(file_path, ignore_list, whitelist): floyd_logger.debug("Ignoring file : %s", file_name) continue unignored_files.append(os.path.join(root, file_name)) return unignored_files
def complete_upload(data_config): data_endpoint = data_config.data_endpoint data_id = data_config.data_id tarball_path = data_config.tarball_path if not data_id: floyd_logger.error("Corrupted upload state, please start a new one.") sys.exit(1) # check for tarball upload, upload to server if not done if not data_config.resource_id and (tarball_path and data_endpoint): floyd_logger.debug("Getting fresh upload credentials") creds = DataClient().new_tus_credentials(data_id) if not creds: sys.exit(1) file_size = os.path.getsize(tarball_path) # check for upload limit dimension if file_size > MAX_UPLOAD_SIZE: try: floyd_logger.info("Removing compressed data...") rmtree(os.path.dirname(tarball_path)) except (OSError, TypeError): pass sys.exit(("Data size too large to upload, please keep it under %s.\n") % (sizeof_fmt(MAX_UPLOAD_SIZE))) floyd_logger.info("Uploading compressed data. Total upload size: %s", sizeof_fmt(file_size)) tus_client = TusDataClient() if not tus_client.resume_upload(tarball_path, data_endpoint, auth=creds): floyd_logger.error("Failed to finish upload!") return try: floyd_logger.info("Removing compressed data...") rmtree(os.path.dirname(tarball_path)) except (OSError, TypeError): pass floyd_logger.debug("Created data with id : %s", data_id) floyd_logger.info("Upload finished.") # Update data config data_config.set_tarball_path(None) data_config.set_data_endpoint(None) data_source = DataClient().get(data_id) data_config.set_resource_id(data_source.resource_id) DataConfigManager.set_config(data_config) # data tarball uploaded, check for server untar if data_config.resource_id: floyd_logger.info( "Waiting for server to unpack data.\n" "You can exit at any time and come back to check the status with:\n" "\tfloyd data upload -r") try: for i in dots(ResourceWaitIter(data_config.resource_id), label='Waiting for unpack...'): pass except WaitTimeoutException: clint_STREAM.write('\n') clint_STREAM.flush() floyd_logger.info( "Looks like it is going to take longer for Floydhub to unpack " "your data. Please check back later.") sys.exit(1) else: data_config.set_resource_id(None) data_config.set_tarball_path(None) data_config.set_data_endpoint(None) data_config.set_resource_id(None) data_config.set_data_id(None) DataConfigManager.set_config(data_config) # Print output table_output = [["NAME"], [normalize_data_name(data_config.data_name)]] floyd_logger.info('') floyd_logger.info(tabulate(table_output, headers="firstrow"))
def set_config(cls, experiment_config): floyd_logger.debug("Setting {} in the file {}".format( experiment_config.to_dict(), cls.CONFIG_FILE_PATH)) with open(cls.CONFIG_FILE_PATH, "w") as config_file: config_file.write(json.dumps(experiment_config.to_dict()))