def triggers_warning(self, path): """ This function checks the specific types and properties of a file. If the file would cause trouble, the function adds a warning to the result queue to be printed out and returns a boolean value notify whether the file caused a warning to be generated. Files that generate warnings are skipped. Currently, this function checks for files that do not exist and files that the user does not have read access. """ if not os.path.exists(path): warning = create_warning(path, "File does not exist.") self.result_queue.put(warning) return True if is_special_file(path): warning = create_warning(path, ("File is character special device, " "block special device, FIFO, or " "socket.")) self.result_queue.put(warning) return True if not is_readable(path): warning = create_warning(path, "File/Directory is not readable.") self.result_queue.put(warning) return True return False
def _enqueue_tasks(self, files): total_files = 0 total_parts = 0 for filename in files: num_uploads = 1 is_multipart_task = self._is_multipart_task(filename) too_large = False if hasattr(filename, 'size'): too_large = filename.size > MAX_UPLOAD_SIZE if too_large and filename.operation_name == 'upload': warning_message = "File exceeds s3 upload limit of 5 TB." warning = create_warning(relative_path(filename.src), warning_message) self.result_queue.put(warning) # Warn and skip over glacier incompatible tasks. elif not self.params.get('force_glacier_transfer') and \ not filename.is_glacier_compatible(): LOGGER.debug( 'Encountered glacier object s3://%s. Not performing ' '%s on object.' % (filename.src, filename.operation_name)) if not self.params['ignore_glacier_warnings']: warning = create_warning( 's3://'+filename.src, 'Object is of storage class GLACIER. Unable to ' 'perform %s operations on GLACIER objects. You must ' 'restore the object to be able to the perform ' 'operation.' % filename.operation_name ) self.result_queue.put(warning) continue elif is_multipart_task and not self.params['dryrun']: # If we're in dryrun mode, then we don't need the # real multipart tasks. We can just use a BasicTask # in the else clause below, which will print out the # fact that it's transferring a file rather than # the specific part tasks required to perform the # transfer. num_uploads = self._enqueue_multipart_tasks(filename) else: task = tasks.BasicTask( session=self.session, filename=filename, parameters=self.params, result_queue=self.result_queue) self.executor.submit(task) total_files += 1 total_parts += num_uploads return total_files, total_parts
def _enqueue_tasks(self, files): total_files = 0 total_parts = 0 for filename in files: num_uploads = 1 is_multipart_task = self._is_multipart_task(filename) too_large = False if hasattr(filename, 'size'): too_large = filename.size > MAX_UPLOAD_SIZE if too_large and filename.operation_name == 'upload': warning_message = "File exceeds s3 upload limit of 5 TB." warning = create_warning(relative_path(filename.src), message=warning_message) self.result_queue.put(warning) elif is_multipart_task and not self.params['dryrun']: # If we're in dryrun mode, then we don't need the # real multipart tasks. We can just use a BasicTask # in the else clause below, which will print out the # fact that it's transferring a file rather than # the specific part tasks required to perform the # transfer. num_uploads = self._enqueue_multipart_tasks(filename) else: task = tasks.BasicTask(session=self.session, filename=filename, parameters=self.params, result_queue=self.result_queue) self.executor.submit(task) total_files += 1 total_parts += num_uploads return total_files, total_parts
def test_create_warning(self): path = "/foo/" error_message = "There was an error" warning_message = create_warning(path, error_message) self.assertEqual(warning_message.message, "warning: Skipping file /foo/. There was an error") self.assertFalse(warning_message.error) self.assertTrue(warning_message.warning)
def _enqueue_tasks(self, files): total_files = 0 total_parts = 0 for filename in files: num_uploads = 1 is_multipart_task = self._is_multipart_task(filename) too_large = False if hasattr(filename, "size"): too_large = filename.size > MAX_UPLOAD_SIZE if too_large and filename.operation_name == "upload": warning_message = "File exceeds s3 upload limit of 5 TB." warning = create_warning(relative_path(filename.src), message=warning_message) self.result_queue.put(warning) elif is_multipart_task and not self.params["dryrun"]: # If we're in dryrun mode, then we don't need the # real multipart tasks. We can just use a BasicTask # in the else clause below, which will print out the # fact that it's transferring a file rather than # the specific part tasks required to perform the # transfer. num_uploads = self._enqueue_multipart_tasks(filename) else: task = tasks.BasicTask( session=self.session, filename=filename, parameters=self.params, result_queue=self.result_queue ) self.executor.submit(task) total_files += 1 total_parts += num_uploads return total_files, total_parts
def test_create_warning(self): path = '/foo/' error_message = 'There was an error' warning_message = create_warning(path, error_message) self.assertEqual(warning_message.message, 'warning: Skipping file /foo/. There was an error') self.assertFalse(warning_message.error) self.assertTrue(warning_message.warning)
def _warn_if_too_large(self, fileinfo): if getattr(fileinfo, 'size') and fileinfo.size > MAX_UPLOAD_SIZE: file_path = relative_path(fileinfo.src) warning_message = ( "File %s exceeds s3 upload limit of %s." % ( file_path, human_readable_size(MAX_UPLOAD_SIZE))) warning = create_warning( file_path, warning_message, skip_file=False) self._result_queue.put(warning)
def _validate_update_time(self, update_time, path): # If the update time is None we know we ran into an invalid tiemstamp. if update_time is None: warning = create_warning( path=path, error_message="File has an invalid timestamp. Passing epoch " "time as timestamp.", skip_file=False) self.result_queue.put(warning) return EPOCH_TIME return update_time
def _warn_parent_reference(self, fileinfo): # normpath() will use the OS path separator so we # need to take that into account when checking for a parent prefix. parent_prefix = '..' + os.path.sep escapes_cwd = os.path.normpath(fileinfo.compare_key).startswith( parent_prefix) if escapes_cwd: warning = create_warning( fileinfo.compare_key, "File references a parent directory.") self._result_queue.put(warning) return True return False
def _on_success(self, future, **kwargs): filename = future.meta.call_args.fileobj try: last_update_tuple = self._last_modified_time.timetuple() mod_timestamp = time.mktime(last_update_tuple) utils.set_file_utime(filename, int(mod_timestamp)) except Exception as e: warning_message = ( 'Successfully Downloaded %s but was unable to update the ' 'last modified time. %s' % (filename, e)) self._result_queue.put( utils.create_warning(filename, warning_message))
def should_ignore_file_with_decoding_warnings(self, dirname, filename): """ We can get a UnicodeDecodeError if we try to listdir(<unicode>) and can't decode the contents with sys.getfilesystemencoding(). In this case listdir() returns the bytestring, which means that join(<unicode>, <str>) could raise a UnicodeDecodeError. When this happens we warn using a FileDecodingError that provides more information into what's going on. """ if not isinstance(filename, six.text_type): decoding_error = FileDecodingError(dirname, filename) warning = create_warning(repr(filename), decoding_error.error_message) self.result_queue.put(warning) return True path = os.path.join(dirname, filename) return self.should_ignore_file(path)
def _warn_glacier(self, fileinfo): if not self._cli_params.get('force_glacier_transfer'): if not fileinfo.is_glacier_compatible(): LOGGER.debug( 'Encountered glacier object s3://%s. Not performing ' '%s on object.' % (fileinfo.src, fileinfo.operation_name)) if not self._cli_params.get('ignore_glacier_warnings'): warning = create_warning( 's3://' + fileinfo.src, 'Object is of storage class GLACIER. Unable to ' 'perform %s operations on GLACIER objects. You must ' 'restore the object to be able to the perform ' 'operation.' % fileinfo.operation_name) self._result_queue.put(warning) return True return False
def _warn_glacier(self, fileinfo): if not self._cli_params.get('force_glacier_transfer'): if not fileinfo.is_glacier_compatible(): LOGGER.debug( 'Encountered glacier object s3://%s. Not performing ' '%s on object.' % (fileinfo.src, fileinfo.operation_name)) if not self._cli_params.get('ignore_glacier_warnings'): warning = create_warning( 's3://'+fileinfo.src, 'Object is of storage class GLACIER. Unable to ' 'perform %s operations on GLACIER objects. You must ' 'restore the object to be able to the perform ' 'operation.' % fileinfo.operation_name ) self._result_queue.put(warning) return True return False