def _get_filelist_remote(remote_uri, recursive = True): ## If remote_uri ends with '/' then all remote files will have ## the remote_uri prefix removed in the relative path. ## If, on the other hand, the remote_uri ends with something else ## (probably alphanumeric symbol) we'll use the last path part ## in the relative path. ## ## Complicated, eh? See an example: ## _get_filelist_remote("s3://bckt/abc/def") may yield: ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} } ## _get_filelist_remote("s3://bckt/abc/def/") will yield: ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} } ## Furthermore a prefix-magic can restrict the return list: ## _get_filelist_remote("s3://bckt/abc/def/x") yields: ## { 'xyz/blah.txt' : {} } info(u"Retrieving list of remote files for %s ..." % remote_uri) s3 = S3(Config()) response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive) rem_base_original = rem_base = remote_uri.object() remote_uri_original = remote_uri if rem_base != '' and rem_base[-1] != '/': rem_base = rem_base[:rem_base.rfind('/')+1] remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base)) rem_base_len = len(rem_base) rem_list = FileDict(ignore_case = False) break_now = False for object in response['list']: if object['Key'] == rem_base_original and object['Key'][-1] != "/": ## We asked for one file and we got that file :-) key = os.path.basename(object['Key']) object_uri_str = remote_uri_original.uri() break_now = True rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list else: key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! object_uri_str = remote_uri.uri() + key rem_list[key] = { 'size' : int(object['Size']), 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-( 'md5' : object['ETag'][1:-1], 'object_key' : object['Key'], 'object_uri_str' : object_uri_str, 'base_uri' : remote_uri, 'dev' : None, 'inode' : None, } if rem_list[key]['md5'].find("-"): # always get it for multipart uploads _get_remote_attribs(S3Uri(object_uri_str), rem_list[key]) md5 = rem_list[key]['md5'] rem_list.record_md5(key, md5) if break_now: break return rem_list
def set_accesslog(self, uri, enable, log_target_prefix_uri=None, acl_public=False): request = self.create_request("BUCKET_CREATE", bucket=uri.bucket(), extra="?logging") accesslog = AccessLog() if enable: accesslog.enableLogging(log_target_prefix_uri) accesslog.setAclPublic(acl_public) else: accesslog.disableLogging() body = str(accesslog) debug(u"set_accesslog(%s): accesslog-xml: %s" % (uri, body)) try: response = self.send_request(request, body) except S3Error, e: if e.info['Code'] == "InvalidTargetBucketForLogging": info("Setting up log-delivery ACL for target bucket.") self.set_accesslog_acl( S3Uri("s3://%s" % log_target_prefix_uri.bucket())) response = self.send_request(request, body) else: raise
def invalinfo(args): cf = CloudFront(Config()) cfuris = Cmd._parse_args(args) requests = [] for cfuri in cfuris: if cfuri.request_id(): requests.append(str(cfuri)) else: inval_list = cf.GetInvalList(cfuri) try: for i in inval_list['inval_list'].info[ 'InvalidationSummary']: requests.append("/".join( ["cf:/", cfuri.dist_id(), i["Id"]])) except: continue for req in requests: cfuri = S3Uri(req) inval_info = cf.GetInvalInfo(cfuri) st = inval_info['inval_status'].info pretty_output("URI", str(cfuri)) pretty_output("Status", st['Status']) pretty_output("Created", st['CreateTime']) pretty_output("Nr of paths", len(st['InvalidationBatch']['Path'])) pretty_output("Reference", st['InvalidationBatch']['CallerReference']) output("")
def _parse_args(args): cf = CloudFront(Config()) cfuris = [] for arg in args: uri = cf.get_dist_name_for_bucket(S3Uri(arg)) cfuris.append(uri) return cfuris
def object_batch_delete(self, remote_list): def compose_batch_del_xml(bucket, key_list): body = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><Delete>" for key in key_list: uri = S3Uri(key) if uri.type != "s3": raise ValueError("Excpected URI type 's3', got '%s'" % uri.type) if not uri.has_object(): raise ValueError("URI '%s' has no object" % key) if uri.bucket() != bucket: raise ValueError("The batch should contain keys from the same bucket") object = saxutils.escape(uri.object()) body += u"<Object><Key>%s</Key></Object>" % object body += u"</Delete>" body = body.encode('utf-8') return body batch = [remote_list[item]['object_uri_str'] for item in remote_list] if len(batch) == 0: raise ValueError("Key list is empty") bucket = S3Uri(batch[0]).bucket() request_body = compose_batch_del_xml(bucket, batch) md5_hash = md5() md5_hash.update(request_body) headers = {'content-md5': base64.b64encode(md5_hash.digest())} request = self.create_request("BATCH_DELETE", bucket = bucket, extra = '?delete', headers = headers) response = self.send_request(request, request_body) return response
def modify(args): cf = CloudFront(Config()) cfuri = S3Uri(args.pop(0)) if cfuri.type != 'cf': raise ParameterError("CloudFront URI required instead of: %s" % arg) if len(args): raise ParameterError( "Too many parameters. Modify one Distribution at a time.") response = cf.ModifyDistribution( cfuri, cnames_add=Cmd.options.cf_cnames_add, cnames_remove=Cmd.options.cf_cnames_remove, comment=Cmd.options.cf_comment, enabled=Cmd.options.cf_enable) if response['status'] >= 400: error("Distribution %s could not be modified: %s" % (cfuri, response['reason'])) output("Distribution modified: %s" % cfuri) response = cf.GetDistInfo(cfuri) d = response['distribution'] dc = d.info['DistributionConfig'] pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin'])) pretty_output("DistId", d.uri()) pretty_output("DomainName", d.info['DomainName']) pretty_output("Status", d.info['Status']) pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) pretty_output("Comment", dc.info['Comment']) pretty_output("Enabled", dc.info['Enabled']) pretty_output("Etag", response['headers']['etag'])
def info(args): cf = CloudFront(Config()) if not args: response = cf.GetList() for d in response['dist_list'].dist_summs: pretty_output("Origin", S3UriS3.httpurl_to_s3uri(d.info['Origin'])) pretty_output("DistId", d.uri()) pretty_output("DomainName", d.info['DomainName']) pretty_output("Status", d.info['Status']) pretty_output("Enabled", d.info['Enabled']) output("") else: cfuris = [] for arg in args: cfuris.append(S3Uri(arg)) if cfuris[-1].type != 'cf': raise ParameterError( "CloudFront URI required instead of: %s" % arg) for cfuri in cfuris: response = cf.GetDistInfo(cfuri) d = response['distribution'] dc = d.info['DistributionConfig'] pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin'])) pretty_output("DistId", d.uri()) pretty_output("DomainName", d.info['DomainName']) pretty_output("Status", d.info['Status']) pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) pretty_output("Comment", dc.info['Comment']) pretty_output("Enabled", dc.info['Enabled']) pretty_output("Etag", response['headers']['etag'])
def create(args): cf = CloudFront(Config()) buckets = [] for arg in args: uri = S3Uri(arg) if uri.type != "s3": raise ParameterError("Bucket can only be created from a s3:// URI instead of: %s" % arg) if uri.object(): raise ParameterError("Use s3:// URI with a bucket name only instead of: %s" % arg) if not uri.is_dns_compatible(): raise ParameterError("CloudFront can only handle lowercase-named buckets.") buckets.append(uri) if not buckets: raise ParameterError("No valid bucket names found") for uri in buckets: info("Creating distribution from: %s" % uri) response = cf.CreateDistribution(uri, cnames_add = Cmd.options.cf_cnames_add, comment = Cmd.options.cf_comment, logging = Cmd.options.cf_logging) d = response['distribution'] dc = d.info['DistributionConfig'] output("Distribution created:") pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin'])) pretty_output("DistId", d.uri()) pretty_output("DomainName", d.info['DomainName']) pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) pretty_output("Comment", dc.info['Comment']) pretty_output("Status", d.info['Status']) pretty_output("Enabled", dc.info['Enabled']) pretty_output("Etag", response['headers']['etag'])
def _parse_args(args): cfuris = [] for arg in args: uri = S3Uri(arg) if uri.type == 's3': try: uri = Cmd._get_dist_name_for_bucket(uri) except Exception, e: debug(e) raise ParameterError("Unable to translate S3 URI to CloudFront distribution name: %s" % uri) if uri.type != 'cf': raise ParameterError("CloudFront URI required instead of: %s" % arg) cfuris.append(uri)
def delete(args): cf = CloudFront(Config()) cfuris = [] for arg in args: cfuris.append(S3Uri(arg)) if cfuris[-1].type != 'cf': raise ParameterError("CloudFront URI required instead of: %s" % arg) for cfuri in cfuris: response = cf.DeleteDistribution(cfuri) if response['status'] >= 400: error("Distribution %s could not be deleted: %s" % (cfuri, response['reason'])) output("Distribution %s deleted" % cfuri)
def compose_batch_del_xml(bucket, key_list): body = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><Delete>" for key in key_list: uri = S3Uri(key) if uri.type != "s3": raise ValueError("Excpected URI type 's3', got '%s'" % uri.type) if not uri.has_object(): raise ValueError("URI '%s' has no object" % key) if uri.bucket() != bucket: raise ValueError("The batch should contain keys from the same bucket") object = saxutils.escape(uri.object()) body += u"<Object><Key>%s</Key></Object>" % object body += u"</Delete>" body = body.encode('utf-8') return body
def fetch_remote_list(args, require_attribs=False, recursive=None, uri_params={}): def _get_remote_attribs(uri, remote_item): response = S3(cfg).object_info(uri) if not response.get('headers'): return remote_item.update({ 'size': int(response['headers']['content-length']), 'md5': response['headers']['etag'].strip('"\''), 'timestamp': dateRFC822toUnix(response['headers']['last-modified']) }) try: md5 = response['s3cmd-attrs']['md5'] remote_item.update({'md5': md5}) debug(u"retreived md5=%s from headers" % md5) except KeyError: pass def _get_filelist_remote(remote_uri, recursive=True): ## If remote_uri ends with '/' then all remote files will have ## the remote_uri prefix removed in the relative path. ## If, on the other hand, the remote_uri ends with something else ## (probably alphanumeric symbol) we'll use the last path part ## in the relative path. ## ## Complicated, eh? See an example: ## _get_filelist_remote("s3://bckt/abc/def") may yield: ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} } ## _get_filelist_remote("s3://bckt/abc/def/") will yield: ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} } ## Furthermore a prefix-magic can restrict the return list: ## _get_filelist_remote("s3://bckt/abc/def/x") yields: ## { 'xyz/blah.txt' : {} } info(u"Retrieving list of remote files for %s ..." % remote_uri) empty_fname_re = re.compile(r'\A\s*\Z') total_size = 0 s3 = S3(Config()) response = s3.bucket_list(remote_uri.bucket(), prefix=remote_uri.object(), recursive=recursive, uri_params=uri_params) rem_base_original = rem_base = remote_uri.object() remote_uri_original = remote_uri if rem_base != '' and rem_base[-1] != '/': rem_base = rem_base[:rem_base.rfind('/') + 1] remote_uri = S3Uri(u"s3://%s/%s" % (remote_uri.bucket(), rem_base)) rem_base_len = len(rem_base) rem_list = FileDict(ignore_case=False) break_now = False for object in response['list']: if object['Key'] == rem_base_original and object['Key'][-1] != "/": ## We asked for one file and we got that file :-) key = unicodise(os.path.basename(deunicodise(object['Key']))) object_uri_str = remote_uri_original.uri() break_now = True rem_list = FileDict( ignore_case=False ) ## Remove whatever has already been put to rem_list else: key = object['Key'][ rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! object_uri_str = remote_uri.uri() + key if empty_fname_re.match(key): # Objects may exist on S3 with empty names (''), which don't map so well to common filesystems. warning(u"Empty object name on S3 found, ignoring.") continue rem_list[key] = { 'size': int(object['Size']), 'timestamp': dateS3toUnix( object['LastModified'] ), ## Sadly it's upload time, not our lastmod time :-( 'md5': object['ETag'].strip('"\''), 'object_key': object['Key'], 'object_uri_str': object_uri_str, 'base_uri': remote_uri, 'dev': None, 'inode': None, } if '-' in rem_list[key][ 'md5']: # always get it for multipart uploads _get_remote_attribs(S3Uri(object_uri_str), rem_list[key]) md5 = rem_list[key]['md5'] rem_list.record_md5(key, md5) total_size += int(object['Size']) if break_now: break return rem_list, total_size cfg = Config() remote_uris = [] remote_list = FileDict(ignore_case=False) if type(args) not in (list, tuple, set): args = [args] if recursive == None: recursive = cfg.recursive for arg in args: uri = S3Uri(arg) if not uri.type == 's3': raise ParameterError("Expecting S3 URI instead of '%s'" % arg) remote_uris.append(uri) total_size = 0 if recursive: for uri in remote_uris: objectlist, tmp_total_size = _get_filelist_remote(uri, recursive=True) total_size += tmp_total_size for key in objectlist: remote_list[key] = objectlist[key] remote_list.record_md5(key, objectlist.get_md5(key)) else: for uri in remote_uris: uri_str = uri.uri() ## Wildcards used in remote URI? ## If yes we'll need a bucket listing... wildcard_split_result = re.split("\*|\?", uri_str, maxsplit=1) if len(wildcard_split_result) == 2: # wildcards found prefix, rest = wildcard_split_result ## Only request recursive listing if the 'rest' of the URI, ## i.e. the part after first wildcard, contains '/' need_recursion = '/' in rest objectlist, tmp_total_size = _get_filelist_remote( S3Uri(prefix), recursive=need_recursion) total_size += tmp_total_size for key in objectlist: ## Check whether the 'key' matches the requested wildcards if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str): remote_list[key] = objectlist[key] else: ## No wildcards - simply append the given URI to the list key = unicodise(os.path.basename(deunicodise(uri.object()))) if not key: raise ParameterError( u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri()) remote_item = { 'base_uri': uri, 'object_uri_str': uri.uri(), 'object_key': uri.object() } if require_attribs: _get_remote_attribs(uri, remote_item) remote_list[key] = remote_item md5 = remote_item.get('md5') if md5: remote_list.record_md5(key, md5) total_size += remote_item.get('size', 0) remote_list, exclude_list = filter_exclude_include(remote_list) return remote_list, exclude_list, total_size
cache.load(cfg.cache_file) except IOError: info(u"No cache file found, creating it.") local_uris = [] local_list = FileDict(ignore_case=False) single_file = False if type(args) not in (list, tuple, set): args = [args] if recursive == None: recursive = cfg.recursive for arg in args: uri = S3Uri(arg) if not uri.type == 'file': raise ParameterError( "Expecting filename or directory instead of: %s" % arg) if uri.isdir() and not recursive: raise ParameterError("Use --recursive to upload a directory: %s" % arg) local_uris.append(uri) for uri in local_uris: list_for_uri, single_file = _get_filelist_local(local_list, uri, cache) ## Single file is True if and only if the user ## specified one local URI and that URI represents ## a FILE. Ie it is False if the URI was of a DIR ## and that dir contained only one FILE. That's not
def uri(self): return S3Uri(u"cf://%s" % self.info['Id'])
def fetch_local_list(args, recursive = None): def _get_filelist_local(loc_list, local_uri, cache): info(u"Compiling list of local files...") if deunicodise(local_uri.basename()) == "-": loc_list["-"] = { 'full_name_unicode' : '-', 'full_name' : '-', 'size' : -1, 'mtime' : -1, } return loc_list, True if local_uri.isdir(): local_base = deunicodise(local_uri.basename()) local_path = deunicodise(local_uri.path()) if cfg.follow_symlinks: filelist = _fswalk_follow_symlinks(local_path) else: filelist = _fswalk_no_symlinks(local_path) single_file = False else: local_base = "" local_path = deunicodise(local_uri.dirname()) filelist = [( local_path, [], [deunicodise(local_uri.basename())] )] single_file = True for root, dirs, files in filelist: rel_root = root.replace(local_path, local_base, 1) for f in files: full_name = os.path.join(root, f) if not os.path.isfile(full_name): continue if os.path.islink(full_name): if not cfg.follow_symlinks: continue relative_file = unicodise(os.path.join(rel_root, f)) if os.path.sep != "/": # Convert non-unix dir separators to '/' relative_file = "/".join(relative_file.split(os.path.sep)) if cfg.urlencoding_mode == "normal": relative_file = replace_nonprintables(relative_file) if relative_file.startswith('./'): relative_file = relative_file[2:] sr = os.stat_result(os.lstat(full_name)) loc_list[relative_file] = { 'full_name_unicode' : unicodise(full_name), 'full_name' : full_name, 'size' : sr.st_size, 'mtime' : sr.st_mtime, 'dev' : sr.st_dev, 'inode' : sr.st_ino, 'uid' : sr.st_uid, 'gid' : sr.st_gid, 'sr': sr # save it all, may need it in preserve_attrs_list ## TODO: Possibly more to save here... } if 'md5' in cfg.sync_checks: md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size) if md5 is None: try: md5 = loc_list.get_md5(relative_file) # this does the file I/O except IOError: continue cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5) loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5) return loc_list, single_file def _maintain_cache(cache, local_list): if cfg.cache_file: cache.mark_all_for_purge() for i in local_list.keys(): cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size']) cache.purge() cache.save(cfg.cache_file) cfg = Config() cache = HashCache() if cfg.cache_file: try: cache.load(cfg.cache_file) except IOError: info(u"No cache file found, creating it.") local_uris = [] local_list = FileDict(ignore_case = False) single_file = False if type(args) not in (list, tuple): args = [args] if recursive == None: recursive = cfg.recursive for arg in args: uri = S3Uri(arg) if not uri.type == 'file': raise ParameterError("Expecting filename or directory instead of: %s" % arg) if uri.isdir() and not recursive: raise ParameterError("Use --recursive to upload a directory: %s" % arg) local_uris.append(uri) for uri in local_uris: list_for_uri, single_file = _get_filelist_local(local_list, uri, cache) ## Single file is True if and only if the user ## specified one local URI and that URI represents ## a FILE. Ie it is False if the URI was of a DIR ## and that dir contained only one FILE. That's not ## a case of single_file==True. if len(local_list) > 1: single_file = False _maintain_cache(cache, local_list) return local_list, single_file
def fetch_local_list(args, recursive=None): def _get_filelist_local(local_uri): info(u"Compiling list of local files...") if local_uri.isdir(): local_base = deunicodise(local_uri.basename()) local_path = deunicodise(local_uri.path()) filelist = _fswalk(local_path, cfg.follow_symlinks) single_file = False else: local_base = "" local_path = deunicodise(local_uri.dirname()) filelist = [(local_path, [], [deunicodise(local_uri.basename())])] single_file = True loc_list = SortedDict(ignore_case=False) for root, dirs, files in filelist: rel_root = root.replace(local_path, local_base, 1) for f in files: full_name = os.path.join(root, f) if not os.path.isfile(full_name): continue if os.path.islink(full_name): if not cfg.follow_symlinks: continue relative_file = unicodise(os.path.join(rel_root, f)) if os.path.sep != "/": # Convert non-unix dir separators to '/' relative_file = "/".join(relative_file.split(os.path.sep)) if cfg.urlencoding_mode == "normal": relative_file = replace_nonprintables(relative_file) if relative_file.startswith('./'): relative_file = relative_file[2:] sr = os.stat_result(os.lstat(full_name)) loc_list[relative_file] = { 'full_name_unicode': unicodise(full_name), 'full_name': full_name, 'size': sr.st_size, 'mtime': sr.st_mtime, ## TODO: Possibly more to save here... } return loc_list, single_file cfg = Config() local_uris = [] local_list = SortedDict(ignore_case=False) single_file = False if type(args) not in (list, tuple): args = [args] if recursive == None: recursive = cfg.recursive for arg in args: uri = S3Uri(arg) if not uri.type == 'file': raise ParameterError( "Expecting filename or directory instead of: %s" % arg) if uri.isdir() and not recursive: raise ParameterError("Use --recursive to upload a directory: %s" % arg) local_uris.append(uri) for uri in local_uris: list_for_uri, single_file = _get_filelist_local(uri) local_list.update(list_for_uri) ## Single file is True if and only if the user ## specified one local URI and that URI represents ## a FILE. Ie it is False if the URI was of a DIR ## and that dir contained only one FILE. That's not ## a case of single_file==True. if len(local_list) > 1: single_file = False return local_list, single_file
def fetch_remote_list(args, require_attribs = False, recursive = None): def _get_filelist_remote(remote_uri, recursive = True): ## If remote_uri ends with '/' then all remote files will have ## the remote_uri prefix removed in the relative path. ## If, on the other hand, the remote_uri ends with something else ## (probably alphanumeric symbol) we'll use the last path part ## in the relative path. ## ## Complicated, eh? See an example: ## _get_filelist_remote("s3://bckt/abc/def") may yield: ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} } ## _get_filelist_remote("s3://bckt/abc/def/") will yield: ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} } ## Furthermore a prefix-magic can restrict the return list: ## _get_filelist_remote("s3://bckt/abc/def/x") yields: ## { 'xyz/blah.txt' : {} } info(u"Retrieving list of remote files for %s ..." % remote_uri) s3 = S3(Config()) response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive) rem_base_original = rem_base = remote_uri.object() remote_uri_original = remote_uri if rem_base != '' and rem_base[-1] != '/': rem_base = rem_base[:rem_base.rfind('/')+1] remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base)) rem_base_len = len(rem_base) rem_list = FileDict(ignore_case = False) break_now = False for object in response['list']: if object['Key'] == rem_base_original and object['Key'][-1] != "/": ## We asked for one file and we got that file :-) key = os.path.basename(object['Key']) object_uri_str = remote_uri_original.uri() break_now = True rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list else: key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! object_uri_str = remote_uri.uri() + key rem_list[key] = { 'size' : int(object['Size']), 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-( 'md5' : object['ETag'][1:-1], 'object_key' : object['Key'], 'object_uri_str' : object_uri_str, 'base_uri' : remote_uri, 'dev' : None, 'inode' : None, } md5 = object['ETag'][1:-1] rem_list.record_md5(key, md5) if break_now: break return rem_list cfg = Config() remote_uris = [] remote_list = FileDict(ignore_case = False) if type(args) not in (list, tuple): args = [args] if recursive == None: recursive = cfg.recursive for arg in args: uri = S3Uri(arg) if not uri.type == 's3': raise ParameterError("Expecting S3 URI instead of '%s'" % arg) remote_uris.append(uri) if recursive: for uri in remote_uris: objectlist = _get_filelist_remote(uri) for key in objectlist: remote_list[key] = objectlist[key] remote_list.record_md5(key, objectlist.get_md5(key)) else: for uri in remote_uris: uri_str = str(uri) ## Wildcards used in remote URI? ## If yes we'll need a bucket listing... if uri_str.find('*') > -1 or uri_str.find('?') > -1: first_wildcard = uri_str.find('*') first_questionmark = uri_str.find('?') if first_questionmark > -1 and first_questionmark < first_wildcard: first_wildcard = first_questionmark prefix = uri_str[:first_wildcard] rest = uri_str[first_wildcard+1:] ## Only request recursive listing if the 'rest' of the URI, ## i.e. the part after first wildcard, contains '/' need_recursion = rest.find('/') > -1 objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion) for key in objectlist: ## Check whether the 'key' matches the requested wildcards if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str): remote_list[key] = objectlist[key] else: ## No wildcards - simply append the given URI to the list key = os.path.basename(uri.object()) if not key: raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri()) remote_item = { 'base_uri': uri, 'object_uri_str': unicode(uri), 'object_key': uri.object() } if require_attribs: response = S3(cfg).object_info(uri) remote_item.update({ 'size': int(response['headers']['content-length']), 'md5': response['headers']['etag'].strip('"\''), 'timestamp' : dateRFC822toUnix(response['headers']['date']) }) # get md5 from header if it's present. We would have set that during upload if response['headers'].has_key('x-amz-meta-s3cmd-attrs'): attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs']) if attrs.has_key('md5'): remote_item.update({'md5': attrs['md5']}) remote_list[key] = remote_item return remote_list
def fetch_local_list(args, is_src=False, recursive=None): def _fetch_local_list_info(loc_list): len_loc_list = len(loc_list) total_size = 0 info( u"Running stat() and reading/calculating MD5 values on %d files, this may take some time..." % len_loc_list) counter = 0 for relative_file in loc_list: counter += 1 if counter % 1000 == 0: info(u"[%d/%d]" % (counter, len_loc_list)) if relative_file == '-': continue full_name = loc_list[relative_file]['full_name'] try: sr = os.stat_result(os.stat(deunicodise(full_name))) except OSError as e: if e.errno == errno.ENOENT: # file was removed async to us getting the list continue else: raise loc_list[relative_file].update({ 'size': sr.st_size, 'mtime': sr.st_mtime, 'dev': sr.st_dev, 'inode': sr.st_ino, 'uid': sr.st_uid, 'gid': sr.st_gid, 'sr': sr # save it all, may need it in preserve_attrs_list ## TODO: Possibly more to save here... }) total_size += sr.st_size if 'md5' in cfg.sync_checks: md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size) if md5 is None: try: md5 = loc_list.get_md5( relative_file) # this does the file I/O except IOError: continue cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5) loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5, sr.st_size) return total_size def _get_filelist_local(loc_list, local_uri, cache): info(u"Compiling list of local files...") if local_uri.basename() == "-": try: uid = os.geteuid() gid = os.getegid() except: uid = 0 gid = 0 loc_list["-"] = { 'full_name': '-', 'size': -1, 'mtime': -1, 'uid': uid, 'gid': gid, 'dev': 0, 'inode': 0, } return loc_list, True if local_uri.isdir(): local_base = local_uri.basename() local_path = local_uri.path() if is_src and len(cfg.files_from): filelist = _get_filelist_from_file(cfg, local_path) single_file = False else: if cfg.follow_symlinks: filelist = _fswalk_follow_symlinks(local_path) else: filelist = _fswalk_no_symlinks(local_path) single_file = False else: local_base = "" local_path = local_uri.dirname() filelist = [(local_path, [], [local_uri.basename()])] single_file = True for root, dirs, files in filelist: rel_root = root.replace(local_path, local_base, 1) for f in files: full_name = os.path.join(root, f) if not os.path.isfile(deunicodise(full_name)): if os.path.exists(deunicodise(full_name)): warning(u"Skipping over non regular file: %s" % full_name) continue if os.path.islink(deunicodise(full_name)): if not cfg.follow_symlinks: warning(u"Skipping over symbolic link: %s" % full_name) continue relative_file = os.path.join(rel_root, f) if os.path.sep != "/": # Convert non-unix dir separators to '/' relative_file = "/".join(relative_file.split(os.path.sep)) if cfg.urlencoding_mode == "normal": relative_file = replace_nonprintables(relative_file) if relative_file.startswith('./'): relative_file = relative_file[2:] loc_list[relative_file] = { 'full_name': full_name, } return loc_list, single_file def _maintain_cache(cache, local_list): # if getting the file list from files_from, it is going to be # a subset of the actual tree. We should not purge content # outside of that subset as we don't know if it's valid or # not. Leave it to a non-files_from run to purge. if cfg.cache_file and len(cfg.files_from) == 0: cache.mark_all_for_purge() for i in local_list.keys(): cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size']) cache.purge() cache.save(cfg.cache_file) cfg = Config() cache = HashCache() if cfg.cache_file: try: cache.load(cfg.cache_file) except IOError: info(u"No cache file found, creating it.") local_uris = [] local_list = FileDict(ignore_case=False) single_file = False if type(args) not in (list, tuple, set): args = [args] if recursive == None: recursive = cfg.recursive for arg in args: uri = S3Uri(arg) if not uri.type == 'file': raise ParameterError( "Expecting filename or directory instead of: %s" % arg) if uri.isdir() and not recursive: raise ParameterError("Use --recursive to upload a directory: %s" % arg) local_uris.append(uri) for uri in local_uris: list_for_uri, single_file = _get_filelist_local(local_list, uri, cache) ## Single file is True if and only if the user ## specified one local URI and that URI represents ## a FILE. Ie it is False if the URI was of a DIR ## and that dir contained only one FILE. That's not ## a case of single_file==True. if len(local_list) > 1: single_file = False local_list, exclude_list = filter_exclude_include(local_list) total_size = _fetch_local_list_info(local_list) _maintain_cache(cache, local_list) return local_list, single_file, exclude_list, total_size