Ejemplo n.º 1
0
    def _get_filelist_remote(remote_uri, recursive = True):
        ## If remote_uri ends with '/' then all remote files will have
        ## the remote_uri prefix removed in the relative path.
        ## If, on the other hand, the remote_uri ends with something else
        ## (probably alphanumeric symbol) we'll use the last path part
        ## in the relative path.
        ##
        ## Complicated, eh? See an example:
        ## _get_filelist_remote("s3://bckt/abc/def") may yield:
        ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
        ## _get_filelist_remote("s3://bckt/abc/def/") will yield:
        ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
        ## Furthermore a prefix-magic can restrict the return list:
        ## _get_filelist_remote("s3://bckt/abc/def/x") yields:
        ## { 'xyz/blah.txt' : {} }

        info(u"Retrieving list of remote files for %s ..." % remote_uri)

        s3 = S3(Config())
        response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive)

        rem_base_original = rem_base = remote_uri.object()
        remote_uri_original = remote_uri
        if rem_base != '' and rem_base[-1] != '/':
            rem_base = rem_base[:rem_base.rfind('/')+1]
            remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
        rem_base_len = len(rem_base)
        rem_list = FileDict(ignore_case = False)
        break_now = False
        for object in response['list']:
            if object['Key'] == rem_base_original and object['Key'][-1] != "/":
                ## We asked for one file and we got that file :-)
                key = os.path.basename(object['Key'])
                object_uri_str = remote_uri_original.uri()
                break_now = True
                rem_list = FileDict(ignore_case = False)   ## Remove whatever has already been put to rem_list
            else:
                key = object['Key'][rem_base_len:]      ## Beware - this may be '' if object['Key']==rem_base !!
                object_uri_str = remote_uri.uri() + key
            rem_list[key] = {
                'size' : int(object['Size']),
                'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
                'md5' : object['ETag'][1:-1],
                'object_key' : object['Key'],
                'object_uri_str' : object_uri_str,
                'base_uri' : remote_uri,
                'dev' : None,
                'inode' : None,
            }
            if rem_list[key]['md5'].find("-"): # always get it for multipart uploads
                _get_remote_attribs(S3Uri(object_uri_str), rem_list[key])
            md5 = rem_list[key]['md5']
            rem_list.record_md5(key, md5)
            if break_now:
                break
        return rem_list
Ejemplo n.º 2
0
 def set_accesslog(self,
                   uri,
                   enable,
                   log_target_prefix_uri=None,
                   acl_public=False):
     request = self.create_request("BUCKET_CREATE",
                                   bucket=uri.bucket(),
                                   extra="?logging")
     accesslog = AccessLog()
     if enable:
         accesslog.enableLogging(log_target_prefix_uri)
         accesslog.setAclPublic(acl_public)
     else:
         accesslog.disableLogging()
     body = str(accesslog)
     debug(u"set_accesslog(%s): accesslog-xml: %s" % (uri, body))
     try:
         response = self.send_request(request, body)
     except S3Error, e:
         if e.info['Code'] == "InvalidTargetBucketForLogging":
             info("Setting up log-delivery ACL for target bucket.")
             self.set_accesslog_acl(
                 S3Uri("s3://%s" % log_target_prefix_uri.bucket()))
             response = self.send_request(request, body)
         else:
             raise
Ejemplo n.º 3
0
 def invalinfo(args):
     cf = CloudFront(Config())
     cfuris = Cmd._parse_args(args)
     requests = []
     for cfuri in cfuris:
         if cfuri.request_id():
             requests.append(str(cfuri))
         else:
             inval_list = cf.GetInvalList(cfuri)
             try:
                 for i in inval_list['inval_list'].info[
                         'InvalidationSummary']:
                     requests.append("/".join(
                         ["cf:/", cfuri.dist_id(), i["Id"]]))
             except:
                 continue
     for req in requests:
         cfuri = S3Uri(req)
         inval_info = cf.GetInvalInfo(cfuri)
         st = inval_info['inval_status'].info
         pretty_output("URI", str(cfuri))
         pretty_output("Status", st['Status'])
         pretty_output("Created", st['CreateTime'])
         pretty_output("Nr of paths", len(st['InvalidationBatch']['Path']))
         pretty_output("Reference",
                       st['InvalidationBatch']['CallerReference'])
         output("")
Ejemplo n.º 4
0
 def _parse_args(args):
     cf = CloudFront(Config())
     cfuris = []
     for arg in args:
         uri = cf.get_dist_name_for_bucket(S3Uri(arg))
         cfuris.append(uri)
     return cfuris
Ejemplo n.º 5
0
    def object_batch_delete(self, remote_list):
        def compose_batch_del_xml(bucket, key_list):
            body = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><Delete>"
            for key in key_list:
                uri = S3Uri(key)
                if uri.type != "s3":
                    raise ValueError("Excpected URI type 's3', got '%s'" % uri.type)
                if not uri.has_object():
                    raise ValueError("URI '%s' has no object" % key)
                if uri.bucket() != bucket:
                    raise ValueError("The batch should contain keys from the same bucket")
                object = saxutils.escape(uri.object())
                body += u"<Object><Key>%s</Key></Object>" % object
            body += u"</Delete>"
            body = body.encode('utf-8')
            return body

        batch = [remote_list[item]['object_uri_str'] for item in remote_list]
        if len(batch) == 0:
            raise ValueError("Key list is empty")
        bucket = S3Uri(batch[0]).bucket()
        request_body = compose_batch_del_xml(bucket, batch)
        md5_hash = md5()
        md5_hash.update(request_body)
        headers = {'content-md5': base64.b64encode(md5_hash.digest())}
        request = self.create_request("BATCH_DELETE", bucket = bucket, extra = '?delete', headers = headers)
        response = self.send_request(request, request_body)
        return response
Ejemplo n.º 6
0
    def modify(args):
        cf = CloudFront(Config())
        cfuri = S3Uri(args.pop(0))
        if cfuri.type != 'cf':
            raise ParameterError("CloudFront URI required instead of: %s" %
                                 arg)
        if len(args):
            raise ParameterError(
                "Too many parameters. Modify one Distribution at a time.")

        response = cf.ModifyDistribution(
            cfuri,
            cnames_add=Cmd.options.cf_cnames_add,
            cnames_remove=Cmd.options.cf_cnames_remove,
            comment=Cmd.options.cf_comment,
            enabled=Cmd.options.cf_enable)
        if response['status'] >= 400:
            error("Distribution %s could not be modified: %s" %
                  (cfuri, response['reason']))
        output("Distribution modified: %s" % cfuri)
        response = cf.GetDistInfo(cfuri)
        d = response['distribution']
        dc = d.info['DistributionConfig']
        pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
        pretty_output("DistId", d.uri())
        pretty_output("DomainName", d.info['DomainName'])
        pretty_output("Status", d.info['Status'])
        pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
        pretty_output("Comment", dc.info['Comment'])
        pretty_output("Enabled", dc.info['Enabled'])
        pretty_output("Etag", response['headers']['etag'])
Ejemplo n.º 7
0
 def info(args):
     cf = CloudFront(Config())
     if not args:
         response = cf.GetList()
         for d in response['dist_list'].dist_summs:
             pretty_output("Origin",
                           S3UriS3.httpurl_to_s3uri(d.info['Origin']))
             pretty_output("DistId", d.uri())
             pretty_output("DomainName", d.info['DomainName'])
             pretty_output("Status", d.info['Status'])
             pretty_output("Enabled", d.info['Enabled'])
             output("")
     else:
         cfuris = []
         for arg in args:
             cfuris.append(S3Uri(arg))
             if cfuris[-1].type != 'cf':
                 raise ParameterError(
                     "CloudFront URI required instead of: %s" % arg)
         for cfuri in cfuris:
             response = cf.GetDistInfo(cfuri)
             d = response['distribution']
             dc = d.info['DistributionConfig']
             pretty_output("Origin",
                           S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
             pretty_output("DistId", d.uri())
             pretty_output("DomainName", d.info['DomainName'])
             pretty_output("Status", d.info['Status'])
             pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
             pretty_output("Comment", dc.info['Comment'])
             pretty_output("Enabled", dc.info['Enabled'])
             pretty_output("Etag", response['headers']['etag'])
Ejemplo n.º 8
0
	def create(args):
		cf = CloudFront(Config())
		buckets = []
		for arg in args:
			uri = S3Uri(arg)
			if uri.type != "s3":
				raise ParameterError("Bucket can only be created from a s3:// URI instead of: %s" % arg)
			if uri.object():
				raise ParameterError("Use s3:// URI with a bucket name only instead of: %s" % arg)
			if not uri.is_dns_compatible():
				raise ParameterError("CloudFront can only handle lowercase-named buckets.")
			buckets.append(uri)
		if not buckets:
			raise ParameterError("No valid bucket names found")
		for uri in buckets:
			info("Creating distribution from: %s" % uri)
			response = cf.CreateDistribution(uri, cnames_add = Cmd.options.cf_cnames_add, 
			                                 comment = Cmd.options.cf_comment,
			                                 logging = Cmd.options.cf_logging)
			d = response['distribution']
			dc = d.info['DistributionConfig']
			output("Distribution created:")
			pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
			pretty_output("DistId", d.uri())
			pretty_output("DomainName", d.info['DomainName'])
			pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
			pretty_output("Comment", dc.info['Comment'])
			pretty_output("Status", d.info['Status'])
			pretty_output("Enabled", dc.info['Enabled'])
			pretty_output("Etag", response['headers']['etag'])
Ejemplo n.º 9
0
	def _parse_args(args):
		cfuris = []
		for arg in args:
			uri = S3Uri(arg)
			if uri.type == 's3':
				try:
					uri = Cmd._get_dist_name_for_bucket(uri)
				except Exception, e:
					debug(e)
					raise ParameterError("Unable to translate S3 URI to CloudFront distribution name: %s" % uri)
			if uri.type != 'cf':
				raise ParameterError("CloudFront URI required instead of: %s" % arg)
			cfuris.append(uri)
Ejemplo n.º 10
0
 def delete(args):
     cf = CloudFront(Config())
     cfuris = []
     for arg in args:
         cfuris.append(S3Uri(arg))
         if cfuris[-1].type != 'cf':
             raise ParameterError("CloudFront URI required instead of: %s" %
                                  arg)
     for cfuri in cfuris:
         response = cf.DeleteDistribution(cfuri)
         if response['status'] >= 400:
             error("Distribution %s could not be deleted: %s" %
                   (cfuri, response['reason']))
         output("Distribution %s deleted" % cfuri)
Ejemplo n.º 11
0
 def compose_batch_del_xml(bucket, key_list):
     body = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><Delete>"
     for key in key_list:
         uri = S3Uri(key)
         if uri.type != "s3":
             raise ValueError("Excpected URI type 's3', got '%s'" % uri.type)
         if not uri.has_object():
             raise ValueError("URI '%s' has no object" % key)
         if uri.bucket() != bucket:
             raise ValueError("The batch should contain keys from the same bucket")
         object = saxutils.escape(uri.object())
         body += u"<Object><Key>%s</Key></Object>" % object
     body += u"</Delete>"
     body = body.encode('utf-8')
     return body
Ejemplo n.º 12
0
def fetch_remote_list(args,
                      require_attribs=False,
                      recursive=None,
                      uri_params={}):
    def _get_remote_attribs(uri, remote_item):
        response = S3(cfg).object_info(uri)
        if not response.get('headers'):
            return

        remote_item.update({
            'size':
            int(response['headers']['content-length']),
            'md5':
            response['headers']['etag'].strip('"\''),
            'timestamp':
            dateRFC822toUnix(response['headers']['last-modified'])
        })
        try:
            md5 = response['s3cmd-attrs']['md5']
            remote_item.update({'md5': md5})
            debug(u"retreived md5=%s from headers" % md5)
        except KeyError:
            pass

    def _get_filelist_remote(remote_uri, recursive=True):
        ## If remote_uri ends with '/' then all remote files will have
        ## the remote_uri prefix removed in the relative path.
        ## If, on the other hand, the remote_uri ends with something else
        ## (probably alphanumeric symbol) we'll use the last path part
        ## in the relative path.
        ##
        ## Complicated, eh? See an example:
        ## _get_filelist_remote("s3://bckt/abc/def") may yield:
        ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
        ## _get_filelist_remote("s3://bckt/abc/def/") will yield:
        ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
        ## Furthermore a prefix-magic can restrict the return list:
        ## _get_filelist_remote("s3://bckt/abc/def/x") yields:
        ## { 'xyz/blah.txt' : {} }

        info(u"Retrieving list of remote files for %s ..." % remote_uri)
        empty_fname_re = re.compile(r'\A\s*\Z')

        total_size = 0

        s3 = S3(Config())
        response = s3.bucket_list(remote_uri.bucket(),
                                  prefix=remote_uri.object(),
                                  recursive=recursive,
                                  uri_params=uri_params)

        rem_base_original = rem_base = remote_uri.object()
        remote_uri_original = remote_uri
        if rem_base != '' and rem_base[-1] != '/':
            rem_base = rem_base[:rem_base.rfind('/') + 1]
            remote_uri = S3Uri(u"s3://%s/%s" % (remote_uri.bucket(), rem_base))
        rem_base_len = len(rem_base)
        rem_list = FileDict(ignore_case=False)
        break_now = False
        for object in response['list']:
            if object['Key'] == rem_base_original and object['Key'][-1] != "/":
                ## We asked for one file and we got that file :-)
                key = unicodise(os.path.basename(deunicodise(object['Key'])))
                object_uri_str = remote_uri_original.uri()
                break_now = True
                rem_list = FileDict(
                    ignore_case=False
                )  ## Remove whatever has already been put to rem_list
            else:
                key = object['Key'][
                    rem_base_len:]  ## Beware - this may be '' if object['Key']==rem_base !!
                object_uri_str = remote_uri.uri() + key
            if empty_fname_re.match(key):
                # Objects may exist on S3 with empty names (''), which don't map so well to common filesystems.
                warning(u"Empty object name on S3 found, ignoring.")
                continue
            rem_list[key] = {
                'size': int(object['Size']),
                'timestamp': dateS3toUnix(
                    object['LastModified']
                ),  ## Sadly it's upload time, not our lastmod time :-(
                'md5': object['ETag'].strip('"\''),
                'object_key': object['Key'],
                'object_uri_str': object_uri_str,
                'base_uri': remote_uri,
                'dev': None,
                'inode': None,
            }
            if '-' in rem_list[key][
                    'md5']:  # always get it for multipart uploads
                _get_remote_attribs(S3Uri(object_uri_str), rem_list[key])
            md5 = rem_list[key]['md5']
            rem_list.record_md5(key, md5)
            total_size += int(object['Size'])
            if break_now:
                break
        return rem_list, total_size

    cfg = Config()
    remote_uris = []
    remote_list = FileDict(ignore_case=False)

    if type(args) not in (list, tuple, set):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive

    for arg in args:
        uri = S3Uri(arg)
        if not uri.type == 's3':
            raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
        remote_uris.append(uri)

    total_size = 0

    if recursive:
        for uri in remote_uris:
            objectlist, tmp_total_size = _get_filelist_remote(uri,
                                                              recursive=True)
            total_size += tmp_total_size
            for key in objectlist:
                remote_list[key] = objectlist[key]
                remote_list.record_md5(key, objectlist.get_md5(key))
    else:
        for uri in remote_uris:
            uri_str = uri.uri()
            ## Wildcards used in remote URI?
            ## If yes we'll need a bucket listing...
            wildcard_split_result = re.split("\*|\?", uri_str, maxsplit=1)
            if len(wildcard_split_result) == 2:  # wildcards found
                prefix, rest = wildcard_split_result
                ## Only request recursive listing if the 'rest' of the URI,
                ## i.e. the part after first wildcard, contains '/'
                need_recursion = '/' in rest
                objectlist, tmp_total_size = _get_filelist_remote(
                    S3Uri(prefix), recursive=need_recursion)
                total_size += tmp_total_size
                for key in objectlist:
                    ## Check whether the 'key' matches the requested wildcards
                    if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'],
                                            uri_str):
                        remote_list[key] = objectlist[key]
            else:
                ## No wildcards - simply append the given URI to the list
                key = unicodise(os.path.basename(deunicodise(uri.object())))
                if not key:
                    raise ParameterError(
                        u"Expecting S3 URI with a filename or --recursive: %s"
                        % uri.uri())
                remote_item = {
                    'base_uri': uri,
                    'object_uri_str': uri.uri(),
                    'object_key': uri.object()
                }
                if require_attribs:
                    _get_remote_attribs(uri, remote_item)

                remote_list[key] = remote_item
                md5 = remote_item.get('md5')
                if md5:
                    remote_list.record_md5(key, md5)
                total_size += remote_item.get('size', 0)

    remote_list, exclude_list = filter_exclude_include(remote_list)
    return remote_list, exclude_list, total_size
Ejemplo n.º 13
0
            cache.load(cfg.cache_file)
        except IOError:
            info(u"No cache file found, creating it.")

    local_uris = []
    local_list = FileDict(ignore_case=False)
    single_file = False

    if type(args) not in (list, tuple, set):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive

    for arg in args:
        uri = S3Uri(arg)
        if not uri.type == 'file':
            raise ParameterError(
                "Expecting filename or directory instead of: %s" % arg)
        if uri.isdir() and not recursive:
            raise ParameterError("Use --recursive to upload a directory: %s" %
                                 arg)
        local_uris.append(uri)

    for uri in local_uris:
        list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)

    ## Single file is True if and only if the user
    ## specified one local URI and that URI represents
    ## a FILE. Ie it is False if the URI was of a DIR
    ## and that dir contained only one FILE. That's not
Ejemplo n.º 14
0
 def uri(self):
     return S3Uri(u"cf://%s" % self.info['Id'])
Ejemplo n.º 15
0
def fetch_local_list(args, recursive = None):
    def _get_filelist_local(loc_list, local_uri, cache):
        info(u"Compiling list of local files...")

        if deunicodise(local_uri.basename()) == "-":
            loc_list["-"] = {
                'full_name_unicode' : '-',
                'full_name' : '-',
                'size' : -1,
                'mtime' : -1,
            }
            return loc_list, True
        if local_uri.isdir():
            local_base = deunicodise(local_uri.basename())
            local_path = deunicodise(local_uri.path())
            if cfg.follow_symlinks:
                filelist = _fswalk_follow_symlinks(local_path)
            else:
                filelist = _fswalk_no_symlinks(local_path)
            single_file = False
        else:
            local_base = ""
            local_path = deunicodise(local_uri.dirname())
            filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
            single_file = True
        for root, dirs, files in filelist:
            rel_root = root.replace(local_path, local_base, 1)
            for f in files:
                full_name = os.path.join(root, f)
                if not os.path.isfile(full_name):
                    continue
                if os.path.islink(full_name):
                                    if not cfg.follow_symlinks:
                                            continue
                relative_file = unicodise(os.path.join(rel_root, f))
                if os.path.sep != "/":
                    # Convert non-unix dir separators to '/'
                    relative_file = "/".join(relative_file.split(os.path.sep))
                if cfg.urlencoding_mode == "normal":
                    relative_file = replace_nonprintables(relative_file)
                if relative_file.startswith('./'):
                    relative_file = relative_file[2:]
                sr = os.stat_result(os.lstat(full_name))
                loc_list[relative_file] = {
                    'full_name_unicode' : unicodise(full_name),
                    'full_name' : full_name,
                    'size' : sr.st_size,
                    'mtime' : sr.st_mtime,
                    'dev'   : sr.st_dev,
                    'inode' : sr.st_ino,
                    'uid' : sr.st_uid,
                    'gid' : sr.st_gid,
                    'sr': sr # save it all, may need it in preserve_attrs_list
                    ## TODO: Possibly more to save here...
                }
                if 'md5' in cfg.sync_checks:
                    md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
                    if md5 is None:
                            try:
                                md5 = loc_list.get_md5(relative_file) # this does the file I/O
                            except IOError:
                                continue
                            cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
                    loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
        return loc_list, single_file

    def _maintain_cache(cache, local_list):
        if cfg.cache_file:
            cache.mark_all_for_purge()
            for i in local_list.keys():
                cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
            cache.purge()
            cache.save(cfg.cache_file)

    cfg = Config()

    cache = HashCache()
    if cfg.cache_file:
        try:
            cache.load(cfg.cache_file)
        except IOError:
            info(u"No cache file found, creating it.")

    local_uris = []
    local_list = FileDict(ignore_case = False)
    single_file = False

    if type(args) not in (list, tuple):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive

    for arg in args:
        uri = S3Uri(arg)
        if not uri.type == 'file':
            raise ParameterError("Expecting filename or directory instead of: %s" % arg)
        if uri.isdir() and not recursive:
            raise ParameterError("Use --recursive to upload a directory: %s" % arg)
        local_uris.append(uri)

    for uri in local_uris:
        list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)

    ## Single file is True if and only if the user
    ## specified one local URI and that URI represents
    ## a FILE. Ie it is False if the URI was of a DIR
    ## and that dir contained only one FILE. That's not
    ## a case of single_file==True.
    if len(local_list) > 1:
        single_file = False

    _maintain_cache(cache, local_list)

    return local_list, single_file
Ejemplo n.º 16
0
def fetch_local_list(args, recursive=None):
    def _get_filelist_local(local_uri):
        info(u"Compiling list of local files...")
        if local_uri.isdir():
            local_base = deunicodise(local_uri.basename())
            local_path = deunicodise(local_uri.path())
            filelist = _fswalk(local_path, cfg.follow_symlinks)
            single_file = False
        else:
            local_base = ""
            local_path = deunicodise(local_uri.dirname())
            filelist = [(local_path, [], [deunicodise(local_uri.basename())])]
            single_file = True
        loc_list = SortedDict(ignore_case=False)
        for root, dirs, files in filelist:
            rel_root = root.replace(local_path, local_base, 1)
            for f in files:
                full_name = os.path.join(root, f)
                if not os.path.isfile(full_name):
                    continue
                if os.path.islink(full_name):
                    if not cfg.follow_symlinks:
                        continue
                relative_file = unicodise(os.path.join(rel_root, f))
                if os.path.sep != "/":
                    # Convert non-unix dir separators to '/'
                    relative_file = "/".join(relative_file.split(os.path.sep))
                if cfg.urlencoding_mode == "normal":
                    relative_file = replace_nonprintables(relative_file)
                if relative_file.startswith('./'):
                    relative_file = relative_file[2:]
                sr = os.stat_result(os.lstat(full_name))
                loc_list[relative_file] = {
                    'full_name_unicode': unicodise(full_name),
                    'full_name': full_name,
                    'size': sr.st_size,
                    'mtime': sr.st_mtime,
                    ## TODO: Possibly more to save here...
                }
        return loc_list, single_file

    cfg = Config()
    local_uris = []
    local_list = SortedDict(ignore_case=False)
    single_file = False

    if type(args) not in (list, tuple):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive

    for arg in args:
        uri = S3Uri(arg)
        if not uri.type == 'file':
            raise ParameterError(
                "Expecting filename or directory instead of: %s" % arg)
        if uri.isdir() and not recursive:
            raise ParameterError("Use --recursive to upload a directory: %s" %
                                 arg)
        local_uris.append(uri)

    for uri in local_uris:
        list_for_uri, single_file = _get_filelist_local(uri)
        local_list.update(list_for_uri)

    ## Single file is True if and only if the user
    ## specified one local URI and that URI represents
    ## a FILE. Ie it is False if the URI was of a DIR
    ## and that dir contained only one FILE. That's not
    ## a case of single_file==True.
    if len(local_list) > 1:
        single_file = False

    return local_list, single_file
Ejemplo n.º 17
0
def fetch_remote_list(args, require_attribs = False, recursive = None):
    def _get_filelist_remote(remote_uri, recursive = True):
        ## If remote_uri ends with '/' then all remote files will have
        ## the remote_uri prefix removed in the relative path.
        ## If, on the other hand, the remote_uri ends with something else
        ## (probably alphanumeric symbol) we'll use the last path part
        ## in the relative path.
        ##
        ## Complicated, eh? See an example:
        ## _get_filelist_remote("s3://bckt/abc/def") may yield:
        ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
        ## _get_filelist_remote("s3://bckt/abc/def/") will yield:
        ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
        ## Furthermore a prefix-magic can restrict the return list:
        ## _get_filelist_remote("s3://bckt/abc/def/x") yields:
        ## { 'xyz/blah.txt' : {} }

        info(u"Retrieving list of remote files for %s ..." % remote_uri)

        s3 = S3(Config())
        response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive)

        rem_base_original = rem_base = remote_uri.object()
        remote_uri_original = remote_uri
        if rem_base != '' and rem_base[-1] != '/':
            rem_base = rem_base[:rem_base.rfind('/')+1]
            remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
        rem_base_len = len(rem_base)
        rem_list = FileDict(ignore_case = False)
        break_now = False
        for object in response['list']:
            if object['Key'] == rem_base_original and object['Key'][-1] != "/":
                ## We asked for one file and we got that file :-)
                key = os.path.basename(object['Key'])
                object_uri_str = remote_uri_original.uri()
                break_now = True
                rem_list = FileDict(ignore_case = False)   ## Remove whatever has already been put to rem_list
            else:
                key = object['Key'][rem_base_len:]      ## Beware - this may be '' if object['Key']==rem_base !!
                object_uri_str = remote_uri.uri() + key
            rem_list[key] = {
                'size' : int(object['Size']),
                'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
                'md5' : object['ETag'][1:-1],
                'object_key' : object['Key'],
                'object_uri_str' : object_uri_str,
                'base_uri' : remote_uri,
                'dev' : None,
                'inode' : None,
            }
            md5 = object['ETag'][1:-1]
            rem_list.record_md5(key, md5)
            if break_now:
                break
        return rem_list

    cfg = Config()
    remote_uris = []
    remote_list = FileDict(ignore_case = False)

    if type(args) not in (list, tuple):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive

    for arg in args:
        uri = S3Uri(arg)
        if not uri.type == 's3':
            raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
        remote_uris.append(uri)

    if recursive:
        for uri in remote_uris:
            objectlist = _get_filelist_remote(uri)
            for key in objectlist:
                remote_list[key] = objectlist[key]
                remote_list.record_md5(key, objectlist.get_md5(key))
    else:
        for uri in remote_uris:
            uri_str = str(uri)
            ## Wildcards used in remote URI?
            ## If yes we'll need a bucket listing...
            if uri_str.find('*') > -1 or uri_str.find('?') > -1:
                first_wildcard = uri_str.find('*')
                first_questionmark = uri_str.find('?')
                if first_questionmark > -1 and first_questionmark < first_wildcard:
                    first_wildcard = first_questionmark
                prefix = uri_str[:first_wildcard]
                rest = uri_str[first_wildcard+1:]
                ## Only request recursive listing if the 'rest' of the URI,
                ## i.e. the part after first wildcard, contains '/'
                need_recursion = rest.find('/') > -1
                objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion)
                for key in objectlist:
                    ## Check whether the 'key' matches the requested wildcards
                    if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str):
                        remote_list[key] = objectlist[key]
            else:
                ## No wildcards - simply append the given URI to the list
                key = os.path.basename(uri.object())
                if not key:
                    raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri())
                remote_item = {
                    'base_uri': uri,
                    'object_uri_str': unicode(uri),
                    'object_key': uri.object()
                }
                if require_attribs:
                    response = S3(cfg).object_info(uri)
                    remote_item.update({
                    'size': int(response['headers']['content-length']),
                    'md5': response['headers']['etag'].strip('"\''),
                    'timestamp' : dateRFC822toUnix(response['headers']['date'])
                    })
                    # get md5 from header if it's present.  We would have set that during upload
                    if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
                        attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
                        if attrs.has_key('md5'):
                            remote_item.update({'md5': attrs['md5']})

                remote_list[key] = remote_item
    return remote_list
Ejemplo n.º 18
0
def fetch_local_list(args, is_src=False, recursive=None):
    def _fetch_local_list_info(loc_list):
        len_loc_list = len(loc_list)
        total_size = 0
        info(
            u"Running stat() and reading/calculating MD5 values on %d files, this may take some time..."
            % len_loc_list)
        counter = 0
        for relative_file in loc_list:
            counter += 1
            if counter % 1000 == 0:
                info(u"[%d/%d]" % (counter, len_loc_list))

            if relative_file == '-': continue

            full_name = loc_list[relative_file]['full_name']
            try:
                sr = os.stat_result(os.stat(deunicodise(full_name)))
            except OSError as e:
                if e.errno == errno.ENOENT:
                    # file was removed async to us getting the list
                    continue
                else:
                    raise
            loc_list[relative_file].update({
                'size': sr.st_size,
                'mtime': sr.st_mtime,
                'dev': sr.st_dev,
                'inode': sr.st_ino,
                'uid': sr.st_uid,
                'gid': sr.st_gid,
                'sr': sr  # save it all, may need it in preserve_attrs_list
                ## TODO: Possibly more to save here...
            })
            total_size += sr.st_size
            if 'md5' in cfg.sync_checks:
                md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
                if md5 is None:
                    try:
                        md5 = loc_list.get_md5(
                            relative_file)  # this does the file I/O
                    except IOError:
                        continue
                    cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size,
                              md5)
                loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino,
                                         md5, sr.st_size)
        return total_size

    def _get_filelist_local(loc_list, local_uri, cache):
        info(u"Compiling list of local files...")

        if local_uri.basename() == "-":
            try:
                uid = os.geteuid()
                gid = os.getegid()
            except:
                uid = 0
                gid = 0
            loc_list["-"] = {
                'full_name': '-',
                'size': -1,
                'mtime': -1,
                'uid': uid,
                'gid': gid,
                'dev': 0,
                'inode': 0,
            }
            return loc_list, True
        if local_uri.isdir():
            local_base = local_uri.basename()
            local_path = local_uri.path()
            if is_src and len(cfg.files_from):
                filelist = _get_filelist_from_file(cfg, local_path)
                single_file = False
            else:
                if cfg.follow_symlinks:
                    filelist = _fswalk_follow_symlinks(local_path)
                else:
                    filelist = _fswalk_no_symlinks(local_path)
                single_file = False
        else:
            local_base = ""
            local_path = local_uri.dirname()
            filelist = [(local_path, [], [local_uri.basename()])]
            single_file = True
        for root, dirs, files in filelist:
            rel_root = root.replace(local_path, local_base, 1)
            for f in files:
                full_name = os.path.join(root, f)
                if not os.path.isfile(deunicodise(full_name)):
                    if os.path.exists(deunicodise(full_name)):
                        warning(u"Skipping over non regular file: %s" %
                                full_name)
                    continue
                if os.path.islink(deunicodise(full_name)):
                    if not cfg.follow_symlinks:
                        warning(u"Skipping over symbolic link: %s" % full_name)
                        continue
                relative_file = os.path.join(rel_root, f)
                if os.path.sep != "/":
                    # Convert non-unix dir separators to '/'
                    relative_file = "/".join(relative_file.split(os.path.sep))
                if cfg.urlencoding_mode == "normal":
                    relative_file = replace_nonprintables(relative_file)
                if relative_file.startswith('./'):
                    relative_file = relative_file[2:]
                loc_list[relative_file] = {
                    'full_name': full_name,
                }

        return loc_list, single_file

    def _maintain_cache(cache, local_list):
        # if getting the file list from files_from, it is going to be
        # a subset of the actual tree.  We should not purge content
        # outside of that subset as we don't know if it's valid or
        # not.  Leave it to a non-files_from run to purge.
        if cfg.cache_file and len(cfg.files_from) == 0:
            cache.mark_all_for_purge()
            for i in local_list.keys():
                cache.unmark_for_purge(local_list[i]['dev'],
                                       local_list[i]['inode'],
                                       local_list[i]['mtime'],
                                       local_list[i]['size'])
            cache.purge()
            cache.save(cfg.cache_file)

    cfg = Config()

    cache = HashCache()
    if cfg.cache_file:
        try:
            cache.load(cfg.cache_file)
        except IOError:
            info(u"No cache file found, creating it.")

    local_uris = []
    local_list = FileDict(ignore_case=False)
    single_file = False

    if type(args) not in (list, tuple, set):
        args = [args]

    if recursive == None:
        recursive = cfg.recursive

    for arg in args:
        uri = S3Uri(arg)
        if not uri.type == 'file':
            raise ParameterError(
                "Expecting filename or directory instead of: %s" % arg)
        if uri.isdir() and not recursive:
            raise ParameterError("Use --recursive to upload a directory: %s" %
                                 arg)
        local_uris.append(uri)

    for uri in local_uris:
        list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)

    ## Single file is True if and only if the user
    ## specified one local URI and that URI represents
    ## a FILE. Ie it is False if the URI was of a DIR
    ## and that dir contained only one FILE. That's not
    ## a case of single_file==True.
    if len(local_list) > 1:
        single_file = False

    local_list, exclude_list = filter_exclude_include(local_list)
    total_size = _fetch_local_list_info(local_list)
    _maintain_cache(cache, local_list)
    return local_list, single_file, exclude_list, total_size