def main(): ## Args parser = argparse.ArgumentParser() parser.add_argument('--src-bucket', required=True, dest='src', help='Source S3 bucket') parser.add_argument('--dst-bucket', required=True, dest='dst', help='Destination S3 bucket') parser.add_argument('--src-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 source endpoint') parser.add_argument('--dst-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 destination endpoint') parser.add_argument('--src-profile', help='Boto profile used for source connection') parser.add_argument('--dst-profile', help='Boto profile used for destination connection') parser.add_argument('--config', '-c', default="./campanile.cfg", help='Path to config file') args = parser.parse_args() ## Config Object cfgfiles = campanile.cfg_file_locations() cfgfiles.insert(0, args.config) c = ConfigParser.SafeConfigParser({'ephemeral':'/tmp'}) c.read(cfgfiles) ## S3 Bucket Connections src_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.src_endpoint,is_secure=True, profile_name=args.src_profile).\ get_bucket(args.src,validate=False) dst_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.dst_endpoint,is_secure=True, profile_name=args.dst_profile).\ get_bucket(args.dst,validate=False) ## Reporting Counters files = 0 movedbytes = 0 ## Select random tmpdir to distribute load across disks tmpdir = random.choice(c.get('DEFAULT',"ephemeral").split(',')) start_index = campanile.stream_index() for line in fileinput.input("-"): name, etag, size, mtime, mid, part, partcount, startbyte, stopbyte \ = line.rstrip('\n').split('\t')[start_index:] srckey = src_bucket.get_key(name, validate=False) dstkey = dst_bucket.get_key(name, validate=False) if mid == campanile.NULL: headers={} report_name = name expected_size = int(size) else: headers={'Range' : "bytes=%s-%s" % (startbyte, stopbyte)} report_name = "%s-%s" % (name, 'part') expected_size = int(stopbyte) - int(startbyte) + 1 with tempfile.SpooledTemporaryFile(max_size=c.getint('DEFAULT',\ 'maxtmpsize'),dir=tmpdir) as fp: ## Download p = campanile.FileProgress(name, verbose=1) srckey.get_contents_to_file(fp, headers=headers, cb=p.progress) if fp.tell() != expected_size: raise Exception("Something bad happened for %s. \ Expecting %s, but got %s" % \ (report_name, expected_size, fp.tell())) campanile.counter(args.src, "OutputBytes", size) fp.flush fp.seek(0) if mid == campanile.NULL: dstkey.cache_control= srckey.cache_control dstkey.content_type = srckey.content_type dstkey.content_encoding = srckey.content_encoding dstkey.content_disposition = srckey.content_disposition dstkey.content_language = srckey.content_language dstkey.metadata = srckey.metadata dstkey.md5 = srckey.md5 report_name = name else: mp = boto.s3.multipart.MultiPartUpload(bucket=dst_bucket) mp.id = mid mp.key_name = name report_name = "%s-%s" % (name, part) ## Upload p = campanile.FileProgress(report_name, verbose=1) if mid == campanile.NULL: dstkey.set_contents_from_file(fp, encrypt_key=srckey.encrypted, cb=p.progress) newetag = dstkey.etag.replace("\"","") else: mpart = mp.upload_part_from_file(fp,part_num=int(part), cb=p.progress) newetag = mpart.etag.replace("\"","") if newetag != srckey.md5: ## Add alert raise Exception("Something bad happened for %s. \ Expecting %s md5, but got %s" % \ (report_name, srckey.md5, newetag)) if mid != campanile.NULL: print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % \ (name, etag, mid, newetag, part, startbyte, stopbyte) campanile.counter(args.dst, "InputBytes", expected_size) campanile.status("%s/%s:OK" % (args.dst,report_name))
def main(): ## Args parser = argparse.ArgumentParser() parser.add_argument('--bucket', required=True, help='Bucket') parser.add_argument('--endpoint', default=boto.s3.connection.NoHostProvided, help='S3 endpoint') parser.add_argument('--profile', help='Boto profile used for connection') parser.add_argument('--dry-run', action="store_true", help='Do everything except complete multipart upload') args = parser.parse_args() ## S3 Connection bucket = S3Connection(suppress_consec_slashes=False, host=args.endpoint,is_secure=True, profile_name=args.profile).get_bucket(args.bucket) current_key = { 'name' : None } mparts = [] ## Process input for line in fileinput.input("-"): key = {} key['name'], key['etag'], key['mid'], part_etag, part, startbyte, \ stopbyte = line.rstrip('\n').split('\t')[0:] ## Print to save partmap print "%s" % line.rstrip('\n') ## Part object mpart = boto.s3.multipart.Part() mpart.part_number = int(part) mpart.etag = part_etag mpart.size = int(stopbyte) - int(startbyte) if key['name'] == current_key['name']: mparts.append(mpart) current_key = key continue if mparts: if args.dry_run: print "Complete %s:%s\n%s" % (current_key['name'], current_key['mid'],parts_to_xml(mparts)) else: ## Added retry because partlist hard to recreate retry = 3 while True: try: result = bucket.complete_multipart_upload(\ current_key['name'], current_key['mid'], parts_to_xml(mparts)) if current_key['etag'] != \ result.etag.replace("\"", ""): ## Add alert; Maybe wrong partsize pass campanile.status("%s:OK" % current_key['mid']) break except Exception, e: if retry == 0: raise retry -= 1 campanile.status("%s:FAIL" % current_key['mid']) campanile.random_sleep() ## Lets try a new bucket connection bucket = S3Connection(suppress_consec_slashes=False, host=args.endpoint,is_secure=True, profile_name=args.profile).get_bucket(args.bucket) mparts = [] mparts.append(mpart) current_key = key
if mparts: if args.dry_run: print "Complete %s:%s\n%s" % (current_key['name'], current_key['mid'],parts_to_xml(mparts)) else: ## Added retry because partlist hard to recreate retry = 3 while True: try: result = bucket.complete_multipart_upload(\ current_key['name'], current_key['mid'], parts_to_xml(mparts)) if current_key['etag'] != result.etag.replace("\"", ""): ## Add alert; Maybe wrong partsize pass campanile.status("%s:OK" % current_key['mid']) break except Exception, e: if retry == 0: raise retry -= 1 campanile.status("%s:FAIL" % current_key['mid']) campanile.random_sleep() ## Lets try a new bucket connection bucket = S3Connection(suppress_consec_slashes=False, host=args.endpoint,is_secure=True, profile_name=args.profile).get_bucket(args.bucket) # ----------------------------------------------------------------------------- # Main # -----------------------------------------------------------------------------
def main(): ## Args parser = argparse.ArgumentParser() parser.add_argument('--src-bucket', required=True, dest='src', help='Source S3 bucket') parser.add_argument('--dst-bucket', required=True, dest='dst', help='Destination S3 bucket') parser.add_argument('--src-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 source endpoint') parser.add_argument('--dst-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 destination endpoint') parser.add_argument('--src-profile', help='Boto profile used for source connection') parser.add_argument('--dst-profile', help='Boto profile used for destination connection') parser.add_argument('--config', '-c', default="./campanile.cfg", help='Path to config file') args = parser.parse_args() ## Config Object cfgfiles = campanile.cfg_file_locations() cfgfiles.insert(0, args.config) c = ConfigParser.SafeConfigParser({'ephemeral': '/tmp'}) c.read(cfgfiles) ## S3 Bucket Connections src_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.src_endpoint,is_secure=True, profile_name=args.src_profile).\ get_bucket(args.src,validate=False) dst_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.dst_endpoint,is_secure=True, profile_name=args.dst_profile).\ get_bucket(args.dst,validate=False) ## Reporting Counters files = 0 movedbytes = 0 ## Select random tmpdir to distribute load across disks tmpdir = random.choice(c.get('DEFAULT', "ephemeral").split(',')) start_index = campanile.stream_index() for line in fileinput.input("-"): name, etag, size, mtime, mid, part, partcount, startbyte, stopbyte \ = line.rstrip('\n').split('\t')[start_index:] srckey = src_bucket.get_key(name, validate=False) dstkey = dst_bucket.get_key(name, validate=False) if mid == campanile.NULL: headers = {} report_name = name expected_size = int(size) else: headers = {'Range': "bytes=%s-%s" % (startbyte, stopbyte)} report_name = "%s-%s" % (name, 'part') expected_size = int(stopbyte) - int(startbyte) + 1 with tempfile.SpooledTemporaryFile(max_size=c.getint('DEFAULT',\ 'maxtmpsize'),dir=tmpdir) as fp: ## Download p = campanile.FileProgress(name, verbose=1) srckey.get_contents_to_file(fp, headers=headers, cb=p.progress) if fp.tell() != expected_size: raise Exception("Something bad happened for %s. \ Expecting %s, but got %s" % \ (report_name, expected_size, fp.tell())) campanile.counter(args.src, "OutputBytes", size) fp.flush fp.seek(0) if mid == campanile.NULL: dstkey.cache_control = srckey.cache_control dstkey.content_type = srckey.content_type dstkey.content_encoding = srckey.content_encoding dstkey.content_disposition = srckey.content_disposition dstkey.content_language = srckey.content_language dstkey.metadata = srckey.metadata dstkey.md5 = srckey.md5 report_name = name else: mp = boto.s3.multipart.MultiPartUpload(bucket=dst_bucket) mp.id = mid mp.key_name = name report_name = "%s-%s" % (name, part) ## Upload p = campanile.FileProgress(report_name, verbose=1) if mid == campanile.NULL: dstkey.set_contents_from_file(fp, encrypt_key=srckey.encrypted, cb=p.progress) newetag = dstkey.etag.replace("\"", "") else: mpart = mp.upload_part_from_file(fp, part_num=int(part), cb=p.progress) newetag = mpart.etag.replace("\"", "") if newetag != srckey.md5: ## Add alert raise Exception("Something bad happened for %s. \ Expecting %s md5, but got %s" % \ (report_name, srckey.md5, newetag)) if mid != campanile.NULL: print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % \ (name, etag, mid, newetag, part, startbyte, stopbyte) campanile.counter(args.dst, "InputBytes", expected_size) campanile.status("%s/%s:OK" % (args.dst, report_name))
def main(): ## Args parser = argparse.ArgumentParser() parser.add_argument('--src-bucket', required=True, dest='src', help='Source S3 bucket') parser.add_argument('--dst-bucket', required=True, dest='dst', help='Destination S3 bucket') parser.add_argument('--src-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 source endpoint') parser.add_argument('--dst-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 destination endpoint') parser.add_argument('--src-profile', help='Boto profile used for source connection') parser.add_argument('--dst-profile', help='Boto profile used for destination connection') parser.add_argument('--dry-run', action="store_true", help='Auto generate multipart-uid') args = parser.parse_args() ## S3 Bucket Connections src_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.src_endpoint,is_secure=True, profile_name=args.src_profile).\ get_bucket(args.src,validate=False) dst_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.dst_endpoint,is_secure=True, profile_name=args.dst_profile).\ get_bucket(args.dst,validate=False) start_index = campanile.stream_index() for line in fileinput.input("-"): record = line.rstrip('\n').split('\t')[start_index:] name, etag, size = record[0:3] partcount = campanile.partcount(etag) if partcount == 0: print '\t'.join(record + [campanile.NULL] * 5) continue ## Find partsize partsize = campanile.cli_chunksize(int(size)) if partcount != int(math.ceil(float(size)/partsize)): campanile.status("Can't calculate partsize for %s/%s\n" % (args.src, name)) ## Add alert continue if args.dry_run: mid = uuid.uuid1() else: srckey = src_bucket.get_key(name, validate=True) metadata = srckey.metadata headers = {} ## Set Cache and Content Values if srckey.cache_control is not None: headers['Cache-Control'] = srckey.cache_control if srckey.content_type is not None: headers['Content-Type'] = srckey.content_type if srckey.content_encoding is not None: headers['Content-Encoding'] = srckey.content_encoding if srckey.content_disposition is not None: headers['Content-Disposition'] = srckey.content_disposition if srckey.content_language is not None: headers['Content-Language'] = srckey.content_language ## Initiate Multipart Upload mid = dst_bucket.initiate_multipart_upload(name, headers = headers, metadata = metadata, encrypt_key = srckey.encrypted).id for i in range(partcount): offset = partsize * i bytes = min(partsize, int(size) - offset) print '\t'.join(record) + "\t%s\t%s\t%s\t%s\t%s" % (mid, (i+1), partcount, offset, (offset + bytes - 1))
def main(): ## Args parser = argparse.ArgumentParser() parser.add_argument('--src-bucket', required=True, dest='src', help='Source S3 bucket') parser.add_argument('--dst-bucket', required=True, dest='dst', help='Destination S3 bucket') parser.add_argument('--src-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 source endpoint') parser.add_argument('--dst-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 destination endpoint') parser.add_argument('--src-profile', help='Boto profile used for source connection') parser.add_argument('--dst-profile', help='Boto profile used for destination connection') parser.add_argument('--dry-run', action="store_true", help='Auto generate multipart-uid') args = parser.parse_args() ## S3 Bucket Connections src_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.src_endpoint,is_secure=True, profile_name=args.src_profile).\ get_bucket(args.src,validate=False) dst_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.dst_endpoint,is_secure=True, profile_name=args.dst_profile).\ get_bucket(args.dst,validate=False) start_index = campanile.stream_index() for line in fileinput.input("-"): record = line.rstrip('\n').split('\t')[start_index:] name, etag, size = record[0:3] partcount = campanile.partcount(etag) if partcount == 0: print '\t'.join(record + [campanile.NULL] * 5) continue ## Find partsize partsize = campanile.cli_chunksize(int(size)) if partcount != int(math.ceil(float(size) / partsize)): campanile.status("Can't calculate partsize for %s/%s\n" % (args.src, name)) ## Add alert continue if args.dry_run: mid = uuid.uuid1() else: srckey = src_bucket.get_key(name, validate=True) metadata = srckey.metadata headers = {} ## Set Cache and Content Values if srckey.cache_control is not None: headers['Cache-Control'] = srckey.cache_control if srckey.content_type is not None: headers['Content-Type'] = srckey.content_type if srckey.content_encoding is not None: headers['Content-Encoding'] = srckey.content_encoding if srckey.content_disposition is not None: headers['Content-Disposition'] = srckey.content_disposition if srckey.content_language is not None: headers['Content-Language'] = srckey.content_language ## Initiate Multipart Upload mid = dst_bucket.initiate_multipart_upload( name, headers=headers, metadata=metadata, encrypt_key=srckey.encrypted).id for i in range(partcount): offset = partsize * i bytes = min(partsize, int(size) - offset) print '\t'.join( record) + "\t%s\t%s\t%s\t%s\t%s" % (mid, (i + 1), partcount, offset, (offset + bytes - 1))