Ejemplo n.º 1
0
def main():

    ## Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--src-bucket', required=True, dest='src',
            help='Source S3 bucket')
    parser.add_argument('--dst-bucket', required=True, dest='dst',
            help='Destination S3 bucket')
    parser.add_argument('--src-endpoint', 
            default=boto.s3.connection.NoHostProvided,
            help='S3 source endpoint')
    parser.add_argument('--dst-endpoint', 
            default=boto.s3.connection.NoHostProvided,
            help='S3 destination endpoint')
    parser.add_argument('--src-profile', 
            help='Boto profile used for source connection')
    parser.add_argument('--dst-profile', 
            help='Boto profile used for destination connection')
    parser.add_argument('--config', '-c', default="./campanile.cfg",
            help='Path to config file')
    args = parser.parse_args()

    ## Config Object
    cfgfiles = campanile.cfg_file_locations()
    cfgfiles.insert(0, args.config)
    c = ConfigParser.SafeConfigParser({'ephemeral':'/tmp'})
    c.read(cfgfiles)

    ## S3 Bucket Connections
    src_bucket = S3Connection(suppress_consec_slashes=False,\
            host=args.src_endpoint,is_secure=True,
            profile_name=args.src_profile).\
            get_bucket(args.src,validate=False)

    dst_bucket = S3Connection(suppress_consec_slashes=False,\
            host=args.dst_endpoint,is_secure=True,
            profile_name=args.dst_profile).\
            get_bucket(args.dst,validate=False) 

    ## Reporting Counters
    files = 0
    movedbytes = 0

    ## Select random tmpdir to distribute load across disks
    tmpdir = random.choice(c.get('DEFAULT',"ephemeral").split(','))

    start_index = campanile.stream_index()
    for line in fileinput.input("-"):
        name, etag, size, mtime, mid, part, partcount, startbyte, stopbyte \
                = line.rstrip('\n').split('\t')[start_index:]
        
        srckey = src_bucket.get_key(name, validate=False)
        dstkey = dst_bucket.get_key(name, validate=False)

        if mid == campanile.NULL:
                headers={}
                report_name = name
                expected_size = int(size)
        else:
            headers={'Range' : "bytes=%s-%s" % (startbyte, stopbyte)}
            report_name = "%s-%s" % (name, 'part')
            expected_size = int(stopbyte) - int(startbyte) + 1

        with tempfile.SpooledTemporaryFile(max_size=c.getint('DEFAULT',\
                                    'maxtmpsize'),dir=tmpdir) as fp:
            ## Download
            p = campanile.FileProgress(name, verbose=1)
            srckey.get_contents_to_file(fp, headers=headers, cb=p.progress)

            if fp.tell() != expected_size: 
                raise Exception("Something bad happened for %s. \
                        Expecting %s, but got %s" % \
                        (report_name, expected_size, fp.tell()))

            campanile.counter(args.src, "OutputBytes", size)
            fp.flush
            fp.seek(0)

            if mid == campanile.NULL:
                dstkey.cache_control= srckey.cache_control
                dstkey.content_type = srckey.content_type
                dstkey.content_encoding = srckey.content_encoding
                dstkey.content_disposition = srckey.content_disposition
                dstkey.content_language = srckey.content_language
                dstkey.metadata = srckey.metadata
                dstkey.md5 = srckey.md5
                report_name = name
            else:
                mp = boto.s3.multipart.MultiPartUpload(bucket=dst_bucket)
                mp.id = mid
                mp.key_name = name
                report_name = "%s-%s" % (name, part)

            ## Upload
            p = campanile.FileProgress(report_name, verbose=1)
            if mid == campanile.NULL:
                dstkey.set_contents_from_file(fp,
                        encrypt_key=srckey.encrypted, cb=p.progress)
                newetag = dstkey.etag.replace("\"","")
            else:
                mpart = mp.upload_part_from_file(fp,part_num=int(part),
                    cb=p.progress)
                newetag = mpart.etag.replace("\"","")

            if newetag != srckey.md5:
                ## Add alert
                raise Exception("Something bad happened for %s. \
                        Expecting %s md5, but got %s" % \
                        (report_name, srckey.md5, newetag))

            if mid != campanile.NULL:
                print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % \
                        (name, etag, mid, newetag, part, startbyte, stopbyte)
            
            campanile.counter(args.dst, "InputBytes", expected_size)
            campanile.status("%s/%s:OK" % (args.dst,report_name))
def main():
    ## Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--bucket', required=True, help='Bucket')
    parser.add_argument('--endpoint', 
            default=boto.s3.connection.NoHostProvided, help='S3 endpoint')
    parser.add_argument('--profile', help='Boto profile used for connection')
    parser.add_argument('--dry-run', action="store_true",
            help='Do everything except complete multipart upload')
    args = parser.parse_args()

    ## S3 Connection
    bucket = S3Connection(suppress_consec_slashes=False,
            host=args.endpoint,is_secure=True,
            profile_name=args.profile).get_bucket(args.bucket)

    current_key  = { 'name' : None }
    mparts = []
    ## Process input
    for line in fileinput.input("-"):
        key = {}
        key['name'], key['etag'], key['mid'], part_etag, part, startbyte, \
                stopbyte = line.rstrip('\n').split('\t')[0:]
        
        ## Print to save partmap 
        print "%s" % line.rstrip('\n')

        ## Part object
        mpart = boto.s3.multipart.Part()
        mpart.part_number = int(part)
        mpart.etag = part_etag
        mpart.size = int(stopbyte) - int(startbyte)

        if key['name'] == current_key['name']:
            mparts.append(mpart)
            current_key = key
            continue

        if mparts:
            if args.dry_run:
                print "Complete %s:%s\n%s" % (current_key['name'], 
                        current_key['mid'],parts_to_xml(mparts))
            else:
                ## Added retry because partlist hard to recreate
                retry = 3
                while True:
                    try:
                        result = bucket.complete_multipart_upload(\
                                current_key['name'], current_key['mid'],
                                parts_to_xml(mparts))
                        if current_key['etag'] != \
                                result.etag.replace("\"", ""):
                            ## Add alert; Maybe wrong partsize
                            pass
                        campanile.status("%s:OK" % current_key['mid'])
                        break
                    except Exception, e:
                        if retry == 0:
                            raise
                        retry -= 1
                        campanile.status("%s:FAIL" % current_key['mid'])
                        campanile.random_sleep()
                        ## Lets try a new bucket connection 
                        bucket = S3Connection(suppress_consec_slashes=False,
                            host=args.endpoint,is_secure=True,
                            profile_name=args.profile).get_bucket(args.bucket)

        mparts = []
        mparts.append(mpart)
        current_key = key
    if mparts:
        if args.dry_run:
            print "Complete %s:%s\n%s" % (current_key['name'], 
                    current_key['mid'],parts_to_xml(mparts))
        else:
            ## Added retry because partlist hard to recreate
            retry = 3 
            while True:
                try:
                    result = bucket.complete_multipart_upload(\
                            current_key['name'], current_key['mid'],
                            parts_to_xml(mparts))
                    if current_key['etag'] != result.etag.replace("\"", ""):
                        ## Add alert; Maybe wrong partsize
                        pass
                    campanile.status("%s:OK" % current_key['mid'])
                    break
                except Exception, e:
                    if retry == 0:
                        raise
                    retry -= 1
                    campanile.status("%s:FAIL" % current_key['mid'])
                    campanile.random_sleep()
                    ## Lets try a new bucket connection 
                    bucket = S3Connection(suppress_consec_slashes=False,
                        host=args.endpoint,is_secure=True,
                        profile_name=args.profile).get_bucket(args.bucket)

# -----------------------------------------------------------------------------
#  Main
# -----------------------------------------------------------------------------
Ejemplo n.º 4
0
def main():

    ## Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--src-bucket',
                        required=True,
                        dest='src',
                        help='Source S3 bucket')
    parser.add_argument('--dst-bucket',
                        required=True,
                        dest='dst',
                        help='Destination S3 bucket')
    parser.add_argument('--src-endpoint',
                        default=boto.s3.connection.NoHostProvided,
                        help='S3 source endpoint')
    parser.add_argument('--dst-endpoint',
                        default=boto.s3.connection.NoHostProvided,
                        help='S3 destination endpoint')
    parser.add_argument('--src-profile',
                        help='Boto profile used for source connection')
    parser.add_argument('--dst-profile',
                        help='Boto profile used for destination connection')
    parser.add_argument('--config',
                        '-c',
                        default="./campanile.cfg",
                        help='Path to config file')
    args = parser.parse_args()

    ## Config Object
    cfgfiles = campanile.cfg_file_locations()
    cfgfiles.insert(0, args.config)
    c = ConfigParser.SafeConfigParser({'ephemeral': '/tmp'})
    c.read(cfgfiles)

    ## S3 Bucket Connections
    src_bucket = S3Connection(suppress_consec_slashes=False,\
            host=args.src_endpoint,is_secure=True,
            profile_name=args.src_profile).\
            get_bucket(args.src,validate=False)

    dst_bucket = S3Connection(suppress_consec_slashes=False,\
            host=args.dst_endpoint,is_secure=True,
            profile_name=args.dst_profile).\
            get_bucket(args.dst,validate=False)

    ## Reporting Counters
    files = 0
    movedbytes = 0

    ## Select random tmpdir to distribute load across disks
    tmpdir = random.choice(c.get('DEFAULT', "ephemeral").split(','))

    start_index = campanile.stream_index()
    for line in fileinput.input("-"):
        name, etag, size, mtime, mid, part, partcount, startbyte, stopbyte \
                = line.rstrip('\n').split('\t')[start_index:]

        srckey = src_bucket.get_key(name, validate=False)
        dstkey = dst_bucket.get_key(name, validate=False)

        if mid == campanile.NULL:
            headers = {}
            report_name = name
            expected_size = int(size)
        else:
            headers = {'Range': "bytes=%s-%s" % (startbyte, stopbyte)}
            report_name = "%s-%s" % (name, 'part')
            expected_size = int(stopbyte) - int(startbyte) + 1

        with tempfile.SpooledTemporaryFile(max_size=c.getint('DEFAULT',\
                                    'maxtmpsize'),dir=tmpdir) as fp:
            ## Download
            p = campanile.FileProgress(name, verbose=1)
            srckey.get_contents_to_file(fp, headers=headers, cb=p.progress)

            if fp.tell() != expected_size:
                raise Exception("Something bad happened for %s. \
                        Expecting %s, but got %s"                                                  % \
                        (report_name, expected_size, fp.tell()))

            campanile.counter(args.src, "OutputBytes", size)
            fp.flush
            fp.seek(0)

            if mid == campanile.NULL:
                dstkey.cache_control = srckey.cache_control
                dstkey.content_type = srckey.content_type
                dstkey.content_encoding = srckey.content_encoding
                dstkey.content_disposition = srckey.content_disposition
                dstkey.content_language = srckey.content_language
                dstkey.metadata = srckey.metadata
                dstkey.md5 = srckey.md5
                report_name = name
            else:
                mp = boto.s3.multipart.MultiPartUpload(bucket=dst_bucket)
                mp.id = mid
                mp.key_name = name
                report_name = "%s-%s" % (name, part)

            ## Upload
            p = campanile.FileProgress(report_name, verbose=1)
            if mid == campanile.NULL:
                dstkey.set_contents_from_file(fp,
                                              encrypt_key=srckey.encrypted,
                                              cb=p.progress)
                newetag = dstkey.etag.replace("\"", "")
            else:
                mpart = mp.upload_part_from_file(fp,
                                                 part_num=int(part),
                                                 cb=p.progress)
                newetag = mpart.etag.replace("\"", "")

            if newetag != srckey.md5:
                ## Add alert
                raise Exception("Something bad happened for %s. \
                        Expecting %s md5, but got %s"                                                      % \
                        (report_name, srckey.md5, newetag))

            if mid != campanile.NULL:
                print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % \
                        (name, etag, mid, newetag, part, startbyte, stopbyte)

            campanile.counter(args.dst, "InputBytes", expected_size)
            campanile.status("%s/%s:OK" % (args.dst, report_name))
Ejemplo n.º 5
0
def main():

    ## Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--src-bucket', required=True, dest='src',
            help='Source S3 bucket')
    parser.add_argument('--dst-bucket', required=True, dest='dst',
            help='Destination S3 bucket')
    parser.add_argument('--src-endpoint', 
            default=boto.s3.connection.NoHostProvided,
            help='S3 source endpoint')
    parser.add_argument('--dst-endpoint', 
            default=boto.s3.connection.NoHostProvided,
            help='S3 destination endpoint')
    parser.add_argument('--src-profile', 
            help='Boto profile used for source connection')
    parser.add_argument('--dst-profile', 
            help='Boto profile used for destination connection')
    parser.add_argument('--dry-run',  action="store_true",
            help='Auto generate multipart-uid')
    args = parser.parse_args()

    ## S3 Bucket Connections
    src_bucket = S3Connection(suppress_consec_slashes=False,\
            host=args.src_endpoint,is_secure=True,
            profile_name=args.src_profile).\
            get_bucket(args.src,validate=False)

    dst_bucket = S3Connection(suppress_consec_slashes=False,\
            host=args.dst_endpoint,is_secure=True,
            profile_name=args.dst_profile).\
            get_bucket(args.dst,validate=False) 

    start_index = campanile.stream_index()
    for line in fileinput.input("-"):
        record = line.rstrip('\n').split('\t')[start_index:]
        name, etag, size = record[0:3]

        partcount = campanile.partcount(etag)
        if partcount == 0:
            print '\t'.join(record + [campanile.NULL] * 5)
            continue

        ## Find partsize
        partsize = campanile.cli_chunksize(int(size))
        if partcount != int(math.ceil(float(size)/partsize)):
            campanile.status("Can't calculate partsize for %s/%s\n" %
                    (args.src, name))
            ## Add alert
            continue

        if args.dry_run:
            mid = uuid.uuid1()
        else:
            srckey = src_bucket.get_key(name, validate=True)
            metadata = srckey.metadata
            headers = {}
            
            ## Set Cache and Content Values
            if srckey.cache_control is not None:
                headers['Cache-Control'] = srckey.cache_control
            if srckey.content_type is not None:
                headers['Content-Type'] = srckey.content_type
            if srckey.content_encoding is not None:
                headers['Content-Encoding'] = srckey.content_encoding
            if srckey.content_disposition is not None:
                headers['Content-Disposition'] = srckey.content_disposition
            if srckey.content_language is not None:
                headers['Content-Language'] = srckey.content_language

            ## Initiate Multipart Upload
            mid = dst_bucket.initiate_multipart_upload(name,
                headers = headers,
                metadata = metadata,
                encrypt_key = srckey.encrypted).id

        for i in range(partcount):
            offset = partsize * i
            bytes = min(partsize, int(size) - offset)
            print '\t'.join(record) + "\t%s\t%s\t%s\t%s\t%s" % (mid, 
                    (i+1), partcount, offset, (offset + bytes - 1))
Ejemplo n.º 6
0
def main():

    ## Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--src-bucket',
                        required=True,
                        dest='src',
                        help='Source S3 bucket')
    parser.add_argument('--dst-bucket',
                        required=True,
                        dest='dst',
                        help='Destination S3 bucket')
    parser.add_argument('--src-endpoint',
                        default=boto.s3.connection.NoHostProvided,
                        help='S3 source endpoint')
    parser.add_argument('--dst-endpoint',
                        default=boto.s3.connection.NoHostProvided,
                        help='S3 destination endpoint')
    parser.add_argument('--src-profile',
                        help='Boto profile used for source connection')
    parser.add_argument('--dst-profile',
                        help='Boto profile used for destination connection')
    parser.add_argument('--dry-run',
                        action="store_true",
                        help='Auto generate multipart-uid')
    args = parser.parse_args()

    ## S3 Bucket Connections
    src_bucket = S3Connection(suppress_consec_slashes=False,\
            host=args.src_endpoint,is_secure=True,
            profile_name=args.src_profile).\
            get_bucket(args.src,validate=False)

    dst_bucket = S3Connection(suppress_consec_slashes=False,\
            host=args.dst_endpoint,is_secure=True,
            profile_name=args.dst_profile).\
            get_bucket(args.dst,validate=False)

    start_index = campanile.stream_index()
    for line in fileinput.input("-"):
        record = line.rstrip('\n').split('\t')[start_index:]
        name, etag, size = record[0:3]

        partcount = campanile.partcount(etag)
        if partcount == 0:
            print '\t'.join(record + [campanile.NULL] * 5)
            continue

        ## Find partsize
        partsize = campanile.cli_chunksize(int(size))
        if partcount != int(math.ceil(float(size) / partsize)):
            campanile.status("Can't calculate partsize for %s/%s\n" %
                             (args.src, name))
            ## Add alert
            continue

        if args.dry_run:
            mid = uuid.uuid1()
        else:
            srckey = src_bucket.get_key(name, validate=True)
            metadata = srckey.metadata
            headers = {}

            ## Set Cache and Content Values
            if srckey.cache_control is not None:
                headers['Cache-Control'] = srckey.cache_control
            if srckey.content_type is not None:
                headers['Content-Type'] = srckey.content_type
            if srckey.content_encoding is not None:
                headers['Content-Encoding'] = srckey.content_encoding
            if srckey.content_disposition is not None:
                headers['Content-Disposition'] = srckey.content_disposition
            if srckey.content_language is not None:
                headers['Content-Language'] = srckey.content_language

            ## Initiate Multipart Upload
            mid = dst_bucket.initiate_multipart_upload(
                name,
                headers=headers,
                metadata=metadata,
                encrypt_key=srckey.encrypted).id

        for i in range(partcount):
            offset = partsize * i
            bytes = min(partsize, int(size) - offset)
            print '\t'.join(
                record) + "\t%s\t%s\t%s\t%s\t%s" % (mid,
                                                    (i + 1), partcount, offset,
                                                    (offset + bytes - 1))