Beispiel #1
0
 def upload_trunk(self,filepath,mp,q,id):
     while not q.empty():
         chunk=q.get()
         fp=FileChunkIO(filepath,'r',offset=chunk.offset,bytes=chunk.length)
         mp.upload_part_from_file(fp,part_num=chunk.num)
         fp.close()
         q.task_done()
Beispiel #2
0
def chunks(path, chunk_size=CHUNK_SIZE):
    size = os.stat(path).st_size
    total_chunks = int(math.ceil(size / float(chunk_size)))
    for i in xrange(total_chunks):
        offset = chunk_size * i
        psize = min(chunk_size, size - offset)
        chunk = FileChunkIO(path, mode='r', offset=offset, bytes=psize)
        yield chunk
        chunk.close()
Beispiel #3
0
def upload_part(settings, credentials, multipart_id, part_id, filename, offset, bytes, num_chunks):
    conn = S3Connection(credentials['aws_access_key_id'], credentials['aws_secret_access_key'])
    s3_backups = conn.get_bucket(settings['s3_bucket'])
    for mp in s3_backups.get_all_multipart_uploads():
        if mp.id == multipart_id:
            fp = FileChunkIO(filename, 'r', offset=offset, bytes=bytes)
            mp.upload_part_from_file(fp, part_id)
            fp.close()
            progress_line(filename, part_id, num_chunks)
Beispiel #4
0
 def download_chrunk(self,filepath,key_name,bucket_name,q,id):
     while not q.empty():
         chrunk=q.get()
         offset=chrunk.offset
         length=chrunk.length
         bucket=self.conn.get_bucket(bucket_name)
         resp=bucket.connection.make_request('GET',bucket_name,key_name,headers={'Range':"bytes=%d-%d" %(offset,offset+length)})
         data=resp.read(length)
         fp=FileChunkIO(filepath,'r+',offset=chrunk.offset,bytes=chrunk.length)
         fp.write(data)
         fp.close()
         q.task_done()
 def __init__(self, name, mode='r', closefd=True, offset=0, bytes=None, cb=None,
              upload_id=0, part_num=0, *args, **kwargs):
     FileChunkIO.__init__(self, name, mode, closefd, offset, bytes,  *args, **kwargs)
     
     self._callback = cb
     self.upload_id = upload_id
     self.part_num = part_num
     self.received = 0
     self.lastTimestamp = time.time()
     
     self.allReceived = 0
     
     self.cancelRead = False
Beispiel #6
0
 def upload(self, keyname, filename, **kwargs):
     fsize = os.stat(filename).st_size
     mp = self.bucket.initiate_multipart_upload(keyname)
     ptr = 0
     part = 1
     upload_kwargs = {}
     if kwargs.get("cb", True):
         upload_kwargs = dict(cb=self.cb, num_cb=10)
     while ptr <= fsize:
         f = FileChunkIO(filename, offset=ptr, bytes=min(ptr + self.chunk_size,  fsize) - 1)
         ok = False
         while not ok:
             try:
                 mp.upload_part_from_file(f, part, **upload_kwargs)
                 ok = True
             except:
                 # Silly retry after an exception
                 pass
         f.close()
         ptr += self.chunk_size
         part += 1
     k = Key(self.bucket)
     k.key = keyname
     k.set_acl("private")
Beispiel #7
0
 def _upload(retries_left=amount_of_retries):
     try:
         if debug == 1:
             print('Start uploading part #%d ...' % part_num)
         conn = S3Connection(aws_key, aws_secret)
         conn.debug = debug
         bucket = conn.get_bucket(bucketname)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 with FileChunkIO(source_path, 'r', offset=offset,
                                  bytes=bytes) as fp:
                     mp.upload_part_from_file(fp=fp, part_num=part_num,
                                              cb=cb, num_cb=num_cb)
                 break
     except Exception as exc:
         if retries_left:
             _upload(retries_left=retries_left - 1)
         else:
             print('Failed uploading part #%d' % part_num)
             raise exc
     else:
         if debug == 1:
             print('... Uploaded part #%d' % part_num)
Beispiel #8
0
    def save_big_files(self,
                       bucket_name,
                       key_id,
                       source_file_path,
                       chunk_sze=52428800):

        source_size = os.stat(source_file_path).st_size

        bucket = self._bucket_connection(bucket_name)

        mp = bucket.initiate_multipart_upload(key_id)

        chunk_size = chunk_sze
        chunk_count = int(math.ceil(source_size / float(chunk_size)))

        for i in range(chunk_count):
            offset = chunk_size * i
            bytes = min(chunk_size, source_size - offset)
            with FileChunkIO(source_file_path, 'r', offset=offset,
                             bytes=bytes) as fp:
                mp.upload_part_from_file(fp, part_num=i + 1)

        mp.complete_upload()
Beispiel #9
0
def uploadDataS3(source_path, b):
    # Get file info
    source_size = os.stat(source_path).st_size

    # Create a multipart upload request
    mp = b.initiate_multipart_upload(os.path.basename(source_path))

    # Use a chunk size of 50 MiB (feel free to change this)
    chunk_size = 52428800
    chunk_count = int(math.ceil(source_size / float(chunk_size)))

    # Send the file parts, using FileChunkIO to create a file-like object
    # that points to a certain byte range within the original file. We
    # set bytes to never exceed the original file size.
    for i in range(chunk_count):
        offset = chunk_size * i
        bytes = min(chunk_size, source_size - offset)
        with FileChunkIO(source_path, 'r', offset=offset,
                         bytes=bytes) as fp:
            mp.upload_part_from_file(fp, part_num=i + 1)

    # Finish the upload
    mp.complete_upload()
Beispiel #10
0
def upload_to_S3Bucket():
    logging.debug("Inside upload_to_S3Bucket")
    try:
        # Connect to S3
        conn = S3Connection('Secret Code',
                            'Secret Key',
                            calling_format=OrdinaryCallingFormat())
        b = conn.get_bucket('mscience-test-upload')
        # Get file info
        source_path = os.path.dirname(os.path.abspath(
            "MDataE-FRConsumer.csv")) + '\MDataE-FRConsumer.csv'
        source_size = os.stat(source_path).st_size
        # Create a multipart upload request
        mp = b.initiate_multipart_upload(os.path.basename(source_path))
        # Use a chunk size of 50 MiB (feel free to change this)
        chunk_size = 52428800
        chunk_count = int(math.ceil(source_size / float(chunk_size)))

        # Send the file parts, using FileChunkIO to create a file-like object
        # that points to a certain byte range within the original file. We
        # set bytes to never exceed the original file size.
        for i in range(chunk_count):
            offset = chunk_size * i
            bytes = min(chunk_size, source_size - offset)
            with FileChunkIO(source_path, 'r', offset=offset,
                             bytes=bytes) as fp:
                mp.upload_part_from_file(fp, part_num=i + 1)

# Finish the upload
        mp.complete_upload()
        print("File Uploaded")

#if os.path.exists("MDataE-UKConsumer.csv"):
#os.remove("MDataE-UKConsumer.csv")
    except Exception as e:
        logging.debug(e)
        raise e
Beispiel #11
0
def upload(bucket, source_file, dest_folder=None):
    """ http://boto.readthedocs.org/en/latest/s3_tut.html """
    # Get file info
    source_size = os.stat(source_file).st_size
    # Create a multipart upload request
    dest_path = os.path.basename(source_file)
    if dest_folder:
        if not dest_folder.endswith("/"):
            dest_folder += "/"
        dest_path = dest_folder + dest_path

    mp = bucket.initiate_multipart_upload(dest_path)
    start_time = time.mktime(time.localtime())

    # Use a chunk size of 8 MiB (feel free to change this)
    chunk_size = 8388608
    chunk_count = int(math.ceil(source_size / float(chunk_size)))
    speed = 0
    # Send the file parts, using FileChunkIO to create a file-like object
    # that points to a certain byte range within the original file. We
    # set bytes to never exceed the original file size.
    for i in range(chunk_count):
        offset = chunk_size * i
        bytes = min(chunk_size, source_size - offset)
        with FileChunkIO(source_file, 'r', offset=offset, bytes=bytes) as fp:
            mp.upload_part_from_file(fp, part_num=i + 1)
            end_time = time.mktime(time.localtime())
            if i > 1:
                speed = int(offset / (end_time - start_time))
            update_progress(i / float(chunk_count), speed,
                            (end_time - start_time))

    # Finish the upload
    mp.complete_upload()
    update_progress(chunk_count / float(chunk_count), speed,
                    (end_time - start_time))
Beispiel #12
0
    def upload_local_file(self, file_path, obj_path):
        hasher = hashlib.md5()
        try:
            key = self.bucket.new_key(obj_path)
            mp = self.bucket.initiate_multipart_upload(obj_path)
            chunk_size = 5 * 2**20
            file_size = os.stat(file_path).st_size
            # s3 multipart upload should be larger than 5MB
            chunk_count = int(math.ceil(file_size / float(chunk_size)))
            for i in range(chunk_count):
                offset = chunk_size * i
                bytes = min(chunk_size, file_size - offset)
                with FileChunkIO(file_path, 'r', offset=offset,
                                 bytes=bytes) as fp:
                    mp.upload_part_from_file(fp, part_num=i + 1)

            mp.complete_upload()
            self.bucket.set_acl('public-read', obj_path)
            return hasher.hexdigest()
        except S3ResponseError:
            logger.exception("Could not upload key '%s' to S3", obj_path)
        except Exception, ex:
            logger.exception("Could not read source to key '%s' to S3: %s" %
                             (obj_path, ex))
def send_backup_to_s3(bucketname='tweetstock-mongo-dump',
                      keyname='mongodump_cmp.tar.gz',
                      archive='mongodump_cmp.tar.gz'):
    """chunked upload to S3
	adapted from http://boto.readthedocs.org/en/latest/s3_tut.html
	"""

    con = get_s3_connection()
    bucket = con.get_bucket(bucketname)

    mp = bucket.initiate_multipart_upload(keyname)

    filesize = os.stat(archive).st_size
    chunksize = 52428800
    chunkcount = int(math.ceil(filesize / float(chunksize)))

    for i in range(chunkcount):
        print "Uploading part", i + 1
        offset = chunksize * i
        b = min(chunksize, filesize - offset)
        with FileChunkIO(archive, 'r', offset=offset, bytes=b) as fc:
            mp.upload_part_from_file(fc, part_num=i + 1)
    mp.complete_upload()
    print 'Upload Complete'
Beispiel #14
0
    def test_multipart_upload(self):
        import math, os
        import time
        from filechunkio import FileChunkIO
        bucket = conn.get_bucket(test_bucket)

        source_path = '/root/jdk.tar.gz'
        source_size = os.stat(source_path).st_size

        mp = bucket.initiate_multipart_upload(os.path.basename(source_path), policy="public-read-write")
        print mp.id

        #chunk_size = 52428800
        chunk_size = 52428800
        chunk_count = int(math.ceil(source_size / chunk_size))

        for i in range(chunk_count + 1):
            offset = chunk_size * i
            bytes = min(chunk_size, source_size - offset)
            with FileChunkIO(source_path, 'r', offset=offset,
                             bytes=bytes) as fp:
                mp.upload_part_from_file(fp, part_num=i + 1)

        mp.complete_upload()
 def _upload(retries_left=amount_of_retries):
     try:
         logging.info('Start uploading part #%d ...' % part_num)
         conn = boto.connect_s3(
             aws_key,
             aws_secret,
             host='10.254.9.36',
             is_secure=False,
             port=7480,
             calling_format=boto.s3.connection.OrdinaryCallingFormat(),
         )
         bucket = conn.get_bucket(bucketname)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 with FileChunkIO(source_path, 'r', offset=offset,
                     bytes=bytes) as fp:
                     mp.upload_part_from_file(fp=fp, part_num=part_num)
                 break
     except Exception, exc:
         if retries_left:
             _upload(retries_left=retries_left - 1)
         else:
             logging.info('... Failed uploading part #%d' % part_num)
             raise exc
Beispiel #16
0
    def uploadImageFile(self):

        fileSize = os.stat(self.fileAbsPath).st_size
        keyName = os.path.basename(self.fileAbsPath)  # make unique id later
        mp = self.bucket.initiate_multipart_upload(keyName)

        chunkSize = 5242880
        chunkCount = int(math.ceil(fileSize / float(chunkSize)))

        self.started.emit(True, self.index)

        try:
            i = 0
            # for i in range(chunkCount):
            while self.isRunning and i < chunkCount:
                offset = chunkSize * i
                bytes = min(chunkSize, fileSize - offset)
                with FileChunkIO(self.fileAbsPath,
                                 'r',
                                 offset=offset,
                                 bytes=bytes) as fp:
                    mp.upload_part_from_file(fp, part_num=i + 1)
                i += 1
                # emit progress here
                progress = i / float(chunkCount) * 100
                self.valueChanged.emit(progress, self.index)

            if self.isRunning is True:
                mp.complete_upload()
                self.finished.emit('success', self.index)
            else:
                self.finished.emit('cancelled', self.index)

        except Exception as e:
            self.error.emit(e, self.index)
        """
Beispiel #17
0
 def download_chrunk(self, filepath, key_name, bucket_name, q, id):
     while not q.empty():
         chrunk = q.get()
         offset = chrunk.offset
         length = chrunk.length
         bucket = self.conn.get_bucket(bucket_name)
         resp = bucket.connection.make_request(
             'GET',
             bucket_name,
             key_name,
             headers={'Range': "bytes=%d-%d" % (offset, offset + length)})
         data = resp.read(length)
         fp = FileChunkIO(filepath,
                          'r+',
                          offset=chrunk.offset,
                          bytes=chrunk.length)
         fp.write(data)
         fp.close()
         q.task_done()
Beispiel #18
0
    def load_file(self,
                  filename,
                  key,
                  bucket_name=None,
                  replace=False,
                  multipart_bytes=5 * (1024**3),
                  encrypt=False):
        """
        Loads a local file to S3

        :param filename: name of the file to load.
        :type filename: str
        :param key: S3 key that will point to the file
        :type key: str
        :param bucket_name: Name of the bucket in which to store the file
        :type bucket_name: str
        :param replace: A flag to decide whether or not to overwrite the key
            if it already exists. If replace is False and the key exists, an
            error will be raised.
        :type replace: bool
        :param multipart_bytes: If provided, the file is uploaded in parts of
            this size (minimum 5242880). The default value is 5GB, since S3
            cannot accept non-multipart uploads for files larger than 5GB. If
            the file is smaller than the specified limit, the option will be
            ignored.
        :type multipart_bytes: int
        :param encrypt: If True, the file will be encrypted on the server-side
            by S3 and will be stored in an encrypted form while at rest in S3.
        :type encrypt: bool
        """
        if not bucket_name:
            (bucket_name, key) = self.parse_s3_url(key)
        bucket = self.get_bucket(bucket_name)
        key_obj = bucket.get_key(key)
        if not replace and key_obj:
            raise ValueError(
                "The key {key} already exists.".format(**locals()))

        key_size = os.path.getsize(filename)
        if multipart_bytes and key_size >= multipart_bytes:
            # multipart upload
            from filechunkio import FileChunkIO
            mp = bucket.initiate_multipart_upload(key_name=key,
                                                  encrypt_key=encrypt)
            total_chunks = int(math.ceil(key_size / multipart_bytes))
            sent_bytes = 0
            try:
                for chunk in range(total_chunks):
                    offset = chunk * multipart_bytes
                    bytes = min(multipart_bytes, key_size - offset)
                    with FileChunkIO(filename, 'r', offset=offset,
                                     bytes=bytes) as fp:
                        _log.info('Sending chunk {c} of {tc}...'.format(
                            c=chunk + 1, tc=total_chunks))
                        mp.upload_part_from_file(fp, part_num=chunk + 1)
            except:
                mp.cancel_upload()
                raise
            mp.complete_upload()
        else:
            # regular upload
            if not key_obj:
                key_obj = bucket.new_key(key_name=key)
            key_size = key_obj.set_contents_from_filename(filename,
                                                          replace=replace,
                                                          encrypt_key=encrypt)
        _log.info("The key {key} now contains"
                  " {key_size} bytes".format(**locals()))
Beispiel #19
0
    if s3key:
        print s3_path, "exists on S3"
        if int(s3key.content_length) != int(source_size):
            print 'filesizes differ ', s3key.content_length, source_size
            do_s3_upload = True
    else:
        do_s3_upload = True

    if do_s3_upload:
        # Get file info

        # Create a multipart upload request
        mp = b.initiate_multipart_upload(s3_path)

        # Use a chunk size of 150 MiB (feel free to change this)
        chunk_size = 152428800
        chunk_count = int(math.ceil(source_size / chunk_size))

        # Send the file parts, using FileChunkIO to create a file-like object
        # that points to a certain byte range within the original file. We
        # set bytes to never exceed the original file size.
        for i in range(chunk_count + 1):
            offset = chunk_size * i
            bytes = min(chunk_size, source_size - offset)
            with FileChunkIO(filename, 'r', offset=offset, bytes=bytes) as fp:
                mp.upload_part_from_file(fp, part_num=i + 1)

        # Finish the upload
        mp.complete_upload()
        print s3_path, "uploaded to S3"
Beispiel #20
0
class Snapshot:
    """
    Snapshot class

    This class provides methods to compress all contents
    (first-level children) of the source directory and
    compress all resulting archives to the master archive
    for easy transfer.

    Additionally this class provides methods to transfer
    the master snapshot to configured destinations. Only
    locally accessible (mountable) paths and Amazon
    S3 Buckets are supported as destinations.
    """
    def __init__(self, source, destinations):
        """
        Constructor

        Attributes:
            source          absolute path to directory to backup
            destinations    list of local and remote backup destinations
            source_root     parent directory of source
            source_name     name of source directory
            temp_dir_name   name of temporary directory
            temp_dir_path   absolute path to temporary directory
            master_file     absolute path to master file when created
        """
        self.time = str(int(time.time()))
        self.source = source
        self.source_root = os.path.abspath(os.path.join(source, os.pardir))
        self.source_name = os.path.basename(os.path.normpath(source))
        self.destinations = destinations
        self.temp_dir_name = self.source_name + '-' + self.time
        self.temp_dir_path = self.source_root + '/' + self.temp_dir_name
        self.master_file = None

    def make(self):
        """Make snapshot"""
        print "\n**** BACKUP PARAMETERS ****\n"
        print "Backup source path: " + self.source
        print "Backup name: " + self.source_name
        print "Backup destinations: " + str(self.destinations)
        print "Temporary directory path: " + self.temp_dir_path
        print "\n**** RUNNING BACKUP ****\n"

        self.log_events(
            'info',
            'Backup from ' + self.source + ' to ' + str(self.destinations) +
            ' with temporary path at ' + self.temp_dir_path)
        self.log_events(
            'info',
            'Starting backup name ' + self.source_name + '-' + self.time)
        self.__make_temp_dir()
        self.__compress_source_dirs()
        self.__verify_source_archives()
        self.__make_snapshot()

    def transfer(self):
        """Transfer snapshot"""
        self.__transfer_snapshot_local()
        self.__transfer_snapshot_s3()
        self.__cleanup()

    def __purge_s3(self, bucket, keep_older=1):
        """
        Remove older archives from the S3 bucket

            bucket      S3 bucket name to purge
            keep_older  Amount of previous archives to keep
        """
        c = boto.connect_s3()
        b = c.get_bucket(bucket)
        rs = b.list()

        older_archives = []
        for item in rs:
            if self.source_name in item.name:
                sitem = item.name.split('-')
                mtime = sitem[len(sitem) - 2]
                if mtime < self.time:
                    older_archives.append(item.name)

        if len(older_archives) > keep_older:
            for o in older_archives[:-keep_older]:
                b.delete_key(o)

    def __cleanup(self):
        """Remove temporary files and directories"""
        os.remove(self.master_file)

    def __make_temp_dir(self):
        """Create temporary directory to local space"""
        os.mkdir(self.temp_dir_path)
        if not os.path.isdir(self.temp_dir_path):
            self.log_events(
                'fatal', 'Unable to create temporary directory at ' +
                self.temp_dir_path)
            raise Exception('Unable to create temporary directory at ' +
                            self.temp_dir_path + ', aborting.')

        return self.temp_dir_path

    def __compress_source_dirs(self):
        """Compress individual child directories in the source directory"""
        # CHDIR to source dir to avoid unnecessary path nesting
        os.chdir(self.source)
        pipe = subprocess.PIPE
        source_count = 0

        for item in os.walk('.').next()[1]:
            source_count += 1
            self.log_events(
                'info', 'Archiving dir #' + str(source_count) + ': ' + item)
            print('Archiving dir #' + str(source_count) + ': ' + item + '...')
            pd = subprocess.Popen(
                ['/usr/bin/zip', '-r', self.temp_dir_path + '/' + item, item],
                stdout=pipe,
                stderr=pipe)
            stdout, stderr = pd.communicate()
            print stdout

    def __make_snapshot(self):
        """Compress all archives to master archive and remove temporary"""
        # source_root is the parent dir of source
        # it acts as a temp dir for the master snapshot
        self.master_file = self.source_root + '/' + self.source_name + '-' + self.time + '-master.zip'
        self.log_events('info', 'Creating master archive')
        print "Creating master archive..."
        os.chdir(self.temp_dir_path)
        pipe = subprocess.PIPE
        pd = subprocess.Popen([
            '/usr/bin/zip', '-r', self.source_root + '/' + self.source_name +
            '-' + self.time + '-master', '.'
        ],
                              stdout=pipe,
                              stderr=pipe)
        stdout, stderr = pd.communicate()
        print stdout
        print "Removing temporary directory..."
        self.log_events('info', 'Removing temporary directory')
        shutil.rmtree(self.temp_dir_path)
        print "Master archive created successfully!"
        self.log_events('info', 'Master archive created successfully!')

    def __verify_source_archives(self):
        """Compare sources to archives"""
        sources = []
        archives = []

        for x in os.walk(self.source).next()[1]:
            sources.append(x)
            if os.path.isfile(self.temp_dir_path + '/' + x + '.zip'):
                archives.append(x)

        if len(archives) != len(sources):
            print "WARNING: Some backups failed, check log file!"
            self.log_events(
                'warning',
                'Source to archive count mismatch, some directories are missing.'
            )
            missing = self.diff(sources, archives)
            self.log_events(
                'warning',
                'The following archives are missing: ' + str(missing))
        else:
            print "All sources archived successfully!"
            self.log_events('info', 'All sources archived successfully!')

    def __transfer_snapshot_local(self):
        """Transfer master archive to local backup destinations"""
        for dest in self.destinations['local']:
            if not os.path.exists(dest):
                try:
                    os.makedirs(dest)
                except OSError as exception:
                    if exception.errno != errno.EEXIST:
                        raise
            print "Transferring master archive to destination " + dest + "..."
            self.log_events(
                'info', 'Transferring master archive to destination ' + dest)
            if self.master_file is not None:
                shutil.copy2(self.master_file, dest)

            if not os.path.isfile(dest + '/' + self.source_name + '-' +
                                  self.time + '-master.zip'):
                self.log_events(
                    'info',
                    'Error transferring master archive to destination ' + dest)
                raise Exception(
                    'Error transferring master archive to destination ' + dest)

            print "Transfer to " + dest + " completed successfully!"
            self.log_events('info',
                            'Transfer to ' + dest + ' completed successfully!')

    def __transfer_snapshot_s3(self):
        """Transfer master archive to remote backup destinations"""
        c = boto.connect_s3()
        for bucket in self.destinations['s3']:
            try:
                b = c.get_bucket(bucket)
                self.__purge_s3(bucket)
                self.log_events('info', 'Found bucket ' + bucket)
                sys.stdout.write('Found bucket ' + bucket + ', ')
                total_bytes = 0
                for key in b:
                    total_bytes += key.size
                sys.stdout.write('currently there is ' + size(total_bytes) +
                                 ' in it.\n')
            except S3ResponseError, e:
                if e.status == 404:
                    self.log_events(
                        'error',
                        'Bucket ' + bucket + ' not found, creating now')
                    print 'Bucket ' + bucket + ' not found, creating now...'
                    try:
                        b = c.create_bucket(bucket)
                    except S3CreateError, e:
                        self.log_events(
                            'fatal', "Failed creating bucket with name " +
                            bucket + ", aborting.")
                        self.log_events('fatal', e.message)
                        print "Failed creating bucket with name " + bucket + ", aborting."

            if b.get_all_multipart_uploads():
                print "This bucket contains lost files, you should remove them."

            self.log_events('info', 'Initiating remote upload')
            print "Initiating remote upload..."
            source_size = os.stat(self.master_file).st_size
            mp = b.initiate_multipart_upload(os.path.basename(
                self.master_file))
            chunk_size = 52428800
            chunk_count = int(math.ceil(source_size / chunk_size))

            self.log_events(
                'info',
                'Uploading ' + size(source_size) + ' to bucket ' + bucket)
            print 'Uploading ' + size(source_size) + ' to bucket ' + bucket

            for i in range(chunk_count + 1):
                offset = chunk_size * i
                bytes = min(chunk_size, source_size - offset)
                with FileChunkIO(self.master_file,
                                 'r',
                                 offset=offset,
                                 bytes=bytes) as fp:
                    mp.upload_part_from_file(fp, part_num=i + 1)

            mp.complete_upload()
            print 'Transfer to bucket ' + bucket + ' completed successfully!'
            self.log_events(
                'info',
                'Transfer to bucket ' + bucket + ' completed successfully!')
Beispiel #21
0
 def test_readlines(self):
     with FileChunkIO(self.tf, offset=1, bytes=15) as c:
         self.assertEqual(c.readlines(), [b'23456789\n', b'234567'])
Beispiel #22
0
 def test_readinto(self):
     with FileChunkIO(self.tf, offset=1, bytes=2) as c:
         n = 3
         b = bytearray(n.__index__())
         c.readinto(b)
         self.assertEqual(b, b'23\x00')
Beispiel #23
0
 def test_read_respects_offset_and_bytes(self):
     with FileChunkIO(self.tf, offset=1, bytes=3) as c:
         self.assertEqual(c.read(), b'234')
Beispiel #24
0
 def test_tell_respects_offset(self):
     with FileChunkIO(self.tf, offset=1) as c:
         self.assertEqual(c.tell(), 0)
         self.assertEqual(c.read(1), b'2')
Beispiel #25
0
    def run(self):
        try:
            tries = 0
            exception = None
            while tries < self.retries:
                if self.do_stop:
                    break
                try:
                    if self.multipart_id and self.multipart_num and self.multipart_parts:
                        mp_log_info = "s3://%s%s (multipart: %d/%d, size: %.2fmb)" % (
                            self.bucket_name, self.short_key_name(
                                self.key_name), self.multipart_num,
                            self.multipart_parts,
                            float(self.byte_count / 1024.00 / 1024.00))
                        for mp in self.bucket.get_all_multipart_uploads():
                            if mp.id == self.multipart_id:
                                logging.info("Uploading AWS S3 key: %s" %
                                             mp_log_info)
                                callback_count = 10
                                if self.target_bandwidth is not None:
                                    # request a callback every 0.5MB to allow for somewhat decent throttling
                                    callback_count = self.byte_count / 1024 / 1024 / 0.5
                                with FileChunkIO(self.file_name,
                                                 'r',
                                                 offset=self.multipart_offset,
                                                 bytes=self.byte_count) as fp:
                                    mp.upload_part_from_file(
                                        fp=fp,
                                        cb=self.status,
                                        num_cb=callback_count,
                                        part_num=self.multipart_num)
                                break
                        else:
                            raise OperationError(
                                "Missing multipart upload id %s for %s in S3 response."
                                % (self.multipart_id, mp_log_info))
                    else:
                        key = None
                        try:
                            logging.info(
                                "Uploading AWS S3 key: %s (multipart: None, size: %.2fmb)"
                                % (self.short_key_name(self.key_name),
                                   float(self.byte_count / 1024.00 / 1024.00)))
                            key = Key(bucket=self.bucket, name=self.key_name)
                            callback_count = 10
                            if self.target_bandwidth is not None:
                                # request a callback every 0.5MB to allow for somewhat decent throttling
                                callback_count = self.byte_count / 1024.00 / 1024.00 / 0.5
                            key.set_contents_from_filename(
                                self.file_name,
                                cb=self.status,
                                num_cb=callback_count)
                        finally:
                            if key:
                                key.close()
                    break
                except (httplib.HTTPException, exceptions.IOError,
                        socket.error, socket.gaierror) as e:
                    logging.error(
                        "Got exception during upload: '%s', retrying upload" %
                        e)
                    exception = e
                finally:
                    sleep(self.retry_sleep_secs)
                    tries += 1
            if tries >= self.retries and exception:
                raise exception
        except Exception as e:
            logging.fatal("AWS S3 upload failed after %i retries! Error: %s" %
                          (self.retries, e))
            raise e

        return self.file_name, self.key_name, self.multipart_num
Beispiel #26
0
    def upload_to_s3(self, source_path, bucket_name):
        """

        Given a file, upload it to S3.
        Credentials should be stored in environment variables or ~/.aws/credentials (%USERPROFILE%\.aws\credentials on Windows).

        Returns True on success, false on failure.

        """

        try:
            self.s3_connection = S3Connection()
        except Exception as e:
            print(e)
            return False

        all_buckets = self.s3_connection.get_all_buckets()
        if bucket_name not in [bucket.name for bucket in all_buckets]:
            try:
                self.s3_connection.create_bucket(bucket_name)
            except Exception as e:
                print(e)
                print("Couldn't create bucket.")
                return False

        if (not os.path.isfile(source_path)
                or os.stat(source_path).st_size == 0):
            print(e)
            return False

        try:
            bucket = self.s3_connection.get_bucket(bucket_name)
            source_size = os.stat(source_path).st_size
            dest_path = os.path.split(source_path)[1]

            # Create a multipart upload request
            mpu = bucket.initiate_multipart_upload(dest_path)

            # Use a chunk size of 5 MiB
            chunk_size = 5242880
            chunk_count = int(math.ceil(source_size / float(chunk_size)))

            print("Uploading zip (" + str(self.human_size(source_size)) +
                  ")..")

            # Send the file parts, using FileChunkIO to create a file-like object
            # that points to a certain byte range within the original file. We
            # set bytes to never exceed the original file size.
            for i in tqdm(range(chunk_count)):
                offset = chunk_size * i
                bytes = min(chunk_size, source_size - offset)
                with FileChunkIO(source_path, 'r', offset=offset,
                                 bytes=bytes) as fp:
                    mpu.upload_part_from_file(fp, part_num=i + 1)

            # Finish the upload
            mpu.complete_upload()

        except Exception as e:
            print(e)
            return False

        return True
Beispiel #27
0
 def test_init_autosets_bytes(self):
     with FileChunkIO(self.tf) as c:
         self.assertEqual(c.bytes, 45)
Beispiel #28
0
                  (CONFIG['storage'][storageSites[i]]['PROVIDER']),
                  end="")
            if (chunk == ifshare_propriedades['numchunks'] - 1):
                qtosBytes = ifshare_propriedades[
                    'tamanho'] - chunk * ifshare_propriedades['chunksize']
            else:
                qtosBytes = ifshare_propriedades['chunksize']
            totalBytes[storageSites[i]] += qtosBytes
            totalChunks[storageSites[i]] += 1
            bytesAcumulados += qtosBytes
            chunksAcumulados += 1

            if (CONFIG['DOBUCKETUPLOAD']):
                offset = ifshare_propriedades['chunksize'] * chunk
                fpParte = FileChunkIO(nomeArquivoEntrada,
                                      offset=offset,
                                      bytes=qtosBytes)
                chaveNoBucket = Key(upBucket[storageSites[i]])
                chaveNoBucket.key = ifshare_propriedades[
                    'hashgeral'] + "." + ifshare_propriedades[
                        'hashchunk' + str(chunk)] + ".ifsharechunk"
                chaveNoBucket.set_contents_from_file(fpParte)

            data_parte = (ifshare_propriedades['hashgeral'],
                          ifshare_propriedades['hashchunk' + str(chunk)],
                          chunk, storageSites[i])
            if (CONFIG['DOBDINSERT']):
                cursor.execute(add_parte, data_parte)
        print("")

        if (CONFIG['DOBDINSERT']):
Beispiel #29
0
 def test_seek_cur(self):
     with FileChunkIO(self.tf, offset=20, bytes=10) as c:
         c.seek(5)
         c.seek(-3, SEEK_CUR)
         self.assertEqual(c.tell(), 2)
Beispiel #30
0
 def test_seek_end(self):
     with FileChunkIO(self.tf, offset=10, bytes=10) as c:
         c.seek(-5, SEEK_END)
         self.assertEqual(c.read(3), b'789')
Beispiel #31
0
 def test_init_autosets_bytes_and_respects_offset(self):
     with FileChunkIO(self.tf, offset=1) as c:
         self.assertEqual(c.bytes, 44)
Beispiel #32
0
def main():
    home = os.getenv('HOME')

    parser = ArgumentParser(description=desc)
    subparsers = parser.add_subparsers(help='commands')

    # A list command
    list_parser = subparsers.add_parser('list', help='list buckets or objects')
    list_parser.set_defaults(which='list')
    list_parser.add_argument('bucketname', action='store', nargs='?', help='bucket to list')

    # A create command
    create_parser = subparsers.add_parser('create', help='create bucket')
    create_parser.set_defaults(which='create')
    create_parser.add_argument('bucketname', action='store', help='New bucket to create')

    # A delete command
    delete_parser = subparsers.add_parser('delete', help='remove bucket or object')
    delete_parser.set_defaults(which='delete')
    delete_parser.add_argument('bucketname', action='store', help='the bucket to remove')
    delete_parser.add_argument('filename', action='store', nargs='?', help='the file to remove')

    # A put command
    put_parser = subparsers.add_parser('put', help='put object to bucket')
    put_parser.set_defaults(which='put')
    put_parser.add_argument('bucketname', action='store', help='the bucket')
    put_parser.add_argument('filename', action='store', help='the file to put')

    # A get command
    get_parser = subparsers.add_parser('get', help='get object from bucket')
    get_parser.set_defaults(which='get')
    get_parser.add_argument('bucketname', action='store', help='the bucket')
    get_parser.add_argument('filename', action='store', help='the file to get')

    # A version command
    parser.add_argument('--version', action='version', version='%(prog)s 0.1')

    # A config file command
    parser.add_argument('-c', '--config-file',
                        default='%s/.s3b' % home,
                        help='Config file name. Defaults to %s/.s3b' % home,
                        metavar='FILE')

    args = vars(parser.parse_args())

    # Get a config file
    config = {}
    execfile('%s/.s3b' % home, config)

    conn = s3_connector.connect_s3(access_key=config['access_key'],
                                   secret_key=config['secret_key'],
                                   s3_host=config['s3_host'],
                                   port=config['port'],
                                   is_secure=config['is_secure'])

    if args['which'] == 'list' and args['bucketname'] is None:
        for bucket in conn.get_all_buckets():
            print "{name}\t{created}".format(
                name=bucket.name,
                created=bucket.creation_date,
            )

    if args['which'] == 'list' and args['bucketname'] is not None:
        buckets = conn.get_all_buckets()
        bucket = s3_utils.get_bucket(buckets=buckets, bucket_name=args['bucketname'])
        for item in bucket.list():
            print "{name}\t{size}\t{modified}".format(
                name=item.name,
                size=item.size,
                modified=item.last_modified,
            )

    if args['which'] == 'create':
        conn.create_bucket(args['bucketname'])

    if args['which'] == 'delete' and args['filename'] is None:
        conn.delete_bucket(args['bucketname'])

    if args['which'] == 'delete' and args['filename'] is not None:
        buckets = conn.get_all_buckets()
        bucket = s3_utils.get_bucket(buckets=buckets, bucket_name=args['bucketname'])
        bucket.delete_key(args['filename'])

    if args['which'] == 'put':
        buckets = conn.get_all_buckets()
        bucket = s3_utils.get_bucket(buckets=buckets, bucket_name=args['bucketname'])
        source_size = os.stat(args['filename']).st_size
        if config['chunk_size'] > source_size:
            key = bucket.new_key(os.path.basename(args['filename']))
            key.set_contents_from_filename(args['filename'])
        else:
            multipart_upload = bucket.initiate_multipart_upload(os.path.basename(args['filename']))
            chunk_count = int(math.ceil(source_size / float(config['chunk_size'])))
            for i in range(chunk_count):
                offset = config['chunk_size'] * i
                with FileChunkIO(args['filename'], 'r', offset=offset,
                                 bytes=min(config['chunk_size'], source_size - offset)) as fp:
                    multipart_upload.upload_part_from_file(fp, part_num=i + 1)
            multipart_upload.complete_upload()

    if args['which'] == 'get':
        buckets = conn.get_all_buckets()
        bucket = s3_utils.get_bucket(buckets=buckets, bucket_name=args['bucketname'])
        key = bucket.get_key(os.path.basename(args['filename']))
        key.get_contents_to_filename(args['filename'])
mul_key = 'HEHE'

header = {
    'x-amz-meta-gang': 'Yang Honggang'
}


mp = b.initiate_multipart_upload(mul_key)

chunk_size = 20971520
#chunk_size = 2048
chunk_count = int(math.ceil(source_size / float(chunk_size)))

for i in range(chunk_count):
    offset = chunk_size * i
    bytes = min(chunk_size, source_size - offset)
    with FileChunkIO(source_path, 'r', offset=offset,bytes=bytes) as fp:
        print("0000000000000000(chunk_count:%d)0000000000000(tell:%d)000000000000000000" % (chunk_count, fp.tell()))
        print(type(fp))
        mp.upload_part_from_file(fp, part_num=i + 1)
    mp.cancel_upload()

print "before complete"
mp.cancel_upload()
#mp = mp.complete_upload()
#key = b.get_key('HEHE')
#print mp
#print key.size
#print key.content_type
#print key.last_modified
Beispiel #34
0
 def test_init_seeks_to_offset(self):
     with FileChunkIO(self.tf, offset=1) as c:
         self.assertEqual(c.tell(), 0)
         self.assertEqual(c.read(1), b'2')
    def run(self):
        try:
            tries = 0
            exception = None
            while tries < self.retries:
                if self.do_stop:
                    break
                try:
                    if self.multipart_id and self.multipart_num and self.multipart_parts:
                        for mp in self.bucket.get_all_multipart_uploads():
                            if mp.id == self.multipart_id:
                                logging.info(
                                    "Uploading AWS S3 key: s3://%s%s (multipart: %d/%d, size: %.2fmb)"
                                    %
                                    (self.bucket_name,
                                     self.short_key_name(self.key_name),
                                     self.multipart_num, self.multipart_parts,
                                     float(
                                         self.byte_count / 1024.00 / 1024.00)))
                                with FileChunkIO(self.file_name,
                                                 'r',
                                                 offset=self.multipart_offset,
                                                 bytes=self.byte_count) as fp:
                                    mp.upload_part_from_file(
                                        fp=fp,
                                        cb=self.status,
                                        num_cb=10,
                                        part_num=self.multipart_num)
                                break
                    else:
                        key = None
                        try:
                            logging.info(
                                "Uploading AWS S3 key: %s (multipart: None, size: %.2fmb)"
                                % (self.short_key_name(self.key_name),
                                   float(self.byte_count / 1024.00 / 1024.00)))
                            key = Key(bucket=self.bucket, name=self.key_name)
                            key.set_contents_from_filename(self.file_name,
                                                           cb=self.status,
                                                           num_cb=10)
                        finally:
                            if key:
                                key.close()
                    break
                except (httplib.HTTPException, exceptions.IOError,
                        socket.error, socket.gaierror) as e:
                    logging.error(
                        "Got exception during upload: '%s', retrying upload" %
                        e)
                    exception = e
                finally:
                    sleep(self.retry_sleep_secs)
                    tries += 1
            if tries >= self.retries and exception:
                raise exception
        except Exception as e:
            logging.fatal("AWS S3 upload failed after %i retries! Error: %s" %
                          (self.retries, e))
            raise e

        return self.file_name, self.key_name, self.multipart_num
Beispiel #36
0
 def test_seek_respects_offset(self):
     with FileChunkIO(self.tf, offset=1) as c:
         c.seek(1)
         self.assertEqual(c.read(1), b'3')