Esempio n. 1
0
 def calculate_stats(self, msg):
     start_time = parse_ts(msg['Service-Read'])
     end_time = parse_ts(msg['Service-Write'])
     elapsed_time = end_time - start_time
     if elapsed_time > self.max_time:
         self.max_time = elapsed_time
     if elapsed_time < self.min_time:
         self.min_time = elapsed_time
     self.total_time += elapsed_time.seconds
     if start_time < self.earliest_time:
         self.earliest_time = start_time
     if end_time > self.latest_time:
         self.latest_time = end_time
Esempio n. 2
0
 def calculate_stats(self, msg):
     start_time = parse_ts(msg['Service-Read'])
     end_time = parse_ts(msg['Service-Write'])
     elapsed_time = end_time - start_time
     if elapsed_time > self.max_time:
         self.max_time = elapsed_time
     if elapsed_time < self.min_time:
         self.min_time = elapsed_time
     self.total_time += elapsed_time.seconds
     if start_time < self.earliest_time:
         self.earliest_time = start_time
     if end_time > self.latest_time:
         self.latest_time = end_time
Esempio n. 3
0
def parse_ts_extended(ts):
    warnings.warn(
        "parse_ts_extended has been deprecated and will be removed in version "
        "1.3 because boto.utils.parse_ts has subsumed the old functionality.",
        PendingDeprecationWarning
    )
    return parse_ts(ts)
    def run(self):
        """
        Backup postgres database for specific `app_code`.
        """

        s3connection = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
        s3bucket = s3connection.get_bucket(AWS_BUCKET)

        DBDATESTAMP = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

        DBURL = APP_CODES.get(self.__app_code)

        DUMPFILE = 'postgres-{}-{}.pg_dump'.format(self.__app_code,
                                                   DBDATESTAMP)

        BACKUP_COMMAND = 'pg_dump --format=c --dbname="{}"'.format(DBURL)

        # Determine where to put this backup
        now = datetime.datetime.now()
        for directory in DIRECTORIES:
            prefix = directory['name'] + '/'
            earliest_current_date = now - datetime.timedelta(
                days=directory['days'])
            s3keys = s3bucket.list(prefix=prefix)
            large_enough_backups = filter(lambda x: x.size >= MINIMUM_SIZE,
                                          s3keys)
            young_enough_backup_found = False
            for backup in large_enough_backups:
                if parse_ts(backup.last_modified) >= earliest_current_date:
                    young_enough_backup_found = True
            if not young_enough_backup_found:
                # This directory doesn't have any current backups; stop here and use it
                # as the destination
                break

        # Perform the backup
        filename = ''.join((prefix, DUMPFILE))
        print('Backing up to "{}"...'.format(filename))
        upload = s3bucket.new_key(filename)
        chunks_done = 0
        with smart_open.smart_open(upload, 'wb') as s3backup:
            process = subprocess.Popen(BACKUP_COMMAND,
                                       shell=True,
                                       stdout=subprocess.PIPE)
            while True:
                chunk = process.stdout.read(CHUNK_SIZE)
                if not len(chunk):
                    print('Finished! Wrote {} chunks; {}'.format(
                        chunks_done,
                        humanize.naturalsize(chunks_done * CHUNK_SIZE)))
                    break
                s3backup.write(chunk)
                chunks_done += 1
                if '--hush' not in sys.argv:
                    print('Wrote {} chunks; {}'.format(
                        chunks_done,
                        humanize.naturalsize(chunks_done * CHUNK_SIZE)))

        print('Backup `{}` successfully sent to S3.'.format(filename))
        return  # Close thread
def cleanup():
    aws_lifecycle = os.environ.get("AWS_BACKUP_BUCKET_DELETION_RULE_ENABLED",
                                   "False") == "True"

    s3connection = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
    s3bucket = s3connection.get_bucket(AWS_BUCKET)

    if not aws_lifecycle:
        # Remove old backups beyond desired retention
        for directory in DIRECTORIES:
            prefix = directory['name'] + '/'
            keeps = directory['keeps']
            s3keys = s3bucket.list(prefix=prefix)
            large_enough_backups = filter(lambda x: x.size >= MINIMUM_SIZE,
                                          s3keys)
            large_enough_backups = sorted(large_enough_backups,
                                          key=lambda x: x.last_modified,
                                          reverse=True)

            for l in large_enough_backups:
                now = datetime.datetime.now()
                delta = now - parse_ts(l.last_modified)
                if delta.days > keeps:
                    print('Deleting old backup "{}"...'.format(l.name))
                    l.delete()
Esempio n. 6
0
 def modified_time(self, name):
     name = self._normalize_name(self._clean_name(name))
     entry = self.entries.get(name)
     if entry is None:
         entry = self.bucket.get_key(self._encode_name(name))
     # Parse the last_modified string to a local datetime object.
     return parse_ts(entry.last_modified)
 def modified_time(self, name):
     name = self._normalize_name(self._clean_name(name))
     entry = self.entries.get(name)
     if entry is None:
         entry = self.bucket.get_key(self._encode_name(name))
     # Parse the last_modified string to a local datetime object.
     return parse_ts(entry.last_modified)
Esempio n. 8
0
def parse_ts_extended(ts):
    warnings.warn(
        "parse_ts_extended has been deprecated and will be removed in version "
        "1.3 because boto.utils.parse_ts has subsumed the old functionality.",
        PendingDeprecationWarning
    )
    return parse_ts(ts)
Esempio n. 9
0
    def set_keys(self, credentials, timeout=None):
        keys = self.stored_keys.get(credentials.path)
        if keys is not None:
            expiration = parse_ts(keys["Expiration"])
            if timeout:
                expiration = keys["LastUpdated"] + timeout
            if datetime.utcnow() > expiration:
                log.info("Keys expired, recreating them")
                keys = None

        if keys is None:
            log.info("Assuming role")
            pair = IamSaml(credentials.provider, credentials.idp_username, "")
            pair.basic_auth = self.basic_auth
            keys = pair.get_result(credentials.role).credentials.to_dict()

            _keys = {
                "Code": "Success",
                "LastUpdated": datetime.utcnow(),
                "AccessKeyId": keys["access_key"],
                "SecretAccessKey": keys["secret_key"],
                "Token": keys["session_token"],
                "Expiration": keys["expiration"]
            }
            self.stored_keys[credentials.path] = _keys
            self.stored_assertions[credentials.path] = pair.assertion
        return self.stored_keys[credentials.path], self.stored_assertions[
            credentials.path]
Esempio n. 10
0
    def retrieve_response(self, spider, request):
        response = super(S3CacheStorage, self).retrieve_response(spider, request)
        if response is None:  # not in local filesystem cache, so try copying from s3
            local_path = self._get_request_path(spider, request)
            remote_path = os.path.relpath(local_path, self.tmpcachedir).lower()
            bucket = self.conn.get_bucket(self.bucket_name, validate=False)

            def _get_key(filename):
                key_name = os.path.join(remote_path, filename)
                return bucket.get_key(key_name)

            # check if the key exists
            metadata_key = _get_key('pickled_meta')
            if metadata_key is None:
                return None  # key not found

            # check if the cache entry has expired
            mtime = parse_ts(metadata_key.last_modified)
            if 0 < self.expiration_secs < (datetime.datetime.utcnow() - mtime).total_seconds():
                return None  # expired

            # deserialise the cached response
            metadata = pickle.loads(metadata_key.get_contents_as_string())
            body = _get_key('response_body').get_contents_as_string()
            rawheaders = _get_key('response_headers').get_contents_as_string()
            url = metadata.get('response_url')
            status = metadata['status']
            headers = Headers(headers_raw_to_dict(rawheaders))
            respcls = responsetypes.from_args(headers=headers, url=url)
            response = respcls(url=url, headers=headers, status=status, body=body)

        return response
Esempio n. 11
0
 def endElement(self, name, value, connection):
     if name == 'reservedInstancesModificationId':
         self.modification_id = value
     elif name == 'createDate':
         self.create_date = parse_ts(value)
     elif name == 'updateDate':
         self.update_date = parse_ts(value)
     elif name == 'effectiveDate':
         self.effective_date = parse_ts(value)
     elif name == 'status':
         self.status = value
     elif name == 'statusMessage':
         self.status_message = value
     elif name == 'clientToken':
         self.client_token = value
     else:
         setattr(self, name, value)
Esempio n. 12
0
 def endElement(self, name, value, connection):
     if name == 'reservedInstancesModificationId':
         self.modification_id = value
     elif name == 'createDate':
         self.create_date = parse_ts(value)
     elif name == 'updateDate':
         self.update_date = parse_ts(value)
     elif name == 'effectiveDate':
         self.effective_date = parse_ts(value)
     elif name == 'status':
         self.status = value
     elif name == 'statusMessage':
         self.status_message = value
     elif name == 'clientToken':
         self.client_token = value
     else:
         setattr(self, name, value)
Esempio n. 13
0
def parse_ts_extended(ts):
    RFC1123 = '%a, %d %b %Y %H:%M:%S %Z'
    rv = None
    try:
        rv = parse_ts(ts)
    except ValueError:
        rv = datetime.datetime.strptime(ts, RFC1123)
    return rv
Esempio n. 14
0
def parse_ts_extended(ts):
    RFC1123 = '%a, %d %b %Y %H:%M:%S %Z'
    rv = None
    try:
        rv = parse_ts(ts)
    except ValueError:
        rv = datetime.datetime.strptime(ts, RFC1123)
    return rv
Esempio n. 15
0
 def endElement(self, name, value, connection):
     if name == "reservedInstancesModificationId":
         self.modification_id = value
     elif name == "createDate":
         self.create_date = parse_ts(value)
     elif name == "updateDate":
         self.update_date = parse_ts(value)
     elif name == "effectiveDate":
         self.effective_date = parse_ts(value)
     elif name == "status":
         self.status = value
     elif name == "statusMessage":
         self.status_message = value
     elif name == "clientToken":
         self.client_token = value
     else:
         setattr(self, name, value)
Esempio n. 16
0
def key_dt(key):
    from boto.utils import parse_ts
    try:
        modified = time.strptime(key.last_modified, '%a, %d %b %Y %H:%M:%S %Z')
        dt = datetime.datetime.fromtimestamp(mktime(modified))
        return dt
    except:
        return parse_ts(key.last_modified)
Esempio n. 17
0
    def _save_to_s3(self, data, mime, update=False, compress=True):
        ziped_data = None
        content_encoding = None
        headers = {}
        if compress and mime == 'application/vnd.google-earth.kml+xml':
            ziped_data = self._gzip_data(data)
            content_encoding = 'gzip'
            headers['Content-Encoding'] = 'gzip'

        if not update:
            if content_encoding == 'gzip' and ziped_data is not None:
                data = ziped_data
            try:
                k = Key(bucket=self.bucket)
                k.key = self.file_id
                k.set_metadata('Content-Type', mime)
                k.content_type = mime
                k.content_encoding = content_encoding
                k.set_metadata('Content-Encoding', content_encoding)
                k.set_contents_from_string(data, replace=False)
                key = self.bucket.get_key(k.key)
                last_updated = parse_ts(key.last_modified)
            except Exception as e:
                raise exc.HTTPInternalServerError('Error while configuring S3 key (%s) %s' % (self.file_id, e))
            try:
                _save_item(self.admin_id, file_id=self.file_id, last_updated=last_updated)
            except Exception as e:
                raise exc.HTTPInternalServerError('Cannot create file on Dynamodb (%s)' % e)

        else:
            try:
                if content_encoding == 'gzip' and ziped_data is not None:
                    data = ziped_data
                # Inconsistant behaviour with metadata, see https://github.com/boto/boto/issues/2798
                self.key.content_encoding = content_encoding
                self.key.set_metadata('Content-Encoding', content_encoding)
                self.key.set_contents_from_string(data, replace=True)
                key = self.bucket.get_key(self.key.key)
                last_updated = parse_ts(key.last_modified)
            except Exception as e:
                raise exc.HTTPInternalServerError('Error while updating S3 key (%s) %s' % (self.key.key, e))
            try:
                _save_item(self.admin_id, last_updated=last_updated)
            except Exception as e:
                raise exc.HTTPInternalServerError('Cannot update file on Dynamodb (%s) %s' % (self.file_id, e))
Esempio n. 18
0
 def modified_time(self, name):
     name = self._normalize_name(self._clean_name(name))
     entry = self.entries.get(name)
     # only call self.bucket.get_key() if the key is not found
     # in the preloaded metadata.
     if entry is None:
         entry = self.bucket.get_key(self._encode_name(name))
     # Parse the last_modified string to a local datetime object.
     return parse_ts(entry.last_modified)
Esempio n. 19
0
 def modified_time(self, name):
     name = self._normalize_name(self._clean_name(name))
     entry = self.entries.get(name)
     # only call self.bucket.get_key() if the key is not found
     # in the preloaded metadata.
     if entry is None:
         entry = self.bucket.get_key(self._encode_name(name))
     # Parse the last_modified string to a local datetime object.
     return parse_ts(entry.last_modified)
Esempio n. 20
0
 def test_new_file_modified_time(self):
     self.storage.preload_metadata = True
     name = 'test_storage_save.txt'
     content = ContentFile('new content')
     utcnow = datetime.datetime.utcnow()
     with mock.patch('storages.backends.s3boto.datetime') as mock_datetime:
         mock_datetime.utcnow.return_value = utcnow
         self.storage.save(name, content)
         self.assertEqual(self.storage.modified_time(name),
                          parse_ts(utcnow.strftime(ISO8601)))
Esempio n. 21
0
 def test_new_file_modified_time(self):
     self.storage.preload_metadata = True
     name = 'test_storage_save.txt'
     content = ContentFile('new content')
     utcnow = datetime.datetime.utcnow()
     with mock.patch('storages.backends.s3boto.datetime') as mock_datetime:
         mock_datetime.utcnow.return_value = utcnow
         self.storage.save(name, content)
         self.assertEqual(self.storage.modified_time(name),
                          parse_ts(utcnow.strftime(ISO8601)))
Esempio n. 22
0
def clean_s3(now):
    remove_datetime = now - timedelta(hours=24)
    bucket = getS3Bucket()
    for key in bucket.list():
        key_name = key.name.encode('utf-8')
        key_datetime = parse_ts(key.last_modified)
        if 'data' in key_name and len(key_name) == 34 and key_datetime < remove_datetime:
            print key_name, 'Remove'
            bucket.delete_key(key_name)
        else:
            print key_name, 'Skipping'
Esempio n. 23
0
    def _save_to_s3(self, data, mime, update=False, compress=True):
        data_payload = data
        content_encoding = None
        headers = {'Cache-Control': 'no-cache, must-revalidate'}
        if compress and mime == 'application/vnd.google-earth.kml+xml':
            data_payload = self._gzip_data(data)
            content_encoding = 'gzip'
        if not update:
            try:
                # Push object to bucket
                replace = False
                k = Key(bucket=self.bucket)
                _push_object_to_s3(k, self.file_id, mime, content_encoding, headers, data_payload, replace)
                key = self.bucket.get_key(k.key)
                last_updated = parse_ts(key.last_modified)
            except Exception as e:
                raise exc.HTTPInternalServerError('Error while configuring S3 key (%s) %s' % (self.file_id, e))
            try:
                # Push to dynamoDB, only one entry per object
                _save_item(self.admin_id, file_id=self.file_id, last_updated=last_updated, bucketname=self.bucket.name)
            except Exception as e:
                raise exc.HTTPInternalServerError('Cannot create file on Dynamodb (%s)' % e)

        else:
            try:
                # Inconsistant behaviour with metadata, see https://github.com/boto/boto/issues/2798
                # Push object to bucket
                replace = True
                _push_object_to_s3(self.key, self.file_id, mime, content_encoding, headers, data_payload, replace)
                key = self.bucket.get_key(self.key.key)
                last_updated = parse_ts(key.last_modified)
            except Exception as e:
                raise exc.HTTPInternalServerError('Error while updating S3 key (%s) %s' % (self.file_id, e))
            try:
                _save_item(self.admin_id, last_updated=last_updated, bucketname=self.bucket.name)
            except Exception as e:
                raise exc.HTTPInternalServerError('Cannot update file on Dynamodb (%s) %s' % (self.file_id, e))
    def handle_noargs(self, **options):
        connection = S3Connection(settings.BACKUP_S3_ACCESS_KEY, settings.BACKUP_S3_SECRET_KEY)
        bucket = connection.get_bucket(settings.BACKUP_S3_BUCKET)

        count_deleted = 0
        size_deleted = 0
        for key in bucket.list():
            file_datetime = parse_ts(key.last_modified)

            # Time is appararently two hours earlier than local time
            file_date = (file_datetime + relativedelta(hours=2)).date()

            if not must_keep_file(file_date):
                count_deleted += 1
                size_deleted += key.size
                key.delete()

        print "%d files are deleted with a total size of %s" % (count_deleted, size(size_deleted))
Esempio n. 25
0
def main():

    ## Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--bucket', required=True, help='Bucket')
    parser.add_argument('--endpoint',
                        default=boto.s3.connection.NoHostProvided,
                        help='S3 endpoint')
    parser.add_argument('--profile', help='Boto profile used for connection')
    args = parser.parse_args()

    ## S3 Connection
    bucket = S3Connection(suppress_consec_slashes=False,
                          host=args.endpoint,
                          is_secure=True,
                          profile_name=args.profile).get_bucket(args.bucket)

    ## Hadoop Counters
    totalsize = 0

    ## In a Stream?
    start_index = campanile.stream_index()

    ## Process input
    for line in fileinput.input("-"):
        if line.startswith('#'):
            continue

        delim, prefix = line.rstrip('\n').split('\t')[start_index].split(',')
        for key in bucket.list(prefix=prefix, delimiter=delim):

            if key.__class__.__name__ == "Prefix":
                continue

            ## Don't include glacier obejcts
            if key.storage_class == 'GLACIER':
                continue

            print "%s\t%s\t%s\t%s" % (key.name.encode('utf-8'),
                                      key.etag.replace("\"", ""), key.size,
                                      parse_ts(key.last_modified))

            ## Log stats
            campanile.counter(args.bucket, "Bytes", key.size)
def get_last_key(keys, days_ago):
  """ Loops over the keys and finds the last key that is at least `days_ago` old. """
  last_key = None
  cur_time_in_seconds = time.mktime(time.gmtime())
  min_difference_in_days = float('inf')
  SECONDS_IN_DAY = 60 * 60 * 24

  for key in keys:
    # Get the time of last modification of the key, in seconds
    parsed_datetime = utils.parse_ts(key.last_modified)
    time_in_seconds = time.mktime(parsed_datetime.timetuple())

    difference_in_days = (cur_time_in_seconds - time_in_seconds) / float(SECONDS_IN_DAY)
    # We want the latest backup that is at least `day_ago` old
    if difference_in_days < min_difference_in_days and difference_in_days > days_ago:
      last_key = key
      min_difference_in_days = difference_in_days

  return last_key
Esempio n. 27
0
def main():

    ## Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--bucket', required=True, help='Bucket')
    parser.add_argument('--endpoint', 
            default=boto.s3.connection.NoHostProvided, help='S3 endpoint')
    parser.add_argument('--profile', help='Boto profile used for connection')
    args = parser.parse_args()

    ## S3 Connection
    bucket = S3Connection(suppress_consec_slashes=False,
            host=args.endpoint,is_secure=True,
            profile_name=args.profile).get_bucket(args.bucket)

    ## Hadoop Counters
    totalsize = 0
    
    ## In a Stream?
    start_index = campanile.stream_index()

    ## Process input
    for line in fileinput.input("-"):
        if line.startswith('#'):
            continue
        
        delim, prefix = line.rstrip('\n').split('\t')[start_index].split(',')
        for key in bucket.list(prefix=prefix,delimiter=delim):
            
            if key.__class__.__name__ == "Prefix":
                continue 

            ## Don't include glacier obejcts 
            if key.storage_class == 'GLACIER':
                continue 

            print "%s\t%s\t%s\t%s" % (key.name.encode('utf-8'), 
                    key.etag.replace("\"", ""), 
                    key.size, parse_ts(key.last_modified))

            ## Log stats
            campanile.counter(args.bucket, "Bytes", key.size)
Esempio n. 28
0
    def keys(self):
        keys = getattr(self, "_keys", None)
        if keys is not None:
            expiration = parse_ts(keys["Expiration"])
            if datetime.utcnow() > expiration:
                log.info("Keys expired, recreating them")
                keys = None

        if keys is None:
            log.info("Assuming role")
            pair = IamSaml(self.credentials.keys.provider, self.credentials.keys.idp_username, "")
            pair.basic_auth = self.basic_auth
            keys = pair.get_result(self.credentials.keys.role).credentials.to_dict()

            self._keys = {
                  "Code": "Success"
                , "LastUpdated": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S:00Z")
                , "AccessKeyId": keys["access_key"]
                , "SecretAccessKey": keys["secret_key"]
                , "Token": keys["session_token"]
                , "Expiration": keys["expiration"]
                }
        return self._keys
Esempio n. 29
0
    def retrieve_response(self, spider, request):
        response = super(S3CacheStorage,
                         self).retrieve_response(spider, request)
        if response is None:  # not in local filesystem cache, so try copying from s3
            local_path = self._get_request_path(spider, request)
            remote_path = os.path.relpath(local_path, self.tmpcachedir).lower()
            bucket = self.conn.get_bucket(self.bucket_name, validate=False)

            def _get_key(filename):
                key_name = os.path.join(remote_path, filename)
                return bucket.get_key(key_name)

            # check if the key exists
            metadata_key = _get_key('pickled_meta')
            if metadata_key is None:
                return None  # key not found

            # check if the cache entry has expired
            mtime = parse_ts(metadata_key.last_modified)
            if 0 < self.expiration_secs < (datetime.datetime.utcnow() -
                                           mtime).total_seconds():
                return None  # expired

            # deserialise the cached response
            metadata = pickle.loads(metadata_key.get_contents_as_string())
            body = _get_key('response_body').get_contents_as_string()
            rawheaders = _get_key('response_headers').get_contents_as_string()
            url = metadata.get('response_url')
            status = metadata['status']
            headers = Headers(headers_raw_to_dict(rawheaders))
            respcls = responsetypes.from_args(headers=headers, url=url)
            response = respcls(url=url,
                               headers=headers,
                               status=status,
                               body=body)

        return response
Esempio n. 30
0
def main():
    config = ConfigParser.ConfigParser()
    config.read(filenames=["aws-s3-restore.conf"])

    aws_access_key = config.get(section="aws", option="key")
    aws_access_secret = config.get(section="aws", option="secret")

    conn = S3Connection(aws_access_key, aws_access_secret)

    buckets = conn.get_all_buckets()

    print "The following buckets are available"
    print "\n".join(["- {}".format(bucket.name) for bucket in buckets])

    bucket = None
    while not bucket:
        print "Enter the exact name of the bucket to restore from:",
        name = raw_input().strip()
        bucket = next(
            iter([bucket for bucket in buckets if bucket.name == name]), None)
        if not bucket:
            print "Not a valid bucket"

    print "Using bucket `{bucket_name}`".format(bucket_name=bucket.name)

    restore_before = datetime.today()
    date_set = "n"
    while not date_set == "y":
        print "From how many days ago do you wish to restore? ",
        days = raw_input().strip()
        try:
            day_offset = int(days)
        except exceptions.ValueError:
            print "Error, you must supply an integer"
            continue
        restore_before = datetime.today() - timedelta(days=day_offset)
        print "Use files modified on `{date}` (or nearest preceding version) (y/N)? ".format(
            date=restore_before),
        date_set = raw_input().strip().lower()

    print
    print "Add files/folders for restoration"
    all_folders = "n"
    objects_to_restore = []
    while not all_folders == "y":
        print "Full path of file/folder to restore: ",
        add_folder = raw_input().strip()
        if add_folder[0] is not "/":
            print "Error, supplied path does not begin with a `/`; discarding"
        else:
            objects_to_restore.append(add_folder)
        print "Folders currently in restore set: "
        print "\n".join(["- {}".format(f) for f in objects_to_restore])

        print "Done adding folders (y/N)? ",
        all_folders = raw_input().strip().lower()

    print "NOTICE: Files will be restored to *this* working directory (and subdirectories)"
    print "Do you want to run the restore (y/N)? ",
    if not raw_input().strip().lower() == "y":
        os.exit(-1)
    else:
        valid_prefixes = []
        print "Running restore from bucket `{bucket_name}`".format(
            bucket_name=bucket.name)

        for obj in objects_to_restore:
            prefix = obj[1:]  # Remove the leading slash
            keys = bucket.get_all_versions(prefix=prefix)
            if not keys:
                print "Invalid prefix: `{obj}`".format(obj=obj)
            else:
                valid_prefixes.append(prefix)

        print
        print "Restoring files modified *before* `{restore_date}` (or nearest preceding version)".format(
            restore_date=restore_before)
        print "Aggregating backupset details..."
        # Determine the available versions for this file list
        all_files = {}
        for prefix in valid_prefixes:
            for version in bucket.list_versions(prefix=prefix):
                last_modified = parse_ts(version.last_modified)
                if last_modified < restore_before:
                    # Only restore if older than specified date
                    if version.name not in all_files or version.last_modified > all_files[
                            version.name].last_modified:
                        # Add to list, or update if newer version available
                        all_files[version.name] = version

        total_file_count = len(all_files.keys())
        print "{count} file(s) to be restored".format(count=total_file_count)
        print
        print "Beginning Restore: "
        i = 0
        for file_prefix, version in all_files.iteritems():
            i = i + 1
            print "- ({number}/{total}): `{name}`".format(
                number=i, total=total_file_count, name=file_prefix)

            dirs = os.path.dirname(file_prefix)
            if not os.path.exists(dirs):
                os.makedirs(dirs)

            if isinstance(version, DeleteMarker):
                print "      WARNING: File was previously DELETED on {date}; skipping".format(
                    date=version.last_modified)
            else:
                if not os.path.exists(file_prefix):
                    # Open relative to our working path
                    fp = open(file_prefix, "w")
                    version.get_file(fp, version_id=version.version_id)
                    fp.close()
                else:
                    print "      WARNING: Already exists at restore location; skipping"
Esempio n. 31
0
 def modified_time(self, name):
     dt = tz.make_aware(parse_ts(self._get_key(name).last_modified), tz.utc)
     return tz.make_naive(dt)
Esempio n. 32
0
 def get_modified_time(self, name):
     dt = tz.make_aware(parse_ts(self._get_key(name).last_modified), tz.utc)
     return dt if setting('USE_TZ') else tz.make_naive(dt)
def backup(mount_point, aws_key, aws_secret_key, lockers=[], dryrun=False, keep_old=25):
    devices = [get_device_for_mount(mount_point)]
    mapped_devices = [str.replace(device, '/dev/xvd', '/dev/sd') for device in devices]
    if devices[0].startswith("/dev/md"):
        devices = get_devices_for_raid(devices[0])

    instance_id = boto.utils.get_instance_metadata()['instance-id']
    region = boto.utils.get_instance_metadata()['placement']['availability-zone'][:-1]
    ec2 = connect_to_region(region, aws_access_key_id=aws_key, aws_secret_access_key=aws_secret_key)
    instance = ec2.get_all_instances([instance_id])[0].instances[0]
    
    all_volumes = ec2.get_all_volumes()
    volumes = []
    for v in all_volumes:
        if v.attach_data.instance_id == instance_id:
            if v.attach_data.device in devices or v.attach_data.device in mapped_devices:
                volumes.append(v)

    if not volumes:
        sys.stderr.write("No EBS volumes found for devices %s\n" % devices)
        sys.exit(1)

    logging.info("Instance ID: %s", instance_id)
    logging.info("Devices: %s", ", ".join(devices))
    logging.info("Volumes: %s", ", ".join(v.id for v in volumes))

    locker_instances = []
    for l in lockers:
        l = l.split(':')
        cls = LOCKER_CLASSES[l[0]]
        kwargs = {}
        for k, v in (x.split('=') for x in l[1:]):
            if v.lower() == "true":
                v = True
            elif v.lower() == "false":
                v = False
            kwargs[k] = v
        kwargs['dryrun'] = dryrun
        inst = cls(**kwargs)
        locker_instances.append(inst)
        if not inst.validate():
            return

    locker_instances.append(XFSLocker(mount_point, dryrun=dryrun))

    with contextlib.nested(*locker_instances):
        for v in volumes:
            name = v.tags.get('Name')
            logging.info("Snapshoting %s (%s)", v.id, name or 'NONAME')
            if not dryrun:
                snap = v.create_snapshot()
                if name:
                    snap.add_tag('Name', name)

    snapshots = ec2.get_all_snapshots(filters={'volume-id': [volume.id for volume in volumes]})
    for s in snapshots:
        start_time = parse_ts(s.start_time)
        if start_time < datetime.now() - timedelta(keep_old):
            logging.info("Deleting Snapshot %s (%s - %s) of %s from %s", s.id, s.description, s.tags, s.volume_id, s.start_time)
            if not dryrun:
                s.delete()
Esempio n. 34
0
def main():
    config = read_config()
    # Cool! Let's set up everything.
    connect_to_region(config.region, aws_access_key_id=access_key_id, aws_secret_access_key=secret_key)
    glacier = Layer2(aws_access_key_id=access_key_id, aws_secret_access_key=secret_key, region_name=config.region)
    vault = glacier.get_vault(config.vault_name)
    # workaround for UnicodeDecodeError
    # https://github.com/boto/boto/issues/3318
    vault.name = str(vault.name)
    print "Beginning job on " + vault.arn

    # Ah, we don't have a vault listing yet.
    if not config.ls_present:

        # No job yet? Initiate a job.
        if not config.inventory_job:
            config.inventory_job = vault.retrieve_inventory()
            config.write()
            print "Requested an inventory. This usually takes about four hours."
            terminate(0)

        # We have a job, but it's not finished.
        job = vault.get_job(config.inventory_job)
        if not job.completed:
            print "Waiting for an inventory. This usually takes about four hours."
            terminate(0)

        # Finished!
        try:
            data = json.loads(job.get_output().read())
        except ValueError:
            print "Something went wrong interpreting the data Amazon sent!"
            terminate(1)

        config.ls = {}
        for archive in data['ArchiveList']:
            config.ls[archive['ArchiveDescription']] = {
                'id': archive['ArchiveId'],
                'last_modified': int(float(time.mktime(parse_ts(archive['CreationDate']).timetuple()))),
                'size': int(archive['Size']),
                'hash': archive['SHA256TreeHash']
            }

        config.ls_present = '-'
        config.inventory_job = ''
        config.write()
        print "Imported a new inventory from Amazon."

    database = Database(
        host=db_host,
        port=db_port,
        username=db_username,
        password=db_password,
        name=db_name
    )
    print "Connected to database."
    # Let's upload!
    os.stat_float_times(False)
    try:
        i = 0
        transferred = 0
        time_begin = time.time()
        for dir in config.dirs:
            print "Syncing " + dir
            for file in database.files():
                path = dir + os.sep + file

                if not os.path.exists(path):
                    #print >> sys.stderr, "'%s' does not exist" % path
		    print "\n" + "'%s' does not exist" % path
                    continue

                # If it's a directory, then ignore it
                if not os.path.isfile(path):
                    continue

                last_modified = int(os.path.getmtime(path))
                size = os.path.getsize(path)
                updating = False
                if file in config.ls:

                    # Has it not been modified since?
                    if config.ls[file]['last_modified'] >= last_modified and config.ls[file]['size'] == size:
                        continue

                    # It's been changed... we should delete the old one
                    else:
                        vault.delete_archive(config.ls[file]['id'])
                        del config.ls[file]
                        updating = True
                        config.write()

                try:
                    print file + ": uploading... ",
                    id = vault.concurrent_create_archive_from_file(path, file)
                    config.ls[file] = {
                        'id': id,
                        'size': size,
                        'last_modified': last_modified
                    }

                    config.write()
                    i += 1
                    transferred += size
                    if updating:
                        print "updated."
                    else:
                        print "done."

                    database.update(file, id, vault)

                except UploadArchiveError:
                    print "FAILED TO UPLOAD."

    finally:
        database.close()
        elapsed = time.time() - time_begin
        print "\n" + str(i) + " files successfully uploaded."
        print "Transferred " + format_bytes(transferred) + " in " + format_time(elapsed) + " at rate of " + format_bytes(transferred / elapsed) + "/s."
        terminate(0)
Esempio n. 35
0
 def modified_time(self, name):
     dt = tz.make_aware(parse_ts(self._get_key(name).last_modified), tz.utc)
     return tz.make_naive(dt)
Esempio n. 36
0
 def last_modified(self):
     return mktime(parse_ts(self.key.last_modified).timetuple())
AWS_BUCKET = os.environ.get('BACKUP_AWS_STORAGE_BUCKET_NAME')

s3connection = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
s3bucket = s3connection.get_bucket(AWS_BUCKET)
###############################################################################

# Determine where to put this backup
now = datetime.datetime.now()
for directory in DIRECTORIES:
    prefix = directory['name'] + '/'
    earliest_current_date = now - datetime.timedelta(days=directory['days'])
    s3keys = s3bucket.list(prefix=prefix)
    large_enough_backups = filter(lambda x: x.size >= MINIMUM_SIZE, s3keys)
    young_enough_backup_found = False
    for backup in large_enough_backups:
        if parse_ts(backup.last_modified) >= earliest_current_date:
            young_enough_backup_found = True
    if not young_enough_backup_found:
        # This directory doesn't have any current backups; stop here and use it
        # as the destination
        break

# Perform the backup
filename = ''.join((prefix, DUMPFILE))
print('Backing up to "{}"...'.format(filename))

os.system(
    "{backup_command} && s3cmd put --multipart-chunk-size-mb={chunk_size}"
    " /srv/backups/{source} s3://{bucket}/{filename}"
    " && rm -rf /srv/backups/{source}".format(backup_command=BACKUP_COMMAND,
                                              bucket=AWS_BUCKET,
Esempio n. 38
0
    # Finished!
    try:
        data = json.loads(job.get_output().read())
    except ValueError:
        print "Something went wrong interpreting the data Amazon sent!"
        terminate(1)

    ls = {}
    for archive in data['ArchiveList']:
        ls[archive['ArchiveDescription']] = {
            'id':
            archive['ArchiveId'],
            'last_modified':
            int(
                float(
                    time.mktime(parse_ts(
                        archive['CreationDate']).timetuple()))),
            'size':
            int(archive['Size']),
            'hash':
            archive['SHA256TreeHash']
        }

    ls_present = '-'
    inventory_job = ''
    write()
    print "Imported a new inventory from Amazon."

# Let's upload!
os.stat_float_times(False)
try:
    i = 0
def backup(app_code):
    """
    Backup postgres database for specific `app_code`.

    Args:
            app_code (str): `kc` or `kpi`
    """

    DBDATESTAMP = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

    # `postgis://` isn't recognized by `pg_dump`; replace it with `postgres://`
    DBURL = re.sub(r'^postgis://', 'postgres://', APP_CODES.get(app_code))
    # Because we are running `pg_dump` within the container,
    # we need to replace the hostname ...
    DBURL = DBURL.replace(os.getenv("POSTGRES_HOST"), "127.0.0.1")
    # ... and the port for '127.0.0.1:5432'
    DBURL = re.sub(r"\:(\d+)\/", ":5432/", DBURL)

    DUMPFILE = 'postgres-{}-{}-{}-{}.pg_dump'.format(
        app_code,
        os.environ.get('PG_MAJOR'),
        os.environ.get('PUBLIC_DOMAIN_NAME'),
        DBDATESTAMP,
    )

    BACKUP_COMMAND = 'pg_dump --format=c --dbname="{}"'.format(DBURL)

    # Determine where to put this backup
    now = datetime.datetime.now()
    for directory in DIRECTORIES:
        prefix = directory['name'] + '/'
        earliest_current_date = now - datetime.timedelta(
            days=directory['days'])
        s3keys = s3bucket.list(prefix=prefix)
        large_enough_backups = filter(lambda x: x.size >= MINIMUM_SIZE, s3keys)
        young_enough_backup_found = False
        for backup in large_enough_backups:
            if parse_ts(backup.last_modified) >= earliest_current_date:
                young_enough_backup_found = True
        if not young_enough_backup_found:
            # This directory doesn't have any current backups; stop here and use it
            # as the destination
            break

    # Perform the backup
    filename = ''.join((prefix, DUMPFILE))
    print('Backing up to "{}"...'.format(filename))
    upload = s3bucket.new_key(filename)
    chunks_done = 0
    with smart_open.smart_open(upload, 'wb') as s3backup:
        process = subprocess.Popen(BACKUP_COMMAND,
                                   shell=True,
                                   stdout=subprocess.PIPE)
        while True:
            chunk = process.stdout.read(CHUNK_SIZE)
            if not len(chunk):
                print('Finished! Wrote {} chunks; {}'.format(
                    chunks_done,
                    humanize.naturalsize(chunks_done * CHUNK_SIZE)))
                break
            s3backup.write(chunk)
            chunks_done += 1
            if '--hush' not in sys.argv:
                print('Wrote {} chunks; {}'.format(
                    chunks_done,
                    humanize.naturalsize(chunks_done * CHUNK_SIZE)))

    print('Backup `{}` successfully sent to S3.'.format(filename))
Esempio n. 40
0
	if not job.completed:
		print "Waiting for an inventory. This usually takes about four hours."
		terminate(0)

	# Finished!
	try:
		data = json.loads(job.get_output().read())
	except ValueError:
		print "Something went wrong interpreting the data Amazon sent!"
		terminate(1)

	ls = {}
	for archive in data['ArchiveList']:
		ls[archive['ArchiveDescription']] = {
			'id': archive['ArchiveId'],
			'last_modified': int(float(time.mktime(parse_ts(archive['CreationDate']).timetuple()))),
			'size': int(archive['Size']),
			'hash': archive['SHA256TreeHash']
		}

	ls_present = '-'
	inventory_job = ''
	write()
	print "Imported a new inventory from Amazon."

db_connection = pymysql.connect(
	host=db_host,
	port=db_port,
	user=db_username,
	password=db_password,
	db=db_name
Esempio n. 41
0
 def get_key_timestamp(self, file_id):
     key = self.get_key(file_id)
     if key:
         last_updated = parse_ts(key.last_modified)
         return last_updated.strftime('%Y-%m-%d %X')
     return time.strftime('%Y-%m-%d %X', time.localtime())
Esempio n. 42
0
    def sync_fbo_weekly(self):
        """
        This task will sync the latest full copy of FBO's xml and any intermediary files. It will overwrite the weekly file.
        We make a personal s3 copy of the data since the FBO ftp service is unreliable and tends to get hammered
        during peak hours. Files are stored to S3 in a gzipped format.

        Working files are stored in temp_dir and can be processed in other processes.
        """

        conn = connect_s3()
        vitals_bucket = conn.get_bucket(S3_BUCKET)


        storage_path = None

        try:
            self.ftp.connect()
            self.ftp.login()

            sourceModifiedTime = self.ftp.sendcmd('MDTM datagov/FBOFullXML.xml')[4:]
            sourceModifiedDateTime = datetime.strptime(sourceModifiedTime, "%Y%m%d%H%M%S")
            sourceModifiedDateTimeStr = sourceModifiedDateTime.strftime("%Y%m%d")
            filename = 'FBOFullXML'+sourceModifiedDateTimeStr+'.xml'


            fullFBOKey = vitals_bucket.get_key(S3_EXTRACT_PREFIX+filename+S3_ARCHIVE_FORMAT)

            if not fullFBOKey or parse_ts(fullFBOKey.last_modified) < sourceModifiedDateTime:
                #Update S3 copy with latest

                print "downloading the latest full xml from repository"
                storage_path = path.join(self.temp_dir, filename)

                with open(storage_path, 'wb') as local_file:
                    # Download the file a chunk at a time using RET
                    self.ftp.retrbinary('RETR datagov/FBOFullXML.xml', local_file.write)

        finally:
            self.ftp.close()

        if not storage_path:
            return

        print "zipping the fbo full file"
        zipped_storage_path = path.join(self.temp_dir, filename+S3_ARCHIVE_FORMAT)
        with open(storage_path, 'rb') as f_in:
            with gzip.GzipFile(zipped_storage_path, 'wb') as myzip:
                myzip.writelines(f_in)

        print "uploading the latest full xml to S3"
        # Put file to S3
        source_size = os.stat(zipped_storage_path).st_size

        # Create a multipart upload request
        mp = vitals_bucket.initiate_multipart_upload(S3_EXTRACT_PREFIX+os.path.basename(zipped_storage_path))

        # Use a chunk size of 50 MiB (feel free to change this)
        chunk_size = 52428800
        chunk_count = int(math.ceil(source_size / chunk_size))

        # Send the file parts, using FileChunkIO to create a file-like object
        # that points to a certain byte range within the original file. We
        # set bytes to never exceed the original file size.
        try:
            for i in range(chunk_count + 1):
                print "uploading chunk {0} of {1}".format(i+1, chunk_count+1)
                offset = chunk_size * i
                bytes = min(chunk_size, source_size - offset)
                with FileChunkIO(zipped_storage_path, 'r', offset=offset,
                                     bytes=bytes) as fp:
                     mp.upload_part_from_file(fp, part_num=i + 1)
        finally:
            # Finish the upload
            mp.complete_upload()

            print "clearing any delta files from s3"
            keys_to_delete = vitals_bucket.list(prefix=S3_EXTRACT_PREFIX)
            for key in keys_to_delete:
                if 'FBOFeed' in key:
                    vitals_bucket.delete_key(key)
Esempio n. 43
0
def run():
    """
    Backup postgres database for specific `app_code`.
    """

    s3connection = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
    s3bucket = s3connection.get_bucket(AWS_BUCKET)

    DBDATESTAMP = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

    DUMPFILE = 'mongo-{}-{}-{}.gz'.format(
        os.environ.get('MONGO_MAJOR'),
        os.environ.get('PUBLIC_DOMAIN_NAME'),
        DBDATESTAMP,
    )

    MONGO_INITDB_ROOT_USERNAME = os.environ.get('MONGO_INITDB_ROOT_USERNAME')
    MONGO_INITDB_ROOT_PASSWORD = os.environ.get('MONGO_INITDB_ROOT_PASSWORD')

    if MONGO_INITDB_ROOT_USERNAME and MONGO_INITDB_ROOT_PASSWORD:
        BACKUP_COMMAND = 'mongodump --archive --gzip --username="******"' \
                        ' --password="******"'.format(
                            username=MONGO_INITDB_ROOT_USERNAME,
                            password=MONGO_INITDB_ROOT_PASSWORD
                        )
    else:
        BACKUP_COMMAND = "mongodump --archive --gzip"

    # Determine where to put this backup
    now = datetime.datetime.now()

    for directory in DIRECTORIES:
        prefix = directory['name'] + '/'
        earliest_current_date = now - datetime.timedelta(days=directory['days'])
        s3keys = s3bucket.list(prefix=prefix)
        large_enough_backups = filter(lambda x: x.size >= MINIMUM_SIZE, s3keys)
        young_enough_backup_found = False
        for backup in large_enough_backups:
            if parse_ts(backup.last_modified) >= earliest_current_date:
                young_enough_backup_found = True
        if not young_enough_backup_found:
            # This directory doesn't have any current backups; stop here and use it
            # as the destination
            break

    # Perform the backup
    filename = ''.join((prefix, DUMPFILE))
    print('Backing up to "{}"...'.format(filename))
    upload = s3bucket.new_key(filename)
    chunks_done = 0
    with smart_open.smart_open(upload, 'wb') as s3backup:
        process = subprocess.Popen(
            BACKUP_COMMAND, shell=True, stdout=subprocess.PIPE)
        while True:
            chunk = process.stdout.read(CHUNK_SIZE)
            if not len(chunk):
                print('Finished! Wrote {} chunks; {}'.format(
                    chunks_done,
                    humanize.naturalsize(chunks_done * CHUNK_SIZE)
                ))
                break
            s3backup.write(chunk)
            chunks_done += 1
            if '--hush' not in sys.argv:
                print('Wrote {} chunks; {}'.format(
                    chunks_done,
                    humanize.naturalsize(chunks_done * CHUNK_SIZE)
                ))

    print('Backup `{}` successfully sent to S3.'.format(filename))
Esempio n. 44
0
 def get_modified_time(self, name):
     dt = tz.make_aware(parse_ts(self._get_key(name).last_modified), tz.utc)
     return dt if setting('USE_TZ') else tz.make_naive(dt)
	if not job.completed:
		print "Waiting for an inventory. This usually takes about four hours."
		terminate(0)
	
	# Finished!
	try:
		data = json.loads(job.get_output().read())
	except ValueError:
		print "Something went wrong interpreting the data Amazon sent!"
		terminate(1)
	
	ls = {}
	for archive in data['ArchiveList']:
		ls[archive['ArchiveDescription']] = {
			'id': archive['ArchiveId'],
			'last_modified': int(float(time.mktime(parse_ts(archive['CreationDate']).timetuple()))),
			'size': int(archive['Size']),
			'hash': archive['SHA256TreeHash']
		}
		
	ls_present = '-'
	inventory_job = ''
	write()
	print "Imported a new inventory from Amazon."
	
# Let's upload!
os.stat_float_times(False)
try:
	i = 0
	transferred = 0
	time_begin = time.time()
Esempio n. 46
0
 def get_key_timestamp(self, file_id):
     key = self.get_key(file_id)
     if key:
         last_updated = parse_ts(key.last_modified)
         return last_updated.strftime('%Y-%m-%d %X')
     return time.strftime('%Y-%m-%d %X', time.localtime())
Esempio n. 47
0
def main():
    config = ConfigParser.ConfigParser()
    config.read(filenames=["aws-s3-restore.conf"])

    aws_access_key = config.get(section="aws", option="key")
    aws_access_secret = config.get(section="aws", option="secret")

    conn = S3Connection(aws_access_key, aws_access_secret)

    buckets = conn.get_all_buckets()

    print "The following buckets are available"
    print "\n".join(["- {}".format(bucket.name) for bucket in buckets])

    bucket = None
    while not bucket:
        print "Enter the exact name of the bucket to restore from:",
        name = raw_input().strip()
        bucket = next(
            iter([bucket for bucket in buckets if bucket.name == name]), None)
        if not bucket:
            print "Not a valid bucket"

    print "Using bucket `{bucket_name}`".format(bucket_name=bucket.name)

    restore_before = datetime.today()
    date_set = "n"
    while not date_set == "y":
        print "From how many days ago do you wish to restore? ",
        days = raw_input().strip()
        try:
            day_offset = int(days)
        except exceptions.ValueError:
            print "Error, you must supply an integer"
            continue
        restore_before = datetime.today() - timedelta(days=day_offset)
        print "Use files modified on `{date}` (or nearest preceding version) (y/N)? ".format(date=restore_before),
        date_set = raw_input().strip().lower()

    print
    print "Add files/folders for restoration"
    all_folders = "n"
    objects_to_restore = []
    while not all_folders == "y":
        print "Full path of file/folder to restore: ",
        add_folder = raw_input().strip()
        if add_folder[0] is not "/":
            print "Error, supplied path does not begin with a `/`; discarding"
        else:
            objects_to_restore.append(add_folder)
        print "Folders currently in restore set: "
        print "\n".join(["- {}".format(f) for f in objects_to_restore])

        print "Done adding folders (y/N)? ",
        all_folders = raw_input().strip().lower()

    print "NOTICE: Files will be restored to *this* working directory (and subdirectories)"
    print "Do you want to run the restore (y/N)? ",
    if not raw_input().strip().lower() == "y":
        os.exit(-1)
    else:
        valid_prefixes = []
        print "Running restore from bucket `{bucket_name}`".format(bucket_name=bucket.name)

        for obj in objects_to_restore:
            prefix = obj[1:]    # Remove the leading slash
            keys = bucket.get_all_versions(prefix=prefix)
            if not keys:
                print "Invalid prefix: `{obj}`".format(obj=obj)
            else:
                valid_prefixes.append(prefix)

        print
        print "Restoring files modified *before* `{restore_date}` (or nearest preceding version)".format(restore_date=restore_before)
        print "Aggregating backupset details..."
        # Determine the available versions for this file list
        all_files = {}
        for prefix in valid_prefixes:
            for version in bucket.list_versions(prefix=prefix):
                last_modified = parse_ts(version.last_modified)
                if last_modified < restore_before:
                    # Only restore if older than specified date
                    if version.name not in all_files or version.last_modified > all_files[version.name].last_modified:
                        # Add to list, or update if newer version available
                        all_files[version.name] = version

        total_file_count = len(all_files.keys())
        print "{count} file(s) to be restored".format(count=total_file_count)
        print
        print "Beginning Restore: "
        i = 0
        for file_prefix, version in all_files.iteritems():
            i = i + 1
            print "- ({number}/{total}): `{name}`".format(number=i, total=total_file_count, name=file_prefix)

            dirs = os.path.dirname(file_prefix)
            if not os.path.exists(dirs):
                os.makedirs(dirs)

            if isinstance(version, DeleteMarker):
                print "      WARNING: File was previously DELETED on {date}; skipping".format(date=version.last_modified)
            else:
                if not os.path.exists(file_prefix):
                    # Open relative to our working path
                    fp = open(file_prefix, "w")
                    version.get_file(fp, version_id=version.version_id)
                    fp.close()
                else:
                    print "      WARNING: Already exists at restore location; skipping"