Ejemplos de OrdinaryCallingFormat en Python, ejemplos de boto.s3.connection.OrdinaryCallingFormat en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: deleteall.py Proyecto: ralic/aws_hack_collection

#!/usr/bin/python2.7
"""
- Author : Nag m
- Hack   : Delete all objects in a bucket
- Info   : Delete all objects in a bucket
            * 101-s3-aws
"""

import boto
from boto.s3.connection import OrdinaryCallingFormat


def deleteall(name):
    bucket = conn.get_bucket(name)
    for obj in bucket.list():
        print " Deleting ... ", obj.name, obj.delete()


if __name__ == "__main__":
    conn = boto.connect_s3(calling_format=OrdinaryCallingFormat())
    bucketname = "101-s3-aws"
    deleteall(bucketname)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: s3_util.py Proyecto: proximax-storage/libpdp

if __name__ == "__main__":
    if 'S3_ACCESS_KEY_ID' in os.environ:
        Id = os.environ['S3_ACCESS_KEY_ID']
    if 'S3_SECRET_ACCESS_KEY' in os.environ:
        Key = os.environ['S3_SECRET_ACCESS_KEY']
    if 'S3_HOSTNAME' in os.environ:
        Host, Port = os.environ['S3_HOSTNAME'].split(':')
    Port = int(Port)

    s3 = boto.connect_s3(Id,
                         Key,
                         host=Host,
                         port=Port,
                         is_secure=False,
                         calling_format=OrdinaryCallingFormat())

    #
    # Bucket create
    #
    if not args.mk is False:
        if not args.mk:
            buckets = DEFAULT_BUCKETS
        else:
            buckets = args.mk
        create_buckets(s3, buckets)
    #
    # Bucket remove
    #
    if not args.rm is False:
        if not args.rm:

Ejemplo n.º 3

0

Mostrar archivo

 def make_connection(self, user):
     return S3Connection(user['key_id'], user['key_secret'], is_secure=False,
                         host=self.host, port=self.port, debug=False,
                         calling_format=OrdinaryCallingFormat() )

Ejemplo n.º 4

0

Mostrar archivo

def get_connection(scheme, parsed_url, storage_uri):
    try:
        from boto.s3.connection import S3Connection
        assert hasattr(S3Connection, u'lookup')

        # Newer versions of boto default to using
        # virtual hosting for buckets as a result of
        # upstream deprecation of the old-style access
        # method by Amazon S3. This change is not
        # backwards compatible (in particular with
        # respect to upper case characters in bucket
        # names); so we default to forcing use of the
        # old-style method unless the user has
        # explicitly asked us to use new-style bucket
        # access.
        #
        # Note that if the user wants to use new-style
        # buckets, we use the subdomain calling form
        # rather than given the option of both
        # subdomain and vhost. The reason being that
        # anything addressable as a vhost, is also
        # addressable as a subdomain. Seeing as the
        # latter is mostly a convenience method of
        # allowing browse:able content semi-invisibly
        # being hosted on S3, the former format makes
        # a lot more sense for us to use - being
        # explicit about what is happening (the fact
        # that we are talking to S3 servers).

        try:
            from boto.s3.connection import OrdinaryCallingFormat
            from boto.s3.connection import SubdomainCallingFormat
            cfs_supported = True
            calling_format = OrdinaryCallingFormat()
        except ImportError:
            cfs_supported = False
            calling_format = None

        if globals.s3_use_new_style:
            if cfs_supported:
                calling_format = SubdomainCallingFormat()
            else:
                log.FatalError(
                    u"Use of new-style (subdomain) S3 bucket addressing was"
                    u"requested, but does not seem to be supported by the "
                    u"boto library. Either you need to upgrade your boto "
                    u"library or duplicity has failed to correctly detect "
                    u"the appropriate support.", log.ErrorCode.boto_old_style)
        else:
            if cfs_supported:
                calling_format = OrdinaryCallingFormat()
            else:
                calling_format = None

    except ImportError:
        log.FatalError(
            u"This backend (s3) requires boto library, version %s or later, "
            u"(http://code.google.com/p/boto/)." % BOTO_MIN_VERSION,
            log.ErrorCode.boto_lib_too_old)

    if not parsed_url.hostname:
        # Use the default host.
        conn = storage_uri.connect(
            is_secure=(not globals.s3_unencrypted_connection))
    else:
        assert scheme == u's3'
        conn = storage_uri.connect(
            host=parsed_url.hostname,
            port=parsed_url.port,
            is_secure=(not globals.s3_unencrypted_connection))

    if hasattr(conn, u'calling_format'):
        if calling_format is None:
            log.FatalError(
                u"It seems we previously failed to detect support for calling "
                u"formats in the boto library, yet the support is there. This is "
                u"almost certainly a duplicity bug.",
                log.ErrorCode.boto_calling_format)
        else:
            conn.calling_format = calling_format

    else:
        # Duplicity hangs if boto gets a null bucket name.
        # HC: Caught a socket error, trying to recover
        raise BackendException(u'Boto requires a bucket name.')
    return conn

Ejemplo n.º 5

0

Mostrar archivo

def s3_copy(aws_access_key_id, aws_secret_access_key, bucket, src, dst):
    try:
        conn = boto.connect_s3(aws_access_key_id, aws_secret_access_key, is_secure=False, calling_format=OrdinaryCallingFormat())
        bucket = conn.get_bucket(bucket, validate=True)
        bucket.copy_key(dst, bucket.name, src)
        return True
    except Exception as e:
        info(e)
        return False

Ejemplo n.º 6

0

Mostrar archivo

def _connect(access_id, access_key, host=None, port=None, is_secure=False):
  return boto.connect_s3(
    access_id, access_key, host=host, port=port, is_secure=is_secure,
    calling_format=OrdinaryCallingFormat()
  )

Ejemplo n.º 7

0

Mostrar archivo

from boto.s3.connection import S3Connection
from boto.s3.connection import OrdinaryCallingFormat
def sizeof_fmt(num):
   for x in ['bytes','KB','MB','GB','TB']:
       if num < 1024.0:
           return "%3.1f %s" % (num, x)
       num /= 1024.0

conn = S3Connection('s3key', 's3secretkey',calling_format=OrdinaryCallingFormat())
print conn
total_bytes=0
buckets = conn.get_all_buckets()
for key in buckets:
     print key.name
       try:
        bucket=conn.get_bucket(key.name)
       except :
         print "s3 exception"
       bucket_size=0
       for key1 in bucket:
        #bucket_size=0
        bucket_size +=key1.size
        total_bytes +=key1.size
        sebucket= sizeof_fmt(bucket_size)
        print key.name
        print  sebucket       
print "total bucket size " % sizeof_fmt(total_bytes)
  #  get_bucket_size(key.name)

Ejemplo n.º 8

0

Mostrar archivo

INSTALLED_APPS += ('gunicorn', )

# STORAGE CONFIGURATION
# ------------------------------------------------------------------------------
# Uploaded Media Files
# ------------------------
# See: http://django-storages.readthedocs.io/en/latest/index.html
INSTALLED_APPS += ('storages', )

AWS_ACCESS_KEY_ID = env('DJANGO_AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = env('DJANGO_AWS_SECRET_ACCESS_KEY')
AWS_STORAGE_BUCKET_NAME = env('DJANGO_AWS_STORAGE_BUCKET_NAME')
AWS_AUTO_CREATE_BUCKET = True
AWS_QUERYSTRING_AUTH = False
AWS_S3_CALLING_FORMAT = OrdinaryCallingFormat()

# AWS cache settings, don't change unless you know what you're doing:
AWS_EXPIRY = 60 * 60 * 24 * 7

# TODO See: https://github.com/jschneier/django-storages/issues/47
# Revert the follwing and use str after the above-mentioned bug is fixed in
# either django-storage-redux or boto
AWS_HEADERS = {
    'Cache-Control':
    six.b('max-age=%d, s-maxage=%d, must-revalidate' %
          (AWS_EXPIRY, AWS_EXPIRY))
}

# URL that handles the media served from MEDIA_ROOT, used for managing
# stored files.

Ejemplo n.º 9

0

Mostrar archivo

def main(argv=None):
    parser = argparse.ArgumentParser(
        description='extent stats via Nuxeo REST API')
    parser.add_argument('path', nargs=1, help="root path")
    parser.add_argument(
        'outdir',
        nargs=1,
    )
    parser.add_argument('--no-s3-check', dest='s3_check', action='store_false')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    # look up all the files in S3, so we can double check that all
    # the files exist as we loop through Nuxeo
    file_check = None
    s3_bytes = s3_count = 0
    if argv.s3_check:
        from boto import s3
        from boto.s3.connection import OrdinaryCallingFormat
        file_check = {}
        conn = s3.connect_to_region('us-west-2',
                                    calling_format=OrdinaryCallingFormat())
        bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon')
        for count, key in enumerate(bucket.list()):
            file_check[key.name] = key.size
            if count % 50000 == 0:
                print('{0} s3 files memorized'.format(count), file=sys.stderr)
            s3_bytes = s3_bytes + key.size
        s3_count = len(file_check)

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    campuses = nx.children(argv.path[0])

    summary_workbook = xlsxwriter.Workbook(
        os.path.join(argv.outdir[0], '{}-summary.xlsx'.format(today)))
    # cell formats
    header_format = summary_workbook.add_format({
        'bold': True,
    })
    number_format = summary_workbook.add_format()
    number_format.set_num_format('#,##0')

    summary_worksheet = summary_workbook.add_worksheet('summary')
    # headers
    summary_worksheet.write(0, 1, 'deduplicated files', header_format)
    summary_worksheet.write(0, 2, 'deduplicated bytes', header_format)
    summary_worksheet.write(0, 4, 'total files', header_format)
    summary_worksheet.write(0, 5, 'total bytes', header_format)
    if argv.s3_check:
        summary_worksheet.write(0, 7, 'files on S3', header_format)
        summary_worksheet.write(0, 8, 'bytes on S3', header_format)
    # widths
    summary_worksheet.set_column(
        0,
        1,
        10,
    )
    summary_worksheet.set_column(
        2,
        2,
        25,
    )
    summary_worksheet.set_column(
        3,
        4,
        10,
    )
    summary_worksheet.set_column(
        5,
        5,
        25,
    )
    summary_worksheet.set_column(
        6,
        7,
        10,
    )
    summary_worksheet.set_column(
        8,
        8,
        25,
    )
    summary_worksheet.set_column(
        9,
        9,
        10,
    )
    true_count = dedup_total = total_count = running_total = 0
    row = 1
    for campus in campuses:
        basename = os.path.basename(campus['path'])
        documents = nx.nxql(
            'select * from Document where ecm:path startswith"{0}"'.format(
                campus['path']))
        (this_count, this_total, dedup_count,
         dedup_bytes) = forCampus(documents, basename, file_check,
                                  argv.outdir[0])
        summary_worksheet.write(row, 0, basename)
        summary_worksheet.write(row, 1, dedup_count, number_format)
        summary_worksheet.write(row, 2, dedup_bytes, number_format)
        summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes))
        summary_worksheet.write(row, 4, this_count, number_format)
        summary_worksheet.write(row, 5, this_total, number_format)
        summary_worksheet.write(row, 6, sizeof_fmt(this_total))
        total_count = total_count + this_count  # number of files
        running_total = running_total + this_total  # number of bytes
        true_count = true_count + dedup_count
        dedup_total = dedup_total + dedup_bytes  # number of bytes
        row = row + 1
    summary_worksheet.write(row, 0, '{}'.format(today))
    summary_worksheet.write(row, 1, true_count, number_format)
    summary_worksheet.write(row, 2, dedup_total, number_format)
    summary_worksheet.write(row, 3, sizeof_fmt(dedup_total))
    summary_worksheet.write(row, 4, total_count, number_format)
    summary_worksheet.write(row, 5, running_total, number_format)
    summary_worksheet.write(row, 6, sizeof_fmt(running_total))
    if argv.s3_check:
        summary_worksheet.write(row, 7, s3_count, number_format)
        summary_worksheet.write(row, 8, s3_bytes, number_format)
        summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes))
    summary_workbook.close()

Ejemplo n.º 10

0

Mostrar archivo

Archivo: s3_bucket.py Proyecto: zhaxing2/ansible

def main():

    argument_spec = ec2_argument_spec()
    argument_spec.update(
        dict(
            force=dict(required=False, default='no', type='bool'),
            policy=dict(required=False, default=None, type='json'),
            name=dict(required=True, type='str'),
            requester_pays=dict(default='no', type='bool'),
            s3_url=dict(aliases=['S3_URL'], type='str'),
            state=dict(default='present', type='str', choices=['present', 'absent']),
            tags=dict(required=False, default=None, type='dict'),
            versioning=dict(default=None, type='bool'),
            ceph=dict(default='no', type='bool')
        )
    )

    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    region, ec2_url, aws_connect_params = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region

    s3_url = module.params.get('s3_url')

    # allow eucarc environment variables to be used if ansible vars aren't set
    if not s3_url and 'S3_URL' in os.environ:
        s3_url = os.environ['S3_URL']

    ceph = module.params.get('ceph')

    if ceph and not s3_url:
        module.fail_json(msg='ceph flavour requires s3_url')

    flavour = 'aws'

    # bucket names with .'s in them need to use the calling_format option,
    # otherwise the connection will fail. See https://github.com/boto/boto/issues/2836
    # for more details.
    aws_connect_params['calling_format'] = OrdinaryCallingFormat()

    # Look at s3_url and tweak connection settings
    # if connecting to Walrus or fakes3
    try:
        if s3_url and ceph:
            ceph = urlparse.urlparse(s3_url)
            connection = boto.connect_s3(
                host=ceph.hostname,
                port=ceph.port,
                is_secure=ceph.scheme == 'https',
                **aws_connect_params
            )
            flavour = 'ceph'
        elif is_fakes3(s3_url):
            fakes3 = urlparse.urlparse(s3_url)
            connection = S3Connection(
                is_secure=fakes3.scheme == 'fakes3s',
                host=fakes3.hostname,
                port=fakes3.port,
                **aws_connect_params
            )
        elif is_walrus(s3_url):
            del aws_connect_params['calling_format']
            walrus = urlparse.urlparse(s3_url).hostname
            connection = boto.connect_walrus(walrus, **aws_connect_params)
        else:
            connection = boto.s3.connect_to_region(location, is_secure=True, **aws_connect_params)
            # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
            if connection is None:
                connection = boto.connect_s3(**aws_connect_params)

    except boto.exception.NoAuthHandlerFound as e:
        module.fail_json(msg='No Authentication Handler found: %s ' % str(e))
    except Exception as e:
        module.fail_json(msg='Failed to connect to S3: %s' % str(e))

    if connection is None:  # this should never happen
        module.fail_json(msg='Unknown error, failed to create s3 connection, no information from boto.')

    state = module.params.get("state")

    if state == 'present':
        create_or_update_bucket(connection, module, location, flavour=flavour)
    elif state == 'absent':
        destroy_bucket(connection, module, flavour=flavour)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: tasks.py Proyecto: synackme/sketchy

def s3_save(files_to_write, capture_record):
    """
    Write a sketch, scrape, and html file to S3
    """
    db.session.add(capture_record)
    # These are the content-types for the files S3 will be serving up
    reponse_types = {
        'sketch': 'image/png',
        'scrape': 'text/plain',
        'html': 'text/html'
    }

    # Iterate through each file we need to write to s3
    for capture_type, file_name in files_to_write.items():
        # Connect to S3, generate Key, set path based on capture_type, write file to S3
        conn = boto.connect_s3(calling_format=OrdinaryCallingFormat())
        key = Key(conn.get_bucket(app.config.get('S3_BUCKET_PREFIX')))
        path = "sketchy/{}/{}".format(capture_type, capture_record.id)
        key.key = path
        key.set_contents_from_filename(app.config['LOCAL_STORAGE_FOLDER'] +
                                       '/' + file_name)

        # Generate a URL for downloading the files
        url = conn.generate_url(app.config.get('S3_LINK_EXPIRATION'),
                                'GET',
                                bucket=app.config.get('S3_BUCKET_PREFIX'),
                                key=key.key,
                                response_headers={
                                    'response-content-type':
                                    reponse_types[capture_type],
                                    'response-content-disposition':
                                    'attachment; filename=' + file_name
                                })

        # Generate appropriate url based on capture_type
        if capture_type == 'sketch':
            capture_record.sketch_url = str(url)
            #print capture_record.sketch_url
        if capture_type == 'scrape':
            capture_record.scrape_url = str(url)
            #print capture_record.scrape_url
        if capture_type == 'html':
            capture_record.html_url = str(url)
            #print capture_record.html_url

    # Remove local files if we are saving to S3
    os.remove(
        os.path.join(app.config['LOCAL_STORAGE_FOLDER'],
                     files_to_write['sketch']))
    os.remove(
        os.path.join(app.config['LOCAL_STORAGE_FOLDER'],
                     files_to_write['scrape']))
    os.remove(
        os.path.join(app.config['LOCAL_STORAGE_FOLDER'],
                     files_to_write['html']))

    # If we don't have a finisher we are donezo
    # TYPO********
    if capture_record.callback:
        capture_record.capture_status = 'S3_ITEMS_SAVED'
    else:
        capture_record.capture_status = 'S3_ITEMS_SAVED'
        capture_record.job_status = 'COMPLETED'
    db.session.commit()

Ejemplo n.º 12

0

Mostrar archivo

 def __init__(self, connection, bucket_name):
     self.connection = connection
     if bucket_name != bucket_name.lower():
         self.connection.calling_format = OrdinaryCallingFormat()
     self.bucket = self.connection.get_bucket(bucket_name, validate=False)

Ejemplo n.º 13

0

Mostrar archivo

def main():
    argument_spec = ec2_argument_spec()
    argument_spec.update(
        dict(
            bucket=dict(required=True),
            object=dict(),
            src=dict(),
            dest=dict(default=None),
            mode=dict(
                choices=['get', 'put', 'delete', 'create', 'geturl', 'getstr'],
                required=True),
            expiry=dict(default=600, aliases=['expiration']),
            s3_url=dict(aliases=['S3_URL']),
            overwrite=dict(aliases=['force'], default=True, type='bool'),
            metadata=dict(type='dict'),
        ), )
    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    bucket = module.params.get('bucket')
    obj = module.params.get('object')
    src = module.params.get('src')
    if module.params.get('dest'):
        dest = os.path.expanduser(module.params.get('dest'))
    mode = module.params.get('mode')
    expiry = int(module.params['expiry'])
    s3_url = module.params.get('s3_url')
    overwrite = module.params.get('overwrite')
    metadata = module.params.get('metadata')

    region, ec2_url, aws_connect_kwargs = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region

    if module.params.get('object'):
        obj = os.path.expanduser(module.params['object'])

    # allow eucarc environment variables to be used if ansible vars aren't set
    if not s3_url and 'S3_URL' in os.environ:
        s3_url = os.environ['S3_URL']

    # Look at s3_url and tweak connection settings
    # if connecting to Walrus or fakes3
    try:
        if is_fakes3(s3_url):
            fakes3 = urlparse.urlparse(s3_url)
            s3 = S3Connection(is_secure=fakes3.scheme == 'fakes3s',
                              host=fakes3.hostname,
                              port=fakes3.port,
                              calling_format=OrdinaryCallingFormat(),
                              **aws_connect_kwargs)
        elif is_walrus(s3_url):
            walrus = urlparse.urlparse(s3_url).hostname
            s3 = boto.connect_walrus(walrus, **aws_connect_kwargs)
        else:
            s3 = boto.s3.connect_to_region(
                location,
                is_secure=True,
                calling_format=OrdinaryCallingFormat(),
                **aws_connect_kwargs)
            # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
            if s3 is None:
                s3 = boto.connect_s3(**aws_connect_kwargs)

    except boto.exception.NoAuthHandlerFound, e:
        module.fail_json(msg='No Authentication Handler found: %s ' % str(e))

Ejemplo n.º 14

0

Mostrar archivo

Archivo: s3.py Proyecto: tonygitworld/ansible-modules-core

def main():
    argument_spec = ec2_argument_spec()
    argument_spec.update(
        dict(
            bucket=dict(required=True),
            dest=dict(default=None),
            encrypt=dict(default=True, type='bool'),
            expiry=dict(default=600, aliases=['expiration']),
            headers=dict(type='dict'),
            marker=dict(default=None),
            max_keys=dict(default=1000),
            metadata=dict(type='dict'),
            mode=dict(choices=[
                'get', 'put', 'delete', 'create', 'geturl', 'getstr', 'delobj',
                'list'
            ],
                      required=True),
            object=dict(),
            permission=dict(type='list', default=['private']),
            version=dict(default=None),
            overwrite=dict(aliases=['force'], default='always'),
            prefix=dict(default=None),
            retries=dict(aliases=['retry'], type='int', default=0),
            s3_url=dict(aliases=['S3_URL']),
            src=dict(),
        ), )
    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    bucket = module.params.get('bucket')
    encrypt = module.params.get('encrypt')
    expiry = int(module.params['expiry'])
    if module.params.get('dest'):
        dest = os.path.expanduser(module.params.get('dest'))
    headers = module.params.get('headers')
    marker = module.params.get('marker')
    max_keys = module.params.get('max_keys')
    metadata = module.params.get('metadata')
    mode = module.params.get('mode')
    obj = module.params.get('object')
    version = module.params.get('version')
    overwrite = module.params.get('overwrite')
    prefix = module.params.get('prefix')
    retries = module.params.get('retries')
    s3_url = module.params.get('s3_url')
    src = module.params.get('src')

    for acl in module.params.get('permission'):
        if acl not in CannedACLStrings:
            module.fail_json(msg='Unknown permission specified: %s' % str(acl))

    if overwrite not in ['always', 'never', 'different']:
        if module.boolean(overwrite):
            overwrite = 'always'
        else:
            overwrite = 'never'

    region, ec2_url, aws_connect_kwargs = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region

    if module.params.get('object'):
        obj = os.path.expanduser(module.params['object'])

    # allow eucarc environment variables to be used if ansible vars aren't set
    if not s3_url and 'S3_URL' in os.environ:
        s3_url = os.environ['S3_URL']

    # bucket names with .'s in them need to use the calling_format option,
    # otherwise the connection will fail. See https://github.com/boto/boto/issues/2836
    # for more details.
    if '.' in bucket:
        aws_connect_kwargs['calling_format'] = OrdinaryCallingFormat()

    # Look at s3_url and tweak connection settings
    # if connecting to Walrus or fakes3
    try:
        if is_fakes3(s3_url):
            fakes3 = urlparse.urlparse(s3_url)
            s3 = S3Connection(is_secure=fakes3.scheme == 'fakes3s',
                              host=fakes3.hostname,
                              port=fakes3.port,
                              calling_format=OrdinaryCallingFormat(),
                              **aws_connect_kwargs)
        elif is_walrus(s3_url):
            walrus = urlparse.urlparse(s3_url).hostname
            s3 = boto.connect_walrus(walrus, **aws_connect_kwargs)
        else:
            aws_connect_kwargs['is_secure'] = True
            s3 = connect_to_aws(boto.s3, location, **aws_connect_kwargs)
            # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
            if s3 is None:
                s3 = boto.connect_s3(**aws_connect_kwargs)

    except boto.exception.NoAuthHandlerFound, e:
        module.fail_json(msg='No Authentication Handler found: %s ' % str(e))

Ejemplo n.º 15

0

Mostrar archivo

Archivo: s3_lifecycle.py Proyecto: tate11/intelligent-code-completion

def main():

    argument_spec = ec2_argument_spec()
    argument_spec.update(
        dict(
            name = dict(required=True, type='str'),
            expiration_days = dict(default=None, required=False, type='int'),
            expiration_date = dict(default=None, required=False, type='str'),
            prefix = dict(default=None, required=False),
            requester_pays = dict(default='no', type='bool'),
            rule_id = dict(required=False, type='str'),
            state = dict(default='present', choices=['present', 'absent']),
            status = dict(default='enabled', choices=['enabled', 'disabled']),
            storage_class = dict(default='glacier', type='str', choices=['glacier', 'standard_ia']),
            transition_days = dict(default=None, required=False, type='int'),
            transition_date = dict(default=None, required=False, type='str')
        )
    )

    module = AnsibleModule(argument_spec=argument_spec,
                           mutually_exclusive = [
                               [ 'expiration_days', 'expiration_date' ],
                               [ 'expiration_days', 'transition_date' ],
                               [ 'transition_days', 'transition_date' ],
                               [ 'transition_days', 'expiration_date' ]
                               ]
                           )

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    if not HAS_DATEUTIL:
        module.fail_json(msg='dateutil required for this module')

    region, ec2_url, aws_connect_params = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region
    try:
        connection = boto.s3.connect_to_region(location, is_secure=True, calling_format=OrdinaryCallingFormat(), **aws_connect_params)
        # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
        if connection is None:
            connection = boto.connect_s3(**aws_connect_params)
    except (boto.exception.NoAuthHandlerFound, AnsibleAWSError) as e:
        module.fail_json(msg=str(e))

    expiration_date = module.params.get("expiration_date")
    transition_date = module.params.get("transition_date")
    state = module.params.get("state")
    storage_class = module.params.get("storage_class")

    # If expiration_date set, check string is valid
    if expiration_date is not None:
        try:
            datetime.datetime.strptime(expiration_date, "%Y-%m-%dT%H:%M:%S.000Z")
        except ValueError as e:
            module.fail_json(msg="expiration_date is not a valid ISO-8601 format. The time must be midnight and a timezone of GMT must be included")

    if transition_date is not None:
        try:
            datetime.datetime.strptime(transition_date, "%Y-%m-%dT%H:%M:%S.000Z")
        except ValueError as e:
            module.fail_json(msg="expiration_date is not a valid ISO-8601 format. The time must be midnight and a timezone of GMT must be included")

    boto_required_version = (2,40,0)
    if storage_class == 'standard_ia' and tuple(map(int, (boto.__version__.split(".")))) < boto_required_version:
        module.fail_json(msg="'standard_ia' class requires boto >= 2.40.0")

    if state == 'present':
        create_lifecycle_rule(connection, module)
    elif state == 'absent':
        destroy_lifecycle_rule(connection, module)

Ejemplo n.º 16

0

Mostrar archivo

def main(src,
         dest,
         num_processes=2,
         split=32,
         force=False,
         verbose=False,
         quiet=False,
         secure=True,
         max_tries=5):

    # Check that src is a valid S3 url
    split_rs = urlparse.urlsplit(src)
    if split_rs.scheme != "s3":
        raise ValueError("'%s' is not an S3 url" % src)

    # Check that dest does not exist
    if os.path.isdir(dest):
        filename = split_rs.path.split('/')[-1]
        dest = os.path.join(dest, filename)

    if os.path.exists(dest):
        if force:
            os.remove(dest)
        else:
            raise ValueError("Destination file '%s' exists, specify -f to"
                             " overwrite" % dest)

    # Split out the bucket and the key
    s3 = boto.connect_s3()
    s3 = boto.connect_s3(calling_format=OrdinaryCallingFormat())
    s3.is_secure = secure
    logger.debug("split_rs: %s" % str(split_rs))
    bucket = s3.lookup(split_rs.netloc)
    if bucket is None:
        raise ValueError("'%s' is not a valid bucket" % split_rs.netloc)
    key = bucket.get_key(split_rs.path)
    if key is None:
        raise ValueError("'%s' does not exist." % split_rs.path)

    # Determine the total size and calculate byte ranges
    resp = s3.make_request("HEAD", bucket=bucket, key=key)
    if resp is None:
        raise ValueError("response is invalid.")

    size = int(resp.getheader("content-length"))
    logger.debug("Got headers: %s" % resp.getheaders())

    # Skipping multipart if file is less than 1mb
    if size < 1024 * 1024:
        t1 = time.time()
        key.get_contents_to_filename(dest)
        t2 = time.time() - t1
        size_mb = size / 1024 / 1024
        logger.info(
            "Finished single-part download of %0.2fM in %0.2fs (%0.2fMBps)" %
            (size_mb, t2, size_mb / t2))
    else:
        # Touch the file
        fd = os.open(dest, os.O_CREAT)
        os.close(fd)

        size_mb = size / 1024 / 1024
        num_parts = (size_mb + (-size_mb % split)) // split

        def arg_iterator(num_parts):
            for min_byte, max_byte in gen_byte_ranges(size, num_parts):
                yield (bucket.name, key.name, dest, min_byte, max_byte, split,
                       secure, max_tries, 0)

        s = size / 1024 / 1024.
        try:
            t1 = time.time()
            pool = Pool(processes=num_processes)
            pool.map_async(do_part_download,
                           arg_iterator(num_parts)).get(9999999)
            t2 = time.time() - t1
            logger.info("Finished downloading %0.2fM in %0.2fs (%0.2fMBps)" %
                        (s, t2, s / t2))
        except KeyboardInterrupt:
            logger.warning("User terminated")
        except Exception, err:
            logger.error(err)

Ejemplo n.º 17

0

Mostrar archivo

    def s3_connection(self):
        """
        Connect to the Amazon S3 API.

        If the connection attempt fails because Boto can't find credentials the
        attempt is retried once with an anonymous connection.

        Called on demand by :py:attr:`s3_bucket`.

        :returns: A :py:class:`boto.s3.connection.S3Connection` object.
        :raises: :py:exc:`.CacheBackendError` when the connection to the Amazon
                 S3 API fails.
        """
        if not hasattr(self, 'cached_connection'):
            import boto
            from boto.exception import BotoClientError, BotoServerError, NoAuthHandlerFound
            from boto.s3.connection import S3Connection, SubdomainCallingFormat, OrdinaryCallingFormat
            try:
                # Configure the number of retries and the socket timeout used
                # by Boto. Based on the snippet given in the following email:
                # https://groups.google.com/d/msg/boto-users/0osmP0cUl5Y/X4NdlMGWKiEJ
                if not boto.config.has_section(BOTO_CONFIG_SECTION):
                    boto.config.add_section(BOTO_CONFIG_SECTION)
                boto.config.set(BOTO_CONFIG_SECTION,
                                BOTO_CONFIG_NUM_RETRIES_OPTION,
                                str(self.config.s3_cache_retries))
                boto.config.set(BOTO_CONFIG_SECTION,
                                BOTO_CONFIG_SOCKET_TIMEOUT_OPTION,
                                str(self.config.s3_cache_timeout))
                logger.debug("Connecting to Amazon S3 API ..")
                endpoint = urlparse(self.config.s3_cache_url)
                host, _, port = endpoint.netloc.partition(':')
                is_secure = (endpoint.scheme == 'https')
                calling_format = SubdomainCallingFormat() if host == S3Connection.DefaultHost else OrdinaryCallingFormat()
                try:
                    self.cached_connection = S3Connection(host=host,
                                                          port=int(port) if port else None,
                                                          is_secure=is_secure,
                                                          calling_format=calling_format)
                except NoAuthHandlerFound:
                    logger.debug("Amazon S3 API credentials missing, retrying with anonymous connection ..")
                    self.cached_connection = S3Connection(host=host,
                                                          port=int(port) if port else None,
                                                          is_secure=is_secure,
                                                          calling_format=calling_format,
                                                          anon=True)
            except (BotoClientError, BotoServerError):
                raise CacheBackendError("""
                    Failed to connect to the Amazon S3 API! Most likely your
                    credentials are not correctly configured. The Amazon S3
                    cache backend will be disabled for now.
                """)
        return self.cached_connection

Ejemplo n.º 18

0

Mostrar archivo

def upload_to_s3(aws_access_key_id, aws_secret_access_key, file, bucket, key, callback=None, md5=None, reduced_redundancy=False, content_type=None):
    """
    Uploads the given file to the AWS S3
    bucket and key specified.

    callback is a function of the form:

    def callback(complete, total)

    The callback should accept two integer parameters,
    the first representing the number of bytes that
    have been successfully transmitted to S3 and the
    second representing the size of the to be transmitted
    object.

    Returns boolean indicating success/failure of upload.
    """
    try:
        size = os.fstat(file.fileno()).st_size
    except Exception as e:
        info(e)
        # Not all file objects implement fileno(),
        # so we fall back on this
        file.seek(0, os.SEEK_END)
        size = file.tell()

    conn = boto.connect_s3(aws_access_key_id, aws_secret_access_key, is_secure=False, calling_format=OrdinaryCallingFormat())
    bucket = conn.get_bucket(bucket, validate=True)
    k = Key(bucket)
    k.key = key
    if content_type:
        k.set_metadata('Content-Type', content_type)
    sent = k.set_contents_from_file(file, cb=callback, md5=md5, reduced_redundancy=reduced_redundancy, rewind=True)

    # Rewind for later use
    file.seek(0)

    if sent == size:
        return True
    return False

Ejemplo n.º 19

0

Mostrar archivo

Archivo: settings.py Proyecto: Cvalladares/Newsblur_Instrumented

# ==========
# = Celery =
# ==========

# celeryapp.autodiscover_tasks(INSTALLED_APPS)
CELERY_ACCEPT_CONTENT = ['pickle', 'json', 'msgpack', 'yaml']

# ==========
# = Assets =
# ==========

JAMMIT = jammit.JammitAssets(NEWSBLUR_DIR)

if DEBUG:
    MIDDLEWARE_CLASSES += ('utils.request_introspection_middleware.DumpRequestMiddleware',)
    MIDDLEWARE_CLASSES += ('utils.exception_middleware.ConsoleExceptionMiddleware',)

# =======
# = AWS =
# =======

S3_CONN = None
if BACKED_BY_AWS.get('pages_on_s3') or BACKED_BY_AWS.get('icons_on_s3'):
    S3_CONN = S3Connection(S3_ACCESS_KEY, S3_SECRET, calling_format=OrdinaryCallingFormat())
    # if BACKED_BY_AWS.get('pages_on_s3'):
    #     S3_PAGES_BUCKET = S3_CONN.get_bucket(S3_PAGES_BUCKET_NAME)
    # if BACKED_BY_AWS.get('icons_on_s3'):
    #     S3_ICONS_BUCKET = S3_CONN.get_bucket(S3_ICONS_BUCKET_NAME)

django.http.request.host_validation_re = re.compile(r"^([a-z0-9.-_\-]+|\[[a-f0-9]*:[a-f0-9:]+\])(:\d+)?$")

Ejemplo n.º 20

0

Mostrar archivo

def s3_delete(aws_access_key_id, aws_secret_access_key, bucket, key):
    try:
        conn = boto.connect_s3(aws_access_key_id, aws_secret_access_key, is_secure=False, calling_format=OrdinaryCallingFormat())
        bucket = conn.get_bucket(bucket, validate=True)
        k = Key(bucket=bucket, name=key)
        if k.exists():
            k.delete()
        if k.exists():
            return False
        else:
            return True
    except Exception as e:
        info(e)
        return False

Ejemplo n.º 21

0

Mostrar archivo

def main():
    argument_spec = ec2_argument_spec()
    argument_spec.update(dict(
        bucket         = dict(required=True),
        dest           = dict(default=None, type='path'),
        encrypt        = dict(default=True, type='bool'),
        expiry         = dict(default=600, aliases=['expiration']),
        headers        = dict(type='dict'),
        marker         = dict(default=None),
        max_keys       = dict(default=1000),
        metadata       = dict(type='dict'),
        mode           = dict(choices=['get', 'put', 'delete', 'create', 'geturl', 'getstr', 'delobj', 'list'], required=True),
        object         = dict(type='path'),
        permission     = dict(type='list', default=['private']),
        version        = dict(default=None),
        overwrite      = dict(aliases=['force'], default='always'),
        prefix         = dict(default=None),
        retries        = dict(aliases=['retry'], type='int', default=0),
        s3_url         = dict(aliases=['S3_URL']),
        rgw            = dict(default='no', type='bool'),
        src            = dict(),
    ),
    )
    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    bucket = module.params.get('bucket')
    encrypt = module.params.get('encrypt')
    expiry = int(module.params['expiry'])
    if module.params.get('dest'):
        dest = module.params.get('dest')
    headers = module.params.get('headers')
    marker = module.params.get('marker')
    max_keys = module.params.get('max_keys')
    metadata = module.params.get('metadata')
    mode = module.params.get('mode')
    obj = module.params.get('object')
    version = module.params.get('version')
    overwrite = module.params.get('overwrite')
    prefix = module.params.get('prefix')
    retries = module.params.get('retries')
    s3_url = module.params.get('s3_url')
    rgw = module.params.get('rgw')
    src = module.params.get('src')

    for acl in module.params.get('permission'):
        if acl not in CannedACLStrings:
            module.fail_json(msg='Unknown permission specified: %s' % str(acl))

    if overwrite not in ['always', 'never', 'different']:
        if module.boolean(overwrite):
            overwrite = 'always'
        else:
            overwrite = 'never'

    region, ec2_url, aws_connect_kwargs = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region

    if module.params.get('object'):
        obj = module.params['object']

    # allow eucarc environment variables to be used if ansible vars aren't set
    if not s3_url and 'S3_URL' in os.environ:
        s3_url = os.environ['S3_URL']

    # rgw requires an explicit url
    if rgw and not s3_url:
        module.fail_json(msg='rgw flavour requires s3_url')

    # bucket names with .'s in them need to use the calling_format option,
    # otherwise the connection will fail. See https://github.com/boto/boto/issues/2836
    # for more details.
    if '.' in bucket:
        aws_connect_kwargs['calling_format'] = OrdinaryCallingFormat()

    # Look at s3_url and tweak connection settings
    # if connecting to RGW, Walrus or fakes3
    try:
        s3 = get_s3_connection(aws_connect_kwargs, location, rgw, s3_url)

    except boto.exception.NoAuthHandlerFound as e:
        module.fail_json(msg='No Authentication Handler found: %s ' % str(e))
    except Exception as e:
        module.fail_json(msg='Failed to connect to S3: %s' % str(e))

    if s3 is None: # this should never happen
        module.fail_json(msg ='Unknown error, failed to create s3 connection, no information from boto.')

    # If our mode is a GET operation (download), go through the procedure as appropriate ...
    if mode == 'get':

        # First, we check to see if the bucket exists, we get "bucket" returned.
        bucketrtn = bucket_check(module, s3, bucket)
        if bucketrtn is False:
            module.fail_json(msg="Source bucket cannot be found", failed=True)

        # Next, we check to see if the key in the bucket exists. If it exists, it also returns key_matches md5sum check.
        keyrtn = key_check(module, s3, bucket, obj, version=version)
        if keyrtn is False:
            if version is not None:
                module.fail_json(msg="Key %s with version id %s does not exist."% (obj, version), failed=True)
            else:
                module.fail_json(msg="Key %s does not exist."%obj, failed=True)

        # If the destination path doesn't exist or overwrite is True, no need to do the md5um etag check, so just download.
        pathrtn = path_check(dest)
        if pathrtn is False or overwrite == 'always':
            download_s3file(module, s3, bucket, obj, dest, retries, version=version)

        # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists.
        if pathrtn is True:
            md5_remote = keysum(module, s3, bucket, obj, version=version)
            md5_local = module.md5(dest)
            if md5_local == md5_remote:
                sum_matches = True
                if overwrite == 'always':
                    download_s3file(module, s3, bucket, obj, dest, retries, version=version)
                else:
                    module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite=always parameter to force.", changed=False)
            else:
                sum_matches = False

                if overwrite in ('always', 'different'):
                    download_s3file(module, s3, bucket, obj, dest, retries, version=version)
                else:
                    module.exit_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force download.")

        # Firstly, if key_matches is TRUE and overwrite is not enabled, we EXIT with a helpful message.
        if sum_matches is True and overwrite == 'never':
            module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite parameter to force.", changed=False)

    # if our mode is a PUT operation (upload), go through the procedure as appropriate ...
    if mode == 'put':

        # Use this snippet to debug through conditionals:
        # module.exit_json(msg="Bucket return %s"%bucketrtn)

        # Lets check the src path.
        pathrtn = path_check(src)
        if pathrtn is False:
            module.fail_json(msg="Local object for PUT does not exist", failed=True)

        # Lets check to see if bucket exists to get ground truth.
        bucketrtn = bucket_check(module, s3, bucket)
        if bucketrtn is True:
            keyrtn = key_check(module, s3, bucket, obj)

        # Lets check key state. Does it exist and if it does, compute the etag md5sum.
        if bucketrtn is True and keyrtn is True:
            md5_remote = keysum(module, s3, bucket, obj)
            md5_local = module.md5(src)

            if md5_local == md5_remote:
                sum_matches = True
                if overwrite == 'always':
                    upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers)
                else:
                    get_download_url(module, s3, bucket, obj, expiry, changed=False)
            else:
                sum_matches = False
                if overwrite in ('always', 'different'):
                    upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers)
                else:
                    module.exit_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force upload.")

        # If neither exist (based on bucket existence), we can create both.
        if bucketrtn is False and pathrtn is True:
            create_bucket(module, s3, bucket, location)
            upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers)

        # If bucket exists but key doesn't, just upload.
        if bucketrtn is True and pathrtn is True and keyrtn is False:
            upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers)

    # Delete an object from a bucket, not the entire bucket
    if mode == 'delobj':
        if obj is None:
            module.fail_json(msg="object parameter is required", failed=True)
        if bucket:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is True:
                deletertn = delete_key(module, s3, bucket, obj)
                if deletertn is True:
                    module.exit_json(msg="Object %s deleted from bucket %s." % (obj, bucket), changed=True)
            else:
                module.fail_json(msg="Bucket does not exist.", changed=False)
        else:
            module.fail_json(msg="Bucket parameter is required.", failed=True)


    # Delete an entire bucket, including all objects in the bucket
    if mode == 'delete':
        if bucket:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is True:
                deletertn = delete_bucket(module, s3, bucket)
                if deletertn is True:
                    module.exit_json(msg="Bucket %s and all keys have been deleted."%bucket, changed=True)
            else:
                module.fail_json(msg="Bucket does not exist.", changed=False)
        else:
            module.fail_json(msg="Bucket parameter is required.", failed=True)

    # Support for listing a set of keys
    if mode == 'list':
        bucket_object = get_bucket(module, s3, bucket)

        # If the bucket does not exist then bail out
        if bucket_object is None:
            module.fail_json(msg="Target bucket (%s) cannot be found"% bucket, failed=True)

        list_keys(module, bucket_object, prefix, marker, max_keys)

    # Need to research how to create directories without "populating" a key, so this should just do bucket creation for now.
    # WE SHOULD ENABLE SOME WAY OF CREATING AN EMPTY KEY TO CREATE "DIRECTORY" STRUCTURE, AWS CONSOLE DOES THIS.
    if mode == 'create':
        if bucket and not obj:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is True:
                module.exit_json(msg="Bucket already exists.", changed=False)
            else:
                module.exit_json(msg="Bucket created successfully", changed=create_bucket(module, s3, bucket, location))
        if bucket and obj:
            bucketrtn = bucket_check(module, s3, bucket)
            if obj.endswith('/'):
                dirobj = obj
            else:
                dirobj = obj + "/"
            if bucketrtn is True:
                keyrtn = key_check(module, s3, bucket, dirobj)
                if keyrtn is True:
                    module.exit_json(msg="Bucket %s and key %s already exists."% (bucket, obj), changed=False)
                else:
                    create_dirkey(module, s3, bucket, dirobj)
            if bucketrtn is False:
                created = create_bucket(module, s3, bucket, location)
                create_dirkey(module, s3, bucket, dirobj)

    # Support for grabbing the time-expired URL for an object in S3/Walrus.
    if mode == 'geturl':
        if bucket and obj:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is False:
                module.fail_json(msg="Bucket %s does not exist."%bucket, failed=True)
            else:
                keyrtn = key_check(module, s3, bucket, obj)
                if keyrtn is True:
                    get_download_url(module, s3, bucket, obj, expiry)
                else:
                    module.fail_json(msg="Key %s does not exist."%obj, failed=True)
        else:
            module.fail_json(msg="Bucket and Object parameters must be set", failed=True)

    if mode == 'getstr':
        if bucket and obj:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is False:
                module.fail_json(msg="Bucket %s does not exist."%bucket, failed=True)
            else:
                keyrtn = key_check(module, s3, bucket, obj, version=version)
                if keyrtn is True:
                    download_s3str(module, s3, bucket, obj, version=version)
                else:
                    if version is not None:
                        module.fail_json(msg="Key %s with version id %s does not exist."% (obj, version), failed=True)
                    else:
                        module.fail_json(msg="Key %s does not exist."%obj, failed=True)

    module.exit_json(failed=False)

Ejemplo n.º 22

0

Mostrar archivo

Archivo: heroku.py Proyecto: sHtev/wagtaildemo

 def get_bucket_server(self, server, bucket):
     if not bucket:
         return OrdinaryCallingFormat.get_bucket_server(
                 self, server, bucket)
     return 's3-%s.amazonaws.com' % (AWS_STORAGE_BUCKET_REGION,)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: download_photos.py Proyecto: showerst/billy

    def handle(self, args):
        s3conn = boto.connect_s3(settings.AWS_KEY,
                                 settings.AWS_SECRET,
                                 calling_format=OrdinaryCallingFormat())
        bucket = s3conn.create_bucket(settings.AWS_BUCKET)

        for abbr in args.abbrs:

            meta = db.metadata.find_one({'_id': abbr.lower()})
            if not meta:
                log.critical(
                    "'{0}' does not exist in the database.".format(abbr))
                sys.exit(1)
            else:
                log.info("Downloading photos for {0}".format(abbr))

            orig_dir = 'photos/original'
            xsmall_dir = 'photos/xsmall'
            small_dir = 'photos/small'
            large_dir = 'photos/large'
            for d in (orig_dir, xsmall_dir, small_dir, large_dir):
                if not os.path.exists(d):
                    os.makedirs(d)

            for leg in db.legislators.find(
                {
                    settings.LEVEL_FIELD: abbr,
                    'photo_url': {
                        '$exists': True
                    }
                },
                    timeout=False):

                fname = os.path.join(orig_dir, '{0}.jpg'.format(leg['_id']))

                # if fname already exists, skip this processing step
                if os.path.exists(fname):
                    continue

                # error retrieving photo, skip it
                try:
                    tmpname, resp = scraper.urlretrieve(leg['photo_url'])
                except Exception as e:
                    log.critical('error fetching %s: %s', leg['photo_url'], e)
                    continue

                try:
                    # original size, standardized filenames
                    fname = os.path.join(orig_dir,
                                         '{0}.jpg'.format(leg['_id']))
                    subprocess.check_call(['convert', tmpname, fname])
                    _upload(fname, bucket)

                    # xsmall - 50x70
                    fname = os.path.join(xsmall_dir,
                                         '{0}.jpg'.format(leg['_id']))
                    subprocess.check_call(
                        ['convert', tmpname, '-resize', '50x75', fname])
                    _upload(fname, bucket)

                    # small - 150x200
                    fname = os.path.join(small_dir,
                                         '{0}.jpg'.format(leg['_id']))
                    subprocess.check_call(
                        ['convert', tmpname, '-resize', '150x200', fname])
                    _upload(fname, bucket)
                except subprocess.CalledProcessError:
                    print('convert failed for ', fname)