Example #1
0
def parse_csar(csarfile, model_name=None):

    # unzip csarfile
    zip_ref = zipfile.ZipFile(csarfile, 'r')
    csardir = "/tmp/{}".format(time.time())
    zip_ref.extractall(csardir)
    zip_ref.close()
    dircontent = os.listdir(csardir)
    if len(dircontent) == 1:
        csardir = '{}/{}'.format(csardir, dircontent[0])

    # find the CSAR entry point
    entry_file = None
    if os.path.isdir(csardir):
        if 'TOSCA-Metadata' in os.listdir(csardir):
            tm_dir = os.path.join(csardir, 'TOSCA-Metadata')
            if os.path.isdir(tm_dir) and 'TOSCA.meta' in os.listdir(tm_dir):
                tm_file = os.path.join(tm_dir, 'TOSCA.meta')
                if os.path.isfile(tm_file):
                    meta = {}
                    with open(tm_file, 'r') as f:
                        for line in f:
                            tokens = line.split(':')
                            if len(tokens) == 2:
                                meta[tokens[0].strip()] = tokens[1].strip()
                    if str(meta.get('CSAR-Version')) == '1.1' and str(
                            meta.get('TOSCA-Meta-File-Version')
                    ) == '1.0' and 'Entry-Definitions' in meta.keys():
                        entry_file = meta['Entry-Definitions']
                    else:
                        print "Error in the provided TOSCA meta file inside the CSAR"
        yamlfiles = [
            filename for filename in os.listdir(csardir)
            if os.path.splitext(filename)[1] in ['.yaml', '.yml']
        ]
        if len(yamlfiles) == 1:
            entry_file = yamlfiles[0]
        else:
            print "A uniq yaml file have to be provided at the root of the CSAR if it does not contain a TOSCA-Metadata directory"

    # First step of the parsing : yaml
    toscayaml = None
    if entry_file is not None:
        toscayaml = parse_tosca("{}/{}".format(csardir, entry_file))

    # Second step : parse TOSCA types en topology (model)
    if toscayaml is not None:
        parse_declarative_workflows(toscayaml)
        if model_name is not None:
            parse_model(toscayaml, model_name)

    # Get s3 connection data
    s3_host = None
    s3_service = linda_rd('S3', categ='catalog/service')
    if isinstance(s3_service, list) and len(s3_service) > 0:
        s3_host = s3_service[0]['Address']
        s3_port = s3_service[0]['ServicePort']
        s3_key = linda_rd('s3/admin/access-key-id')
        s3_secret = linda_rd('s3/admin/secret-access-key')

        # init s3 with a bucket for the model
        if s3_host is not None:
            conn = S3Connection(s3_key,
                                s3_secret,
                                host=s3_host,
                                port=s3_port,
                                calling_format=OrdinaryCallingFormat(),
                                is_secure=False)
            model_bucket = conn.create_bucket(model_name)
            url_s3_csar = upload_s3(model_bucket, csarfile, 'application/zip',
                                    '{}.csar.zip'.format(model_name))
            print "url_s3_csar =  {}'".format(url_s3_csar)

        # Event to update cache for csars
        linda_out('exec_cache_csar/{}'.format(model_name), url_s3_csar)
Example #2
0
    print('\nScanning snapshots...\n')

    snapshots = ec2.get_all_snapshots(owner='self')
    for snapshot in snapshots:
        process_snapshot(Snapshot(snapshot))


if __name__ == '__main__':
    access_key = os.environ['AWS_ACCESS_KEY_ID']
    secret_key = os.environ['AWS_SECRET_ACCESS_KEY']

    conn = boto.connect_ec2(access_key, secret_key)
    regions = boto.ec2.regions()

    for region in regions:
        try:
            process_region(region)
        except:
            print('\nFailed to process region %s\n' % region.name)

    s3 = boto.connect_s3(access_key,
                         secret_key,
                         calling_format=OrdinaryCallingFormat())

    print('\nScanning buckets...\n')

    buckets = s3.get_all_buckets()
    for bucket in buckets:
        process_bucket(Bucket(bucket))
Example #3
0
def main():

    argument_spec = ec2_argument_spec()
    argument_spec.update(
        dict(name=dict(required=True, type='str'),
             expiration_days=dict(default=None, required=False, type='int'),
             expiration_date=dict(default=None, required=False, type='str'),
             prefix=dict(default=None, required=False),
             requester_pays=dict(default='no', type='bool'),
             rule_id=dict(required=False, type='str'),
             state=dict(default='present', choices=['present', 'absent']),
             status=dict(default='enabled', choices=['enabled', 'disabled']),
             storage_class=dict(default='glacier',
                                type='str',
                                choices=['glacier', 'standard_ia']),
             transition_days=dict(default=None, required=False, type='int'),
             transition_date=dict(default=None, required=False, type='str')))

    module = AnsibleModule(
        argument_spec=argument_spec,
        mutually_exclusive=[['expiration_days', 'expiration_date'],
                            ['expiration_days', 'transition_date'],
                            ['transition_days', 'transition_date'],
                            ['transition_days', 'expiration_date']])

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    if not HAS_DATEUTIL:
        module.fail_json(msg='dateutil required for this module')

    region, ec2_url, aws_connect_params = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region
    try:
        connection = boto.s3.connect_to_region(
            location,
            is_secure=True,
            calling_format=OrdinaryCallingFormat(),
            **aws_connect_params)
        # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
        if connection is None:
            connection = boto.connect_s3(**aws_connect_params)
    except (boto.exception.NoAuthHandlerFound, AnsibleAWSError) as e:
        module.fail_json(msg=str(e))

    expiration_date = module.params.get("expiration_date")
    transition_date = module.params.get("transition_date")
    state = module.params.get("state")
    storage_class = module.params.get("storage_class")

    # If expiration_date set, check string is valid
    if expiration_date is not None:
        try:
            datetime.datetime.strptime(expiration_date,
                                       "%Y-%m-%dT%H:%M:%S.000Z")
        except ValueError as e:
            module.fail_json(
                msg=
                "expiration_date is not a valid ISO-8601 format. The time must be midnight and a timezone of GMT must be included"
            )

    if transition_date is not None:
        try:
            datetime.datetime.strptime(transition_date,
                                       "%Y-%m-%dT%H:%M:%S.000Z")
        except ValueError as e:
            module.fail_json(
                msg=
                "expiration_date is not a valid ISO-8601 format. The time must be midnight and a timezone of GMT must be included"
            )

    boto_required_version = (2, 40, 0)
    if storage_class == 'standard_ia' and tuple(
            map(int, (boto.__version__.split(".")))) < boto_required_version:
        module.fail_json(msg="'standard_ia' class requires boto >= 2.40.0")

    if state == 'present':
        create_lifecycle_rule(connection, module)
    elif state == 'absent':
        destroy_lifecycle_rule(connection, module)
Example #4
0
def do_part_upload(args):
    """
    Upload a part of a MultiPartUpload

    Open the target file and read in a chunk. Since we can't pickle
    S3Connection or MultiPartUpload objects, we have to reconnect and lookup
    the MPU object with each part upload.

    :type args: tuple of (string, string, string, int, int, int)
    :param args: The actual arguments of this method. Due to lameness of
                 multiprocessing, we have to extract these outside of the
                 function definition.

                 The arguments are: S3 Bucket name, MultiPartUpload id, file
                 name, the part number, part offset, part size
    """
    # Multiprocessing args lameness
    bucket_name, mpu_id, fname, i, start, size, secure, max_tries, current_tries = args
    logger.debug("do_part_upload got args: %s" % (args, ))

    # Connect to S3, get the MultiPartUpload
    s3 = boto.connect_s3(calling_format=OrdinaryCallingFormat())
    s3.is_secure = secure
    bucket = s3.lookup(bucket_name)
    mpu = None
    for mp in bucket.list_multipart_uploads():
        if mp.id == mpu_id:
            mpu = mp
            break
    if mpu is None:
        raise Exception("Could not find MultiPartUpload %s" % mpu_id)

    # Read the chunk from the file
    fp = open(fname, 'rb')
    fp.seek(start)
    data = fp.read(size)
    fp.close()
    if not data:
        raise Exception("Unexpectedly tried to read an empty chunk")

    def progress(x, y):
        logger.debug("Part %d: %0.2f%%" % (i + 1, 100. * x / y))

    try:
        # Do the upload
        t1 = time.time()
        mpu.upload_part_from_file(StringIO(data), i + 1, cb=progress)

        # Print some timings
        t2 = time.time() - t1
        s = len(data) / 1024. / 1024.
        logger.info("Uploaded part %s (%0.2fM) in %0.2fs at %0.2fMBps" %
                    (i + 1, s, t2, s / t2))
    except Exception, err:
        logger.debug("Retry request %d of max %d times" %
                     (current_tries, max_tries))
        if current_tries > max_tries:
            logger.error(err)
        else:
            time.sleep(3)
            current_tries += 1
            do_part_upload(bucket_name, mpu_id, fname, i, start, size, secure,
                           max_tries, current_tries)
    "s3": {
        "keys": {
            "cleversafe.service.consul": {
                "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'),
                'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')
            },
            "localhost": {
                "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'),
                'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')
            },
        },
        "kwargs": {
            'cleversafe.service.consul': {
                'host': 'cleversafe.service.consul',
                "is_secure": False,
                "calling_format": OrdinaryCallingFormat()
            },
            'localhost': {
                'host': 'localhost',
                "is_secure": False,
                "calling_format": OrdinaryCallingFormat()
            },
        }
    }
}
SUBMISSION = {
    "bucket": 'test_submission',
    "host": CLEVERSAFE_HOST,
}
# Postgres
PSQLGRAPH = {
Example #6
0
# Get settings module
settings = sys.modules[os.environ['FLASK_SETTINGS_MODULE']]

if hasattr(settings, 'TEST_MODE') and settings.TEST_MODE:
    _mock = mock_s3()
    _mock.start()

    _conn = boto.connect_s3()
    _bucket = _conn.create_bucket(settings.AWS_STORAGE_BUCKET_NAME)

    _mock.stop()
else:
    _conn = boto.connect_s3(
            settings.AWS_ACCESS_KEY_ID,
            settings.AWS_SECRET_ACCESS_KEY, calling_format=OrdinaryCallingFormat())
    _bucket = _conn.get_bucket(settings.AWS_STORAGE_BUCKET_NAME)

class StorageException(Exception):
    """
    Adds 'detail' attribute to contain response body
    """
    def __init__(self, message, detail):
        super(Exception, self).__init__(message)
        self.detail = detail


def _mock_in_test_mode(f):
    @wraps(f)
    def decorated_function(*args, **kwargs):
        if hasattr(settings, 'TEST_MODE') and settings.TEST_MODE:
Example #7
0
"""Derived from
    https://www.toptal.com/aws/service-oriented-architecture-aws-lambda"""
from PIL import Image
import boto
from boto.s3.connection import OrdinaryCallingFormat
from resize import scale_jpeg_to_height
from io import BytesIO

sizes = {'thumbnail': 540, 's': 1024, 'm': 2048, 'l': 4096}

# How are credentials established?
conn = boto.connect_s3(calling_format=OrdinaryCallingFormat())


def _get_adjacent_key(bucket, key_name, new_file_name):
    parts = key_name.split('/')
    parts[-1] = new_file_name
    new_key_path = '/'.join(parts)


def handle_resize(event, context):
    # Obtain the bucket name and key for the event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key_path = event['Records'][0]['s3']['object']['key']

    bucket = conn.get_bucket(bucket_name)
    key = bucket.get_key(key_path)
    input_bytes = BytesIO()
    key.get_contents_to_file(input_bytes)
    input_bytes.seek(0)
    resized = scale_jpeg_to_height(input_bytes.get_value, sizes['thumbnail'])
Example #8
0
        def _euca_connection(_ufshost, _port, _region, _access_id, _secret_key,
                             _token, _conn_type, _dns_enabled):
            path = 'compute'
            conn_class = EC2Connection
            api_version = '2012-12-01'
            if _region != 'euca':
                # look up region endpoint
                conn = _euca_connection(_ufshost, _port, 'euca', _access_id,
                                        _secret_key, _token, 'ec2',
                                        _dns_enabled)
                regions = RegionCache(conn).get_regions(_ufshost)
                region = [
                    region.endpoint for region in regions
                    if region.name == _region
                ]
                if region:
                    endpoint = region[0]
                    parsed = urlparse(endpoint)
                    _ufshost = parsed.hostname[4:]  # remove 'ec2.' prefix
                    _port = parsed.port

            # special case since this is our own class, not boto's
            if _conn_type == 'admin':
                return EucalyptusAdmin(_ufshost, _port, _access_id,
                                       _secret_key, _token, _dns_enabled)

            # Configure based on connection type
            if _conn_type == 'autoscale':
                api_version = '2011-01-01'
                conn_class = boto.ec2.autoscale.AutoScaleConnection
                path = 'AutoScaling'
            elif _conn_type == 'cloudwatch':
                path = 'CloudWatch'
                conn_class = boto.ec2.cloudwatch.CloudWatchConnection
            elif _conn_type == 'cloudformation':
                path = 'CloudFormation'
                conn_class = boto.cloudformation.CloudFormationConnection
            elif _conn_type == 'elb':
                path = 'LoadBalancing'
                conn_class = boto.ec2.elb.ELBConnection
            elif _conn_type == 'iam':
                path = 'Euare'
                conn_class = boto.iam.IAMConnection
            elif _conn_type == 's3':
                path = 'objectstorage'
                conn_class = S3Connection
            elif _conn_type == 'vpc':
                conn_class = boto.vpc.VPCConnection

            if _dns_enabled:
                _ufshost = "{0}.{1}".format(path.lower(), _ufshost)
                path = '/'
            else:
                path = '/services/{0}/'.format(path)
            region = RegionInfo(name='eucalyptus', endpoint=_ufshost)
            # IAM and S3 connections need host instead of region info
            if _conn_type in ['iam', 's3']:
                conn = conn_class(_access_id,
                                  _secret_key,
                                  host=_ufshost,
                                  port=_port,
                                  path=path,
                                  is_secure=True,
                                  security_token=_token)
            else:
                conn = conn_class(_access_id,
                                  _secret_key,
                                  region=region,
                                  port=_port,
                                  path=path,
                                  is_secure=True,
                                  security_token=_token)
            if _conn_type == 's3':
                conn.calling_format = OrdinaryCallingFormat()

            # AutoScaling service needs additional auth info
            if _conn_type == 'autoscale':
                conn.auth_region_name = 'Eucalyptus'

            setattr(conn, 'APIVersion', api_version)
            if conn:
                conn.https_validate_certificates = validate_certs
            if certs_file is not None:
                conn.ca_certificates_file = certs_file
            conn.http_connection_kwargs['timeout'] = 30
            # uncomment to enable boto request logger. Use only for development
            # conn.set_request_hook(RequestLogger())
            return conn
Example #9
0
 def __init__(self, *args, **kargs):
     kargs['calling_format'] = OrdinaryCallingFormat()
     super(OrdinaryConnection, self).__init__(*args, **kargs)
Example #10
0
def getS3Connection():
    return S3Connection(s3_user, s3_secret_key, host='swift.vehicleforge.org', calling_format=OrdinaryCallingFormat(), is_secure=True)
def main():

    argument_spec = ec2_argument_spec()
    argument_spec.update(
        dict(
            force=dict(required=False, default='no', type='bool'),
            policy=dict(required=False, default=None),
            name=dict(required=True),
            requester_pays=dict(default='no', type='bool'),
            s3_url=dict(aliases=['S3_URL']),
            state=dict(default='present', choices=['present', 'absent']),
            tags=dict(required=None, default={}, type='dict'),
            versioning=dict(default='no', type='bool'),
            cors_xml=dict(required=False, default=None),
        ))

    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    region, ec2_url, aws_connect_params = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region

    s3_url = module.params.get('s3_url')

    # allow eucarc environment variables to be used if ansible vars aren't set
    if not s3_url and 'S3_URL' in os.environ:
        s3_url = os.environ['S3_URL']

    # Look at s3_url and tweak connection settings
    # if connecting to Walrus or fakes3
    try:
        if is_fakes3(s3_url):
            fakes3 = urlparse.urlparse(s3_url)
            connection = S3Connection(is_secure=fakes3.scheme == 'fakes3s',
                                      host=fakes3.hostname,
                                      port=fakes3.port,
                                      calling_format=OrdinaryCallingFormat(),
                                      **aws_connect_params)
        elif is_walrus(s3_url):
            walrus = urlparse.urlparse(s3_url).hostname
            connection = boto.connect_walrus(walrus, **aws_connect_params)
        else:
            connection = boto.s3.connect_to_region(
                location,
                is_secure=True,
                calling_format=OrdinaryCallingFormat(),
                **aws_connect_params)
            # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
            if connection is None:
                connection = boto.connect_s3(**aws_connect_params)

    except boto.exception.NoAuthHandlerFound, e:
        module.fail_json(msg='No Authentication Handler found: %s ' % str(e))
Example #12
0
def create_connection(key=None, secret=None):
    return boto.connect_s3(key, secret, calling_format=OrdinaryCallingFormat())
Example #13
0
 def make_connection(self, user):
     return S3Connection(user['key_id'], user['key_secret'], is_secure=False,
                         host=self.host, port=self.port, debug=False,
                         calling_format=OrdinaryCallingFormat() )
Example #14
0
    def handle(self, *args, **kwargs):
        KEY = input("Enter the AWS Access Key ID: ")
        SECRET = input("Enter the AWS Secret Access Key: ")
        PROJECT_NAME = input("Enter the project name (e.g., aurora): ")

        iam_conn = boto.connect_iam(KEY, SECRET)
        s3_conn = boto.connect_s3(KEY,
                                  SECRET,
                                  calling_format=OrdinaryCallingFormat())
        cf_conn = boto.connect_cloudfront(KEY, SECRET)
        setting_parameters = []

        def create_group(group="Bots", path="/bots/"):
            print("Creating {} group...".format(group), end=' ')
            try:
                group = iam_conn.get_group(group)
            except boto.exception.BotoServerError as e:
                if e.error_code == "NoSuchEntity":
                    group = iam_conn.create_group(group, path=path)
                    print("success!")
            else:
                print("already exists, skipping.")

        def create_user(username, group="Bots", path="/bots/", policies={}):
            print("Creating {} user...".format(username), end=' ')
            try:
                user = iam_conn.get_user(username)
            except boto.exception.BotoServerError as e:
                if e.error_code == "NoSuchEntity":
                    user = iam_conn.create_user(username, path=path)
                    response = iam_conn.create_access_key(username)
                    print("success!")

                    access_key_id = response.create_access_key_response \
                            .create_access_key_result.access_key.access_key_id
                    secret_access_key = response.create_access_key_response \
                            .create_access_key_result.access_key.secret_access_key

                    print("AWS_ACCESS_KEY_ID = '{}'".format(access_key_id))
                    print("AWS_SECRET_ACCESS_KEY = '{}'".format(
                        secret_access_key))

                    for name, policy in policies.items():
                        iam_conn.put_user_policy(username, name, policy)
                        print("\tAttaching {}".format(name))

                    iam_conn.add_user_to_group(group, username)
                    print("\tAdding to {}".format(group))
            else:
                print("already exists, skipping.")

        def create_cloudfront_distribution(bucket_name, cname):
            origin = boto.cloudfront.origin.S3Origin( \
                    "{}.s3.amazonaws.com".format(bucket_name))
            try:
                print("Creating CloudFront distribution for {}...".format(
                    bucket_name),
                      end=' ')
                distribution = cf_conn.create_distribution(origin,
                                                           True,
                                                           cnames=[cname])
                print("success!")
                print("\tDistribution ID: {}".format(distribution.id))
                print("\tDomain name: {}".format(distribution.domain_name))
            except boto.cloudfront.CloudFrontServerError as e:
                if e.error_code == "CNAMEAlreadyExists":
                    print("already exists, skipping.")

        # Create buckets
        upload_bucket_name = "uploads-{}".format(PROJECT_NAME)
        static_bucket_name = "static-{}".format(PROJECT_NAME)

        upload_cname = "u-{}".format(PROJECT_NAME)
        static_cname = "s-{}".format(PROJECT_NAME)

        s3_conn.create_bucket(upload_bucket_name)
        s3_conn.create_bucket(static_bucket_name)

        print(
            "AWS_STATIC_STORAGE_BUCKET_NAME = '{}'".format(static_bucket_name))
        print("AWS_STORAGE_BUCKET_NAME = '{}'".format(upload_bucket_name))

        # Create CloudFront Distribution
        create_cloudfront_distribution(upload_bucket_name, upload_cname)
        create_cloudfront_distribution(static_bucket_name, static_cname)

        # Create IAM Users
        unix_time = str(int(time()))

        create_group("Bots")
        create_user("{}FileUploader".format(PROJECT_NAME.capitalize()),
                    policies={
                        'GenericUploadPolicy':
                        generic_upload_policy.format(unix_time,
                                                     static_bucket_name,
                                                     upload_bucket_name),
                        'CloudFrontInvalidationPolicy':
                        cloudfront_invalidation_policy.format(unix_time)
                    })
        create_user("{}EmailSender".format(PROJECT_NAME.capitalize()),
                    policies={
                        'EmailSendingPolicy':
                        email_sending_policy.format(unix_time)
                    })
Example #15
0
def main(src,
         dest,
         num_processes=2,
         split=50,
         force=False,
         reduced_redundancy=False,
         verbose=False):
    dest_bucket_name, dest_key_name = validate_url(dest)
    src_bucket_name, src_key_name = validate_url(src)

    s3 = boto.connect_s3(calling_format=OrdinaryCallingFormat())
    dest_bucket = s3.lookup(dest_bucket_name)
    dest_key = dest_bucket.get_key(dest_key_name)

    # See if we're overwriting an existing key
    if dest_key is not None:
        if not force:
            raise ValueError(
                "'%s' already exists. Specify -f to overwrite it" % dest)

    # Determine the total size and calculate byte ranges
    src_bucket = s3.lookup(src_bucket_name)
    src_key = src_bucket.get_key(src_key_name)
    size = src_key.size

    # If file is less than 5G, copy it directly
    if size < 5 * 1024 * 1024 * 1024:
        logging.info("Source object is %0.2fM copying it directly" %
                     (size / 1024. / 1024.))
        t1 = time.time()
        src_key.copy(dest_bucket_name,
                     dest_key_name,
                     reduced_redundancy=reduced_redundancy)
        t2 = time.time() - t1
        s = size / 1024. / 1024.
        logger.info("Finished copying %0.2fM in %0.2fs (%0.2fMbps)" %
                    (s, t2, s / t2))
        return

    part_size = max(5 * 1024 * 1024, 1024 * 1024 * split)
    num_parts = int(ceil(size / float(part_size)))
    logging.info(
        "Source object is %0.2fM splitting into %d parts of size %0.2fM" %
        (size / 1024. / 1024., num_parts, part_size / 1024. / 1024.))

    # Create the multi-part upload object
    mpu = dest_bucket.initiate_multipart_upload(
        dest_key_name, reduced_redundancy=reduced_redundancy)
    logger.info("Initialized copy: %s" % mpu.id)

    # Generate arguments for invocations of do_part_copy
    def gen_args(num_parts):
        cur_pos = 0
        for i in range(num_parts):
            part_start = cur_pos
            cur_pos = cur_pos + part_size
            part_end = min(cur_pos - 1, size - 1)
            part_num = i + 1
            yield (src_bucket_name, src_key_name, dest_bucket_name, mpu.id,
                   part_num, part_start, part_end)

    # Do the thing
    try:
        # Create a pool of workers
        pool = Pool(processes=num_processes)
        t1 = time.time()
        pool.map_async(do_part_copy, gen_args(num_parts)).get(9999999)
        # Print out some timings
        t2 = time.time() - t1
        s = size / 1024. / 1024.
        # Finalize
        mpu.complete_upload()
        logger.info("Finished copying %0.2fM in %0.2fs (%0.2fMbps)" %
                    (s, t2, s / t2))
    except KeyboardInterrupt:
        logger.warn("Received KeyboardInterrupt, canceling copy")
        pool.terminate()
        mpu.cancel_upload()
    except Exception, err:
        logger.error("Encountered an error, canceling copy")
        logger.error(err)
        mpu.cancel_upload()
Example #16
0
def main():
    argument_spec = ec2_argument_spec()
    argument_spec.update(dict(
            bucket         = dict(required=True),
            dest           = dict(default=None),
            encrypt        = dict(default=True, type='bool'),
            expiry         = dict(default=600, aliases=['expiration']),
            headers        = dict(type='dict'),
            marker         = dict(default=None),
            max_keys       = dict(default=1000),
            metadata       = dict(type='dict'),
            mode           = dict(choices=['get', 'put', 'delete', 'create', 'geturl', 'getstr', 'delobj', 'list'], required=True),
            object         = dict(),
            permission     = dict(type='list', default=['private']),
            version        = dict(default=None),
            overwrite      = dict(aliases=['force'], default='always'),
            prefix         = dict(default=None),
            retries        = dict(aliases=['retry'], type='int', default=0),
            s3_url         = dict(aliases=['S3_URL']),
            src            = dict(),
        ),
    )
    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    bucket = module.params.get('bucket')
    encrypt = module.params.get('encrypt')
    expiry = int(module.params['expiry'])
    if module.params.get('dest'):
        dest = os.path.expanduser(module.params.get('dest'))
    headers = module.params.get('headers')
    marker = module.params.get('marker')
    max_keys = module.params.get('max_keys')
    metadata = module.params.get('metadata')
    mode = module.params.get('mode')
    obj = module.params.get('object')
    version = module.params.get('version')
    overwrite = module.params.get('overwrite')
    prefix = module.params.get('prefix')
    retries = module.params.get('retries')
    s3_url = module.params.get('s3_url')
    src = module.params.get('src')

    for acl in module.params.get('permission'):
        if acl not in CannedACLStrings:
            module.fail_json(msg='Unknown permission specified: %s' % str(acl))

    if overwrite not in ['always', 'never', 'different']:
        if module.boolean(overwrite):
            overwrite = 'always'
        else:
            overwrite = 'never'

    region, ec2_url, aws_connect_kwargs = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region

    if module.params.get('object'):
        obj = os.path.expanduser(module.params['object'])

    # allow eucarc environment variables to be used if ansible vars aren't set
    if not s3_url and 'S3_URL' in os.environ:
        s3_url = os.environ['S3_URL']

    # bucket names with .'s in them need to use the calling_format option,
    # otherwise the connection will fail. See https://github.com/boto/boto/issues/2836
    # for more details.
    if '.' in bucket:
        aws_connect_kwargs['calling_format'] = OrdinaryCallingFormat()

    # Look at s3_url and tweak connection settings
    # if connecting to Walrus or fakes3
    try:
        if is_fakes3(s3_url):
            fakes3 = urlparse.urlparse(s3_url)
            s3 = S3Connection(
                is_secure=fakes3.scheme == 'fakes3s',
                host=fakes3.hostname,
                port=fakes3.port,
                calling_format=OrdinaryCallingFormat(),
                **aws_connect_kwargs
            )
        elif is_walrus(s3_url):
            walrus = urlparse.urlparse(s3_url).hostname
            s3 = boto.connect_walrus(walrus, **aws_connect_kwargs)
        else:
            s3 = boto.s3.connect_to_region(location, is_secure=True, **aws_connect_kwargs)
            # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
            if s3 is None:
                s3 = boto.connect_s3(**aws_connect_kwargs)

    except boto.exception.NoAuthHandlerFound, e:
        module.fail_json(msg='No Authentication Handler found: %s ' % str(e))
Example #17
0
def main(src,
         dest,
         num_processes=2,
         split=32,
         force=False,
         verbose=False,
         quiet=False,
         secure=True,
         max_tries=5):

    # Check that src is a valid S3 url
    split_rs = urlparse.urlsplit(src)
    if split_rs.scheme != "s3":
        raise ValueError("'%s' is not an S3 url" % src)

    # Check that dest does not exist
    if os.path.isdir(dest):
        filename = split_rs.path.split('/')[-1]
        dest = os.path.join(dest, filename)

    if os.path.exists(dest):
        if force:
            os.remove(dest)
        else:
            raise ValueError("Destination file '%s' exists, specify -f to"
                             " overwrite" % dest)

    # Split out the bucket and the key
    s3 = boto.connect_s3()
    s3 = boto.connect_s3(calling_format=OrdinaryCallingFormat())
    s3.is_secure = secure
    logger.debug("split_rs: %s" % str(split_rs))
    bucket = s3.lookup(split_rs.netloc)
    if bucket == None:
        raise ValueError("'%s' is not a valid bucket" % split_rs.netloc)
    key = bucket.get_key(split_rs.path)
    if key is None:
        raise ValueError("'%s' does not exist." % split_rs.path)

    # Determine the total size and calculate byte ranges
    resp = s3.make_request("HEAD", bucket=bucket, key=key)
    if resp is None:
        raise ValueError("response is invalid.")

    size = int(resp.getheader("content-length"))
    logger.debug("Got headers: %s" % resp.getheaders())

    # Skipping multipart if file is less than 1mb
    if size < 1024 * 1024:
        t1 = time.time()
        key.get_contents_to_filename(dest)
        t2 = time.time() - t1
        size_mb = size / 1024 / 1024
        logger.info(
            "Finished single-part download of %0.2fM in %0.2fs (%0.2fMBps)" %
            (size_mb, t2, size_mb / t2))
    else:
        # Touch the file
        fd = os.open(dest, os.O_CREAT)
        os.close(fd)

        size_mb = size / 1024 / 1024
        num_parts = (size_mb + (-size_mb % split)) // split

        def arg_iterator(num_parts):
            for min_byte, max_byte in gen_byte_ranges(size, num_parts):
                yield (bucket.name, key.name, dest, min_byte, max_byte, split,
                       secure, max_tries, 0)

        s = size / 1024 / 1024.
        try:
            t1 = time.time()
            pool = Pool(processes=num_processes)
            pool.map_async(do_part_download,
                           arg_iterator(num_parts)).get(9999999)
            t2 = time.time() - t1
            logger.info("Finished downloading %0.2fM in %0.2fs (%0.2fMBps)" %
                        (s, t2, s / t2))
        except KeyboardInterrupt:
            logger.warning("User terminated")
        except Exception, err:
            logger.error(err)
Example #18
0
def test_s3datasink_substitutions():
    indir = mkdtemp(prefix='-Tmp-nipype_ds_subs_in')
    outdir = mkdtemp(prefix='-Tmp-nipype_ds_subs_out')
    files = []
    for n in ['ababab.n', 'xabababyz.n']:
        f = os.path.join(indir, n)
        files.append(f)
        open(f, 'w')

    # run fakes3 server and set up bucket
    fakes3dir = op.expanduser('~/fakes3')
    try:
        proc = Popen(['fakes3', '-r', fakes3dir, '-p', '4567'],
                     stdout=open(os.devnull, 'wb'))
    except OSError as ose:
        if 'No such file or directory' in str(ose):
            return  # fakes3 not installed. OK!
        raise ose

    conn = S3Connection(anon=True,
                        is_secure=False,
                        port=4567,
                        host='localhost',
                        calling_format=OrdinaryCallingFormat())
    conn.create_bucket('test')

    ds = nio.S3DataSink(
        testing=True,
        anon=True,
        bucket='test',
        bucket_path='output/',
        parametrization=False,
        base_directory=outdir,
        substitutions=[('ababab', 'ABABAB')],
        # end archoring ($) is used to assure operation on the filename
        # instead of possible temporary directories names matches
        # Patterns should be more comprehendable in the real-world usage
        # cases since paths would be quite more sensible
        regexp_substitutions=[(r'xABABAB(\w*)\.n$', r'a-\1-b.n'),
                              ('(.*%s)[-a]([^%s]*)$' % ((os.path.sep, ) * 2),
                               r'\1!\2')])
    setattr(ds.inputs, '@outdir', files)
    ds.run()
    yield assert_equal, \
        sorted([os.path.basename(x) for
            x in glob.glob(os.path.join(outdir, '*'))]), \
        ['!-yz-b.n', 'ABABAB.n']  # so we got re used 2nd and both patterns

    bkt = conn.get_bucket(ds.inputs.bucket)
    bkt_files = list(k for k in bkt.list())

    found = [False, False]
    failed_deletes = 0
    for k in bkt_files:
        if '!-yz-b.n' in k.key:
            found[0] = True
            try:
                bkt.delete_key(k)
            except:
                failed_deletes += 1
        elif 'ABABAB.n' in k.key:
            found[1] = True
            try:
                bkt.delete_key(k)
            except:
                failed_deletes += 1

    # ensure delete requests were successful
    yield assert_equal, failed_deletes, 0

    # ensure both keys are found in bucket
    yield assert_equal, found.count(True), 2

    proc.kill()
    shutil.rmtree(fakes3dir)
    shutil.rmtree(indir)
    shutil.rmtree(outdir)
Example #19
0
def s3keys(email):
    global s3
    if not s3:
        s3 = boto.connect_s3(calling_format=OrdinaryCallingFormat())
    b = s3.create_bucket(bucket_name, location=bucket_location)
    return b.list(prefix=email)
Example #20
0
def main():

    argument_spec = ec2_argument_spec()
    argument_spec.update(
        dict(
            name = dict(required=True),
            target_bucket = dict(required=False, default=None),
            target_prefix = dict(required=False, default=""),
            state = dict(required=False, default='present', choices=['present', 'absent'])
        )
    )

    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    region, ec2_url, aws_connect_params = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region
    try:
        connection = boto.s3.connect_to_region(location, is_secure=True, calling_format=OrdinaryCallingFormat(), **aws_connect_params)
        # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
        if connection is None:
            connection = boto.connect_s3(**aws_connect_params)
    except (boto.exception.NoAuthHandlerFound, AnsibleAWSError) as e:
        module.fail_json(msg=str(e))

    state = module.params.get("state")

    if state == 'present':
        enable_bucket_logging(connection, module)
    elif state == 'absent':
        disable_bucket_logging(connection, module)
Example #21
0
def main(src,
         dest,
         num_processes=2,
         split=50,
         force=False,
         reduced_redundancy=False,
         verbose=False,
         quiet=False,
         secure=True,
         max_tries=5):
    # Check that dest is a valid S3 url
    split_rs = urlparse.urlsplit(dest)
    if split_rs.scheme != "s3":
        raise ValueError("'%s' is not an S3 url" % dest)

    s3 = boto.connect_s3(calling_format=OrdinaryCallingFormat())
    s3.is_secure = secure
    bucket = s3.lookup(split_rs.netloc)
    if bucket is None:
        raise ValueError("'%s' is not a valid bucket" % split_rs.netloc)
    key = bucket.get_key(split_rs.path)
    # See if we're overwriting an existing key
    if key is not None:
        if not force:
            raise ValueError(
                "'%s' already exists. Specify -f to overwrite it" % dest)

    # Determine the splits
    part_size = max(5 * 1024 * 1024, 1024 * 1024 * split)
    src.seek(0, 2)
    size = src.tell()
    num_parts = int(ceil(size / part_size))

    # If file is less than 5M, just upload it directly
    if size < 5 * 1024 * 1024:
        src.seek(0)
        t1 = time.time()
        k = boto.s3.key.Key(bucket, split_rs.path)
        k.set_contents_from_file(src)
        t2 = time.time() - t1
        s = size / 1024. / 1024.
        logger.info("Finished uploading %0.2fM in %0.2fs (%0.2fMBps)" %
                    (s, t2, s / t2))
        return

    # Create the multi-part upload object
    mpu = bucket.initiate_multipart_upload(
        split_rs.path, reduced_redundancy=reduced_redundancy)
    logger.info("Initialized upload: %s" % mpu.id)

    # Generate arguments for invocations of do_part_upload
    def gen_args(num_parts, fold_last):
        for i in range(num_parts + 1):
            part_start = part_size * i
            if i == (num_parts - 1) and fold_last is True:
                yield (bucket.name, mpu.id, src.name, i, part_start,
                       part_size * 2, secure, max_tries, 0)
                break
            else:
                yield (bucket.name, mpu.id, src.name, i, part_start, part_size,
                       secure, max_tries, 0)

    # If the last part is less than 5M, just fold it into the previous part
    fold_last = ((size % part_size) < 5 * 1024 * 1024)

    # Do the thing
    try:
        # Create a pool of workers
        pool = Pool(processes=num_processes)
        t1 = time.time()
        pool.map_async(do_part_upload, gen_args(num_parts,
                                                fold_last)).get(9999999)
        # Print out some timings
        t2 = time.time() - t1
        s = size / 1024. / 1024.
        # Finalize
        src.close()
        mpu.complete_upload()
        logger.info("Finished uploading %0.2fM in %0.2fs (%0.2fMBps)" %
                    (s, t2, s / t2))
    except KeyboardInterrupt:
        logger.warn("Received KeyboardInterrupt, canceling upload")
        pool.terminate()
        mpu.cancel_upload()
    except Exception, err:
        logger.error("Encountered an error, canceling upload")
        logger.error(err)
        mpu.cancel_upload()
Example #22
0
def main():

    argument_spec = ec2_argument_spec()
    argument_spec.update(
        dict(
            force=dict(required=False, default='no', type='bool'),
            policy=dict(required=False, default=None, type='json'),
            name=dict(required=True, type='str'),
            requester_pays=dict(default='no', type='bool'),
            s3_url=dict(aliases=['S3_URL'], type='str'),
            state=dict(default='present', type='str', choices=['present', 'absent']),
            tags=dict(required=False, default=None, type='dict'),
            versioning=dict(default=None, type='bool'),
            ceph=dict(default='no', type='bool')
        )
    )

    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    region, ec2_url, aws_connect_params = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region

    s3_url = module.params.get('s3_url')

    # allow eucarc environment variables to be used if ansible vars aren't set
    if not s3_url and 'S3_URL' in os.environ:
        s3_url = os.environ['S3_URL']

    ceph = module.params.get('ceph')

    if ceph and not s3_url:
        module.fail_json(msg='ceph flavour requires s3_url')

    flavour = 'aws'

    # bucket names with .'s in them need to use the calling_format option,
    # otherwise the connection will fail. See https://github.com/boto/boto/issues/2836
    # for more details.
    aws_connect_params['calling_format'] = OrdinaryCallingFormat()

    # Look at s3_url and tweak connection settings
    # if connecting to Walrus or fakes3
    try:
        if s3_url and ceph:
            ceph = urlparse.urlparse(s3_url)
            connection = boto.connect_s3(
                host=ceph.hostname,
                port=ceph.port,
                is_secure=ceph.scheme == 'https',
                **aws_connect_params
            )
            flavour = 'ceph'
        elif is_fakes3(s3_url):
            fakes3 = urlparse.urlparse(s3_url)
            connection = S3Connection(
                is_secure=fakes3.scheme == 'fakes3s',
                host=fakes3.hostname,
                port=fakes3.port,
                **aws_connect_params
            )
        elif is_walrus(s3_url):
            del aws_connect_params['calling_format']
            walrus = urlparse.urlparse(s3_url).hostname
            connection = boto.connect_walrus(walrus, **aws_connect_params)
        else:
            connection = boto.s3.connect_to_region(location, is_secure=True, **aws_connect_params)
            # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
            if connection is None:
                connection = boto.connect_s3(**aws_connect_params)

    except boto.exception.NoAuthHandlerFound as e:
        module.fail_json(msg='No Authentication Handler found: %s ' % str(e))
    except Exception as e:
        module.fail_json(msg='Failed to connect to S3: %s' % str(e))

    if connection is None:  # this should never happen
        module.fail_json(msg='Unknown error, failed to create s3 connection, no information from boto.')

    state = module.params.get("state")

    if state == 'present':
        create_or_update_bucket(connection, module, location, flavour=flavour)
    elif state == 'absent':
        destroy_bucket(connection, module, flavour=flavour)
Example #23
0
def main():
    argument_spec = ec2_argument_spec()
    argument_spec.update(dict(
            bucket         = dict(required=True),
            dest           = dict(default=None),
            encrypt        = dict(default=True, type='bool'),
            expiry         = dict(default=600, aliases=['expiration']),
            headers        = dict(type='dict'),
            marker         = dict(default=None),
            max_keys       = dict(default=1000),
            metadata       = dict(type='dict'),
            mode           = dict(choices=['get', 'put', 'delete', 'create', 'geturl', 'getstr', 'delobj', 'list'], required=True),
            object         = dict(),
            permission     = dict(type='list', default=['private']),
            version        = dict(default=None),
            overwrite      = dict(aliases=['force'], default='always'),
            prefix         = dict(default=None),
            retries        = dict(aliases=['retry'], type='int', default=0),
            s3_url         = dict(aliases=['S3_URL']),
            rgw            = dict(default='no', type='bool'),
            src            = dict(),
        ),
    )
    module = AnsibleModule(argument_spec=argument_spec)

    if not HAS_BOTO:
        module.fail_json(msg='boto required for this module')

    bucket = module.params.get('bucket')
    encrypt = module.params.get('encrypt')
    expiry = int(module.params['expiry'])
    if module.params.get('dest'):
        dest = os.path.expanduser(module.params.get('dest'))
    headers = module.params.get('headers')
    marker = module.params.get('marker')
    max_keys = module.params.get('max_keys')
    metadata = module.params.get('metadata')
    mode = module.params.get('mode')
    obj = module.params.get('object')
    version = module.params.get('version')
    overwrite = module.params.get('overwrite')
    prefix = module.params.get('prefix')
    retries = module.params.get('retries')
    s3_url = module.params.get('s3_url')
    rgw = module.params.get('rgw')
    src = module.params.get('src')

    for acl in module.params.get('permission'):
        if acl not in CannedACLStrings:
            module.fail_json(msg='Unknown permission specified: %s' % str(acl))

    if overwrite not in ['always', 'never', 'different']:
        if module.boolean(overwrite):
            overwrite = 'always'
        else:
            overwrite = 'never'

    region, ec2_url, aws_connect_kwargs = get_aws_connection_info(module)

    if region in ('us-east-1', '', None):
        # S3ism for the US Standard region
        location = Location.DEFAULT
    else:
        # Boto uses symbolic names for locations but region strings will
        # actually work fine for everything except us-east-1 (US Standard)
        location = region

    if module.params.get('object'):
        obj = os.path.expanduser(module.params['object'])

    # allow eucarc environment variables to be used if ansible vars aren't set
    if not s3_url and 'S3_URL' in os.environ:
        s3_url = os.environ['S3_URL']

    # rgw requires an explicit url
    if rgw and not s3_url:
        module.fail_json(msg='rgw flavour requires s3_url')

    # bucket names with .'s in them need to use the calling_format option,
    # otherwise the connection will fail. See https://github.com/boto/boto/issues/2836
    # for more details.
    if '.' in bucket:
        aws_connect_kwargs['calling_format'] = OrdinaryCallingFormat()

    # Look at s3_url and tweak connection settings
    # if connecting to RGW, Walrus or fakes3
    try:
        if s3_url and rgw:
            rgw = urlparse.urlparse(s3_url)
            s3 = boto.connect_s3(
                is_secure=rgw.scheme == 'https',
                host=rgw.hostname,
                port=rgw.port,
                calling_format=OrdinaryCallingFormat(),
                **aws_connect_kwargs
            )
        elif is_fakes3(s3_url):
            fakes3 = urlparse.urlparse(s3_url)
            s3 = S3Connection(
                is_secure=fakes3.scheme == 'fakes3s',
                host=fakes3.hostname,
                port=fakes3.port,
                calling_format=OrdinaryCallingFormat(),
                **aws_connect_kwargs
            )
        elif is_walrus(s3_url):
            walrus = urlparse.urlparse(s3_url).hostname
            s3 = boto.connect_walrus(walrus, **aws_connect_kwargs)
        else:
            aws_connect_kwargs['is_secure'] = True
            try:
                s3 = connect_to_aws(boto.s3, location, **aws_connect_kwargs)
            except AnsibleAWSError:
                # use this as fallback because connect_to_region seems to fail in boto + non 'classic' aws accounts in some cases
                s3 = boto.connect_s3(**aws_connect_kwargs)

    except boto.exception.NoAuthHandlerFound as e:
        module.fail_json(msg='No Authentication Handler found: %s ' % str(e))
    except Exception as e:
        module.fail_json(msg='Failed to connect to S3: %s' % str(e))

    if s3 is None: # this should never happen
        module.fail_json(msg ='Unknown error, failed to create s3 connection, no information from boto.')

    # If our mode is a GET operation (download), go through the procedure as appropriate ...
    if mode == 'get':

        # First, we check to see if the bucket exists, we get "bucket" returned.
        bucketrtn = bucket_check(module, s3, bucket)
        if bucketrtn is False:
            module.fail_json(msg="Source bucket cannot be found", failed=True)

        # Next, we check to see if the key in the bucket exists. If it exists, it also returns key_matches md5sum check.
        keyrtn = key_check(module, s3, bucket, obj, version=version)
        if keyrtn is False:
            if version is not None:
                module.fail_json(msg="Key %s with version id %s does not exist."% (obj, version), failed=True)
            else:
                module.fail_json(msg="Key %s does not exist."%obj, failed=True)

        # If the destination path doesn't exist or overwrite is True, no need to do the md5um etag check, so just download.
        pathrtn = path_check(dest)
        if pathrtn is False or overwrite == 'always':
            download_s3file(module, s3, bucket, obj, dest, retries, version=version)

        # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists.
        if pathrtn is True:
            md5_remote = keysum(module, s3, bucket, obj, version=version)
            md5_local = module.md5(dest)
            if md5_local == md5_remote:
                sum_matches = True
                if overwrite == 'always':
                    download_s3file(module, s3, bucket, obj, dest, retries, version=version)
                else:
                    module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite=always parameter to force.", changed=False)
            else:
                sum_matches = False

                if overwrite in ('always', 'different'):
                    download_s3file(module, s3, bucket, obj, dest, retries, version=version)
                else:
                    module.exit_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force download.")

        # Firstly, if key_matches is TRUE and overwrite is not enabled, we EXIT with a helpful message.
        if sum_matches is True and overwrite == 'never':
            module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite parameter to force.", changed=False)

    # if our mode is a PUT operation (upload), go through the procedure as appropriate ...
    if mode == 'put':

        # Use this snippet to debug through conditionals:
#       module.exit_json(msg="Bucket return %s"%bucketrtn)

        # Lets check the src path.
        pathrtn = path_check(src)
        if pathrtn is False:
            module.fail_json(msg="Local object for PUT does not exist", failed=True)

        # Lets check to see if bucket exists to get ground truth.
        bucketrtn = bucket_check(module, s3, bucket)
        if bucketrtn is True:
            keyrtn = key_check(module, s3, bucket, obj)

        # Lets check key state. Does it exist and if it does, compute the etag md5sum.
        if bucketrtn is True and keyrtn is True:
                md5_remote = keysum(module, s3, bucket, obj)
                md5_local = module.md5(src)

                if md5_local == md5_remote:
                    sum_matches = True
                    if overwrite == 'always':
                        upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers)
                    else:
                        get_download_url(module, s3, bucket, obj, expiry, changed=False)
                else:
                    sum_matches = False
                    if overwrite in ('always', 'different'):
                        upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers)
                    else:
                        module.exit_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force upload.")

        # If neither exist (based on bucket existence), we can create both.
        if bucketrtn is False and pathrtn is True:
            create_bucket(module, s3, bucket, location)
            upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers)

        # If bucket exists but key doesn't, just upload.
        if bucketrtn is True and pathrtn is True and keyrtn is False:
            upload_s3file(module, s3, bucket, obj, src, expiry, metadata, encrypt, headers)

    # Delete an object from a bucket, not the entire bucket
    if mode == 'delobj':
        if obj is None:
            module.fail_json(msg="object parameter is required", failed=True);
        if bucket:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is True:
                deletertn = delete_key(module, s3, bucket, obj)
                if deletertn is True:
                    module.exit_json(msg="Object %s deleted from bucket %s." % (obj, bucket), changed=True)
            else:
                module.fail_json(msg="Bucket does not exist.", changed=False)
        else:
            module.fail_json(msg="Bucket parameter is required.", failed=True)


    # Delete an entire bucket, including all objects in the bucket
    if mode == 'delete':
        if bucket:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is True:
                deletertn = delete_bucket(module, s3, bucket)
                if deletertn is True:
                    module.exit_json(msg="Bucket %s and all keys have been deleted."%bucket, changed=True)
            else:
                module.fail_json(msg="Bucket does not exist.", changed=False)
        else:
            module.fail_json(msg="Bucket parameter is required.", failed=True)

    # Support for listing a set of keys
    if mode == 'list':
        bucket_object = get_bucket(module, s3, bucket)

        # If the bucket does not exist then bail out
        if bucket_object is None:
            module.fail_json(msg="Target bucket (%s) cannot be found"% bucket, failed=True)

        list_keys(module, bucket_object, prefix, marker, max_keys)

    # Need to research how to create directories without "populating" a key, so this should just do bucket creation for now.
    # WE SHOULD ENABLE SOME WAY OF CREATING AN EMPTY KEY TO CREATE "DIRECTORY" STRUCTURE, AWS CONSOLE DOES THIS.
    if mode == 'create':
        if bucket and not obj:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is True:
                module.exit_json(msg="Bucket already exists.", changed=False)
            else:
                module.exit_json(msg="Bucket created successfully", changed=create_bucket(module, s3, bucket, location))
        if bucket and obj:
            bucketrtn = bucket_check(module, s3, bucket)
            if obj.endswith('/'):
                dirobj = obj
            else:
                dirobj = obj + "/"
            if bucketrtn is True:
                keyrtn = key_check(module, s3, bucket, dirobj)
                if keyrtn is True:
                    module.exit_json(msg="Bucket %s and key %s already exists."% (bucket, obj), changed=False)
                else:
                    create_dirkey(module, s3, bucket, dirobj)
            if bucketrtn is False:
                created = create_bucket(module, s3, bucket, location)
                create_dirkey(module, s3, bucket, dirobj)

    # Support for grabbing the time-expired URL for an object in S3/Walrus.
    if mode == 'geturl':
        if bucket and obj:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is False:
                module.fail_json(msg="Bucket %s does not exist."%bucket, failed=True)
            else:
                keyrtn = key_check(module, s3, bucket, obj)
                if keyrtn is True:
                    get_download_url(module, s3, bucket, obj, expiry)
                else:
                    module.fail_json(msg="Key %s does not exist."%obj, failed=True)
        else:
            module.fail_json(msg="Bucket and Object parameters must be set", failed=True)

    if mode == 'getstr':
        if bucket and obj:
            bucketrtn = bucket_check(module, s3, bucket)
            if bucketrtn is False:
                module.fail_json(msg="Bucket %s does not exist."%bucket, failed=True)
            else:
                keyrtn = key_check(module, s3, bucket, obj, version=version)
                if keyrtn is True:
                    download_s3str(module, s3, bucket, obj, version=version)
                else:
                    if version is not None:
                        module.fail_json(msg="Key %s with version id %s does not exist."% (obj, version), failed=True)
                    else:
                        module.fail_json(msg="Key %s does not exist."%obj, failed=True)

    module.exit_json(failed=False)
Example #24
0
    def resetConnection(self):
        self.bucket = None
        self.conn = None

        try:
            from boto.s3.connection import S3Connection
            from boto.s3.key import Key
            assert hasattr(S3Connection, 'lookup')

            # Newer versions of boto default to using
            # virtual hosting for buckets as a result of
            # upstream deprecation of the old-style access
            # method by Amazon S3. This change is not
            # backwards compatible (in particular with
            # respect to upper case characters in bucket
            # names); so we default to forcing use of the
            # old-style method unless the user has
            # explicitly asked us to use new-style bucket
            # access.
            #
            # Note that if the user wants to use new-style
            # buckets, we use the subdomain calling form
            # rather than given the option of both
            # subdomain and vhost. The reason being that
            # anything addressable as a vhost, is also
            # addressable as a subdomain. Seeing as the
            # latter is mostly a convenience method of
            # allowing browse:able content semi-invisibly
            # being hosted on S3, the former format makes
            # a lot more sense for us to use - being
            # explicit about what is happening (the fact
            # that we are talking to S3 servers).

            try:
                from boto.s3.connection import OrdinaryCallingFormat
                from boto.s3.connection import SubdomainCallingFormat
                cfs_supported = True
                calling_format = OrdinaryCallingFormat()
            except ImportError:
                cfs_supported = False
                calling_format = None

            if globals.s3_use_new_style:
                if cfs_supported:
                    calling_format = SubdomainCallingFormat()
                else:
                    log.FatalError("Use of new-style (subdomain) S3 bucket addressing was"
                                   "requested, but does not seem to be supported by the "
                                   "boto library. Either you need to upgrade your boto "
                                   "library or duplicity has failed to correctly detect "
                                   "the appropriate support.",
                                   log.ErrorCode.boto_old_style)
            else:
                if cfs_supported:
                    calling_format = OrdinaryCallingFormat()
                else:
                    calling_format = None

        except ImportError:
            log.FatalError("This backend (s3) requires boto library, version %s or later, "
                           "(http://code.google.com/p/boto/)." % BOTO_MIN_VERSION,
                           log.ErrorCode.boto_lib_too_old)

        if not self.parsed_url.hostname:
            # Use the default host.
            self.conn = self.storage_uri.connect(
                is_secure=(not globals.s3_unencrypted_connection))
        else:
            assert self.scheme == 's3'
            self.conn = self.storage_uri.connect(
                host=self.parsed_url.hostname,
                is_secure=(not globals.s3_unencrypted_connection))

        if hasattr(self.conn, 'calling_format'):
            if calling_format is None:
                log.FatalError("It seems we previously failed to detect support for calling "
                               "formats in the boto library, yet the support is there. This is "
                               "almost certainly a duplicity bug.",
                               log.ErrorCode.boto_calling_format)
            else:
                self.conn.calling_format = calling_format

        else:
            # Duplicity hangs if boto gets a null bucket name.
            # HC: Caught a socket error, trying to recover
            raise BackendException('Boto requires a bucket name.')

        self.bucket = self.conn.lookup(self.bucket_name)
Example #25
0
    def slurp(self):
        """
        :returns: item_list - list of S3 Buckets.
        :returns: exception_map - A dict where the keys are a tuple containing the
            location of the exception and the value is the actual exception
        """
        self.prep_for_slurp()

        item_list = []
        exception_map = {}

        from security_monkey.common.sts_connect import connect
        for account in self.accounts:

            try:
                s3conn = connect(account, 's3', calling_format=OrdinaryCallingFormat())
                all_buckets = self.wrap_aws_rate_limited_call(
                    s3conn.get_all_buckets
                )
            except Exception as e:
                exc = BotoConnectionIssue(str(e), 's3', account, None)
                self.slurp_exception((self.index, account), exc, exception_map)
                continue

            for bucket in all_buckets:
                app.logger.debug("Slurping %s (%s) from %s" % (self.i_am_singular, bucket.name, account))

                if self.check_ignore_list(bucket.name):
                    continue

                try:
                    loc = self.wrap_aws_rate_limited_call(bucket.get_location)
                    region = self.translate_location_to_region(loc)
                    if region == '':
                        s3regionconn = self.wrap_aws_rate_limited_call(
                            connect,
                            account,
                            's3',
                            calling_format=OrdinaryCallingFormat()
                        )
                        region = 'us-east-1'
                    else:
                        s3regionconn = self.wrap_aws_rate_limited_call(
                            connect,
                            account,
                            's3',
                            region=region,
                            calling_format=OrdinaryCallingFormat()
                        )

                    bhandle = self.wrap_aws_rate_limited_call(
                        s3regionconn.get_bucket,
                        bucket,
                        validate=False
                    )
                    s3regionconn.close()
                except Exception as e:
                    exc = S3PermissionsIssue(bucket.name)
                    # Unfortunately, we can't get the region, so the entire account
                    # will be skipped in find_changes, not just the bad bucket.
                    self.slurp_exception((self.index, account), exc, exception_map)
                    continue

                app.logger.debug("Slurping %s (%s) from %s/%s" % (self.i_am_singular, bucket.name, account, region))
                bucket_dict = self.conv_bucket_to_dict(bhandle, account, region, bucket.name, exception_map)

                item = S3Item(account=account, region=region, name=bucket.name, config=bucket_dict)
                item_list.append(item)

        return item_list, exception_map
Example #26
0
from boto import connect_s3
from boto.s3.connection import OrdinaryCallingFormat
from boto.s3.key import Key
from fabric.api import abort, task
from fabric.contrib.console import confirm

from rds import scrape as rds_scrape
from render import render
from scrape import scrape

BUCKET_NAME = 'www.ec2instances.info'

# Work around https://github.com/boto/boto/issues/2836 by explicitly setting
# the calling_format.
BUCKET_CALLING_FORMAT = OrdinaryCallingFormat()

abspath = lambda filename: os.path.join(
    os.path.abspath(os.path.dirname(__file__)), filename)

FAB_HOST = os.getenv('FAB_HOST', '127.0.0.1')
FAB_PORT = os.getenv('FAB_PORT', '8080')


@task
def build():
    """Scrape AWS sources for data and build the site"""
    scrape_ec2()
    #scrape_rds()
    render_html()
# parses 2 or 4 little-endian bits into their corresponding integer value
def parse_int(bytes):
    val = ord(bytes[0]) + (ord(bytes[1]) << 8)
    if len(bytes) > 3:
        val += (ord(bytes[2]) << 16) + (ord(bytes[3]) << 24)
    return val


bucket = ''
key = ''
entry = ""

# OrdinaryCallingFormat prevents certificate errors on bucket names with dots
# https://stackoverflow.com/questions/51604689/read-zip-files-from-amazon-s3-using-boto3-and-python#51605244
_bucket = boto.connect_s3(calling_format=OrdinaryCallingFormat()).get_bucket(bucket)
_key = _bucket.get_key(key)

# fetch the last 22 bytes (end-of-central-directory record; assuming the comment field is empty)
size = _key.size
eocd = fetch(_key, size - 22, 22)

# start offset and size of the central directory
cd_start = parse_int(eocd[16:20])
cd_size = parse_int(eocd[12:16])

# fetch central directory, append EOCD, and open as zipfile
cd = fetch(_key, cd_start, cd_size)
zip = zipfile.ZipFile(io.BytesIO(cd + eocd))

Example #28
0
# ==========
# = Celery =
# ==========

# celeryapp.autodiscover_tasks(INSTALLED_APPS)
CELERY_ACCEPT_CONTENT = ['pickle', 'json', 'msgpack', 'yaml']

# ==========
# = Assets =
# ==========

JAMMIT = jammit.JammitAssets(NEWSBLUR_DIR)

if DEBUG:
    MIDDLEWARE_CLASSES += ('utils.request_introspection_middleware.DumpRequestMiddleware',)
    MIDDLEWARE_CLASSES += ('utils.exception_middleware.ConsoleExceptionMiddleware',)

# =======
# = AWS =
# =======

S3_CONN = None
if BACKED_BY_AWS.get('pages_on_s3') or BACKED_BY_AWS.get('icons_on_s3'):
    S3_CONN = S3Connection(S3_ACCESS_KEY, S3_SECRET, calling_format=OrdinaryCallingFormat())
    # if BACKED_BY_AWS.get('pages_on_s3'):
    #     S3_PAGES_BUCKET = S3_CONN.get_bucket(S3_PAGES_BUCKET_NAME)
    # if BACKED_BY_AWS.get('icons_on_s3'):
    #     S3_ICONS_BUCKET = S3_CONN.get_bucket(S3_ICONS_BUCKET_NAME)

django.http.request.host_validation_re = re.compile(r"^([a-z0-9.-_\-]+|\[[a-f0-9]*:[a-f0-9:]+\])(:\d+)?$")
Example #29
0
# STORAGE CONFIGURATION
# ------------------------------------------------------------------------------
# Uploaded Media Files
# ------------------------
# See: http://django-storages.readthedocs.io/en/latest/index.html
INSTALLED_APPS += (
    'storages',
)

AWS_ACCESS_KEY_ID = env('DJANGO_AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = env('DJANGO_AWS_SECRET_ACCESS_KEY')
AWS_STORAGE_BUCKET_NAME = env('DJANGO_AWS_STORAGE_BUCKET_NAME')
AWS_AUTO_CREATE_BUCKET = True
AWS_QUERYSTRING_AUTH = False
AWS_S3_CALLING_FORMAT = OrdinaryCallingFormat()

# AWS cache settings, don't change unless you know what you're doing:
AWS_EXPIRY = 60 * 60 * 24 * 7

# TODO See: https://github.com/jschneier/django-storages/issues/47
# Revert the following and use str after the above-mentioned bug is fixed in
# either django-storage-redux or boto
AWS_HEADERS = {
    'Cache-Control': six.b('max-age=%d, s-maxage=%d, must-revalidate' % (
        AWS_EXPIRY, AWS_EXPIRY))
}

# URL that handles the media served from MEDIA_ROOT, used for managing
# stored files.
{ % if cookiecutter.use_whitenoise == 'y' - %}
Example #30
0
class Production(Common):

    # This ensures that Django will be able to detect a secure connection
    # properly on Heroku.
    SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https')

    # INSTALLED_APPS
    INSTALLED_APPS = Common.INSTALLED_APPS
    # END INSTALLED_APPS

    # SECRET KEY
    SECRET_KEY = values.SecretValue()
    # END SECRET KEY

    # django-secure
    INSTALLED_APPS += ("djangosecure", )

    # MIDDLEWARE CONFIGURATION
    MIDDLEWARE_CLASSES = (
        # Make sure djangosecure.middleware.SecurityMiddleware is listed first
        'djangosecure.middleware.SecurityMiddleware',
    )

    MIDDLEWARE_CLASSES += Common.MIDDLEWARE_CLASSES
    # END MIDDLEWARE CONFIGURATION

    # set this to 60 seconds and then to 518400 when you can prove it works
    SECURE_HSTS_SECONDS = 60
    SECURE_HSTS_INCLUDE_SUBDOMAINS = values.BooleanValue(True)
    SECURE_FRAME_DENY = values.BooleanValue(True)
    SECURE_CONTENT_TYPE_NOSNIFF = values.BooleanValue(True)
    SECURE_BROWSER_XSS_FILTER = values.BooleanValue(True)
    SESSION_COOKIE_SECURE = values.BooleanValue(False)
    SESSION_COOKIE_HTTPONLY = values.BooleanValue(True)
    SECURE_SSL_REDIRECT = values.BooleanValue(True)
    # end django-secure

    # SITE CONFIGURATION
    # Hosts/domain names that are valid for this site
    # See https://docs.djangoproject.com/en/1.6/ref/settings/#allowed-hosts
    ALLOWED_HOSTS = ["*"]
    # END SITE CONFIGURATION

    INSTALLED_APPS += ("gunicorn", )

    # STORAGE CONFIGURATION
    # See: http://django-storages.readthedocs.org/en/latest/index.html
    INSTALLED_APPS += (
        'storages',
    )

    # See: http://django-storages.readthedocs.org/en/latest/backends/amazon-S3.html#settings
    STATICFILES_STORAGE = DEFAULT_FILE_STORAGE = 'storages.backends.s3boto.S3BotoStorage'

    # See: http://django-storages.readthedocs.org/en/latest/backends/amazon-S3.html#settings
    AWS_ACCESS_KEY_ID = values.SecretValue()
    AWS_SECRET_ACCESS_KEY = values.SecretValue()
    AWS_STORAGE_BUCKET_NAME = values.SecretValue()
    AWS_AUTO_CREATE_BUCKET = True
    AWS_QUERYSTRING_AUTH = False

    # See: https://github.com/antonagestam/collectfast
    # For Django 1.7+, 'collectfast' should come before 'django.contrib.staticfiles'
    AWS_PRELOAD_METADATA = True
    INSTALLED_APPS = ('collectfast', ) + INSTALLED_APPS

    # AWS cache settings, don't change unless you know what you're doing:
    AWS_EXPIRY = 60 * 60 * 24 * 7
    AWS_HEADERS = {
        'Cache-Control': 'max-age=%d, s-maxage=%d, must-revalidate' % (
            AWS_EXPIRY, AWS_EXPIRY)
    }
    # See: http://django-storages.readthedocs.org/en/latest/backends/amazon-S3.html
    try:
        from boto.s3.connection import OrdinaryCallingFormat
        AWS_S3_CALLING_FORMAT = OrdinaryCallingFormat()
    except ImportError:
        pass

    # See: https://docs.djangoproject.com/en/dev/ref/settings/#static-url
    STATIC_URL = 'https://s3.amazonaws.com/%s/' % AWS_STORAGE_BUCKET_NAME
    # END STORAGE CONFIGURATION

    # EMAIL
    DEFAULT_FROM_EMAIL = values.Value('{{cookiecutter.project_name}} <noreply@{{cookiecutter.domain_name}}>')
    EMAIL_HOST = values.Value('smtp.sendgrid.com')
    EMAIL_HOST_PASSWORD = values.SecretValue(environ_prefix="", environ_name="SENDGRID_PASSWORD")
    EMAIL_HOST_USER = values.SecretValue(environ_prefix="", environ_name="SENDGRID_USERNAME")
    EMAIL_PORT = values.IntegerValue(587, environ_prefix="", environ_name="EMAIL_PORT")
    EMAIL_SUBJECT_PREFIX = values.Value('[{{cookiecutter.project_name}}] ', environ_name="EMAIL_SUBJECT_PREFIX")
    EMAIL_USE_TLS = True
    SERVER_EMAIL = EMAIL_HOST_USER
    # END EMAIL

    # TEMPLATE CONFIGURATION
    # See: https://docs.djangoproject.com/en/dev/ref/settings/#template-dirs
    TEMPLATE_LOADERS = (
        ('django.template.loaders.cached.Loader', (
            'django.template.loaders.filesystem.Loader',
            'django.template.loaders.app_directories.Loader',
        )),
    )
    # END TEMPLATE CONFIGURATION

    # CACHING
    # Only do this here because thanks to django-pylibmc-sasl and pylibmc
    # memcacheify is painful to install on windows.
    try:
        # See: https://github.com/rdegges/django-heroku-memcacheify
        from memcacheify import memcacheify
        CACHES = memcacheify()
    except ImportError:
        CACHES = values.CacheURLValue(default="memcached://127.0.0.1:11211")