def __init__( self, region_name: str = None, session_name: str = None, ): """ Initialize `BotoSession` Parameters ---------- region_name : str (optional) Default region when creating new connection. session_name : str (optional) An identifier for the assumed role session. (required when `sts_arn` is given) """ self.region_name = region_name # read why RoleSessionName is important https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html self.session_name = session_name or uuid4().hex self._role_fetcher = InstanceMetadataFetcher(timeout=setting("S3_CREDENTIALS_TIMEOUT", 1000), num_attempts=3) self.access_key = None self.secret_key = None self.security_token = None
def getCredentials(self): provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load() access_key = creds.access_key return (access_key)
def __init__(self, dest=None, content_type=None): self.errors = [] if not dest: dest = 'default' try: destination_settings =\ app_settings.S3DIRECT_DESTINATIONS[dest] except KeyError: self.errors.append( 'Provided destination is not specified in config', ) return for key, value in destination_settings.items(): setattr(self, key, value) self.access_key =\ getattr(settings, 'AWS_S3_ACCESS_KEY_ID', None) self.secret_access_key =\ getattr(settings, 'AWS_S3_SECRET_ACCESS_KEY', None) self.now_date = datetime.utcnow().strftime('%Y%m%dT%H%M%S000Z') self.raw_date = datetime.utcnow().strftime('%Y%m%d') if content_type: self.content_type = content_type if (content_type and (self.allowed and content_type not in self.allowed) and self.allowed != '*'): self.errors.append('Invalid file type.') return if not self.region or self.region == 'us-east-1': self.endpoint = 's3.amazonaws.com' self.endpoint = f's3-{self.region}.amazonaws.com' if self.access_key is None or self.secret_access_key is None: try: from botocore.credentials import (InstanceMetadataProvider, InstanceMetadataFetcher) except ImportError: InstanceMetadataProvider = None InstanceMetadataFetcher = None if all([InstanceMetadataProvider, InstanceMetadataFetcher]): provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load() self.access_key = creds.access_key self.secret_access_key = creds.secret_key self.token = creds.token else: self.errors.append( 'Failed to access EC2 instance metadata due to ' 'missing dependency.') return
def delete_object(bucket, key): rn = "eu-west-1" provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load().get_frozen_credentials() s3_client = boto3.client( "s3", region_name=rn, aws_access_key_id=creds.access_key, aws_secret_access_key=creds.secret_key, aws_session_token=creds.token, ) s3_client.delete_object(Bucket=bucket, Key=key)
def get_boto_session( force_ec2: bool = False, region_name: str = "eu-west-1", ): kwargs = {"region_name": region_name} if force_ec2: provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load().get_frozen_credentials() kwargs["aws_access_key_id"] = creds.access_key kwargs["aws_secret_access_key"] = creds.secret_key kwargs["aws_session_token"] = creds.token return boto3.Session(**kwargs)
def get_aws_credentials(): access_key = getattr(settings, 'AWS_ACCESS_KEY_ID', None) secret_key = getattr(settings, 'AWS_SECRET_ACCESS_KEY', None) if access_key and secret_key: # AWS tokens are not created for pregenerated access keys return AWSCredentials(None, secret_key, access_key) if not InstanceMetadataProvider or not InstanceMetadataFetcher: # AWS credentials are not required for publicly-writable buckets return AWSCredentials(None, None, None) provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load() if creds: return AWSCredentials(creds.token, creds.secret_key, creds.access_key) else: # Creds are incorrect return AWSCredentials(None, None, None)
def create_credential_resolver(session): """Create a default credential resolver. This creates a pre-configured credential resolver that includes the default lookup chain for credentials. """ profile_name = session.get_config_variable('profile') credential_file = session.get_config_variable('credentials_file') config_file = session.get_config_variable('config_file') metadata_timeout = session.get_config_variable( 'metadata_service_timeout') num_attempts = session.get_config_variable( 'metadata_service_num_attempts') providers = [] if profile_name is None: providers += [ EnvProvider(), ] profile_name = 'default' providers += [ # The new config file has precedence over the legacy # config file. SharedCredentialProvider(creds_filename=credential_file, profile_name=profile_name), # The new config file has precedence over the legacy # config file. ConfigProvider(config_filename=config_file, profile_name=profile_name), OriginalEC2Provider(), BotoProvider(), InstanceMetadataProvider(iam_role_fetcher=InstanceMetadataFetcher( timeout=metadata_timeout, num_attempts=num_attempts)) ] resolver = CredentialResolver(providers=providers) return resolver
def __call__(self, host, username, password, aws_region, boto_profile): """ Return the authorization header. If 'boto_profile' is passed, it'll be used. Otherwise it'll sign requests with instance role. :param host: ElasticSearch host. :param username: Username used for authenticating the requests to ElasticSearch. :param password: Password used for authenticating the requests to ElasticSearch. :param aws_region: AWS Region to use. Only required when signing requests. :param boto_profile: Boto profile to use for connecting. Only required when signing requests. """ if username and password: return username + ':' + password if not aws_region: return None if boto_profile: # Executing elastalert from machine with aws credentials config = configparser.ConfigParser() config.read(os.path.expanduser('~') + '/.aws/credentials') aws_access_key_id = str(config[boto_profile]['aws_access_key_id']) aws_secret_access_key = str(config[boto_profile]['aws_secret_access_key']) aws_token = None else: # Executing elastalert from machine deployed with specific role provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) aws_credentials = provider.load() aws_access_key_id = str(aws_credentials.access_key) aws_secret_access_key = str(aws_credentials.secret_key) aws_token = str(aws_credentials.token) return AWSRequestsAuth(aws_access_key=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, aws_token=aws_token, aws_host=host, aws_region=aws_region, aws_service='es')
def get_upload_params(request): content_type = request.POST['type'] filename = request.POST['name'] dest = get_s3direct_destinations().get(request.POST['dest']) if not dest: data = json.dumps({'error': 'File destination does not exist.'}) return HttpResponse(data, content_type="application/json", status=400) key = dest.get('key') auth = dest.get('auth') allowed = dest.get('allowed') acl = dest.get('acl') bucket = dest.get('bucket') cache_control = dest.get('cache_control') content_disposition = dest.get('content_disposition') content_length_range = dest.get('content_length_range') server_side_encryption = dest.get('server_side_encryption') if not acl: acl = 'public-read' if not key: data = json.dumps({'error': 'Missing destination path.'}) return HttpResponse(data, content_type="application/json", status=403) if auth and not auth(request.user): data = json.dumps({'error': 'Permission denied.'}) return HttpResponse(data, content_type="application/json", status=403) if (allowed and content_type not in allowed) and allowed != '*': data = json.dumps({'error': 'Invalid file type (%s).' % content_type}) return HttpResponse(data, content_type="application/json", status=400) if hasattr(key, '__call__'): key = key(filename) elif key == '/': key = '${filename}' else: # The literal string '${filename}' is an S3 field variable for key. # https://aws.amazon.com/articles/1434#aws-table key = '%s/${filename}' % key access_key = getattr(settings, 'AWS_ACCESS_KEY_ID', None) secret_access_key = getattr(settings, 'AWS_SECRET_ACCESS_KEY', None) token = None if access_key is None or secret_access_key is None: # Get credentials from instance profile if not defined in settings -- # this avoids the need to put access credentials in the settings.py file. # Assumes we're running on EC2. try: from botocore.credentials import InstanceMetadataProvider, InstanceMetadataFetcher except ImportError: InstanceMetadataProvider = None InstanceMetadataFetcher = None if all([InstanceMetadataProvider, InstanceMetadataFetcher]): provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load() access_key = creds.access_key secret_access_key = creds.secret_key token = creds.token else: data = json.dumps({ 'error': 'Failed to access EC2 instance metadata due to missing dependency.' }) return HttpResponse(data, content_type="application/json", status=500) data = create_upload_data(content_type, key, acl, bucket, cache_control, content_disposition, content_length_range, server_side_encryption, access_key, secret_access_key, token) return HttpResponse(json.dumps(data), content_type="application/json")
def get_upload_params(request): content_type = request.POST['type'] filename = get_valid_filename(request.POST['name']) dest = get_s3upload_destinations().get(request.POST['dest']) if not dest: data = json.dumps({'error': 'File destination does not exist.'}) return HttpResponse(data, content_type="application/json", status=400) key = dest.get('key') auth = dest.get('auth') allowed_types = dest.get('allowed_types') acl = dest.get('acl') bucket = dest.get('bucket') cache_control = dest.get('cache_control') content_disposition = dest.get('content_disposition') content_length_range = dest.get('content_length_range') allowed_extensions = dest.get('allowed_extensions') server_side_encryption = dest.get('server_side_encryption') if not acl: acl = 'public-read' if not key: data = json.dumps({'error': 'Missing destination path.'}) return HttpResponse(data, content_type="application/json", status=403) if auth and not auth(request.user): data = json.dumps({'error': 'Permission denied.'}) return HttpResponse(data, content_type="application/json", status=403) if (allowed_types and content_type not in allowed_types) and allowed_types != '*': data = json.dumps({'error': 'Invalid file type (%s).' % content_type}) return HttpResponse(data, content_type="application/json", status=400) original_ext = splitext(filename)[1] lowercased_ext = original_ext.lower() if (allowed_extensions and lowercased_ext not in allowed_extensions) and allowed_extensions != '*': data = json.dumps( {'error': 'Forbidden file extension (%s).' % original_ext}) return HttpResponse(data, content_type="application/json", status=415) if hasattr(key, '__call__'): key = key(filename) elif key == '/': key = filename else: key = '{0}/{1}'.format(key, filename) access_key = getattr(settings, 'AWS_ACCESS_KEY_ID', None) secret_access_key = getattr(settings, 'AWS_SECRET_ACCESS_KEY', None) token = None if access_key is None or secret_access_key is None: # Get credentials from instance profile if not defined in settings -- # this avoids the need to put access credentials in the settings.py file. # Assumes we're running on EC2. try: from botocore.credentials import InstanceMetadataProvider, InstanceMetadataFetcher except ImportError: InstanceMetadataProvider = None InstanceMetadataFetcher = None if all([InstanceMetadataProvider, InstanceMetadataFetcher]): provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load() access_key = creds.access_key secret_access_key = creds.secret_key token = creds.token else: data = json.dumps({ 'error': 'Failed to access EC2 instance metadata due to missing dependency.' }) return HttpResponse(data, content_type="application/json", status=500) data = create_upload_data(content_type, key, acl, bucket, cache_control, content_disposition, content_length_range, server_side_encryption, access_key, secret_access_key, token) url = None # Generate signed URL for private document access if acl == "private": url = get_signed_download_url( key=key.replace("${filename}", filename), bucket_name=bucket or settings.AWS_STORAGE_BUCKET_NAME, ttl=int(5 * 60), # 5 mins ) response = { "aws_payload": data, "private_access_url": url, } return HttpResponse(json.dumps(response), content_type="application/json")
def am_i_an_ec2_instance(): return InstanceMetadataFetcher( timeout=1, num_attempts=1).retrieve_iam_role_credentials()
def __setupSparkSession__( self, jobConf: dict, ) -> SparkSession: ''' Init the Spark environemnt with few default configurations and start the spark session. ''' conf = SparkConf() # #Setup Spark Specific configurations # hmConf = { "spark.executor.pyspark.memory": "512m", "spark.debug.maxToStringFields": "5000", "spark.rps.askTimeout": "1200", "spark.network.timeout": "1200", "spark.maxRemoteBlockSizeFetchToMem": "512m", "spark.broadcast.blockSize": "16m", "spark.broadcast.compress": "true", "spark.rdd.compress": "true", "spark.io.compression.codec": "org.apache.spark.io.SnappyCompressionCodec", "spark.kryo.unsafe": "true", "spark.serializer": "org.apache.spark.serializer.KryoSerializer", "spark.kryoserializer.buffer": "10240", "spark.kryoserializer.buffer.max": "2040m", "hive.exec.dynamic.partition": "true", "hive.exec.dynamic.partition.mode": "nonstrict", "hive.warehouse.data.skiptrash": "true", "spark.sql.hive.metastorePartitionPruning": "true", "spark.sql.broadcastTimeout": "1200", "spark.sql.sources.partitionOverwriteMode": "dynamic", "spark.sql.orc.filterPushdown": "true", "spark.sql.orc.splits.include.file.footer": "true", "spark.sql.orc.cache.stripe.details.size": "1000", "spark.hadoop.parquet.enable.summary-metadata": "false", "spark.sql.parquet.mergeSchema": "false", "spark.sql.parquet.filterPushdown": "true", "spark.sql.parquet.fs.optimized.committer.optimization-enabled": "true", "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2", "spark.hadoop.mapreduce.fileoutputcommitter.cleanup-failures.ignored": "true" } for (k, v) in jobConf['sparkconfs'].items(): hmConf.set(k, v) conf.setAll(hmConf) # #Setup Hadoop Specific configurations # hdpCnf = SparkContext._jsc.hadoopConfiguration() hdpCnf.set('io.file.buffer.size', '65536') hdpCnf.set('mapreduce.fileoutputcommitter.algorithm.version', '2') for (k, v) in jobConf['hadoopconfs'].items(): hdpCnf.set(k, v) # #Setup AWS Specific configurations # if jobConf['appconfs']['runenv'].toUpperCase() == 'AWS': SparkContext.setSystemProperty( 'com.amazonaws.services.s3.enableV4', 'true') SparkContext.setSystemProperty( 'com.amazonaws.services.s3.enforceV4', 'true') conf.set( "spark.sql.parquet.output.committer.class", "com.amazon.emr.committer.EmrOptimizedSparkSqlParquetOutputCommitter" ) cred = None try: from botocore.credentials import InstanceMetadataProvider, InstanceMetadataFetcher provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load() hdpCnf.setAll({ 'fs.s3a.access.key': creds.access_key, 'fs.s3a.access.key': creds.secret_key, }) except: pass hdpCnf.setAll({ 'fs.s3a.server-side-encryption-algorithm': 'SSE-KMS', 'fs.s3.enableServerSideEncryption': 'true', 'fs.s3.enableServerSideEncryption': 'true', 'fs.s3.impl': 'org.apache.hadoop.fs.s3a.S3AFileSystem', 'fs.s3a.impl': 'org.apache.hadoop.fs.s3a.S3AFileSystem', 'fs.s3a.endpoint': "s3.%s.amazonaws.com" % (jobConf['appconfs']['appdefaults'] or 'us-east-1') }) spark = SparkSession \ .builder \ .config(conf=conf) \ .appName(jobConf['name'] or 'PySparkApp') \ .enableHiveSupport() \ .getOrCreate() sc = spark.sparkContext sc.setLogLevel(jobConf['appconfs']['logging']['sparkloglevel'] or 'INFO') if jobConf['appconfs']['logging']['sparkloglevel'] or 'INFO' == "DEBUG": msg = "" for k in sc._conf.getAll(): msg += "\t%50s -> %s\n" % (k[0], k[1]) log.debug( "Initiated SparkSesion with below confs,\n{}".format(msg)) return spark
def __setupSparkSession__(self, appName: str = None): ''' Init the Spark environemnt with few default configurations and start the spark session. ''' self.__conf = SparkConf() hmConf = { "spark.rps.askTimeout": "1200", "spark.network.timeout": "1200", "spark.broadcast.blockSize": "16m", "spark.sql.broadcastTimeout": "1200", "spark.broadcast.compress": "true", "spark.rdd.compress": "true", "fs.s3.enableServerSideEncryption": "true", "spark.kryo.unsafe": "false", "spark.kryoserializer.buffer": "10240", "spark.kryoserializer.buffer.max": "2040m", "spark.io.compression.codec": "org.apache.spark.io.SnappyCompressionCodec", "spark.serializer": "org.apache.spark.serializer.KryoSerializer", "mapreduce.fileoutputcommitter.algorithm.version": "2", "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2", } self.__conf.setAll(hmConf) SparkContext.setSystemProperty("com.amazonaws.services.s3.enableV4", "true") SparkContext.setSystemProperty("com.amazonaws.services.s3.enforceV4", "true") self.__spark = SparkSession \ .builder \ .config(conf=self.__conf) \ .appName(appName or "PySparkApp") \ .enableHiveSupport() \ .getOrCreate() self.__sc = self.__spark.sparkContext self.sqlC = SQLContext(self.__sc) self.__sc.setSystemProperty("com.amazonaws.services.s3.enableV4", "true") self.__sc.setSystemProperty("com.amazonaws.services.s3.enforceV4", "true") self.__sc.setLogLevel(self.__parms.get("--logLevel", "INFO")) hdpCnf = self.__sc.hadoopConfiguration hdpCnf.setAll({ "io.file.buffer.size": "65536", "mapreduce.fileoutputcommitter.algorithm.version": "2", "fs.s3a.endpoint": "%s.amazonaws.com" % (self.__parms.get("--awsRegion", 's3.us-east-1')) }) if (self.__parms.get("--runEnv", "AWS") == "AWS"): from botocore.credentials import InstanceMetadataProvider, InstanceMetadataFetcher provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load() hdpCnf.setAll({ "fs.s3a.access.key": creds.access_key, "fs.s3a.secret.key": creds.secret_key, "fs.s3a.server-side-encryption-algorithm": "SSE-KMS", "fs.s3.enableServerSideEncryption": "true", "fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem", "fs.s3a.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem", "fs.s3a.endpoint": "s3.%s.amazonaws.com" % (self.__parms.get("--awsRegion", "us-east-1")) })
import json import logging import paste.translogger import requests import os import boto3 from botocore.credentials import InstanceMetadataProvider, InstanceMetadataFetcher from requests_aws4auth import AWS4Auth secret_key = os.environ.get("SECRET_KEY") access_key = os.environ.get("ACCESS_KEY") if secret_key == None: logger = logging.getLogger("elasticsearch-service") logger.info("No params so attempt get config from machine") provider = InstanceMetadataProvider(iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) credentials = provider.load() access_key = credentials.access_key secret_key = credentials.secret_key region = os.environ.get('REGION') if region == None: region = "eu-central-1" def executeSignedPost(url, body): service = 'es' awsauth = AWS4Auth(access_key, secret_key, region, service) r = requests.post(url, auth=awsauth, json=body) result = r.json() return result
def get_instance_cred(): provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher(timeout=1000, num_attempts=2)) creds = provider.load().get_frozen_credentials() return creds
def create_credential_resolver(session, cache=None, region_name=None): """Create a default credential resolver. This creates a pre-configured credential resolver that includes the default lookup chain for credentials. """ profile_name = session.get_config_variable("profile") or "default" metadata_timeout = session.get_config_variable("metadata_service_timeout") num_attempts = session.get_config_variable("metadata_service_num_attempts") disable_env_vars = session.instance_variables().get("profile") is not None if cache is None: cache = {} env_provider = EnvProvider() container_provider = ContainerProvider() instance_metadata_provider = InstanceMetadataProvider( iam_role_fetcher=InstanceMetadataFetcher( timeout=metadata_timeout, num_attempts=num_attempts, user_agent=session.user_agent(), )) profile_provider_builder = ProfileProviderBuilder(session, cache=cache, region_name=region_name) assume_role_provider = AssumeRoleProvider( load_config=lambda: session.full_config, client_creator=_get_client_creator(session, region_name), cache=cache, profile_name=profile_name, credential_sourcer=CanonicalNameCredentialSourcer( [env_provider, container_provider, instance_metadata_provider]), profile_provider_builder=profile_provider_builder, ) pre_profile = [ env_provider, assume_role_provider, ] profile_providers = profile_provider_builder.providers( profile_name=profile_name, disable_env_vars=disable_env_vars, ) post_profile = [ OriginalEC2Provider(), BotoProvider(), container_provider, instance_metadata_provider, ] providers = pre_profile + profile_providers + post_profile if disable_env_vars: # An explicitly provided profile will negate an EnvProvider. # We will defer to providers that understand the "profile" # concept to retrieve credentials. # The one edge case if is all three values are provided via # env vars: # export AWS_ACCESS_KEY_ID=foo # export AWS_SECRET_ACCESS_KEY=bar # export AWS_PROFILE=baz # Then, just like our client() calls, the explicit credentials # will take precedence. # # This precedence is enforced by leaving the EnvProvider in the chain. # This means that the only way a "profile" would win is if the # EnvProvider does not return credentials, which is what we want # in this scenario. providers.remove(env_provider) LOGGER.debug("Skipping environment variable credential check" " because profile name was explicitly set.") return CredentialResolver(providers=providers)