def test_expand_role(self): conn = boto3.client('iam', region_name='us-east-1') conn.create_role(RoleName='test-role', AssumeRolePolicyDocument='some policy') hook = AwsHook() arn = hook.expand_role('test-role') expect_arn = conn.get_role(RoleName='test-role').get('Role').get('Arn') self.assertEqual(arn, expect_arn)
def expand_role(self): if 'Model' not in self.config: return hook = AwsHook(self.aws_conn_id) config = self.config['Model'] if 'ExecutionRoleArn' in config: config['ExecutionRoleArn'] = hook.expand_role(config['ExecutionRoleArn'])
def expand_role(self): if 'Model' not in self.config: return config = self.config['Model'] if 'ExecutionRoleArn' in config: hook = AwsHook(self.aws_conn_id) config['ExecutionRoleArn'] = hook.expand_role(config['ExecutionRoleArn'])
def execute(self, context): aws_hook = AwsHook(self.aws) aws_iam_arn = aws_hook.expand_role(self.iam_role) redshift = PostgresHook(postgres_conn_id=self.redshift) self.log.info("Creating table {} if not exists".format(self.table)) redshift.run(self.create_temp_table) self.log.info("Clearing all rows from {}".format(self.table)) redshift.run("DELETE FROM {}".format(self.table)) self.log.info("Copying data from {} to {}.".format( self.s3_path, self.table)) sql = SqlQueries.copy_from_s3_to_staging.format( self.table, self.s3_path, aws_iam_arn, self.json_structure) redshift.run(sql) self.log.info("{} is now populated.".format(self.table))
def expand_role(self): if 'RoleArn' in self.config: hook = AwsHook(self.aws_conn_id) self.config['RoleArn'] = hook.expand_role(self.config['RoleArn'])
def expand_role(self): if 'TrainingJobDefinition' in self.config: config = self.config['TrainingJobDefinition'] if 'RoleArn' in config: hook = AwsHook(self.aws_conn_id) config['RoleArn'] = hook.expand_role(config['RoleArn'])
def expand_role(self): if 'ExecutionRoleArn' in self.config: hook = AwsHook(self.aws_conn_id) self.config['ExecutionRoleArn'] = hook.expand_role(self.config['ExecutionRoleArn'])
def execute(self, context): """ S3 STAGING FUNCTION Works for given S3 Locations and target tables (credentials to AWS and Redshift must be provided) by creating a Redshift COPY command from <S3 Location> to <target_table>. Based on keyword "log" the execute function will import events. If the execution date is before the start date, the function will just import the events from that specific day. Based on keyword "song" the execute function will import songs. """ # If <s3pref> is not assigned, just add a "/" to the S3 Bucket link. if self.prefix is None: loc = '{}/'.format(self.S3_BUCKET) # If there is an <s3pref> we concatenate it with the S3 Bucket link. else: loc = '{}{}'.format(self.S3_BUCKET, self.prefix) logging.info('Searching in path: {}'.format(loc)) # Define the required AWS and Postgres Hoos try: # Create AWS and Postgres connections, get AWS role ARN logging.info( ('Starting import from {} into table {} started.').format( self.S3_BUCKET, self.TABLE)) s3 = AwsHook(aws_conn_id='aws_default') #logging.info(s3.get_session()) role_arn = s3.expand_role('dwhRole') logging.info( 'AWS Hook initialized, using IAM role: {}'.format(role_arn)) redshift_hook = PostgresHook(postgres_conn_id=self.conn_id) logging.info( 'Postgres Hook {} initialized, starting COPY operation.'. format(self.conn_id)) except Exception as e: logging.error('Error when initializing hooks: %s', e) # If the S3 string contains "log" we want to copy events if 'log' in self.prefix: try: # Get the execution date and start date edate = datetime.strptime(self.execdate[:10], '%Y-%m-%d') sdate = self.start_date.to_date_string() sdate = datetime(int(sdate[:4]), int(sdate[6:7]), int(sdate[9:10])) # If execution date is before start date (default) then replace the <loc> string with a # Regexp which contains the specified date if edate < sdate: logging.info( 'Execution date {} is before start date {}, switching to Single Load' .format(edate, sdate)) loc = '{}{}/{:02}/{}*.json'.format(loc, edate.year, edate.month, edate.date()) logging.info('Starting import from location: {}'.format(loc)) redshift_hook.run( sql_queries.staging_events_copy.format( self.TABLE, loc, role_arn)) #redshift_hook.run(sql_queries.staging_events_copy_key.format(self.TABLE, loc, self.key, self.secret)) logging.info('Import job for events done') except Exception as e: logging.error('Loading files failed with error: {}'.format(e)) # If the S3 string contains "song" then we want to copy songs elif 'song' in self.prefix: try: logging.info('Starting import of songs from location: %s', loc) redshift_hook.run( sql_queries.staging_songs_copy.format( self.TABLE, loc, role_arn)) #redshift_hook.run(sql_queries.staging_songs_copy_key.format(self.TABLE, loc, self.key, self.secret)) logging.info('Import is done.') except Exception as e: logging.error('Loading files failed with error: {}'.format(e))