Exemple #1
0
 def test_expand_role(self):
     conn = boto3.client('iam', region_name='us-east-1')
     conn.create_role(RoleName='test-role', AssumeRolePolicyDocument='some policy')
     hook = AwsHook()
     arn = hook.expand_role('test-role')
     expect_arn = conn.get_role(RoleName='test-role').get('Role').get('Arn')
     self.assertEqual(arn, expect_arn)
 def expand_role(self):
     if 'Model' not in self.config:
         return
     hook = AwsHook(self.aws_conn_id)
     config = self.config['Model']
     if 'ExecutionRoleArn' in config:
         config['ExecutionRoleArn'] = hook.expand_role(config['ExecutionRoleArn'])
Exemple #3
0
 def expand_role(self):
     if 'Model' not in self.config:
         return
     config = self.config['Model']
     if 'ExecutionRoleArn' in config:
         hook = AwsHook(self.aws_conn_id)
         config['ExecutionRoleArn'] = hook.expand_role(config['ExecutionRoleArn'])
 def test_expand_role(self):
     conn = boto3.client('iam', region_name='us-east-1')
     conn.create_role(RoleName='test-role', AssumeRolePolicyDocument='some policy')
     hook = AwsHook()
     arn = hook.expand_role('test-role')
     expect_arn = conn.get_role(RoleName='test-role').get('Role').get('Arn')
     self.assertEqual(arn, expect_arn)
 def execute(self, context):
     aws_hook = AwsHook(self.aws)
     aws_iam_arn = aws_hook.expand_role(self.iam_role)
     redshift = PostgresHook(postgres_conn_id=self.redshift)
     self.log.info("Creating table {} if not exists".format(self.table))
     redshift.run(self.create_temp_table)
     self.log.info("Clearing all rows from {}".format(self.table))
     redshift.run("DELETE FROM {}".format(self.table))
     self.log.info("Copying data from {} to {}.".format(
         self.s3_path, self.table))
     sql = SqlQueries.copy_from_s3_to_staging.format(
         self.table, self.s3_path, aws_iam_arn, self.json_structure)
     redshift.run(sql)
     self.log.info("{} is now populated.".format(self.table))
Exemple #6
0
 def expand_role(self):
     if 'RoleArn' in self.config:
         hook = AwsHook(self.aws_conn_id)
         self.config['RoleArn'] = hook.expand_role(self.config['RoleArn'])
 def expand_role(self):
     if 'TrainingJobDefinition' in self.config:
         config = self.config['TrainingJobDefinition']
         if 'RoleArn' in config:
             hook = AwsHook(self.aws_conn_id)
             config['RoleArn'] = hook.expand_role(config['RoleArn'])
 def expand_role(self):
     if 'ExecutionRoleArn' in self.config:
         hook = AwsHook(self.aws_conn_id)
         self.config['ExecutionRoleArn'] = hook.expand_role(self.config['ExecutionRoleArn'])
    def execute(self, context):
        """ S3 STAGING FUNCTION
            Works for given S3 Locations and target tables (credentials to AWS
            and Redshift must be provided) by creating a Redshift COPY command
            from <S3 Location> to <target_table>.
            Based on keyword "log" the execute function will import events.
            If the execution date is before the start date, the function will
            just import the events from that specific day.
            Based on keyword "song" the execute function will import songs.
        """
        # If <s3pref> is not assigned, just add a "/" to the S3 Bucket link.
        if self.prefix is None:
            loc = '{}/'.format(self.S3_BUCKET)
        # If there is an <s3pref> we concatenate it with the S3 Bucket link.
        else:
            loc = '{}{}'.format(self.S3_BUCKET, self.prefix)
        logging.info('Searching in path: {}'.format(loc))

        # Define the required AWS and Postgres Hoos
        try:
            # Create AWS and Postgres connections, get AWS role ARN
            logging.info(
                ('Starting import from {} into table {} started.').format(
                    self.S3_BUCKET, self.TABLE))
            s3 = AwsHook(aws_conn_id='aws_default')
            #logging.info(s3.get_session())
            role_arn = s3.expand_role('dwhRole')
            logging.info(
                'AWS Hook initialized, using IAM role: {}'.format(role_arn))
            redshift_hook = PostgresHook(postgres_conn_id=self.conn_id)
            logging.info(
                'Postgres Hook {} initialized, starting COPY operation.'.
                format(self.conn_id))
        except Exception as e:
            logging.error('Error when initializing hooks: %s', e)

        # If the S3 string contains "log" we want to copy events
        if 'log' in self.prefix:
            try:
                # Get the execution date and start date
                edate = datetime.strptime(self.execdate[:10], '%Y-%m-%d')
                sdate = self.start_date.to_date_string()
                sdate = datetime(int(sdate[:4]), int(sdate[6:7]),
                                 int(sdate[9:10]))
                # If execution date is before start date (default) then replace the <loc> string with a
                # Regexp which contains the specified date
                if edate < sdate:
                    logging.info(
                        'Execution date {} is before start date {}, switching to Single Load'
                        .format(edate, sdate))
                    loc = '{}{}/{:02}/{}*.json'.format(loc,
                                                       edate.year, edate.month,
                                                       edate.date())
                logging.info('Starting import from location: {}'.format(loc))
                redshift_hook.run(
                    sql_queries.staging_events_copy.format(
                        self.TABLE, loc, role_arn))
                #redshift_hook.run(sql_queries.staging_events_copy_key.format(self.TABLE, loc, self.key, self.secret))
                logging.info('Import job for events done')
            except Exception as e:
                logging.error('Loading files failed with error: {}'.format(e))

        # If the S3 string contains "song" then we want to copy songs
        elif 'song' in self.prefix:
            try:
                logging.info('Starting import of songs from location: %s', loc)
                redshift_hook.run(
                    sql_queries.staging_songs_copy.format(
                        self.TABLE, loc, role_arn))
                #redshift_hook.run(sql_queries.staging_songs_copy_key.format(self.TABLE, loc, self.key, self.secret))
                logging.info('Import is done.')
            except Exception as e:
                logging.error('Loading files failed with error: {}'.format(e))