def execute(self, context):
        self.database = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        self.s3 = S3Hook(aws_conn_id=self.aws_conn_id)
        self.awslambda = AwsLambdaHook(aws_conn_id=self.aws_conn_id,
                                       function_name=self.function_name,
                                       region_name=self.aws_region)

        s3_object = self.s3.get_key(key=self.query_s3_key,
                                    bucket_name=self.query_s3_bucket)

        self.log.info('Executing query...')
        query = s3_object.get()['Body'].read().decode('utf-8')

        engine = self.database.get_sqlalchemy_engine()
        con = engine.connect()
        result = con.execute(query)
        self.log.info(
            'Query complete, sending results to Lambda function {function_name}...'
            .format(function_name=self.function_name))

        records = []
        for row in result:
            row_list = []
            for column, value in row.items():
                value = 0 if value == float('inf') or value == float(
                    '-inf') else value
                row_list.append(value)
            records.append(row_list)

        full_results = []
        for batch in self.batch(iterable=records, n=self.batch_size):
            payload = json.dumps({"input": batch})
            lambda_result = self.awslambda.invoke_lambda(payload)
            results = json.loads(
                lambda_result['Payload'].read().decode('utf-8'),
                object_pairs_hook=OrderedDict)
            full_results = full_results + results

        if full_results:
            f_source = NamedTemporaryFile(mode='w+t',
                                          suffix='.csv',
                                          delete=False)
            fieldnames = [k for k in full_results[0].keys()]
            writer = csv.DictWriter(f_source,
                                    fieldnames=fieldnames,
                                    delimiter='|')

            writer.writeheader()
            for row in full_results:
                writer.writerow(row)

            f_source.close()
            self.s3.load_file(filename=f_source.name,
                              key=self.dest_s3_key,
                              bucket_name=self.dest_s3_bucket,
                              replace=True)
            self.log.info("File loaded to S3.")
            os.remove(f_source.name)
        else:
            self.log.info("No records found, nothing to send to Lambda!")
Ejemplo n.º 2
0
def lambda1(ds, **kwargs):

    hook = AwsLambdaHook('myAirflowTest',
                         region_name='',
                         log_type='None',
                         qualifier='$LATEST',
                         invocation_type='RequestResponse',
                         config=None,
                         aws_conn_id='my_lambda')
    response_1 = hook.invoke_lambda(payload='null')
    print('Response--->', response_1)
Ejemplo n.º 3
0
def create_thumbnail(ds, **kwargs):

    hook = AwsLambdaHook(
        'LAMBDA_FN_NAME',  #LAMBDA_FN_NAME
        log_type='None',
        qualifier='$LATEST',
        invocation_type='RequestResponse',
        config=None,
        aws_conn_id='aws_default')

    response_1 = hook.invoke_lambda(payload=json.dumps(kwargs['dag_run'].conf))
    payload = json.loads(response_1['Payload'].read().decode())
    kwargs['ti'].xcom_push(key="ThumbnailDetails", value=payload)
Ejemplo n.º 4
0
    def get_hook(self):
        """
        Initialises an AWS Lambda hook

        :return: airflow.contrib.hooks.AwsLambdaHook
        """
        return AwsLambdaHook(
            self.function_name,
            self.region_name,
            self.log_type,
            self.qualifier,
            aws_conn_id=self.aws_conn_id,
        )
    def test_invoke_lambda_function(self):

        hook = AwsLambdaHook(aws_conn_id='aws_default',
                             function_name="test_function", region_name="us-east-1")

        hook.get_conn().create_function(
            FunctionName='test_function',
            Runtime='python2.7',
            Role='test-iam-role',
            Handler='lambda_function.lambda_handler',
            Code={
                'ZipFile': self.lambda_function(),
            },
            Description='test lambda function',
            Timeout=3,
            MemorySize=128,
            Publish=True,
        )

        input = {'hello': 'airflow'}
        response = hook.invoke_lambda(payload=json.dumps(input))

        self.assertEqual(response["StatusCode"], 202)
 def test_get_conn_returns_a_boto3_connection(self):
     hook = AwsLambdaHook(aws_conn_id='aws_default',
                          function_name="test_function",
                          region_name="us-east-1")
     self.assertIsNotNone(hook.get_conn())
 def test_get_conn_returns_a_boto3_connection(self):
     hook = AwsLambdaHook(aws_conn_id='aws_default',
                          function_name="test_function", region_name="us-east-1")
     self.assertIsNotNone(hook.get_conn())
class S3QueryToLambdaOperator(BaseOperator):
    """
    Executes a .sql file from S3 in Redshift, sends results to Lambda, then stores results back in S3
    :param query_s3_bucket: reference to a specific S3 bucket to retrieve sql
    :type query_s3_bucket: string
    :param query_s3_key: reference to a specific S3 key to retrieve sql
    :type query_s3_key: string
    :param dest_s3_bucket: reference to a specific S3 bucket to put files
    :type dest_s3_bucket: string
    :param dest_s3_key: reference to a specific S3 key to put files
    :type dest_s3_key: string
    :param function_name: name of AWS Lambda function
    :type function_name: string
    :param aws_region: name of AWS region
    :type aws_region: string
    :param redshift_conn_id: reference to a specific redshift database
    :type redshift_conn_id: string
    :param aws_conn_id: reference to a specific S3 connection
    :type aws_conn_id: string
    :param batch_size: size of batches to submit to Lambda
    :type batch_size: int
    """

    template_fields = ()
    template_ext = ()
    ui_color = '#ededed'

    @apply_defaults
    def __init__(self,
                 query_s3_bucket,
                 query_s3_key,
                 dest_s3_bucket,
                 dest_s3_key,
                 function_name,
                 aws_region,
                 redshift_conn_id='redshift_default',
                 aws_conn_id='aws_default',
                 batch_size=100,
                 *args,
                 **kwargs):
        super(S3QueryToLambdaOperator, self).__init__(*args, **kwargs)
        self.query_s3_bucket = query_s3_bucket
        self.query_s3_key = query_s3_key
        self.dest_s3_bucket = dest_s3_bucket
        self.dest_s3_key = dest_s3_key
        self.function_name = function_name
        self.aws_region = aws_region
        self.redshift_conn_id = redshift_conn_id
        self.aws_conn_id = aws_conn_id
        self.batch_size = batch_size

    def execute(self, context):
        self.database = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        self.s3 = S3Hook(aws_conn_id=self.aws_conn_id)
        self.awslambda = AwsLambdaHook(aws_conn_id=self.aws_conn_id,
                                       function_name=self.function_name,
                                       region_name=self.aws_region)

        s3_object = self.s3.get_key(key=self.query_s3_key,
                                    bucket_name=self.query_s3_bucket)

        self.log.info('Executing query...')
        query = s3_object.get()['Body'].read().decode('utf-8')

        engine = self.database.get_sqlalchemy_engine()
        con = engine.connect()
        result = con.execute(query)
        self.log.info(
            'Query complete, sending results to Lambda function {function_name}...'
            .format(function_name=self.function_name))

        records = []
        for row in result:
            row_list = []
            for column, value in row.items():
                value = 0 if value == float('inf') or value == float(
                    '-inf') else value
                row_list.append(value)
            records.append(row_list)

        full_results = []
        for batch in self.batch(iterable=records, n=self.batch_size):
            payload = json.dumps({"input": batch})
            lambda_result = self.awslambda.invoke_lambda(payload)
            results = json.loads(
                lambda_result['Payload'].read().decode('utf-8'),
                object_pairs_hook=OrderedDict)
            full_results = full_results + results

        if full_results:
            f_source = NamedTemporaryFile(mode='w+t',
                                          suffix='.csv',
                                          delete=False)
            fieldnames = [k for k in full_results[0].keys()]
            writer = csv.DictWriter(f_source,
                                    fieldnames=fieldnames,
                                    delimiter='|')

            writer.writeheader()
            for row in full_results:
                writer.writerow(row)

            f_source.close()
            self.s3.load_file(filename=f_source.name,
                              key=self.dest_s3_key,
                              bucket_name=self.dest_s3_bucket,
                              replace=True)
            self.log.info("File loaded to S3.")
            os.remove(f_source.name)
        else:
            self.log.info("No records found, nothing to send to Lambda!")

    def batch(self, iterable, n=1):
        l = len(iterable)
        for ndx in range(0, l, n):
            yield iterable[ndx:min(ndx + n, l)]