class KinesisOutput():
    """Class for managing connections and operations with AWS Kinesis"""
    KINESIS_CLIENT = createAWSClient('kinesis')

    def __init__(self):
        pass

    @classmethod
    def putRecord(cls, outputObject, stream):
        """Put an event into the specific Kinesis stream"""
        logger.info('Writing results to Kinesis')

        # The default lambda function here converts all objects into dicts
        kinesisStream = json.dumps(
            outputObject,
            ensure_ascii=False,
            default=lambda x: vars(x)
        )
        try:
            cls.KINESIS_CLIENT.put_record(
                StreamName=stream,
                Data=kinesisStream,
                PartitionKey=os.environ['OUTPUT_SHARD']
            )
        except:
            logger.error('Kinesis Write error!')
            raise KinesisError('Failed to write result to output stream!')
Exemple #2
0
class OutputManager():
    """Controls the output formats and streams from this function. Valid output
    targets are:
    Kinesis: for processing in the enhancement pipeline and epub storage
    SQS: For queuing and processing by the ElasticSearch manager"""

    KINESIS_CLIENT = createAWSClient('kinesis')

    def __init__(self):
        pass

    @classmethod
    def putKinesis(cls, data, stream, recType='work'):
        """Puts records into a Kinesis stream for processing by other parts of
        the SFR data pipeline. Takes data as an object and converts it into a
        JSON string. This is then passed to the specified stream.

        This will raise any error, as failure to pass an output should halt the
        process."""

        logger.info('Writing results to Kinesis')
        outputObject = {'status': 200, 'data': data, 'type': recType}

        # The default lambda function here converts all objects into dicts
        kinesisStream = OutputManager._convertToJSON(outputObject)

        partKey = OutputManager._createPartitionKey(data)

        try:
            cls.KINESIS_CLIENT.put_record(StreamName=stream,
                                          Data=kinesisStream,
                                          PartitionKey=partKey)

        except:  # noqa: E702
            logger.error('Kinesis Write error!')
            raise OutputError('Failed to write result to output stream!')

    @staticmethod
    def _convertToJSON(obj):
        """Converts an object or dict to a JSON string.
        the DEFAULT parameter implements a lambda function to get the values
        from an object using the vars() builtin."""
        try:
            jsonStr = json.dumps(obj,
                                 ensure_ascii=False,
                                 default=lambda x: vars(x))
        except TypeError:
            jsonStr = json.dumps(obj, ensure_ascii=False)

        return jsonStr

    @staticmethod
    def _createPartitionKey(obj):
        md5 = hashlib.md5()
        md5.update(obj['storedURL'].encode('utf-8'))
        return md5.hexdigest()
Exemple #3
0
 def test_create_with_access(self, mock_boto):
     result = createAWSClient(
         'fakeService', {
             'region': 'test',
             'aws_access_key_id': 1,
             'aws_secret_access_key': 'secret'
         })
     mock_boto.assert_called_once_with('fakeService',
                                       region_name='test',
                                       aws_access_key_id=1,
                                       aws_secret_access_key='secret')
     self.assertTrue(result)
class OutputManager():
    """Class for managing connections and operations with AWS Kinesis"""
    KINESIS_CLIENT = createAWSClient('kinesis')

    def __init__(self):
        pass

    @classmethod
    def putRecord(cls, outputObject, stream, workUUID):
        """Put an event into the specific Kinesis stream"""
        logger.info("Writing results to Kinesis")

        # The default lambda function here converts all objects into dicts
        kinesisStream = OutputManager._convertToJSON(outputObject)

        try:
            cls.KINESIS_CLIENT.put_record(StreamName=stream,
                                          Data=kinesisStream,
                                          PartitionKey=workUUID)
        except:
            logger.error('Kinesis Write error!')
            raise KinesisError('Failed to write result to output stream!')

    @staticmethod
    def _convertToJSON(obj):
        """Converts an object or dict to a JSON string.
        the DEFAULT parameter implements a lambda function to get the values
        from an object using the vars() builtin."""
        return json.dumps(obj, ensure_ascii=False, default=lambda x: vars(x))

    @staticmethod
    def formatResponse(status, data):
        """Creates a response block to be returned to the API client.
        Arguments:
            status {integer} -- A standard HTTP status code.
            data {dict} -- A dictionary containing either an error message or a
            set of metadata describing the agent being queried.
        Returns:
            [dict] -- A complete response object containing a status and
            relevant data.
        """
        return {
            'statusCode': status,
            'headers': {
                'req-time': time.time()
            },
            'isBase64Encoded': False,
            'body': OutputManager._convertToJSON(data)
        }
Exemple #5
0
def createEventMapping(runType):
    logger.info('Creating event Source mappings for Lambda')
    try:
        with open('config/event_sources_{}.json'.format(runType)) as sources:
            try:
                eventMappings = json.load(sources)
            except json.decoder.JSONDecodeError as err:
                logger.error('Unable to parse JSON file')
                raise err
    except FileNotFoundError:
        logger.info('No Event Source mapping provided')
        return
    except IOError as err:
        logger.error('Unable to open JSON file')
        raise err

    if len(eventMappings['EventSourceMappings']) < 1:
        logger.info('No event sources defined')
        return

    configDict, configLines = loadEnvFile(runType, None)

    lambdaClient = createAWSClient('lambda', configDict)

    for mapping in eventMappings['EventSourceMappings']:
        logger.debug('Adding event source mapping for function')

        createKwargs = {
            'EventSourceArn': mapping['EventSourceArn'],
            'FunctionName': configDict['function_name'],
            'Enabled': mapping['Enabled'],
            'BatchSize': mapping['BatchSize'],
        }

        if 'StartingPosition' in mapping:
            createKwargs['StartingPosition'] = mapping['StartingPosition']
            if mapping['StartingPosition'] == 'AT_TIMESTAMP':
                createKwargs['StartingPositionTimestamp'] = mapping[
                    'StartingPositionTimestamp']  # noqa: E50

        try:
            lambdaClient.create_event_source_mapping(**createKwargs)
        except lambdaClient.exceptions.ResourceConflictException as err:
            logger.info('Event Mapping already exists, update')
            logger.debug(err)
            updateEventMapping(lambdaClient, mapping, configDict)
Exemple #6
0
 def test_create_client(self, mock_boto):
     result = createAWSClient('fakeService', {'region': 'test'})
     mock_boto.assert_called_once_with('fakeService', region_name='test')
     self.assertTrue(result)
Exemple #7
0
 def test_create_with_load_env(self, mock_boto, mock_env):
     result = createAWSClient('fakeService', None)
     mock_env.assert_called_once_with(None, None)
     mock_boto.assert_called_once_with('fakeService', region_name='test')
     self.assertTrue(result)
class OutputManager():
    """Controls the output formats and streams from this function. Valid output
    targets are:
    Kinesis: for processing in the enhancement pipeline and epub storage
    SQS: For queuing and processing by the ElasticSearch manager"""

    KINESIS_CLIENT = createAWSClient('kinesis')
    SQS_CLIENT = createAWSClient('sqs')
    AWS_REDIS = createAWSClient('elasticache')
    REDIS_CLIENT = redis.Redis(
        host='sfr-filter-query.rtovuw.0001.use1.cache.amazonaws.com',
        port=6379,
        socket_timeout=5)

    def __init__(self):
        pass

    @classmethod
    def putKinesis(cls, data, stream, recType='work'):
        """Puts records into a Kinesis stream for processing by other parts of
        the SFR data pipeline. Takes data as an object and converts it into a
        JSON string. This is then passed to the specified stream.

        This will raise any error, as failure to pass an output should halt the
        process."""

        logger.info('Writing results to Kinesis')
        outputObject = {'status': 200, 'data': data, 'type': recType}

        # The default lambda function here converts all objects into dicts
        kinesisStream = OutputManager._convertToJSON(outputObject)

        partKey = OutputManager._createPartitionKey(data)

        try:
            cls.KINESIS_CLIENT.put_record(StreamName=stream,
                                          Data=kinesisStream,
                                          PartitionKey=partKey)

        except:
            logger.error('Kinesis Write error!')
            raise OutputError('Failed to write result to output stream!')

    @classmethod
    def putKinesisBatch(cls, records, stream):
        streamRecords = [{
            'Data':
            OutputManager._convertToJSON({
                'status': 200,
                'data': r['data'],
                'type': r['recType']
            }),
            'PartitionKey':
            OutputManager._createPartitionKey(r['data'])
        } for r in records]

        try:
            cls.KINESIS_CLIENT.put_records(Records=streamRecords,
                                           StreamName=stream)
        except Exception as err:
            logger.error('Kinesis Batch write error')
            logger.debug(err)
            raise OutputError('Failed to write batch to Kinesis')

    @classmethod
    def putQueue(cls, data, outQueue):
        """This puts record identifiers into an SQS queue that is read for
        records to (re)index in ElasticSearch. Takes an object which is
        converted into a JSON string."""

        logger.info('Writing results to SQS')

        # The default lambda function here converts all objects into dicts
        messageData = OutputManager._convertToJSON(data)

        try:
            cls.SQS_CLIENT.send_message(QueueUrl=outQueue,
                                        MessageBody=messageData)
        except:
            logger.error('SQS Write error!')
            raise OutputError('Failed to write result to output stream!')

    @classmethod
    def putQueueBatches(cls, messages, outQueue):
        while len(messages) > 0:
            jsonMessages = []
            for i in range(10):
                try:
                    jsonMessages.append({
                        'MessageBody':
                        OutputManager._convertToJSON(messages.pop()),
                        'Id':
                        str(i)
                    })
                except IndexError:
                    break

            try:
                cls.SQS_CLIENT.send_message_batch(QueueUrl=outQueue,
                                                  Entries=jsonMessages)
            except Exception as err:
                logger.error('Failed to write messages to queue')
                logger.debug(err)
                raise OutputError('Failed to write results to queue')

    @classmethod
    def checkRecentQueries(cls, queryString):
        queryTime = cls.REDIS_CLIENT.get(queryString)
        logger.debug('Checking query recency of {} at {}'.format(
            queryString, queryTime))
        currentTime = datetime.utcnow() - timedelta(days=1)
        if (queryTime is not None
                and datetime.strptime(queryTime.decode('utf-8'),
                                      '%Y-%m-%dT%H:%M:%S') >= currentTime):
            return True

        cls.REDIS_CLIENT.set(queryString,
                             datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S'),
                             ex=60 * 60 * 24 * 7)
        return False

    @staticmethod
    def _convertToJSON(obj):
        """Converts an object or dict to a JSON string.
        the DEFAULT parameter implements a lambda function to get the values
        from an object using the vars() builtin."""
        try:
            jsonStr = json.dumps(obj,
                                 ensure_ascii=False,
                                 default=lambda x: vars(x))
        except TypeError:
            jsonStr = json.dumps(obj, ensure_ascii=False)

        return jsonStr

    @staticmethod
    def _createPartitionKey(obj):
        try:
            return str(obj['primary_identifier']['identifier'])
        except KeyError:
            pass

        try:
            return str(obj['identifiers'][0]['identifier'])
        except KeyError:
            pass

        try:
            return str(obj['id'])
        except KeyError:
            pass

        return '0'
Exemple #9
0
 def __init__(self, s3Key):
     self.s3Client = createAWSClient('s3')
     self.key = s3Key
     self.bucket = os.environ.get('COVER_BUCKET', 'sfr-instance-covers')
     self.logger = createLog('s3Client')