class KinesisOutput(): """Class for managing connections and operations with AWS Kinesis""" KINESIS_CLIENT = createAWSClient('kinesis') def __init__(self): pass @classmethod def putRecord(cls, outputObject, stream): """Put an event into the specific Kinesis stream""" logger.info('Writing results to Kinesis') # The default lambda function here converts all objects into dicts kinesisStream = json.dumps( outputObject, ensure_ascii=False, default=lambda x: vars(x) ) try: cls.KINESIS_CLIENT.put_record( StreamName=stream, Data=kinesisStream, PartitionKey=os.environ['OUTPUT_SHARD'] ) except: logger.error('Kinesis Write error!') raise KinesisError('Failed to write result to output stream!')
class OutputManager(): """Controls the output formats and streams from this function. Valid output targets are: Kinesis: for processing in the enhancement pipeline and epub storage SQS: For queuing and processing by the ElasticSearch manager""" KINESIS_CLIENT = createAWSClient('kinesis') def __init__(self): pass @classmethod def putKinesis(cls, data, stream, recType='work'): """Puts records into a Kinesis stream for processing by other parts of the SFR data pipeline. Takes data as an object and converts it into a JSON string. This is then passed to the specified stream. This will raise any error, as failure to pass an output should halt the process.""" logger.info('Writing results to Kinesis') outputObject = {'status': 200, 'data': data, 'type': recType} # The default lambda function here converts all objects into dicts kinesisStream = OutputManager._convertToJSON(outputObject) partKey = OutputManager._createPartitionKey(data) try: cls.KINESIS_CLIENT.put_record(StreamName=stream, Data=kinesisStream, PartitionKey=partKey) except: # noqa: E702 logger.error('Kinesis Write error!') raise OutputError('Failed to write result to output stream!') @staticmethod def _convertToJSON(obj): """Converts an object or dict to a JSON string. the DEFAULT parameter implements a lambda function to get the values from an object using the vars() builtin.""" try: jsonStr = json.dumps(obj, ensure_ascii=False, default=lambda x: vars(x)) except TypeError: jsonStr = json.dumps(obj, ensure_ascii=False) return jsonStr @staticmethod def _createPartitionKey(obj): md5 = hashlib.md5() md5.update(obj['storedURL'].encode('utf-8')) return md5.hexdigest()
def test_create_with_access(self, mock_boto): result = createAWSClient( 'fakeService', { 'region': 'test', 'aws_access_key_id': 1, 'aws_secret_access_key': 'secret' }) mock_boto.assert_called_once_with('fakeService', region_name='test', aws_access_key_id=1, aws_secret_access_key='secret') self.assertTrue(result)
class OutputManager(): """Class for managing connections and operations with AWS Kinesis""" KINESIS_CLIENT = createAWSClient('kinesis') def __init__(self): pass @classmethod def putRecord(cls, outputObject, stream, workUUID): """Put an event into the specific Kinesis stream""" logger.info("Writing results to Kinesis") # The default lambda function here converts all objects into dicts kinesisStream = OutputManager._convertToJSON(outputObject) try: cls.KINESIS_CLIENT.put_record(StreamName=stream, Data=kinesisStream, PartitionKey=workUUID) except: logger.error('Kinesis Write error!') raise KinesisError('Failed to write result to output stream!') @staticmethod def _convertToJSON(obj): """Converts an object or dict to a JSON string. the DEFAULT parameter implements a lambda function to get the values from an object using the vars() builtin.""" return json.dumps(obj, ensure_ascii=False, default=lambda x: vars(x)) @staticmethod def formatResponse(status, data): """Creates a response block to be returned to the API client. Arguments: status {integer} -- A standard HTTP status code. data {dict} -- A dictionary containing either an error message or a set of metadata describing the agent being queried. Returns: [dict] -- A complete response object containing a status and relevant data. """ return { 'statusCode': status, 'headers': { 'req-time': time.time() }, 'isBase64Encoded': False, 'body': OutputManager._convertToJSON(data) }
def createEventMapping(runType): logger.info('Creating event Source mappings for Lambda') try: with open('config/event_sources_{}.json'.format(runType)) as sources: try: eventMappings = json.load(sources) except json.decoder.JSONDecodeError as err: logger.error('Unable to parse JSON file') raise err except FileNotFoundError: logger.info('No Event Source mapping provided') return except IOError as err: logger.error('Unable to open JSON file') raise err if len(eventMappings['EventSourceMappings']) < 1: logger.info('No event sources defined') return configDict, configLines = loadEnvFile(runType, None) lambdaClient = createAWSClient('lambda', configDict) for mapping in eventMappings['EventSourceMappings']: logger.debug('Adding event source mapping for function') createKwargs = { 'EventSourceArn': mapping['EventSourceArn'], 'FunctionName': configDict['function_name'], 'Enabled': mapping['Enabled'], 'BatchSize': mapping['BatchSize'], } if 'StartingPosition' in mapping: createKwargs['StartingPosition'] = mapping['StartingPosition'] if mapping['StartingPosition'] == 'AT_TIMESTAMP': createKwargs['StartingPositionTimestamp'] = mapping[ 'StartingPositionTimestamp'] # noqa: E50 try: lambdaClient.create_event_source_mapping(**createKwargs) except lambdaClient.exceptions.ResourceConflictException as err: logger.info('Event Mapping already exists, update') logger.debug(err) updateEventMapping(lambdaClient, mapping, configDict)
def test_create_client(self, mock_boto): result = createAWSClient('fakeService', {'region': 'test'}) mock_boto.assert_called_once_with('fakeService', region_name='test') self.assertTrue(result)
def test_create_with_load_env(self, mock_boto, mock_env): result = createAWSClient('fakeService', None) mock_env.assert_called_once_with(None, None) mock_boto.assert_called_once_with('fakeService', region_name='test') self.assertTrue(result)
class OutputManager(): """Controls the output formats and streams from this function. Valid output targets are: Kinesis: for processing in the enhancement pipeline and epub storage SQS: For queuing and processing by the ElasticSearch manager""" KINESIS_CLIENT = createAWSClient('kinesis') SQS_CLIENT = createAWSClient('sqs') AWS_REDIS = createAWSClient('elasticache') REDIS_CLIENT = redis.Redis( host='sfr-filter-query.rtovuw.0001.use1.cache.amazonaws.com', port=6379, socket_timeout=5) def __init__(self): pass @classmethod def putKinesis(cls, data, stream, recType='work'): """Puts records into a Kinesis stream for processing by other parts of the SFR data pipeline. Takes data as an object and converts it into a JSON string. This is then passed to the specified stream. This will raise any error, as failure to pass an output should halt the process.""" logger.info('Writing results to Kinesis') outputObject = {'status': 200, 'data': data, 'type': recType} # The default lambda function here converts all objects into dicts kinesisStream = OutputManager._convertToJSON(outputObject) partKey = OutputManager._createPartitionKey(data) try: cls.KINESIS_CLIENT.put_record(StreamName=stream, Data=kinesisStream, PartitionKey=partKey) except: logger.error('Kinesis Write error!') raise OutputError('Failed to write result to output stream!') @classmethod def putKinesisBatch(cls, records, stream): streamRecords = [{ 'Data': OutputManager._convertToJSON({ 'status': 200, 'data': r['data'], 'type': r['recType'] }), 'PartitionKey': OutputManager._createPartitionKey(r['data']) } for r in records] try: cls.KINESIS_CLIENT.put_records(Records=streamRecords, StreamName=stream) except Exception as err: logger.error('Kinesis Batch write error') logger.debug(err) raise OutputError('Failed to write batch to Kinesis') @classmethod def putQueue(cls, data, outQueue): """This puts record identifiers into an SQS queue that is read for records to (re)index in ElasticSearch. Takes an object which is converted into a JSON string.""" logger.info('Writing results to SQS') # The default lambda function here converts all objects into dicts messageData = OutputManager._convertToJSON(data) try: cls.SQS_CLIENT.send_message(QueueUrl=outQueue, MessageBody=messageData) except: logger.error('SQS Write error!') raise OutputError('Failed to write result to output stream!') @classmethod def putQueueBatches(cls, messages, outQueue): while len(messages) > 0: jsonMessages = [] for i in range(10): try: jsonMessages.append({ 'MessageBody': OutputManager._convertToJSON(messages.pop()), 'Id': str(i) }) except IndexError: break try: cls.SQS_CLIENT.send_message_batch(QueueUrl=outQueue, Entries=jsonMessages) except Exception as err: logger.error('Failed to write messages to queue') logger.debug(err) raise OutputError('Failed to write results to queue') @classmethod def checkRecentQueries(cls, queryString): queryTime = cls.REDIS_CLIENT.get(queryString) logger.debug('Checking query recency of {} at {}'.format( queryString, queryTime)) currentTime = datetime.utcnow() - timedelta(days=1) if (queryTime is not None and datetime.strptime(queryTime.decode('utf-8'), '%Y-%m-%dT%H:%M:%S') >= currentTime): return True cls.REDIS_CLIENT.set(queryString, datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S'), ex=60 * 60 * 24 * 7) return False @staticmethod def _convertToJSON(obj): """Converts an object or dict to a JSON string. the DEFAULT parameter implements a lambda function to get the values from an object using the vars() builtin.""" try: jsonStr = json.dumps(obj, ensure_ascii=False, default=lambda x: vars(x)) except TypeError: jsonStr = json.dumps(obj, ensure_ascii=False) return jsonStr @staticmethod def _createPartitionKey(obj): try: return str(obj['primary_identifier']['identifier']) except KeyError: pass try: return str(obj['identifiers'][0]['identifier']) except KeyError: pass try: return str(obj['id']) except KeyError: pass return '0'
def __init__(self, s3Key): self.s3Client = createAWSClient('s3') self.key = s3Key self.bucket = os.environ.get('COVER_BUCKET', 'sfr-instance-covers') self.logger = createLog('s3Client')