Beispiel #1
0
    def __init__(self, beaver_config, logger=None):
        super(KinesisTransport, self).__init__(beaver_config, logger=logger)

        self._access_key = beaver_config.get('kinesis_aws_access_key')
        self._secret_key = beaver_config.get('kinesis_aws_secret_key')
        self._region = beaver_config.get('kinesis_aws_region')
        self._stream_name = beaver_config.get('kinesis_aws_stream')

        # self-imposed max batch size to minimize the number of records in a given call to Kinesis
        self._batch_size_max = beaver_config.get('kinesis_aws_batch_size_max',
                                                 '512000')

        try:
            if self._access_key is None and self._secret_key is None:
                self._connection = boto.kinesis.connect_to_region(self._region)
            else:
                self._connection = boto.kinesis.connect_to_region(
                    self._region,
                    aws_access_key_id=self._access_key,
                    aws_secret_access_key=self._secret_key)

            if self._connection is None:
                self._logger.warn(
                    'Unable to connect to AWS Kinesis - check your AWS credentials'
                )
                raise TransportException(
                    'Unable to connect to AWS Kinesis - check your AWS credentials'
                )

        except Exception, e:
            raise TransportException(e.message)
Beispiel #2
0
    def callback(self, filename, lines, **kwargs):
        timestamp = self.get_timestamp(**kwargs)
        if kwargs.get('timestamp', False):
            del kwargs['timestamp']

        for line in lines:
            try:
                import warnings
                with warnings.catch_warnings():
                    warnings.simplefilter('error')
                    self._channel.basic_publish(
                        exchange=self._rabbitmq_config['exchange'],
                        routing_key=self._rabbitmq_config['key'],
                        body=self.format(filename, line, timestamp, **kwargs),
                        properties=pika.BasicProperties(
                            content_type='text/json', delivery_mode=1))
            except UserWarning:
                self._is_valid = False
                raise TransportException(
                    'Connection appears to have been lost')
            except Exception, e:
                self._is_valid = False
                try:
                    raise TransportException(e.strerror)
                except AttributeError:
                    raise TransportException(
                        'Unspecified exception encountered'
                    )  # TRAP ALL THE THINGS!
Beispiel #3
0
    def __init__(self, beaver_config, logger=None):
        super(SnsTransport, self).__init__(beaver_config, logger=logger)

        self._access_key = beaver_config.get('sns_aws_access_key')
        self._secret_key = beaver_config.get('sns_aws_secret_key')
        self._profile = beaver_config.get('sns_aws_profile_name')
        self._region = beaver_config.get('sns_aws_region')
        self._topic_arn = beaver_config.get('sns_topic_arn')

        try:
            if self._profile:
                self._connection = boto.sns.connect_to_region(
                    self._region, profile_name=self._profile)
            elif self._access_key is None and self._secret_key is None:
                self._connection = boto.sns.connect_to_region(self._region)
            else:
                self._connection = boto.sns.connect_to_region(
                    self._region,
                    aws_access_key_id=self._access_key,
                    aws_secret_access_key=self._secret_key)

            if self._connection is None:
                self._logger.warn(
                    'Unable to connect to AWS - check your AWS credentials')
                raise TransportException(
                    'Unable to connect to AWS - check your AWS credentials')

        except Exception, e:
            raise TransportException(e.message)
Beispiel #4
0
    def __init__(self, beaver_config, logger=None):
        super(KinesisTransport, self).__init__(beaver_config, logger=logger)

        self._access_key = beaver_config.get('kinesis_aws_access_key')
        self._secret_key = beaver_config.get('kinesis_aws_secret_key')
        self._region = beaver_config.get('kinesis_aws_region')
        self._stream_name = beaver_config.get('kinesis_aws_stream')

        # self-imposed max batch size to minimize the number of records in a given call to Kinesis
        self._batch_size_max = int(beaver_config.get('kinesis_aws_batch_size_max', '512000'))
        self._max_retries = int(beaver_config.get('kinesis_max_retries', '3'))
        self._initial_wait_between_retries = int(beaver_config.get('kinesis_initial_backoff_millis', '10'))

        # Kinesis Limit http://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecords.html#API_PutRecords_RequestSyntax
        self._max_records_per_batch = 500

        try:
            if self._access_key is None and self._secret_key is None:
                self._connection = boto.kinesis.connect_to_region(self._region)
            else:
                self._connection = boto.kinesis.connect_to_region(self._region,
                                                                  aws_access_key_id=self._access_key,
                                                                  aws_secret_access_key=self._secret_key)

            if self._connection is None:
                self._logger.warn('Unable to connect to AWS Kinesis - check your AWS credentials')
                raise TransportException('Unable to connect to AWS Kinesis - check your AWS credentials')

        except Exception, e:
            raise TransportException(e.message)
    def callback(self, filename, lines, **kwargs):
        """publishes lines one by one to the given topic"""
        timestamp = self.get_timestamp(**kwargs)
        if kwargs.get('timestamp', False):
            del kwargs['timestamp']

        for line in lines:
            try:
                import warnings
                with warnings.catch_warnings():
                    warnings.simplefilter('error')
                    #produce message
                    if self._key is None:
                        response = self._prod.send_messages(self._kafka_config['topic'], self.format(filename, line, timestamp, **kwargs))
                    else:
                        response = self._prod.send_messages(self._kafka_config['topic'], self._key, self.format(filename, line, timestamp, **kwargs))

                    if response:
                        if response[0].error:
                            self._logger.info('message error: {0}'.format(response[0].error))
                            self._logger.info('message offset: {0}'.format(response[0].offset))

            except Exception as e:
                try:
                    self._logger.error('Exception caught sending message/s : ' + str(e))
                    raise TransportException(e.strerror)
                except AttributeError:
                    raise TransportException('Unspecified exception encountered')  # TRAP ALL THE THINGS!
Beispiel #6
0
    def __init__(self, beaver_config, logger=None):
        super(RedisTransport, self).__init__(beaver_config, logger=logger)

        redis_url = beaver_config.get('redis_url')
        redis_password = beaver_config.get('redis_password')
        _url = urlparse.urlparse(redis_url, scheme='redis')
        _, _, _db = _url.path.rpartition('/')

        self._redis = redis.StrictRedis(host=_url.hostname, port=_url.port, password=redis_password, db=int(_db), socket_timeout=10)
        self._redis_namespace = beaver_config.get('redis_namespace')

        wait = 0
        while 1:
            if wait == 20:
                break

            time.sleep(1)
            wait += 1
            try:
                self._redis.ping()
                break
            except UserWarning:
                self._is_valid = False
                raise TransportException('Connection appears to have been lost')
            except Exception, e:
                self._is_valid = False
                try:
                    raise TransportException(e.strerror)
                except AttributeError:
                    raise TransportException('Unspecified exception encountered')
Beispiel #7
0
    def __init__(self, beaver_config, logger=None):
        super(SqsTransport, self).__init__(beaver_config, logger=logger)

        self._access_key = beaver_config.get('sqs_aws_access_key')
        self._secret_key = beaver_config.get('sqs_aws_secret_key')
        self._region = beaver_config.get('sqs_aws_region')
        self._queue_name = beaver_config.get('sqs_aws_queue')

        try:
            if self._access_key is None and self._secret_key is None:
                self._connection = boto.sqs.connect_to_region(self._region)
            else:
                self._connection = boto.sqs.connect_to_region(
                    self._region,
                    aws_access_key_id=self._access_key,
                    aws_secret_access_key=self._secret_key)

            if self._connection is None:
                self._logger.warn(
                    'Unable to connect to AWS - check your AWS credentials')
                raise TransportException(
                    'Unable to connect to AWS - check your AWS credentials')

            self._queue = self._connection.get_queue(self._queue_name)

            if self._queue is None:
                raise TransportException(
                    'Unable to access queue with name {0}'.format(
                        self._queue_name))
        except Exception, e:
            raise TransportException(e.message)
Beispiel #8
0
 def stompConnect(self):
     try:
         host_and_ports = (self.host, self.port)
         self.conn = stomp.Connection([host_and_ports])
         self.conn.start()
         self.conn.connect(self.userName, self.password)
     except stomp.exception.NotConnectedException, e:
         try:
             raise TransportException(e.strerror)
         except AttributeError:
             raise TransportException('Unspecified exception encountered')
 def _send_message_batch(self, message_batch):
     try:
         result = self._queue.write_batch(message_batch)
         if not result:
             self._logger.error(
                 'Error occurred sending messages to SQS queue {0}. result: {1}'
                 .format(self._queue_name, result))
             raise TransportException(
                 'Error occurred sending message to queue {0}'.format(
                     self._queue_name))
     except Exception, e:
         self._logger.exception(
             'Exception occurred sending batch to SQS queue')
         raise TransportException(e.message)
Beispiel #10
0
 def _send_message_batch(self, message_batch):
     try:
         result = self._connection.put_records(
             records=message_batch, stream_name=self._stream_name)
         if result.get('FailedRecordCount', 0) > 0:
             self._logger.error(
                 'Error occurred sending records to Kinesis stream {0}. result: {1}'
                 .format(self._stream_name, result))
             raise TransportException(
                 'Error occurred sending records to stream {0}'.format(
                     self._stream_name))
     except Exception, e:
         self._logger.exception(
             'Exception occurred sending records to Kinesis stream')
         raise TransportException(e.message)
Beispiel #11
0
    def __init__(self, beaver_config, logger=None):
        super(Transport, self).__init__(beaver_config, logger=logger)

        self._access_key = beaver_config.get('sqs_aws_access_key')
        self._secret_key = beaver_config.get('sqs_aws_secret_key')
        self._profile = beaver_config.get('sqs_aws_profile_name')
        self._region = beaver_config.get('sqs_aws_region')
        self._queue_owner_acct_id = beaver_config.get(
            'sqs_aws_queue_owner_acct_id')
        self._queue = beaver_config.get('sqs_aws_queue').split(',')
        self._bulk_lines = beaver_config.get('sqs_bulk_lines')

        try:
            if self._profile:
                self._connection = boto.sqs.connect_to_region(
                    self._region, profile_name=self._profile)
            elif self._access_key is None and self._secret_key is None:
                self._connection = boto.sqs.connect_to_region(self._region)
            else:
                self._connection = boto.sqs.connect_to_region(
                    self._region,
                    aws_access_key_id=self._access_key,
                    aws_secret_access_key=self._secret_key)

            if self._connection is None:
                self._logger.warn(
                    'Unable to connect to AWS - check your AWS credentials')
                raise TransportException(
                    'Unable to connect to AWS - check your AWS credentials')

            self._queues = {}
            for queue in self._queue:
                self._logger.debug(
                    'Attempting to load SQS queue: {0}'.format(queue))
                if self._queue_owner_acct_id is None:
                    self._queues[queue] = self._connection.get_queue(queue)
                else:
                    self._queues[queue] = self._connection.get_queue(
                        queue, owner_acct_id=self._queue_owner_acct_id)

                if self._queues[queue] is None:
                    raise TransportException(
                        'Unable to access queue with name {0}'.format(queue))

                self._logger.debug(
                    'Successfully loaded SQS queue: {0}'.format(queue))
        except Exception as e:
            raise TransportException(e.message)
Beispiel #12
0
    def callback(self, filename, lines, **kwargs):
        """Sends log lines to redis servers"""

        self._logger.debug('Redis transport called')

        timestamp = self.get_timestamp(**kwargs)
        if kwargs.get('timestamp', False):
            del kwargs['timestamp']

        namespace = self._beaver_config.get_field('redis_namespace', filename)
        if not namespace:
            namespace = self._namespace
        self._logger.debug('Got namespace: ' + namespace)

        server = self._get_next_server()
        self._logger.debug('Got redis server: ' + server['url'])

        pipeline = server['redis'].pipeline(transaction=False)

        for line in lines:
            pipeline.rpush(namespace,
                           self.format(filename, line, timestamp, **kwargs))

        try:
            pipeline.execute()
        except redis.exceptions.RedisError, exception:
            self._logger.warn('Cannot push lines to redis server: ' +
                              server['url'])
            raise TransportException(exception)
    def __init__(self, beaver_config, logger=None):
        super(RedisTransport, self).__init__(beaver_config, logger=logger)

        urls = beaver_config.get('redis_url')
        self._servers = []
        for url in urls.split(','):
            self._servers.append({
                'redis':
                redis.StrictRedis.from_url(url, socket_timeout=10),
                'url':
                url,
                'down_until':
                0
            })

        self._namespace = beaver_config.get('redis_namespace')
        self._current_server_index = 0

        self._data_type = beaver_config.get('redis_data_type')
        if self._data_type not in [
                self.LIST_DATA_TYPE, self.CHANNEL_DATA_TYPE
        ]:
            raise TransportException('Unknown Redis data type')

        self._check_connections()
    def _get_next_server(self):
        """Returns a valid redis server or raises a TransportException"""

        current_try = 0
        max_tries = len(self._servers)

        while current_try < max_tries:

            server_index = self._raise_server_index()
            server = self._servers[server_index]
            down_until = server['down_until']

            self._logger.debug('Checking server ' + str(current_try + 1) +
                               '/' + str(max_tries) + ': ' + server['url'])

            if down_until == 0:
                self._logger.debug('Elected server: ' + server['url'])
                return server

            if down_until < time.time():
                if self._is_reachable(server):
                    server['down_until'] = 0
                    self._logger.debug('Elected server: ' + server['url'])

                    return server
                else:
                    self._logger.debug('Server still unavailable: ' +
                                       server['url'])
                    server['down_until'] = time.time() + 5

            current_try += 1

        raise TransportException('Cannot reach any redis server')
Beispiel #15
0
    def callback(self, filename, lines, **kwargs):
        timestamp = self.get_timestamp(**kwargs)
        if kwargs.get('timestamp', False):
            del kwargs['timestamp']

        try:
            for line in lines:
                self._sock.send(self.format(filename, line, timestamp, **kwargs) + "\n")
        except socket.error, e:
            self.invalidate()

            if isinstance(e.args, tuple):
                if e[0] == errno.EPIPE:
                    raise TransportException('Connection appears to have been lost')

            raise TransportException('Socket Error: %s', e.args)
    def callback(self, filename, lines, **kwargs):
        """publishes lines one by one to the given topic"""
        timestamp = self.get_timestamp(**kwargs)
        if kwargs.get('timestamp', False):
            del kwargs['timestamp']

        for line in lines:
            try:
                import warnings
                with warnings.catch_warnings():
                    warnings.simplefilter('error')
                    self._client.publish(self._topic, self.format(filename, line, timestamp, **kwargs), 0)
            except Exception, e:
                try:
                    raise TransportException(e.strerror)
                except AttributeError:
                    raise TransportException('Unspecified exception encountered')
Beispiel #17
0
 def _send_message(self, msg):
     for queue in self._queues:
         try:
             msg = '[{0}]'.format(msg.rstrip(','))
             m = RawMessage()
             m.set_body(msg)
             result = self._queues[queue].write(m)
             if not result:
                 self._logger.error(
                     'Error occurred sending message to SQS queue {0}. result: {1}'
                     .format(self._queue_name, result))
                 raise TransportException(
                     'Error occurred sending message to queue {0}'.format(
                         self._queue_name))
         except Exception as e:
             self._logger.exception(
                 'Exception occurred sending message to SQS queue')
             raise TransportException(e.message)
Beispiel #18
0
 def fulsh(self):
     try:
         for city, bulk in self._bulks.iteritems():
             bulk.execute()
             self._logger.info("Mongo: Inserted bulk for city: " + city)
         self._bulks = {}
         self.size = 0
     except Exception as e:
         raise TransportException(e.message)
 def callback(self, filename, lines, **kwargs):
     if not self._connection_ok:
         raise TransportException('RabbitMQ: Not connected or connection not OK')
     timestamp = self.get_timestamp(**kwargs)
     if kwargs.get('timestamp', False):
         del kwargs['timestamp']
     for line in lines:
         try:
             import warnings
             with warnings.catch_warnings():
                 warnings.simplefilter('error')
                 body = self.format(filename, line, timestamp, **kwargs)
                 self._lines.put(body)
         except UserWarning:
             raise TransportException('Connection appears to have been lost')
         except Exception as e:
             try:
                 raise TransportException(e.strerror)
             except AttributeError:
                 raise TransportException('Unspecified exception encountered')
Beispiel #20
0
 def _send_message_batch(self, message_batch):
     for queue in self._queues:
         try:
             self._logger.debug(
                 'Attempting to push batch message to SQS queue: {0}'.
                 format(queue))
             result = self._queues[queue].write_batch(message_batch)
             if not result:
                 self._logger.error(
                     'Error occurred sending messages to SQS queue {0}. result: {1}'
                     .format(queue, result))
                 raise TransportException(
                     'Error occurred sending message to queue {0}'.format(
                         queue))
             self._logger.debug(
                 'Successfully pushed batch message to SQS queue: {0}'.
                 format(queue))
         except Exception as e:
             self._logger.exception(
                 'Exception occurred sending batch to SQS queue')
             raise TransportException(e.message)
Beispiel #21
0
 def _connect(self):
     try:
         self._logger.info("Connect to Mongo")
         client = _mongo_client(self.connection_string)
         db = client[self.db_name]
         self.db = db
         if self.bulk:
             self._logger.debug("Mongo: Bulk not empty")
             self.bulk.fulsh()
         self.bulk = Bulk(self.db, self._logger)
         self._logger.debug("Mongo: Bulk created")
     except Exception, e:
         raise TransportException("Mongo: {} - {}".format(datetime.now(), e.message))
Beispiel #22
0
    def callback(self, filename, lines, **kwargs):
        """publishes lines one by one to the given topic"""
        timestamp = self.get_timestamp(**kwargs)
        if kwargs.get('timestamp', False):
            del kwargs['timestamp']

        for line in lines:
            try:
                import warnings
                with warnings.catch_warnings():
                    warnings.simplefilter('error')
                    m = self.format(filename, line, timestamp, **kwargs)
                    self.logger.debug("Sending message " + m)
                    self.conn.send(destination=self.queue, body=m)

            except Exception, e:
                self.logger.error(e)
                try:
                    raise TransportException(e)
                except AttributeError:
                    raise TransportException(
                        'Unspecified exception encountered')
Beispiel #23
0
    def callback(self, filename, lines, **kwargs):
        timestamp = self.get_timestamp(**kwargs)
        if kwargs.get('timestamp', False):
            del kwargs['timestamp']

        for line in lines:
            try:
                self._connection.publish(
                    self._topic_arn,
                    self.format(filename, line, timestamp, **kwargs))
            except Exception, e:
                self._logger.exception(
                    'Exception occurred sending to SNS topic')
                raise TransportException(e.message)
Beispiel #24
0
    def _send_message_batch(self, message_batch):
        @retry(wait_exponential_multiplier=self._initial_wait_between_retries,
               stop_max_attempt_number=self._max_retries,
               retry_on_exception=lambda exc: ('ProvisionedThroughputExceededException' in exc.message
                                               or 'Throttle' in exc.message or 'Throttling' in exc.message),
               retry_on_result=lambda res: res.get('FailedRecordCount', 0) > 0)
        def internal_send_message_batch_with_retry():
            return self._connection.put_records(records=message_batch, stream_name=self._stream_name)

        try:
            internal_send_message_batch_with_retry()
        except Exception, e:
            self._logger.exception('Exception occurred sending records to Kinesis stream')
            raise TransportException(e.message)
 def callback(self, filename, lines, **kwargs):
     timestamp = self.get_timestamp(**kwargs)
     if kwargs.get('timestamp', False):
         del kwargs['timestamp']
     for line in lines:
         try:
             import warnings
             with warnings.catch_warnings():
                 warnings.simplefilter('error')
                 body = self.format(filename, line, timestamp, **kwargs)
                 self._lines.put(body)
         except UserWarning:
             self._is_valid = False
             raise TransportException(
                 'Connection appears to have been lost')
         except Exception as e:
             self._is_valid = False
             try:
                 raise TransportException(e.strerror)
             except AttributeError:
                 raise TransportException(
                     'Unspecified exception encountered'
                 )  # TRAP ALL THE THINGS!
Beispiel #26
0
    def callback(self, filename, lines, **kwargs):
        timestamp = self.get_timestamp(**kwargs)
        if kwargs.get('timestamp', False):
            del kwargs['timestamp']

        for line in lines:
            self._pipeline.rpush(
                self._redis_namespace,
                self.format(filename, line, timestamp, **kwargs))

        try:
            self._pipeline.execute()
        except redis.exceptions.ConnectionError, e:
            traceback.print_exc()
            raise TransportException(str(e))
Beispiel #27
0
 def send_batch(self):
     try:
         filename = path.join(self._storage_folder,
                              '{}.gz'.format(datetime.now().time()))
         with gzip.open(filename, 'wb') as bss_arch:
             json.dump(self._batch, bss_arch)
             self._logger.info("Saved BSS batch to file " + str(filename))
         with open(filename) as post_data:
             r = requests.post(self._url,
                               data=post_data.read(),
                               headers=self._headers)
             if r.status_code != 200:
                 remove(filename)
                 raise TransportException(
                     "Send to BSS server error: {}, {}".format(
                         r.status_code, r.content))
             self._logger.info("Sent BSS batch to file " + str(filename))
         self._batch = []
         self._logger.debug("Empty BSS batch")
         remove(filename)
     except OSError as e:
         self._logger.warning(e.message)
     except Exception as e:
         raise TransportException(e.message)
Beispiel #28
0
    def __init__(self, beaver_config, logger=None):
        super(KafkaTransport, self).__init__(beaver_config, logger=logger)

        self._kafka_config = {}
        config_to_store = [
            'client_id', 'hosts', 'async', 'topic', 'key', 'ack_timeout',
            'codec', 'batch_n', 'batch_t', 'round_robin'
        ]

        for key in config_to_store:
            self._kafka_config[key] = beaver_config.get('kafka_' + key)

        try:
            self._client = KafkaClient(self._kafka_config['hosts'],
                                       self._kafka_config['client_id'])
            self._client.ensure_topic_exists(self._kafka_config['topic'])
            self._key = self._kafka_config['key']
            if self._key is None:
                self._prod = SimpleProducer(
                    self._client,
                    async=self._kafka_config['async'],
                    req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
                    ack_timeout=self._kafka_config['ack_timeout'],
                    codec=self._kafka_config['codec'],
                    batch_send=True,
                    batch_send_every_n=self._kafka_config['batch_n'],
                    batch_send_every_t=self._kafka_config['batch_t'])
            else:
                partitioner = None
                if self._kafka_config['round_robin']:
                    partitioner = RoundRobinPartitioner
                self._prod = KeyedProducer(
                    self._client,
                    async=self._kafka_config['async'],
                    partitioner=partitioner,
                    req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
                    ack_timeout=self._kafka_config['ack_timeout'],
                    codec=self._kafka_config['codec'],
                    batch_send=True,
                    batch_send_every_n=self._kafka_config['batch_n'],
                    batch_send_every_t=self._kafka_config['batch_t'])

            self._is_valid = True

        except Exception, e:
            raise TransportException(e.message)
    def callback(self, filename, lines, **kwargs):
        timestamp = self.get_timestamp(**kwargs)
        if kwargs.get('timestamp', False):
            del kwargs['timestamp']

        rn = self._beaver_config.get_field('redis_namespace', filename)
        if not rn:
            rn = self._redis_namespace
        self._logger.debug('redis_namespace: ' + rn)

        for line in lines:
            self._pipeline.rpush(
                rn, self.format(filename, line, timestamp, **kwargs))

        try:
            self._pipeline.execute()
        except redis.exceptions.RedisError, e:
            traceback.print_exc()
            raise TransportException(str(e))
Beispiel #30
0
def prepare_bss_message(raw_payload, moses_version, product):
    """
    raw_msg (dict)
    """
    try:
        filtered_payload = make_payload(raw_payload)
        return {
            "type": 402,
            "payload": filtered_payload,
            "eventId": str(uuid.uuid1()),
            "timestamp": int(time() * 1000),
            "common": {
                "formatVersion": 3,
                "appVersion": str(moses_version),
                "product": int(product)
            }
        }
    except Exception as e:
        raise TransportException(e.message)