Beispiel #1
0
    def _check_redis_pubsub_for_restart(self):
        """
        Subscribe to Redis PubSub and restart if necessary.

        Check the Redis PubSub channel and restart Traptor if a message for
        this Traptor is found.
        """
        self.logger.info("Subscribing to the Traptor notification PubSub")
        self.logger.debug("restart_flag = {}".format(self.restart_flag))

        pubsub_check_interval = float(os.getenv('PUBSUB_CHECK_INTERVAL', 1))

        p = self.pubsub_conn.pubsub()
        p.subscribe(self.traptor_notify_channel)

        while True:
            time.sleep(pubsub_check_interval)
            m = p.get_message()
            if m is not None:
                data = str(m['data'])
                t = data.split(':')
                self.logger.debug("PubSub Message: {}".format(t))
                if t[0] == self.traptor_type and t[1] == str(self.traptor_id):
                    # Log the action and restart
                    self.restart_flag = True
                    self.logger.debug("Redis PubSub message found. Setting restart flag to True.")
                    dd_monitoring.increment('restart_message_received')
Beispiel #2
0
 def kafka_failure(e):
     self.logger.error("Caught Kafka exception when sending a tweet to Kafka", extra={
         'error_type': 'KafkaError',
         'ex': traceback.format_exc()
     })
     dd_monitoring.increment('tweet_to_kafka_failure',
                             tags=['error_type:kafka'])
Beispiel #3
0
 def _delete_rule_counters(self):
     """
     Stop and then delete the existing rule counters.
     """
     if len(self.rule_counters) > 0:
         for counter in self.rule_counters:
             try:
                 self.rule_counters[counter].deactivate()
             except:
                 self.logger.error("Caught exception while deactivating a rule counter", extra={
                     'error_type': 'ConnectionError',
                     'ex': traceback.format_exc()
                 })
                 dd_monitoring.increment('redis_error',
                                         tags=['error_type:connection_error'])
         for counter in self.rule_counters:
             try:
                 self.rule_counters[counter].stop()
                 self.rule_counters[counter].delete_key()
             except:
                 self.logger.error("Caught exception while stopping and deleting a rule counter", extra={
                     'error_type': 'RedisConnectionError',
                     'ex': traceback.format_exc()
                 })
                 dd_monitoring.increment('redis_error',
                                         tags=['error_type:connection_error'])
         self.logger.info("Rule counters deleted successfully")
Beispiel #4
0
    def _get_redis_rules(self):
        """ Yields a traptor rule from redis.  This function
            expects that the redis keys are set up like follows:

            traptor-<traptor_type>:<traptor_id>:<rule_id>

            For example,

            traptor-follow:0:34

            traptor-track:0:5

            traptor-locations:0:2

            For 'follow' twitter streaming, each traptor may only
            follow 5000 twitter ids, as per the Twitter API.

            For 'track' twitter stream, each traptor may only
            track 400 keywords, as per the Twitter API.

            For 'locations' twitter stream, each traptor may only
            track 25 bounding boxes, as per the Twitter API.

            :returns: Yields a traptor rule from redis.
        """
        # Set up API limitation checks
        if self.traptor_type == 'follow':
            rule_max = 5000
        elif self.traptor_type == 'track':
            rule_max = 400
        elif self.traptor_type == 'locations':
            rule_max = 1
        else:
            self.logger.error('Unsupported traptor_type', extra={'traptor_type': self.traptor_type})
            dd_monitoring.increment('traptor_error_occurred',
                                    tags=['error_type:not_implemented_error'])
            raise(NotImplementedError)

        # for rule in xrange(rule_max):
        redis_key = 'traptor-{0}:{1}'.format(self.traptor_type,
                                             self.traptor_id)
        match = ':'.join([redis_key, '*'])
        try:
            self.logger.info("Getting rules from Redis")
            for idx, hashname in enumerate(self.redis_conn.scan_iter(match=match)):
                if idx < rule_max:
                    redis_rule = self.redis_conn.hgetall(hashname)
                    yield redis_rule
                    self.logger.debug('Index: {0}, Redis_rule: {1}'.format(
                                      idx, redis_rule))
        except:
            self.logger.critical("Caught exception while connecting to Redis", extra={
                'error_type': 'RedisConnectionError',
                'ex': traceback.format_exc()
            })
            dd_monitoring.increment('redis_error',
                                    tags=['error_type:connection_error'])
Beispiel #5
0
    def _enrich_tweet(self, tweet):
        """
        Enrich the tweet with additional fields, rule matching and stats collection.

        :return dict enriched_data: tweet dict with additional enrichments
        :return dict tweet: non-tweet message with no additional enrichments
        """
        enriched_data = dict()

        if self._message_is_limit_message(tweet):
            # Increment counter
            dd_monitoring.increment('limit_message_received')
            # Send DD the limit message value
            limit_count = tweet.get('limit').get(self.traptor_type, None)
            dd_monitoring.gauge('limit_message_count', limit_count, [])
            # Store the limit count in Redis
            self._increment_limit_message_counter(limit_count=limit_count)
        elif self._message_is_tweet(tweet):
            try:
                # Add the initial traptor fields
                tweet = self._create_traptor_obj(tweet)

                # Add the created_at_iso field
                tweet = self._add_iso_created_at(tweet)

                # Add the rule information
                enriched_data = self._find_rule_matches(tweet)

                # Update the matched rule stats
                if self.traptor_type != 'locations' and self.enable_stats_collection ==\
                        'true':
                    self._increment_rule_counter(enriched_data)
            except Exception as e:
                self.logger.error("Failed to enrich tweet, skipping enhancement", {
                    "tweet": json.dumps(tweet),
                    "ex"   : traceback.format_exc()
                })

                # an error occurred while processing the tweet. If some information was
                # set in the dictionary when calling _find_rule_matches, clear it out
                # because it is likely invalid...
                enriched_data = {}

        else:
            self.logger.info("Twitter message is not a tweet", extra={
                'twitter_message': tweet
            })

        dd_monitoring.increment('tweet_process_success')

        if enriched_data:
            return enriched_data
        else:
            return tweet
Beispiel #6
0
    def _add_heartbeat_message_to_redis(self,
                                        heartbeat_conn):
        """Add a heartbeat message to Redis."""
        time_to_live = 5
        now = datetime.now().strftime("%Y%M%d%H%M%S")
        key_to_add = "{}:{}:{}".format(self.traptor_type,
                                       self.traptor_id,
                                       now)
        message = "alive"

        dd_monitoring.increment('heartbeat_message_sent_success')
        return heartbeat_conn.setex(key_to_add, time_to_live, message)
Beispiel #7
0
    def _increment_limit_message_counter(self, limit_count):
        """
        Increment the limit message counter

        :param limit_count: the integer value from the limit message
        """
        try:
            if self.limit_counter is not None:
                self.limit_counter.increment(limit_count=limit_count)
        except:
            self.logger.error("Caught exception while incrementing a limit counter", extra={
                'error_type': 'RedisConnectionError',
                'ex': traceback.format_exc()
            })
            dd_monitoring.increment('redis_error',
                                    tags=['error_type:connection_error'])
Beispiel #8
0
 def _setup_kafka(self):
     """ Set up a Kafka connection."""
     if self.kafka_enabled == 'true':
         self.logger.info('Setting up kafka connection')
         try:
             self._create_kafka_producer()
         except:
             self.logger.critical("Caught Kafka Unavailable Error", extra={
                 'error_type': 'KafkaUnavailableError',
                 'ex': traceback.format_exc()
             })
             dd_monitoring.increment('kafka_error',
                                     tags=['error_type:kafka_unavailable'])
             # sys.exit(3)
     else:
         self.logger.info('Skipping kafka connection setup')
         self.logger.debug('Kafka_enabled setting: {}'.format(self.kafka_enabled))
         self.kafka_conn = None
Beispiel #9
0
    def _send_heartbeat_message(self):
        """Add an expiring key to Redis as a heartbeat on a timed basis."""
        self.logger.info("Starting the heartbeat")
        hb_interval = 5

        # while Traptor is running, add a heartbeat message every 5 seconds
        while True:
            try:
                self._add_heartbeat_message_to_redis(self.heartbeat_conn)
            except Exception:
                self.logger.error("Caught exception while adding the heartbeat message to Redis", extra={
                    'error_type': 'RedisConnectionError',
                    'ex': traceback.format_exc()
                })
                dd_monitoring.increment('heartbeat_message_sent_failure',
                                        tags=['error_type:redis_connection_error'])
                raise

            time.sleep(hb_interval)
Beispiel #10
0
    def _create_birdy_stream(self):
        """ Create a birdy twitter stream.
            If there is a TwitterApiError it will exit with status code 3.
            This was done to prevent services like supervisor from automatically
            restart the process causing the twitter API to get locked out.

            Creates ``self.birdy_stream``.
        """

        if self.traptor_type == 'follow':
            # Try to set up a twitter stream using twitter id list
            try:
                self._create_twitter_follow_stream()
            except TwitterApiError as e:
                self.logger.critical("Caught Twitter Api Error creating follow stream", extra = {
                    'error_type': 'TwitterAPIError',
                    'ex': traceback.format_exc()
                })
                dd_monitoring.increment('twitter_error_occurred',
                                        tags=['error_type:twitter_api_error'])
                sys.exit(3)
        elif self.traptor_type == 'track':
            # Try to set up a twitter stream using twitter term list
            try:
                self._create_twitter_track_stream()
            except TwitterApiError as e:
                self.logger.critical("Caught Twitter Api Error", extra={
                    'error_type': 'TwitterAPIError',
                    'ex': traceback.format_exc()
                })
                dd_monitoring.increment('twitter_error_occurred',
                                        tags=['error_type:twitter_api_error'])
                sys.exit(3)
        elif self.traptor_type == 'locations':
            # Try to set up a twitter stream using twitter term list
            try:
                self._create_twitter_locations_stream()
            except TwitterApiError as e:
                self.logger.critical("Caught Twitter Api Error", extra={
                    'error_type': 'TwitterAPIError',
                    'ex': traceback.format_exc()
                })
                dd_monitoring.increment('twitter_error_occurred',
                                        tags=['error_type:twitter_api_error'])
                sys.exit(3)
        else:
            self.logger.critical('Caught error creating birdy stream for Traptor type that does not exist', extra ={
                'error_type': 'NotImplementedError',
                'ex': traceback.format_exc()
            })
            dd_monitoring.increment('traptor_error_occurred',
                                    tags=['error_type:not_implemented_error'])
            sys.exit(3)
Beispiel #11
0
    def _main_loop(self):
        """
        Main loop for iterating through the twitter data.

        This method iterates through the birdy stream, does any
        pre-processing, and adds enrichments to the data.  If kafka is
        enabled it will write to the kafka topic defined when instantiating
        the Traptor class.
        """
        self.logger.info("Starting tweet processing")
        # Iterate through the twitter results
        for item in self.birdy_stream._stream_iter():
            if item:
                try:
                    tweet = json.loads(item)
                except:
                    self.logger.error("Caught exception while json loading the Twitter message", extra={
                        'ex': traceback.format_exc()
                    })
                    dd_monitoring.increment('traptor_error_occurred',
                                            tags=['error_type:json_loads_error'])
                else:
                    enriched_data = self._enrich_tweet(tweet)

                    if self.kafka_enabled == 'true':

                        try:
                            self._send_enriched_data_to_kafka(tweet, enriched_data)
                        except:
                            self.logger.error("Caught exception adding Twitter message to Kafka", extra={
                                'ex': traceback.format_exc()
                            })
                            dd_monitoring.increment('tweet_to_kafka_failure',
                                                    tags=['error_type:kafka'])
                    else:
                        self.logger.debug(json.dumps(enriched_data, indent=2))

            if self.restart_flag:
                self.logger.info("Restart flag is true; restarting myself")
                break
Beispiel #12
0
    def _increment_rule_counter(self, tweet):
        """
        Increment a rule counter.

        :param rule_value: the value of the rule to increment the counter for
        """
        rule_id = tweet.get('traptor', {}).get('rule_id', None)

        # If the counter doesn't yet exist, create it
        if self.rule_counters.get(rule_id, None) is None:
            self.rule_counters[rule_id] = self._create_rule_counter(rule_id=rule_id)

        # If a rule value exists, increment the counter
        try:
            if rule_id is not None and self.rule_counters[rule_id] is not None:
                self.rule_counters[rule_id].increment()
        except:
            self.logger.error("Caught exception while incrementing a rule counter", extra={
                'error_type': 'RedisConnectionError',
                'ex': traceback.format_exc()
            })
            dd_monitoring.increment('redis_error',
                                    tags=['error_type:connection_error'])
Beispiel #13
0
    def _main_loop(self):
        """
        Main loop for iterating through the twitter data.

        This method iterates through the birdy stream, does any
        pre-processing, and adds enrichments to the data.  If kafka is
        enabled it will write to the kafka topic defined when instantiating
        the Traptor class.
        """
        self.logger.info("Starting tweet processing.")
        # Iterate through the twitter results
        for item in self.birdy_stream._stream_iter():
            if item:
                try:
                    tweet = json.loads(item)
                except:
                    dd_monitoring.increment('tweet_process_failure')
                    self.logger.error("Caught exception while json loading the Twitter message", extra={
                        'ex': traceback.format_exc()
                    })
                else:
                    dd_monitoring.increment('tweet_process_success')

                    enriched_data = self._enrich_tweet(tweet)

                    if self.kafka_enabled == 'true':
                        try:
                            self.kafka_conn.send(self.kafka_topic, enriched_data)

                            self.logger.info("Tweet sent to kafka", extra={
                                'tweet_id': tweet.get('id_str', None)
                            })

                            dd_monitoring.increment('tweet_to_kafka_success')
                        except Exception:
                            self.logger.error("Caught exception adding Twitter message to Kafka", extra={
                                'ex': traceback.format_exc()
                            })

                            dd_monitoring.increment('tweet_to_kafka_failure')
                    else:
                        self.logger.debug(json.dumps(enriched_data, indent=2))

            if self.restart_flag:
                self.logger.info("Restart flag is true; restarting myself.")
                break
Beispiel #14
0
    def _find_rule_matches(self, tweet_dict):
        """
        Find a rule match for the tweet.

        This code only expects there to be one match.  If there is more than one, it will use the last one
        it finds since the first match will be overwritten.

        :param dict tweet_dict: The dictionary twitter object.
        :returns: a ``dict`` with the augmented data fields.
        """

        # If the traptor is any other type, keep it going
        new_dict = tweet_dict
        self.logger.debug('Finding tweet rule matches')

        # If the Traptor is a geo traptor, return the one rule we've already set up
        if self.traptor_type == 'locations':
            for key, value in self.locations_rule.iteritems():
                new_dict['traptor'][key] = value

        # Do track Traptor enrichments...
        elif self.traptor_type == 'track':

            """
            Here's how Twitter does it, and so shall we:

            The text of the Tweet and some entity fields are considered for matches.
            Specifically, the text attribute of the Tweet, expanded_url and display_url
            for links and media, text for hashtags, and screen_name for user mentions
            are checked for matches.
            """

            # Build up the query from our tweet fields
            query = ""

            # Tweet text
            query = query + tweet_dict['text'].encode("utf-8")

            # URLs and Media
            url_list = []
            if 'urls' in tweet_dict['entities']:
                for url in tweet_dict['entities']['urls']:
                    expanded_url = url.get('expanded_url', None)
                    display_url = url.get('display_url', None)

                    if expanded_url is not None:
                        url_list.append(expanded_url)
                    if display_url is not None:
                        url_list.append(display_url)

            if 'media' in tweet_dict['entities']:
                for item in tweet_dict['entities']['media']:
                    expanded_url = item.get('expanded_url', None)
                    display_url = item.get('display_url', None)

                    if expanded_url is not None:
                        url_list.append(expanded_url)
                    if display_url is not None:
                        url_list.append(display_url)

            # Hashtags
            if 'hashtags' in tweet_dict['entities']:
                for tag in tweet_dict['entities']['hashtags']:
                    query = query + " " + tag['text'].encode("utf-8")

            # Screen name
            if 'screen_name' in tweet_dict['user']:
                query = query + " " + tweet_dict['user']['screen_name'].encode('utf-8')

            # Retweeted parts
            if tweet_dict.get('retweeted_status', None) is not None:
                # Status
                query = query + " " + tweet_dict['retweeted_status']['text'].encode("utf-8")

                if tweet_dict['retweeted_status'].get('quoted_status', {}).get('extended_tweet', {}).get('full_text', None) is not None:
                    query = query + " " + tweet_dict['retweeted_status']['quoted_status']['extended_tweet']['full_text'].encode("utf-8")

                # URLs and Media
                if 'urls' in tweet_dict['retweeted_status']['entities']:
                    for url in tweet_dict['retweeted_status']['entities']['urls']:
                        expanded_url = url.get('expanded_url', None)
                        display_url = url.get('display_url', None)

                        if expanded_url is not None:
                            url_list.append(expanded_url)
                        if display_url is not None:
                            url_list.append(display_url)

                if 'media' in tweet_dict['retweeted_status']['entities']:
                    for item in tweet_dict['retweeted_status']['entities']['media']:
                        expanded_url = item.get('expanded_url', None)
                        display_url = item.get('display_url', None)

                        if expanded_url is not None:
                            url_list.append(expanded_url)
                        if display_url is not None:
                            url_list.append(display_url)

                # Hashtags
                if 'hashtags' in tweet_dict['retweeted_status']['entities']:
                    for tag in tweet_dict['retweeted_status']['entities']['hashtags']:
                        query = query + " " + tag['text'].encode("utf-8")

                # Names
                if 'in_reply_to_screen_name' in tweet_dict['retweeted_status']:
                    in_reply_to_screen_name = tweet_dict.get('retweeted_status', {}).get('in_reply_to_screen_name', None)
                    if in_reply_to_screen_name is not None:
                        query = query + " " + tweet_dict['retweeted_status']['in_reply_to_screen_name'].encode('utf-8')

                if 'screen_name' in tweet_dict['retweeted_status']['user']:
                    screen_name = tweet_dict.get('retweeted_status', {}).get('user', {}).get('screen_name', None)
                    if screen_name is not None:
                        query = query + " " + tweet_dict['retweeted_status']['user']['screen_name'].encode('utf-8')

            # Quoted Status parts
            if tweet_dict.get('quoted_status', None) is not None:
                # Standard tweet
                if tweet_dict.get('quoted_status').get('text', None) is not None:
                    query = query + " " + tweet_dict['quoted_status']['text'].encode('utf-8')

                # Extended tweet
                if tweet_dict.get('quoted_status').get('extended_tweet', {}).get('full_text', None) is not None:
                    query = query + " " + tweet_dict['quoted_status']['extended_tweet']['full_text'].encode('utf-8')

            # De-dup urls and add to the giant query
            if len(url_list) > 0:
                url_list = set(url_list)
                for url in url_list:
                    query = query + " " + url.encode("utf-8")

            # Lowercase the entire thing
            query = query.lower()

            random.shuffle(self.redis_rules)

            try:
                # Shuffle the rules every once in a while
                for rule in self.redis_rules:
                    # Get the rule to search for and lowercase it
                    search_str = rule['value'].encode("utf-8").lower()

                    # Split the rule value and see if it's a multi-parter
                    part_finder = list()
                    search_str_multi = search_str.split(" ")

                    # If there is more than one part to the rule, check for each part in the query
                    if len(search_str_multi) > 1:
                        for part in search_str_multi:
                            if part in query:
                                part_finder.append(True)
                            else:
                                part_finder.append(False)

                    if len(search_str_multi) > 1 and all(part_finder):
                        # These two lines kept for backwards compatibility
                        new_dict['traptor']['rule_tag'] = rule['tag']
                        new_dict['traptor']['rule_value'] = rule['value'].encode("utf-8")

                        # Pass all key/value pairs from matched rule through to Traptor
                        for key, value in rule.iteritems():
                            new_dict['traptor'][key] = value.encode("utf-8")

                        # Log that a rule was matched
                        self.logger.debug("Rule matched for tweet id: {}".format(tweet_dict['id_str']))

                    elif search_str in query:
                        # These two lines kept for backwards compatibility
                        new_dict['traptor']['rule_tag'] = rule['tag']
                        new_dict['traptor']['rule_value'] = rule['value'].encode("utf-8")

                        # Pass all key/value pairs from matched rule through to Traptor
                        for key, value in rule.iteritems():
                            new_dict['traptor'][key] = value.encode("utf-8")

                        # Log that a rule was matched
                        self.logger.debug("Rule matched for tweet id: {}".format(tweet_dict['id_str']))
            except:
                self.logger.error("Caught exception while performing rule matching for track", extra={
                    'ex': traceback.format_exc()
                })
                dd_monitoring.increment('traptor_error_occurred',
                                        tags=['error_type:rule_matching_failure'])

        # If this is a follow Traptor, only check the user/id field of the tweet
        elif self.traptor_type == 'follow':
            """
            Here's how Twitter does it, and so shall we:

            Tweets created by the user.
            Tweets which are retweeted by the user.
            Replies to any Tweet created by the user.
            Retweets of any Tweet created by the user.
            Manual replies, created without pressing a reply button (e.g. “@twitterapi I agree”).
            """

            # Build up the query from our tweet fields
            query = ""

            # Tweets created by the user AND
            # Tweets which are retweeted by the user

            try:
                self.logger.debug('tweet_dict for rule match',
                                  extra={'tweet_dict': json.dumps(tweet_dict).encode("utf-8")})
            except:
                self.logger.error("Unable to dump the tweet dict to json", extra={
                    'ex': traceback.format_exc()
                })
                dd_monitoring.increment('traptor_error_occurred',
                                        tags=['error_type:json_dumps'])

            # From this user
            query = query + str(tweet_dict['user']['id_str'])

            # Replies to any Tweet created by the user.
            if tweet_dict['in_reply_to_user_id'] is not None and tweet_dict['in_reply_to_user_id'] != '':
                query = query + str(tweet_dict['in_reply_to_user_id'])

            # User mentions
            if 'user_mentions' in tweet_dict['entities']:
                for tag in tweet_dict['entities']['user_mentions']:
                    id_str = tag.get('id_str')
                    if id_str:
                        query = query + " " + id_str.encode("utf-8")


            # Retweeted parts
            if tweet_dict.get('retweeted_status', None) is not None:
                if tweet_dict['retweeted_status'].get('user', {}).get('id_str', None) is not None:
                    query = query + str(tweet_dict['retweeted_status']['user']['id_str'])

            # Retweets of any Tweet created by the user; AND
            # Manual replies, created without pressing a reply button (e.g. “@twitterapi I agree”).
            query = query + tweet_dict['text'].encode("utf-8")

            # Lowercase the entire thing
            query = query.lower()

            random.shuffle(self.redis_rules)

            try:
                for rule in self.redis_rules:
                    # Get the rule to search for and lowercase it
                    search_str = str(rule['value']).encode("utf-8").lower()

                    self.logger.debug("Search string used for the rule match: {}".format(search_str))
                    self.logger.debug("Query for the rule match: {}".format(query))

                    if search_str in query:
                        # These two lines kept for backwards compatibility
                        new_dict['traptor']['rule_tag'] = rule['tag']
                        new_dict['traptor']['rule_value'] = rule['value'].encode("utf-8")

                        # Pass all key/value pairs from matched rule through to Traptor
                        for key, value in rule.iteritems():
                            new_dict['traptor'][key] = value.encode("utf-8")

                        # Log that a rule was matched
                        self.logger.debug("Rule matched for tweet id: {}".format(tweet_dict['id_str']))
            except:
                self.logger.error("Caught exception while performing rule matching for follow", extra={
                    'ex': traceback.format_exc()
                })
                dd_monitoring.increment('traptor_error_occurred',
                                        tags=['error_type:rule_matching_failure'])

        # unknown traptor type
        else:
            self.logger.warning("Ran into an unknown Traptor type...")

        if 'rule_tag' not in new_dict['traptor']:
            new_dict['traptor']['rule_type'] = self.traptor_type
            new_dict['traptor']['id'] = int(self.traptor_id)
            new_dict['traptor']['rule_tag'] = 'Not Found'
            new_dict['traptor']['rule_value'] = 'Not Found'
            # Log that a rule was matched
            self.logger.warning("No rule matched for tweet", extra={
                'tweet_id': tweet_dict['id_str']
            })

        return new_dict
Beispiel #15
0
 def kafka_success(tweet, response):
     self.logger.info("Tweet sent to kafka", extra={
         'tweet_id': tweet.get('id_str', None)
     })
     dd_monitoring.increment('tweet_to_kafka_success')
Beispiel #16
0
def main():
    """ Command line interface to run a traptor instance.

        Can pass it flags for debug levels and also --stdout mode, which means
        it will not write to kafka but stdout instread.
    """

    # Redis connections
    redis_host = os.getenv('REDIS_HOST', 'localhost')
    redis_port = int(os.getenv('REDIS_PORT', 6379))
    redis_db = int(os.getenv('REDIS_DB', 5))

    redis_conn = redis.StrictRedis(host=redis_host,
                             port=redis_port,
                             db=redis_db,
                             decode_responses=True)

    # Redis pubsub connection
    pubsub_conn = redis.StrictRedis(host=redis_host,
                              port=redis_port,
                              db=redis_db)

    # Redis heartbeat connection
    heartbeat_conn = redis.StrictRedis(host=redis_host,
                                 port=redis_port,
                                 db=redis_db)

    # Twitter api keys
    api_keys = {
        'CONSUMER_KEY': os.getenv('CONSUMER_KEY'),
        'CONSUMER_SECRET': os.getenv('CONSUMER_SECRET'),
        'ACCESS_TOKEN': os.getenv('ACCESS_TOKEN'),
        'ACCESS_TOKEN_SECRET': os.getenv('ACCESS_TOKEN_SECRET')
    }


    # Create the traptor instance
    traptor_instance = Traptor(redis_conn=redis_conn,
                               pubsub_conn=pubsub_conn,
                               heartbeat_conn=heartbeat_conn,
                               traptor_notify_channel=os.getenv('REDIS_PUBSUB_CHANNEL', 'traptor-notify'),
                               rule_check_interval=int(os.getenv('RULE_CHECK_INTERVAL', 60)),
                               traptor_type=os.getenv('TRAPTOR_TYPE', 'track'),
                               traptor_id=int(os.getenv('TRAPTOR_ID', 0)),
                               apikeys=api_keys,
                               kafka_enabled=os.getenv('KAFKA_ENABLED', 'true'),
                               kafka_hosts=os.getenv('KAFKA_HOSTS', 'localhost:9092'),
                               kafka_topic=os.getenv('KAFKA_TOPIC', 'traptor'),
                               use_sentry=os.getenv('USE_SENTRY', 'false'),
                               sentry_url=os.getenv('SENTRY_URL', None),
                               test=False,
                               enable_stats_collection=os.getenv('ENABLE_STATS_COLLECTION', 'true')
                               )

    # Logger for this main function. The traptor has it's own logger

    traptor_name = 'traptor-{}-{}'.format(os.getenv('TRAPTOR_TYPE', 'track'),
                                          os.getenv('TRAPTOR_ID', 0))
    logger = LogFactory.get_instance(name=traptor_name,
                json=os.getenv('LOG_JSON', settings.LOG_JSON) == 'True',
                stdout=os.getenv('LOG_STDOUT', settings.LOG_STDOUT) == 'True',
                level=os.getenv('LOG_LEVEL', settings.LOG_LEVEL),
                dir=os.getenv('LOG_DIR', settings.LOG_DIR),
                file=os.getenv('LOG_FILE', settings.LOG_FILE))

    if settings.DW_ENABLED:
        dw_config(settings.DW_CONFIG)
        logger.register_callback('>=INFO', dw_callback)

    # Wait until all the other containers are up and going...
    time.sleep(30)

    # Run the traptor instance
    try:
        logger.info('Starting Traptor')
        logger.debug("Traptor info: {}".format(traptor_instance.__repr__()))
        traptor_instance.run()
    except Exception as e:
        if os.getenv('USE_SENTRY') == 'true':
            client = Client(os.getenv('SENTRY_URL'))
            client.captureException()

        logger.error("Caught exception when starting Traptor", extra={
            'ex': traceback.format_exc()
        })

        dd_monitoring.increment('traptor_error_occurred',
                                tags=['error_type:traptor_start'])
        raise e