def on_error(self, status_code):
     #error handling, most importantly in the case of 420 errors
     logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p"))
     logging.debug(
         'There was a generic error. The status code is %d. The producer number is %d.'
         % (status_code, cmdargs))
     print('Got an error with status code: ' + str(status_code))
     if status_code == 420:
         producer_change_result = master_producer_utils.change_prod_credentials(
             cmdargs)
         if producer_change_result == 'producer found':
             print "Producer found"
             logging.debug(
                 "A 420 error was encountered but an app which was not at capacity was found and switched to."
             )
             sys.exit()
         else:
             print "Producer not found"
             twitter_utils.sendErrorMail(
                 'A 420 error was encountered and the stored proc either failed to execute or no available producer was found. Intervention is required. The producer number is %d'
                 % cmdargs)
             logging.debug(
                 'A 420 error was encountered and the stored proc either failed to execute or no available producer was found. Intervention is required. The producer number is %d'
                 % cmdargs)
             sleep(2 * 60 * 60)
     return True  # To continue listening
 def on_timeout(self):
     logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p"))
     logging.debug(
         'There was a timeout error from the the Twitter Listener. The producer number is %d.'
         % cmdargs)
     print('Timeout...')
     twitter_utils.sendErrorMail(
         'There was a timeout error from the the Twitter Listener. The producer number is %d.'
         % cmdargs)
     return True  # To continue listening
def convert_long(val):
    try:
        converted = long(val)
    except Exception, e:
        logging.debug(
            'Error in long type conversion inside the creation of the avro schema:  %s'
            % e)
        print("Error in Long type conversion : ", e)
        twitter_utils.sendErrorMail(
            'There was an error in long type conversion inside the avro schema creation. The error is %s.'
            % e)
        converted = 0
Ejemplo n.º 4
0
    def on_status(self, message):

        #producer.send('twitterstream', msg)
        #print 'inside input'
        kafka_input = createPayload(message)
        #print 'input to kafka' + kafka_input
        try:
            print 'inside kafka'
            producer.send(mytopic.encode('utf-8'), kafka_input)

        except Exception, e:
            print("Exception while writing in kafka topic")
            return True
            twitter_utils.sendErrorMail('The error is: ' + e)
 def on_status(self, message):
     logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p"))
     try:
         mls = createPayload(message)
         print mls
         timevar = datetime.utcnow() - datetime.strptime(
             mls[11], '%Y-%m-%d %H:%M:%S')
         print datetime.utcnow(), datetime.strptime(mls[11],
                                                    '%Y-%m-%d %H:%M:%S')
         print "Minutes and Seconds : ", divmod(
             timevar.days * 86400 + timevar.seconds, 60)
     except Exception, e:
         logging.debug(
             'There was an error in creating the payload. The error is: %s'
             % e)
         print 'Error in Payload Creation : ', str(e)
         twitter_utils.sendErrorMail(
             'There was an error in creating the payload. The error is %s' %
             e)
         return True
class StdOutListener(tweepy.StreamListener):
    def on_status(self, message):
        logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p"))
        try:
            mls = createPayload(message)
            print mls
            timevar = datetime.utcnow() - datetime.strptime(
                mls[11], '%Y-%m-%d %H:%M:%S')
            print datetime.utcnow(), datetime.strptime(mls[11],
                                                       '%Y-%m-%d %H:%M:%S')
            print "Minutes and Seconds : ", divmod(
                timevar.days * 86400 + timevar.seconds, 60)
        except Exception, e:
            logging.debug(
                'There was an error in creating the payload. The error is: %s'
                % e)
            print 'Error in Payload Creation : ', str(e)
            twitter_utils.sendErrorMail(
                'There was an error in creating the payload. The error is %s' %
                e)
            return True

        try:
            #converts the payload into the avro format in preparation for loading into hbase
            avro_schema = Util.parse_schema_from_string(
                open('/**/**/twitter.avsc').read())
            client = CachedSchemaRegistryClient(url='http://192.168.**:8081')
            schema_id = client.register('twitter_avro_schema_stream4',
                                        avro_schema)
            avro_schema = client.get_by_id(schema_id)
            schema_id, avro_schema, schema_version = client.get_latest_schema(
                'twitter_avro_schema_stream4')
            schema_version = client.get_version('twitter_avro_schema_stream4',
                                                avro_schema)
            serializer = MessageSerializer(client)
            encoded = serializer.encode_record_with_schema(
                topicname, avro_schema, {
                    "authid": mls[0],
                    "screen_name": mls[1],
                    "description": mls[2],
                    "favourites_count": convert_long(mls[3]),
                    "followers_count": convert_long(mls[4]),
                    "friends_count": convert_long(mls[5]),
                    "listed_count": convert_long(mls[6]),
                    "location": mls[7],
                    "id_str": mls[8],
                    "time_zone": mls[9],
                    "statuses_count": convert_long(mls[10]),
                    "created_at": mls[11],
                    "favorite_count": convert_long(mls[12]),
                    "tid": mls[13],
                    "in_reply_to_status_id_str": mls[14],
                    "in_reply_to_user_id_str": mls[15],
                    "lang": mls[16],
                    "possibly_sensitive": mls[17],
                    "retweet_count": convert_long(mls[18]),
                    "text": mls[19],
                    "entities_url": mls[20],
                    "entities_expanded_url": mls[21],
                    "entities_media_url": mls[22],
                    "disgust": convert_long(mls[23]),
                    "fear": convert_long(mls[24]),
                    "sadness": convert_long(mls[25]),
                    "surprise": convert_long(mls[26]),
                    "trust": convert_long(mls[27]),
                    "negative": convert_long(mls[28]),
                    "positive": convert_long(mls[29]),
                    "neutral": convert_long(mls[30]),
                    "celebrities": (mls[31]),
                    "events": (mls[32]),
                    "brands": (mls[33]),
                    "accessories": (mls[34])
                })
        except Exception, e:
            logging.debug(
                'There was an error in the generation of the avro file. The error is: %s'
                % e)
            print 'Error in avro generation : ', e
            print mls
            twitter_utils.sendErrorMail(
                'There was an error in the generation of the avro file. The error is %s. This is likely due to an error in the schema. Please check the schema file under twitter_avro_schema.avsc'
                % e)
            return True
            twitter_utils.sendErrorMail(
                'There was an error in the generation of the avro file. The error is %s. This is likely due to an error in the schema. Please check the schema file under twitter_avro_schema.avsc'
                % e)
            return True

        try:
            p.produce(topicname, encoded)
            print 'succesfully added to topic =' + str(
                topicname) + ' at: ' + str(datetime.now())
        except Exception, e:
            logging.debug(
                'There was an error in writing to the Kafka topic. The error is: %s'
                % e)
            print 'Error in  writing in kafka topic'
            twitter_utils.sendErrorMail(
                'There was an error in writing to the kafka topic. The error is %s'
                % e)
            return True

    def on_error(self, status_code):
        #error handling, most importantly in the case of 420 errors
        logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p"))
        logging.debug(
            'There was a generic error. The status code is %d. The producer number is %d.'
            % (status_code, cmdargs))
        print('Got an error with status code: ' + str(status_code))
        if status_code == 420:
            producer_change_result = master_producer_utils.change_prod_credentials(
                cmdargs)
            if producer_change_result == 'producer found':
                print "Producer found"
def writeToavro(p, mls):
    #converts the payload into the avro format in preparation for loading into hbase
    try:
        avro_schema = Util.parse_schema_from_string(
            open('/root/quest/twitter_avro_schema.avsc').read())
        client = CachedSchemaRegistryClient(url='http://192.168.111.12:8081')
        schema_id = client.register('twitter_avro__schema_stream4',
                                    avro_schema)
        avro_schema = client.get_by_id(schema_id)
        schema_id, avro_schema, schema_version = client.get_latest_schema(
            'twitter_avro__schema_stream4')
        schema_version = client.get_version('twitter_avro__schema_stream4',
                                            avro_schema)
        serializer = MessageSerializer(client)
        encoded = serializer.encode_record_with_schema(
            topicname, avro_schema, {
                "authid": mls[0],
                "screen_name": mls[1],
                "description": mls[2],
                "favourites_count": convert_long(mls[3]),
                "followers_count": convert_long(mls[4]),
                "friends_count": convert_long(mls[5]),
                "listed_count": convert_long(mls[6]),
                "location": mls[7],
                "id_str": mls[8],
                "time_zone": mls[9],
                "statuses_count": convert_long(mls[10]),
                "created_at": mls[11],
                "favorite_count": convert_long(mls[12]),
                "tid": mls[13],
                "in_reply_to_status_id_str": mls[14],
                "in_reply_to_user_id_str": mls[15],
                "lang": mls[16],
                "possibly_sensitive": mls[17],
                "retweet_count": convert_long(mls[18]),
                "text": mls[19],
                "entities_url": mls[20],
                "entities_expanded_url": mls[21],
                "entities_media_url": mls[22],
                "disgust": convert_long(mls[23]),
                "fear": convert_long(mls[24]),
                "sadness": convert_long(mls[25]),
                "surprise": convert_long(mls[26]),
                "trust": convert_long(mls[27]),
                "negative": convert_long(mls[28]),
                "positive": convert_long(mls[29]),
                "neutral": convert_long(mls[30]),
                "celebrities": (mls[31]),
                "events": (mls[32]),
                "brands": (mls[33]),
                "accessories": (mls[34])
            })
    except Exception, e:
        logging.debug(
            'There was an error in the generation of the avro file. The error is: %s'
            % e)
        print 'Error in avro generation : ', e
        print mls
        twitter_utils.sendErrorMail(
            'There was an error in the generation of the avro file. The error is %s'
            % e)
        return True
        print mls
        twitter_utils.sendErrorMail(
            'There was an error in the generation of the avro file. The error is %s'
            % e)
        return True
    try:
        p.produce(topicname, encoded)
        print 'succesfully added to topic =' + str(topicname) + ' at: ' + str(
            datetime.now())
    except Exception, e:
        logging.debug(
            'There was an error in writing to the Kafka topic. The error is: %s'
            % e)
        print 'Error in  writing in kafka topic'
        twitter_utils.sendErrorMail(
            'There was an error in writing to the kafka topic. The error is %s'
            % e)
        return True


if __name__ == '__main__':
    try:
        conf = {'bootstrap.servers': 'localhost:9092'}
        p = Producer(**conf)
    except Exception, e:
        print 'Error in Producer Configuration : ', e

    db = MySQLdb.connect("192.168.111.10", "root", "", "quest_streaming")
    cursor = db.cursor()

    #gets credentials for twitter from the sql db
Ejemplo n.º 10
0
    def on_timeout(self):

        print('Timeout...')
        twitter_utils.sendErrorMail('Timeout Error from Twitter Listner')
        return True  # To continue listening
Ejemplo n.º 11
0
    def on_error(self, status_code):

        print('Got an error with status code: ' + str(status_code))
        twitter_utils.sendErrorMail('Got an error with status code: ' +
                                    str(status_code))
        return True  # To continue listening
Ejemplo n.º 12
0
        listener = StdOutListener()

        #sign oath cert
        auth = tweepy.OAuthHandler(twitter_utils.consumer_key,
                                   twitter_utils.consumer_secret)

        #search_text = json.loads(ConfigSectionMap("search_parms")['search_text'].decode('utf-8'))
        #print search_text
        auth.set_access_token(twitter_utils.access_token,
                              twitter_utils.access_secret)

        stream = tweepy.Stream(auth, listener)

        ######################################################################
        #Sample delivers a stream of 1% (random selection) of all tweets
        ######################################################################
        #track_var =  twitter_utils.streaming_track['tracks']
        #print track_var
        print 'streaming now'
        stream.filter(track=[
            unicode("APPLE", "utf-8"),
            unicode("#CulturalEvent", "utf-8"),
            unicode("كتارا_", "utf-8")
        ])

    except Exception, e:
        print 'Unknown Error in main loop'
        #twitter_utils.sendErrorMail('Error in main loop is: ' + str(e))
        print Exception, e
        twitter_utils.sendErrorMail('Error in main loop is: ' + str(e))
            twitter_utils.sendErrorMail(
                'There was an error in the generation of the avro file. The error is %s. This is likely due to an error in the schema. Please check the schema file under twitter_avro_schema.avsc'
                % e)
            return True

        try:
            p.produce(topicname, encoded)
            print 'succesfully added to topic =' + str(
                topicname) + ' at: ' + str(datetime.now())
        except Exception, e:
            logging.debug(
                'There was an error in writing to the Kafka topic. The error is: %s'
                % e)
            print 'Error in  writing in kafka topic'
            twitter_utils.sendErrorMail(
                'There was an error in writing to the kafka topic. The error is %s'
                % e)
            return True

    def on_error(self, status_code):
        logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p"))
        logging.debug(
            'There was a generic error. The status code is %d. The producer number is %d.'
            % (status_code, cmdargs))
        print('Got an error with status code: ' + str(status_code))
        if status_code == 420:
            producer_change_result = master_producer_utils.change_prod_credentials(
                cmdargs)
            if producer_change_result == 'producer found':
                print "Producer found"
                logging.debug(