def on_error(self, status_code): #error handling, most importantly in the case of 420 errors logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p")) logging.debug( 'There was a generic error. The status code is %d. The producer number is %d.' % (status_code, cmdargs)) print('Got an error with status code: ' + str(status_code)) if status_code == 420: producer_change_result = master_producer_utils.change_prod_credentials( cmdargs) if producer_change_result == 'producer found': print "Producer found" logging.debug( "A 420 error was encountered but an app which was not at capacity was found and switched to." ) sys.exit() else: print "Producer not found" twitter_utils.sendErrorMail( 'A 420 error was encountered and the stored proc either failed to execute or no available producer was found. Intervention is required. The producer number is %d' % cmdargs) logging.debug( 'A 420 error was encountered and the stored proc either failed to execute or no available producer was found. Intervention is required. The producer number is %d' % cmdargs) sleep(2 * 60 * 60) return True # To continue listening
def on_timeout(self): logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p")) logging.debug( 'There was a timeout error from the the Twitter Listener. The producer number is %d.' % cmdargs) print('Timeout...') twitter_utils.sendErrorMail( 'There was a timeout error from the the Twitter Listener. The producer number is %d.' % cmdargs) return True # To continue listening
def convert_long(val): try: converted = long(val) except Exception, e: logging.debug( 'Error in long type conversion inside the creation of the avro schema: %s' % e) print("Error in Long type conversion : ", e) twitter_utils.sendErrorMail( 'There was an error in long type conversion inside the avro schema creation. The error is %s.' % e) converted = 0
def on_status(self, message): #producer.send('twitterstream', msg) #print 'inside input' kafka_input = createPayload(message) #print 'input to kafka' + kafka_input try: print 'inside kafka' producer.send(mytopic.encode('utf-8'), kafka_input) except Exception, e: print("Exception while writing in kafka topic") return True twitter_utils.sendErrorMail('The error is: ' + e)
def on_status(self, message): logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p")) try: mls = createPayload(message) print mls timevar = datetime.utcnow() - datetime.strptime( mls[11], '%Y-%m-%d %H:%M:%S') print datetime.utcnow(), datetime.strptime(mls[11], '%Y-%m-%d %H:%M:%S') print "Minutes and Seconds : ", divmod( timevar.days * 86400 + timevar.seconds, 60) except Exception, e: logging.debug( 'There was an error in creating the payload. The error is: %s' % e) print 'Error in Payload Creation : ', str(e) twitter_utils.sendErrorMail( 'There was an error in creating the payload. The error is %s' % e) return True
class StdOutListener(tweepy.StreamListener): def on_status(self, message): logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p")) try: mls = createPayload(message) print mls timevar = datetime.utcnow() - datetime.strptime( mls[11], '%Y-%m-%d %H:%M:%S') print datetime.utcnow(), datetime.strptime(mls[11], '%Y-%m-%d %H:%M:%S') print "Minutes and Seconds : ", divmod( timevar.days * 86400 + timevar.seconds, 60) except Exception, e: logging.debug( 'There was an error in creating the payload. The error is: %s' % e) print 'Error in Payload Creation : ', str(e) twitter_utils.sendErrorMail( 'There was an error in creating the payload. The error is %s' % e) return True try: #converts the payload into the avro format in preparation for loading into hbase avro_schema = Util.parse_schema_from_string( open('/**/**/twitter.avsc').read()) client = CachedSchemaRegistryClient(url='http://192.168.**:8081') schema_id = client.register('twitter_avro_schema_stream4', avro_schema) avro_schema = client.get_by_id(schema_id) schema_id, avro_schema, schema_version = client.get_latest_schema( 'twitter_avro_schema_stream4') schema_version = client.get_version('twitter_avro_schema_stream4', avro_schema) serializer = MessageSerializer(client) encoded = serializer.encode_record_with_schema( topicname, avro_schema, { "authid": mls[0], "screen_name": mls[1], "description": mls[2], "favourites_count": convert_long(mls[3]), "followers_count": convert_long(mls[4]), "friends_count": convert_long(mls[5]), "listed_count": convert_long(mls[6]), "location": mls[7], "id_str": mls[8], "time_zone": mls[9], "statuses_count": convert_long(mls[10]), "created_at": mls[11], "favorite_count": convert_long(mls[12]), "tid": mls[13], "in_reply_to_status_id_str": mls[14], "in_reply_to_user_id_str": mls[15], "lang": mls[16], "possibly_sensitive": mls[17], "retweet_count": convert_long(mls[18]), "text": mls[19], "entities_url": mls[20], "entities_expanded_url": mls[21], "entities_media_url": mls[22], "disgust": convert_long(mls[23]), "fear": convert_long(mls[24]), "sadness": convert_long(mls[25]), "surprise": convert_long(mls[26]), "trust": convert_long(mls[27]), "negative": convert_long(mls[28]), "positive": convert_long(mls[29]), "neutral": convert_long(mls[30]), "celebrities": (mls[31]), "events": (mls[32]), "brands": (mls[33]), "accessories": (mls[34]) }) except Exception, e: logging.debug( 'There was an error in the generation of the avro file. The error is: %s' % e) print 'Error in avro generation : ', e print mls twitter_utils.sendErrorMail( 'There was an error in the generation of the avro file. The error is %s. This is likely due to an error in the schema. Please check the schema file under twitter_avro_schema.avsc' % e) return True
twitter_utils.sendErrorMail( 'There was an error in the generation of the avro file. The error is %s. This is likely due to an error in the schema. Please check the schema file under twitter_avro_schema.avsc' % e) return True try: p.produce(topicname, encoded) print 'succesfully added to topic =' + str( topicname) + ' at: ' + str(datetime.now()) except Exception, e: logging.debug( 'There was an error in writing to the Kafka topic. The error is: %s' % e) print 'Error in writing in kafka topic' twitter_utils.sendErrorMail( 'There was an error in writing to the kafka topic. The error is %s' % e) return True def on_error(self, status_code): #error handling, most importantly in the case of 420 errors logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p")) logging.debug( 'There was a generic error. The status code is %d. The producer number is %d.' % (status_code, cmdargs)) print('Got an error with status code: ' + str(status_code)) if status_code == 420: producer_change_result = master_producer_utils.change_prod_credentials( cmdargs) if producer_change_result == 'producer found': print "Producer found"
def writeToavro(p, mls): #converts the payload into the avro format in preparation for loading into hbase try: avro_schema = Util.parse_schema_from_string( open('/root/quest/twitter_avro_schema.avsc').read()) client = CachedSchemaRegistryClient(url='http://192.168.111.12:8081') schema_id = client.register('twitter_avro__schema_stream4', avro_schema) avro_schema = client.get_by_id(schema_id) schema_id, avro_schema, schema_version = client.get_latest_schema( 'twitter_avro__schema_stream4') schema_version = client.get_version('twitter_avro__schema_stream4', avro_schema) serializer = MessageSerializer(client) encoded = serializer.encode_record_with_schema( topicname, avro_schema, { "authid": mls[0], "screen_name": mls[1], "description": mls[2], "favourites_count": convert_long(mls[3]), "followers_count": convert_long(mls[4]), "friends_count": convert_long(mls[5]), "listed_count": convert_long(mls[6]), "location": mls[7], "id_str": mls[8], "time_zone": mls[9], "statuses_count": convert_long(mls[10]), "created_at": mls[11], "favorite_count": convert_long(mls[12]), "tid": mls[13], "in_reply_to_status_id_str": mls[14], "in_reply_to_user_id_str": mls[15], "lang": mls[16], "possibly_sensitive": mls[17], "retweet_count": convert_long(mls[18]), "text": mls[19], "entities_url": mls[20], "entities_expanded_url": mls[21], "entities_media_url": mls[22], "disgust": convert_long(mls[23]), "fear": convert_long(mls[24]), "sadness": convert_long(mls[25]), "surprise": convert_long(mls[26]), "trust": convert_long(mls[27]), "negative": convert_long(mls[28]), "positive": convert_long(mls[29]), "neutral": convert_long(mls[30]), "celebrities": (mls[31]), "events": (mls[32]), "brands": (mls[33]), "accessories": (mls[34]) }) except Exception, e: logging.debug( 'There was an error in the generation of the avro file. The error is: %s' % e) print 'Error in avro generation : ', e print mls twitter_utils.sendErrorMail( 'There was an error in the generation of the avro file. The error is %s' % e) return True
print mls twitter_utils.sendErrorMail( 'There was an error in the generation of the avro file. The error is %s' % e) return True try: p.produce(topicname, encoded) print 'succesfully added to topic =' + str(topicname) + ' at: ' + str( datetime.now()) except Exception, e: logging.debug( 'There was an error in writing to the Kafka topic. The error is: %s' % e) print 'Error in writing in kafka topic' twitter_utils.sendErrorMail( 'There was an error in writing to the kafka topic. The error is %s' % e) return True if __name__ == '__main__': try: conf = {'bootstrap.servers': 'localhost:9092'} p = Producer(**conf) except Exception, e: print 'Error in Producer Configuration : ', e db = MySQLdb.connect("192.168.111.10", "root", "", "quest_streaming") cursor = db.cursor() #gets credentials for twitter from the sql db
def on_timeout(self): print('Timeout...') twitter_utils.sendErrorMail('Timeout Error from Twitter Listner') return True # To continue listening
def on_error(self, status_code): print('Got an error with status code: ' + str(status_code)) twitter_utils.sendErrorMail('Got an error with status code: ' + str(status_code)) return True # To continue listening
listener = StdOutListener() #sign oath cert auth = tweepy.OAuthHandler(twitter_utils.consumer_key, twitter_utils.consumer_secret) #search_text = json.loads(ConfigSectionMap("search_parms")['search_text'].decode('utf-8')) #print search_text auth.set_access_token(twitter_utils.access_token, twitter_utils.access_secret) stream = tweepy.Stream(auth, listener) ###################################################################### #Sample delivers a stream of 1% (random selection) of all tweets ###################################################################### #track_var = twitter_utils.streaming_track['tracks'] #print track_var print 'streaming now' stream.filter(track=[ unicode("APPLE", "utf-8"), unicode("#CulturalEvent", "utf-8"), unicode("كتارا_", "utf-8") ]) except Exception, e: print 'Unknown Error in main loop' #twitter_utils.sendErrorMail('Error in main loop is: ' + str(e)) print Exception, e twitter_utils.sendErrorMail('Error in main loop is: ' + str(e))
twitter_utils.sendErrorMail( 'There was an error in the generation of the avro file. The error is %s. This is likely due to an error in the schema. Please check the schema file under twitter_avro_schema.avsc' % e) return True try: p.produce(topicname, encoded) print 'succesfully added to topic =' + str( topicname) + ' at: ' + str(datetime.now()) except Exception, e: logging.debug( 'There was an error in writing to the Kafka topic. The error is: %s' % e) print 'Error in writing in kafka topic' twitter_utils.sendErrorMail( 'There was an error in writing to the kafka topic. The error is %s' % e) return True def on_error(self, status_code): logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p")) logging.debug( 'There was a generic error. The status code is %d. The producer number is %d.' % (status_code, cmdargs)) print('Got an error with status code: ' + str(status_code)) if status_code == 420: producer_change_result = master_producer_utils.change_prod_credentials( cmdargs) if producer_change_result == 'producer found': print "Producer found" logging.debug(