def test_multi_register(self): basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA) subject = 'test' client = self.client id1 = client.register(subject, basic) latest1 = client.get_latest_schema(subject) v1 = client.get_version(subject, basic) self.assertLatest(latest1, id1, basic, v1) id2 = client.register(subject, adv) latest2 = client.get_latest_schema(subject) v2 = client.get_version(subject, adv) self.assertLatest(latest2, id2, adv, v2) self.assertNotEqual(id1, id2) self.assertNotEqual(latest1, latest2) # ensure version is higher self.assertTrue(latest1[2] < latest2[2]) client.register(subject, basic) latest3 = client.get_latest_schema(subject) # latest should not change with a re-reg self.assertEqual(latest2, latest3)
def test_encode_record_with_schema(self): topic = 'test' basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record ,schema_id)
def test_encode_record_with_schema(self): topic = 'test' basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id)
def test_encode_with_schema_id(self): adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA) basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id)
def test_multi_subject_register(self): parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) client = self.client schema_id = client.register('test', parsed) self.assertTrue(schema_id > 0) # register again under different subject dupe_id = client.register('other', parsed) self.assertEqual(schema_id, dupe_id) self.assertEqual(len(client.id_to_schema), 1)
def test_encode_with_schema_id(self): adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA) basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id( adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id)
def _get_schema_from_body(self, req): length = int(req.headers.getheader('content-length')) data = req.rfile.read(length) data = json.loads(data) schema = data.get("schema", None) if not schema: return None try: avro_schema = Util.parse_schema_from_string(schema) return self._get_identity_schema(avro_schema) except: return None
def _get_schema_from_body(self, req): length = int(req.headers.getheader('content-length')) data = req.rfile.read(length) data = json.loads(data) schema = data.get("schema",None) if not schema: return None try: avro_schema = Util.parse_schema_from_string(schema) return self._get_identity_schema(avro_schema) except: return None
def test_dupe_register(self): parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test' client = self.client schema_id = client.register(subject, parsed) self.assertTrue(schema_id > 0) latest = client.get_latest_schema(subject) # register again under same subject dupe_id = client.register(subject, parsed) self.assertEqual(schema_id, dupe_id) dupe_latest = client.get_latest_schema(subject) self.assertEqual(latest, dupe_latest)
def test_getters(self): parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) client = self.client subject = 'test' version = client.get_version(subject, parsed) self.assertEqual(version, -1) schema = client.get_by_id(1) self.assertEqual(schema, None) latest = client.get_latest_schema(subject) self.assertEqual(latest, (None,None,None)) # register schema_id = client.register(subject, parsed) latest = client.get_latest_schema(subject) version = client.get_version(subject, parsed) self.assertLatest(latest, schema_id, parsed, version) fetched = client.get_by_id(schema_id) self.assertEqual(fetched, parsed)
def test_getters(self): parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) client = self.client subject = 'test' version = client.get_version(subject, parsed) self.assertEqual(version, -1) schema = client.get_by_id(1) self.assertEqual(schema, None) latest = client.get_latest_schema(subject) self.assertEqual(latest, (None, None, None)) # register schema_id = client.register(subject, parsed) latest = client.get_latest_schema(subject) version = client.get_version(subject, parsed) self.assertLatest(latest, schema_id, parsed, version) fetched = client.get_by_id(schema_id) self.assertEqual(fetched, parsed)
def parse_json(self, json): if json: return Util.parse_schema_from_string(json)
class StdOutListener(tweepy.StreamListener): def on_status(self, message): logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p")) try: mls = createPayload(message) print mls timevar = datetime.utcnow() - datetime.strptime( mls[11], '%Y-%m-%d %H:%M:%S') print datetime.utcnow(), datetime.strptime(mls[11], '%Y-%m-%d %H:%M:%S') print "Minutes and Seconds : ", divmod( timevar.days * 86400 + timevar.seconds, 60) except Exception, e: logging.debug( 'There was an error in creating the payload. The error is: %s' % e) print 'Error in Payload Creation : ', str(e) twitter_utils.sendErrorMail( 'There was an error in creating the payload. The error is %s' % e) return True try: #converts the payload into the avro format in preparation for loading into hbase avro_schema = Util.parse_schema_from_string( open('/**/**/twitter.avsc').read()) client = CachedSchemaRegistryClient(url='http://192.168.**:8081') schema_id = client.register('twitter_avro_schema_stream4', avro_schema) avro_schema = client.get_by_id(schema_id) schema_id, avro_schema, schema_version = client.get_latest_schema( 'twitter_avro_schema_stream4') schema_version = client.get_version('twitter_avro_schema_stream4', avro_schema) serializer = MessageSerializer(client) encoded = serializer.encode_record_with_schema( topicname, avro_schema, { "authid": mls[0], "screen_name": mls[1], "description": mls[2], "favourites_count": convert_long(mls[3]), "followers_count": convert_long(mls[4]), "friends_count": convert_long(mls[5]), "listed_count": convert_long(mls[6]), "location": mls[7], "id_str": mls[8], "time_zone": mls[9], "statuses_count": convert_long(mls[10]), "created_at": mls[11], "favorite_count": convert_long(mls[12]), "tid": mls[13], "in_reply_to_status_id_str": mls[14], "in_reply_to_user_id_str": mls[15], "lang": mls[16], "possibly_sensitive": mls[17], "retweet_count": convert_long(mls[18]), "text": mls[19], "entities_url": mls[20], "entities_expanded_url": mls[21], "entities_media_url": mls[22], "disgust": convert_long(mls[23]), "fear": convert_long(mls[24]), "sadness": convert_long(mls[25]), "surprise": convert_long(mls[26]), "trust": convert_long(mls[27]), "negative": convert_long(mls[28]), "positive": convert_long(mls[29]), "neutral": convert_long(mls[30]), "celebrities": (mls[31]), "events": (mls[32]), "brands": (mls[33]), "accessories": (mls[34]) }) except Exception, e: logging.debug( 'There was an error in the generation of the avro file. The error is: %s' % e) print 'Error in avro generation : ', e print mls twitter_utils.sendErrorMail( 'There was an error in the generation of the avro file. The error is %s. This is likely due to an error in the schema. Please check the schema file under twitter_avro_schema.avsc' % e) return True
def test_schema_from_string(self): parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) self.assertTrue(isinstance(parsed, schema.Schema))
def writeToavro(p, mls): #converts the payload into the avro format in preparation for loading into hbase try: avro_schema = Util.parse_schema_from_string( open('/root/quest/twitter_avro_schema.avsc').read()) client = CachedSchemaRegistryClient(url='http://192.168.111.12:8081') schema_id = client.register('twitter_avro__schema_stream4', avro_schema) avro_schema = client.get_by_id(schema_id) schema_id, avro_schema, schema_version = client.get_latest_schema( 'twitter_avro__schema_stream4') schema_version = client.get_version('twitter_avro__schema_stream4', avro_schema) serializer = MessageSerializer(client) encoded = serializer.encode_record_with_schema( topicname, avro_schema, { "authid": mls[0], "screen_name": mls[1], "description": mls[2], "favourites_count": convert_long(mls[3]), "followers_count": convert_long(mls[4]), "friends_count": convert_long(mls[5]), "listed_count": convert_long(mls[6]), "location": mls[7], "id_str": mls[8], "time_zone": mls[9], "statuses_count": convert_long(mls[10]), "created_at": mls[11], "favorite_count": convert_long(mls[12]), "tid": mls[13], "in_reply_to_status_id_str": mls[14], "in_reply_to_user_id_str": mls[15], "lang": mls[16], "possibly_sensitive": mls[17], "retweet_count": convert_long(mls[18]), "text": mls[19], "entities_url": mls[20], "entities_expanded_url": mls[21], "entities_media_url": mls[22], "disgust": convert_long(mls[23]), "fear": convert_long(mls[24]), "sadness": convert_long(mls[25]), "surprise": convert_long(mls[26]), "trust": convert_long(mls[27]), "negative": convert_long(mls[28]), "positive": convert_long(mls[29]), "neutral": convert_long(mls[30]), "celebrities": (mls[31]), "events": (mls[32]), "brands": (mls[33]), "accessories": (mls[34]) }) except Exception, e: logging.debug( 'There was an error in the generation of the avro file. The error is: %s' % e) print 'Error in avro generation : ', e print mls twitter_utils.sendErrorMail( 'There was an error in the generation of the avro file. The error is %s' % e) return True
def test_schema_from_file(self): parsed = Util.parse_schema_from_file(data_gen.get_schema_path('adv_schema.avsc')) self.assertTrue(isinstance(parsed, schema.Schema))
def test_register(self): parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) client = self.client schema_id = client.register('test', parsed) self.assertTrue(schema_id > 0) self.assertEqual(len(client.id_to_schema), 1)