def test_multi_register(self):
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)
        subject = 'test'
        client = self.client

        id1 = client.register(subject, basic)
        latest1 = client.get_latest_schema(subject)
        v1 = client.get_version(subject, basic)
        self.assertLatest(latest1, id1, basic, v1)

        id2 = client.register(subject, adv)
        latest2 = client.get_latest_schema(subject)
        v2 = client.get_version(subject, adv)
        self.assertLatest(latest2, id2, adv, v2)

        self.assertNotEqual(id1, id2)
        self.assertNotEqual(latest1, latest2)
        # ensure version is higher
        self.assertTrue(latest1[2] < latest2[2])

        client.register(subject, basic)
        latest3 = client.get_latest_schema(subject)
        # latest should not change with a re-reg
        self.assertEqual(latest2, latest3)
예제 #2
0
    def test_multi_register(self):
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)
        subject = 'test'
        client = self.client

        id1 = client.register(subject, basic)
        latest1 = client.get_latest_schema(subject)
        v1 = client.get_version(subject, basic)
        self.assertLatest(latest1, id1, basic, v1)

        id2 = client.register(subject, adv)
        latest2 = client.get_latest_schema(subject)
        v2 = client.get_version(subject, adv)
        self.assertLatest(latest2, id2, adv, v2)

        self.assertNotEqual(id1, id2)
        self.assertNotEqual(latest1, latest2)
        # ensure version is higher
        self.assertTrue(latest1[2] < latest2[2])

        client.register(subject, basic)
        latest3 = client.get_latest_schema(subject)
        # latest should not change with a re-reg
        self.assertEqual(latest2, latest3)
 def test_encode_record_with_schema(self):
     topic = 'test'
     basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
     subject = 'test-value'
     schema_id = self.client.register(subject, basic)
     records = data_gen.BASIC_ITEMS
     for record in records:
         message = self.ms.encode_record_with_schema(topic, basic, record)
         self.assertMessageIsSame(message, record ,schema_id)
 def test_encode_record_with_schema(self):
     topic = 'test'
     basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
     subject = 'test-value'
     schema_id = self.client.register(subject, basic)
     records = data_gen.BASIC_ITEMS
     for record in records:
         message = self.ms.encode_record_with_schema(topic, basic, record)
         self.assertMessageIsSame(message, record, schema_id)
    def test_encode_with_schema_id(self):
        adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)
예제 #6
0
    def test_multi_subject_register(self):
        parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        client = self.client
        schema_id = client.register('test', parsed)
        self.assertTrue(schema_id > 0)

        # register again under different subject
        dupe_id = client.register('other', parsed)
        self.assertEqual(schema_id, dupe_id)
        self.assertEqual(len(client.id_to_schema), 1)
    def test_multi_subject_register(self):
        parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        client = self.client
        schema_id = client.register('test', parsed)
        self.assertTrue(schema_id > 0)

        # register again under different subject
        dupe_id = client.register('other', parsed)
        self.assertEqual(schema_id, dupe_id)
        self.assertEqual(len(client.id_to_schema), 1)
    def test_encode_with_schema_id(self):
        adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)
예제 #9
0
 def _get_schema_from_body(self, req):
     length = int(req.headers.getheader('content-length'))
     data = req.rfile.read(length)
     data = json.loads(data)
     schema = data.get("schema", None)
     if not schema:
         return None
     try:
         avro_schema = Util.parse_schema_from_string(schema)
         return self._get_identity_schema(avro_schema)
     except:
         return None
 def _get_schema_from_body(self, req):
     length = int(req.headers.getheader('content-length'))
     data = req.rfile.read(length)
     data = json.loads(data)
     schema = data.get("schema",None)
     if not schema:
         return None
     try:
         avro_schema =  Util.parse_schema_from_string(schema)
         return self._get_identity_schema(avro_schema)
     except:
         return None
    def test_dupe_register(self):
        parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test'
        client = self.client
        schema_id = client.register(subject, parsed)
        self.assertTrue(schema_id > 0)
        latest = client.get_latest_schema(subject)

        # register again under same subject
        dupe_id = client.register(subject, parsed)
        self.assertEqual(schema_id, dupe_id)
        dupe_latest = client.get_latest_schema(subject)
        self.assertEqual(latest, dupe_latest)
예제 #12
0
    def test_dupe_register(self):
        parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test'
        client = self.client
        schema_id = client.register(subject, parsed)
        self.assertTrue(schema_id > 0)
        latest = client.get_latest_schema(subject)

        # register again under same subject
        dupe_id = client.register(subject, parsed)
        self.assertEqual(schema_id, dupe_id)
        dupe_latest = client.get_latest_schema(subject)
        self.assertEqual(latest, dupe_latest)
    def test_getters(self):
        parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        client = self.client
        subject = 'test'
        version = client.get_version(subject, parsed)
        self.assertEqual(version, -1)
        schema = client.get_by_id(1)
        self.assertEqual(schema, None)
        latest = client.get_latest_schema(subject)
        self.assertEqual(latest, (None,None,None))

        # register
        schema_id = client.register(subject, parsed)
        latest = client.get_latest_schema(subject)
        version = client.get_version(subject, parsed)
        self.assertLatest(latest, schema_id, parsed, version)

        fetched = client.get_by_id(schema_id)
        self.assertEqual(fetched, parsed)
예제 #14
0
    def test_getters(self):
        parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        client = self.client
        subject = 'test'
        version = client.get_version(subject, parsed)
        self.assertEqual(version, -1)
        schema = client.get_by_id(1)
        self.assertEqual(schema, None)
        latest = client.get_latest_schema(subject)
        self.assertEqual(latest, (None, None, None))

        # register
        schema_id = client.register(subject, parsed)
        latest = client.get_latest_schema(subject)
        version = client.get_version(subject, parsed)
        self.assertLatest(latest, schema_id, parsed, version)

        fetched = client.get_by_id(schema_id)
        self.assertEqual(fetched, parsed)
예제 #15
0
 def parse_json(self, json):
     if json:
         return Util.parse_schema_from_string(json)
class StdOutListener(tweepy.StreamListener):
    def on_status(self, message):
        logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p"))
        try:
            mls = createPayload(message)
            print mls
            timevar = datetime.utcnow() - datetime.strptime(
                mls[11], '%Y-%m-%d %H:%M:%S')
            print datetime.utcnow(), datetime.strptime(mls[11],
                                                       '%Y-%m-%d %H:%M:%S')
            print "Minutes and Seconds : ", divmod(
                timevar.days * 86400 + timevar.seconds, 60)
        except Exception, e:
            logging.debug(
                'There was an error in creating the payload. The error is: %s'
                % e)
            print 'Error in Payload Creation : ', str(e)
            twitter_utils.sendErrorMail(
                'There was an error in creating the payload. The error is %s' %
                e)
            return True

        try:
            #converts the payload into the avro format in preparation for loading into hbase
            avro_schema = Util.parse_schema_from_string(
                open('/**/**/twitter.avsc').read())
            client = CachedSchemaRegistryClient(url='http://192.168.**:8081')
            schema_id = client.register('twitter_avro_schema_stream4',
                                        avro_schema)
            avro_schema = client.get_by_id(schema_id)
            schema_id, avro_schema, schema_version = client.get_latest_schema(
                'twitter_avro_schema_stream4')
            schema_version = client.get_version('twitter_avro_schema_stream4',
                                                avro_schema)
            serializer = MessageSerializer(client)
            encoded = serializer.encode_record_with_schema(
                topicname, avro_schema, {
                    "authid": mls[0],
                    "screen_name": mls[1],
                    "description": mls[2],
                    "favourites_count": convert_long(mls[3]),
                    "followers_count": convert_long(mls[4]),
                    "friends_count": convert_long(mls[5]),
                    "listed_count": convert_long(mls[6]),
                    "location": mls[7],
                    "id_str": mls[8],
                    "time_zone": mls[9],
                    "statuses_count": convert_long(mls[10]),
                    "created_at": mls[11],
                    "favorite_count": convert_long(mls[12]),
                    "tid": mls[13],
                    "in_reply_to_status_id_str": mls[14],
                    "in_reply_to_user_id_str": mls[15],
                    "lang": mls[16],
                    "possibly_sensitive": mls[17],
                    "retweet_count": convert_long(mls[18]),
                    "text": mls[19],
                    "entities_url": mls[20],
                    "entities_expanded_url": mls[21],
                    "entities_media_url": mls[22],
                    "disgust": convert_long(mls[23]),
                    "fear": convert_long(mls[24]),
                    "sadness": convert_long(mls[25]),
                    "surprise": convert_long(mls[26]),
                    "trust": convert_long(mls[27]),
                    "negative": convert_long(mls[28]),
                    "positive": convert_long(mls[29]),
                    "neutral": convert_long(mls[30]),
                    "celebrities": (mls[31]),
                    "events": (mls[32]),
                    "brands": (mls[33]),
                    "accessories": (mls[34])
                })
        except Exception, e:
            logging.debug(
                'There was an error in the generation of the avro file. The error is: %s'
                % e)
            print 'Error in avro generation : ', e
            print mls
            twitter_utils.sendErrorMail(
                'There was an error in the generation of the avro file. The error is %s. This is likely due to an error in the schema. Please check the schema file under twitter_avro_schema.avsc'
                % e)
            return True
예제 #17
0
 def test_schema_from_string(self):
     parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
     self.assertTrue(isinstance(parsed, schema.Schema))
def writeToavro(p, mls):
    #converts the payload into the avro format in preparation for loading into hbase
    try:
        avro_schema = Util.parse_schema_from_string(
            open('/root/quest/twitter_avro_schema.avsc').read())
        client = CachedSchemaRegistryClient(url='http://192.168.111.12:8081')
        schema_id = client.register('twitter_avro__schema_stream4',
                                    avro_schema)
        avro_schema = client.get_by_id(schema_id)
        schema_id, avro_schema, schema_version = client.get_latest_schema(
            'twitter_avro__schema_stream4')
        schema_version = client.get_version('twitter_avro__schema_stream4',
                                            avro_schema)
        serializer = MessageSerializer(client)
        encoded = serializer.encode_record_with_schema(
            topicname, avro_schema, {
                "authid": mls[0],
                "screen_name": mls[1],
                "description": mls[2],
                "favourites_count": convert_long(mls[3]),
                "followers_count": convert_long(mls[4]),
                "friends_count": convert_long(mls[5]),
                "listed_count": convert_long(mls[6]),
                "location": mls[7],
                "id_str": mls[8],
                "time_zone": mls[9],
                "statuses_count": convert_long(mls[10]),
                "created_at": mls[11],
                "favorite_count": convert_long(mls[12]),
                "tid": mls[13],
                "in_reply_to_status_id_str": mls[14],
                "in_reply_to_user_id_str": mls[15],
                "lang": mls[16],
                "possibly_sensitive": mls[17],
                "retweet_count": convert_long(mls[18]),
                "text": mls[19],
                "entities_url": mls[20],
                "entities_expanded_url": mls[21],
                "entities_media_url": mls[22],
                "disgust": convert_long(mls[23]),
                "fear": convert_long(mls[24]),
                "sadness": convert_long(mls[25]),
                "surprise": convert_long(mls[26]),
                "trust": convert_long(mls[27]),
                "negative": convert_long(mls[28]),
                "positive": convert_long(mls[29]),
                "neutral": convert_long(mls[30]),
                "celebrities": (mls[31]),
                "events": (mls[32]),
                "brands": (mls[33]),
                "accessories": (mls[34])
            })
    except Exception, e:
        logging.debug(
            'There was an error in the generation of the avro file. The error is: %s'
            % e)
        print 'Error in avro generation : ', e
        print mls
        twitter_utils.sendErrorMail(
            'There was an error in the generation of the avro file. The error is %s'
            % e)
        return True
 def test_schema_from_file(self):
     parsed = Util.parse_schema_from_file(data_gen.get_schema_path('adv_schema.avsc'))
     self.assertTrue(isinstance(parsed, schema.Schema))
 def test_schema_from_string(self):
     parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
     self.assertTrue(isinstance(parsed, schema.Schema))
 def test_register(self):
     parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
     client = self.client
     schema_id = client.register('test', parsed)
     self.assertTrue(schema_id > 0)
     self.assertEqual(len(client.id_to_schema), 1)
예제 #22
0
 def parse_json(self, json):
     if json:
         return Util.parse_schema_from_string(json)
예제 #23
0
 def test_register(self):
     parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
     client = self.client
     schema_id = client.register('test', parsed)
     self.assertTrue(schema_id > 0)
     self.assertEqual(len(client.id_to_schema), 1)
예제 #24
0
 def test_schema_from_file(self):
     parsed = Util.parse_schema_from_file(data_gen.get_schema_path('adv_schema.avsc'))
     self.assertTrue(isinstance(parsed, schema.Schema))