コード例 #1
0
def test_select(cluster):
    # type: (ClickHouseCluster) -> None

    schema_registry_client = cluster.schema_registry_client
    serializer = MessageSerializer(schema_registry_client)

    schema = avro.schema.make_avsc_object({
        'name':
        'test_record',
        'type':
        'record',
        'fields': [{
            'name': 'value',
            'type': 'long'
        }]
    })

    buf = io.BytesIO()
    for x in range(0, 3):
        message = serializer.encode_record_with_schema('test_subject', schema,
                                                       {'value': x})
        buf.write(message)
    data = buf.getvalue()

    instance = cluster.instances["dummy"]  # type: ClickHouseInstance
    schema_registry_url = "http://{}:{}".format(cluster.schema_registry_host,
                                                cluster.schema_registry_port)

    run_query(instance,
              "create table avro_data(value Int64) engine = Memory()")
    settings = {'format_avro_schema_registry_url': schema_registry_url}
    run_query(instance, "insert into avro_data format AvroConfluent", data,
              settings)
    stdout = run_query(instance, "select * from avro_data")
    assert list(map(str.split, stdout.splitlines())) == [
        ["0"],
        ["1"],
        ["2"],
    ]
コード例 #2
0
class StdOutListener(tweepy.StreamListener):
    def on_status(self, message):
        logging.debug(datetime.now().strftime("%A, %d. %B %Y %I:%M%p"))
        try:
            mls = createPayload(message)
            print mls
            timevar = datetime.utcnow() - datetime.strptime(
                mls[11], '%Y-%m-%d %H:%M:%S')
            print datetime.utcnow(), datetime.strptime(mls[11],
                                                       '%Y-%m-%d %H:%M:%S')
            print "Minutes and Seconds : ", divmod(
                timevar.days * 86400 + timevar.seconds, 60)
        except Exception, e:
            logging.debug(
                'There was an error in creating the payload. The error is: %s'
                % e)
            print 'Error in Payload Creation : ', str(e)
            twitter_utils.sendErrorMail(
                'There was an error in creating the payload. The error is %s' %
                e)
            return True

        try:
            #converts the payload into the avro format in preparation for loading into hbase
            avro_schema = Util.parse_schema_from_string(
                open('/**/**/twitter.avsc').read())
            client = CachedSchemaRegistryClient(url='http://192.168.**:8081')
            schema_id = client.register('twitter_avro_schema_stream4',
                                        avro_schema)
            avro_schema = client.get_by_id(schema_id)
            schema_id, avro_schema, schema_version = client.get_latest_schema(
                'twitter_avro_schema_stream4')
            schema_version = client.get_version('twitter_avro_schema_stream4',
                                                avro_schema)
            serializer = MessageSerializer(client)
            encoded = serializer.encode_record_with_schema(
                topicname, avro_schema, {
                    "authid": mls[0],
                    "screen_name": mls[1],
                    "description": mls[2],
                    "favourites_count": convert_long(mls[3]),
                    "followers_count": convert_long(mls[4]),
                    "friends_count": convert_long(mls[5]),
                    "listed_count": convert_long(mls[6]),
                    "location": mls[7],
                    "id_str": mls[8],
                    "time_zone": mls[9],
                    "statuses_count": convert_long(mls[10]),
                    "created_at": mls[11],
                    "favorite_count": convert_long(mls[12]),
                    "tid": mls[13],
                    "in_reply_to_status_id_str": mls[14],
                    "in_reply_to_user_id_str": mls[15],
                    "lang": mls[16],
                    "possibly_sensitive": mls[17],
                    "retweet_count": convert_long(mls[18]),
                    "text": mls[19],
                    "entities_url": mls[20],
                    "entities_expanded_url": mls[21],
                    "entities_media_url": mls[22],
                    "disgust": convert_long(mls[23]),
                    "fear": convert_long(mls[24]),
                    "sadness": convert_long(mls[25]),
                    "surprise": convert_long(mls[26]),
                    "trust": convert_long(mls[27]),
                    "negative": convert_long(mls[28]),
                    "positive": convert_long(mls[29]),
                    "neutral": convert_long(mls[30]),
                    "celebrities": (mls[31]),
                    "events": (mls[32]),
                    "brands": (mls[33]),
                    "accessories": (mls[34])
                })
        except Exception, e:
            logging.debug(
                'There was an error in the generation of the avro file. The error is: %s'
                % e)
            print 'Error in avro generation : ', e
            print mls
            twitter_utils.sendErrorMail(
                'There was an error in the generation of the avro file. The error is %s. This is likely due to an error in the schema. Please check the schema file under twitter_avro_schema.avsc'
                % e)
            return True
 def setUp(self):
     # need to set up the serializer
     self.client = MockSchemaRegistryClient()
     self.ms = MessageSerializer(self.client)
コード例 #4
0
ファイル: old_consumer.py プロジェクト: revpoint/jangl-utils
 def get_message_serializer(self):
     schema_registry_url = self.get_schema_registry_url()
     logger.debug('loading schema registry: ' + schema_registry_url)
     schema_client = CachedSchemaRegistryClient(url=schema_registry_url)
     return MessageSerializer(schema_client)
コード例 #5
0
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
from sys import argv
from config import KAFKA_URL, KAFKA_BROKER_LIST

topic = argv[1]  #enter topic as parameter when running script
schema_registry_url = argv[
    2]  # enter schema registry url (ex. http://localhost:8081)
if len(argv) > 3 and argv[3] == 'reset':
    auto_offset_reset = 'smallest'
else:
    auto_offset_reset = 'largest'

schema_registry_client = CachedSchemaRegistryClient(url=schema_registry_url)
serializer = MessageSerializer(schema_registry_client)


# simple decode to replace Kafka-streaming's built-in decode decoding UTF8 ()
def decoder(s):
    decoded_message = serializer.decode_message(s)
    return decoded_message


# Spark Streaming from Kafka
master = 'local[2]'
app_name = 'kafka_consumer'
sc = SparkContext(master, app_name)
ssc = StreamingContext(sc, 60)
kvs = KafkaUtils.createDirectStream(ssc, [topic], {
    "metadata.broker.list": KAFKA_BROKER_LIST,
コード例 #6
0
def writeToavro(p, mls):
    #converts the payload into the avro format in preparation for loading into hbase
    try:
        avro_schema = Util.parse_schema_from_string(
            open('/root/quest/twitter_avro_schema.avsc').read())
        client = CachedSchemaRegistryClient(url='http://192.168.111.12:8081')
        schema_id = client.register('twitter_avro__schema_stream4',
                                    avro_schema)
        avro_schema = client.get_by_id(schema_id)
        schema_id, avro_schema, schema_version = client.get_latest_schema(
            'twitter_avro__schema_stream4')
        schema_version = client.get_version('twitter_avro__schema_stream4',
                                            avro_schema)
        serializer = MessageSerializer(client)
        encoded = serializer.encode_record_with_schema(
            topicname, avro_schema, {
                "authid": mls[0],
                "screen_name": mls[1],
                "description": mls[2],
                "favourites_count": convert_long(mls[3]),
                "followers_count": convert_long(mls[4]),
                "friends_count": convert_long(mls[5]),
                "listed_count": convert_long(mls[6]),
                "location": mls[7],
                "id_str": mls[8],
                "time_zone": mls[9],
                "statuses_count": convert_long(mls[10]),
                "created_at": mls[11],
                "favorite_count": convert_long(mls[12]),
                "tid": mls[13],
                "in_reply_to_status_id_str": mls[14],
                "in_reply_to_user_id_str": mls[15],
                "lang": mls[16],
                "possibly_sensitive": mls[17],
                "retweet_count": convert_long(mls[18]),
                "text": mls[19],
                "entities_url": mls[20],
                "entities_expanded_url": mls[21],
                "entities_media_url": mls[22],
                "disgust": convert_long(mls[23]),
                "fear": convert_long(mls[24]),
                "sadness": convert_long(mls[25]),
                "surprise": convert_long(mls[26]),
                "trust": convert_long(mls[27]),
                "negative": convert_long(mls[28]),
                "positive": convert_long(mls[29]),
                "neutral": convert_long(mls[30]),
                "celebrities": (mls[31]),
                "events": (mls[32]),
                "brands": (mls[33]),
                "accessories": (mls[34])
            })
    except Exception, e:
        logging.debug(
            'There was an error in the generation of the avro file. The error is: %s'
            % e)
        print 'Error in avro generation : ', e
        print mls
        twitter_utils.sendErrorMail(
            'There was an error in the generation of the avro file. The error is %s'
            % e)
        return True