예제 #1
0
def main():
    "main foo happening here, alright!"
    usage = "%prog [Options]"
    parser = OptionParser(usage=usage, version="%prog v0.1")
    parser.add_option("-f", "--file", dest="avro", help="an Avro file to read from", metavar="some/file.avro")
    parser.add_option("-s", "--schema", dest="schema", default=None, action="store_true",
                      help="only extract and return the avro-schema in JSON")
    parser.add_option("-i", "--pretty", dest="pretty", default=None, action="store_true",
                      help="indent [and keysort, if schema] any JSON on the output")
    parser.add_option("-n", "--number", dest="num", type="int", default=5, help="integer number of lines to put out")
    parser.add_option("-d", "--destination", dest="dest", help="optional destination file to write to")
    (opts, arg) = parser.parse_args()

    if not opts.avro:
        print "We at least need a file, ok?"
        parser.print_help()
        sys.exit(2)

    if not opts.dest and opts.schema:
        schema = get_schema(opts.avro)
        if not opts.pretty:
            print schema.to_json()
        else:
            print json.dumps(schema.to_json(), sort_keys=True, indent=opts.pretty)  # I know, it's silly
    elif not opts.dest and not opts.schema:
        print json.dumps(head_avro(opts.avro, opts.num), indent=opts.pretty)
    else:
        write_avro(opts.dest, opts.num, opts.avro)
예제 #2
0
def combine_schemas(schema_files):
    """Combine multiple nested schemas into a single schema.
    """
    known_schemas = avro.schema.Names()

    for s in schema_files:
        schema = load_single_avsc(s, known_schemas)
    return schema.to_json()
예제 #3
0
def main():
    # Create Avro schema
    test_schema = '''
    {
    "namespace": "example.avro",
     "type": "record",
     "name": "SampleLog",
     "fields": [
         {"name": "name", "type": "string"},
         {"name": "value",  "type": "float"},
         {"name": "time", "type": "float"},
         {"name": "datetime", "type": "string"}
     ]
    }
    '''
    schema = avro.schema.parse(test_schema)
    writer = avro.io.DatumWriter(schema)

    # Create producer
    conf = {'bootstrap.servers': 'localhost'}
    p = Producer(**conf)

    print 'Schema:'
    schema_string = json.dumps(schema.to_json()).encode('utf-8')
    schema_string = "".join(schema_string.split())
    print schema_string

    # Topic
    topic_name = 'demo_9'

    # Register schema
    schema_reg_url = 'http://localhost:8081'
    r = requests.post(
        schema_reg_url + '/subjects/' + topic_name + '-value/versions',
        data=json.dumps({
            'schema': schema_string
        }).encode('utf-8'),
        headers={'Content-Type': 'application/vnd.schemaregistry.v1+json'})
    schema_id = json.loads(r.text)["id"]
    print 'Schema id: ' + str(json.loads(r.text)["id"])

    # Create devices
    devices = [device.SampleTemperature(), device.Oscillator()]

    # Time step loop
    timestep = 1  # seconds
    current_time = 0
    while True:
        time.sleep(timestep)
        current_time += timestep
        print current_time
        for dev in devices:
            p.produce(topic_name,
                      value=create_avro_message(dev, writer, schema_id))
            dev.update(timestep)
        p.flush()
예제 #4
0
def combineSchemas(schema_files):
    """Combine multiple nested schemas into a single schema.
    Parameters
    ----------
    schema_files : `list`
        List of files containing schemas.
        If nested, most internal schema must be first.
    Returns
    -------
    `dict`
        Avro schema
    """
    known_schemas = avro.schema.Names()

    for s in schema_files:
        schema = _loadSingleAvsc(s, known_schemas)

    return schema.to_json()
예제 #5
0
 def get_schema(self, schema_dumper):
     data = schema_dumper.stdout.getvalue()
     schema = avro.schema.parse(data)
     return schema.to_json()
예제 #6
0
 def get_schema(self, schema_dumper):
     data = schema_dumper.stdout.getvalue()
     schema = avro.schema.parse(data)
     return schema.to_json()

outdir = "avro-data"
if not os.path.exists(outdir):
    os.makedirs(outdir)

with open(f"avro/{document}.schema.json", "r") as f:
    schema_data = f.read()
schema = avro.schema.Parse(schema_data)

outfile = open(f"{outdir}/{document}.avro", "wb")
writer = avro.datafile.DataFileWriter(outfile, avro.io.DatumWriter(), schema)

with open(f"data/{document}.ndjson", "r") as f:
    data = f.readlines()

try:
    orig = None
    for line in data:
        orig = json.loads(line)
        out = convert(orig, schema)
        writer.append(out)
except:
    with open("test.json", "w") as f:
        json.dump(orig, f)
    with open("test-schema.json", "w") as f:
        json.dump(schema.to_json(), f, indent=2)
    validation.validate(out, parse_schema(schema.to_json()))

writer.close()
예제 #8
0
from kafka import SimpleProducer, KafkaProducer
from kafka import KafkaClient
from time import time



# To send messages synchronously
producer = KafkaProducer(bootstrap_servers = "localhost:9092", compression_type = "gzip")

# Kafka topic
topic = "tnx"

# Path to user.avsc avro schema
schema_path = "/home/cloudera/workspace/kafka-clients-python/transactions.avsc"
schema = avro.schema.Parse(open(schema_path).read())
print("Schema", schema.to_json())

writer = DatumWriter(schema)
bytes_writer = io.BytesIO()
encoder = avro.io.BinaryEncoder(bytes_writer)

def get_record():
    return {"id": "123"
            , "merchant_id": "m123"
            , "customer_id": "c345"
            , "amount": 100.1
            , "category": "pos"
            , "timestamp": int(time())}


for i in range(10):
예제 #9
0
from avro.io import DatumWriter
from kafka import SimpleProducer, KafkaProducer
from kafka import KafkaClient
from time import time

# To send messages synchronously
producer = KafkaProducer(bootstrap_servers="localhost:9092",
                         compression_type="gzip")

# Kafka topic
topic = "tnx"

# Path to user.avsc avro schema
schema_path = "/home/cloudera/workspace/kafka-clients-python/transactions.avsc"
schema = avro.schema.Parse(open(schema_path).read())
print("Schema", schema.to_json())

writer = DatumWriter(schema)
bytes_writer = io.BytesIO()
encoder = avro.io.BinaryEncoder(bytes_writer)


def get_record():
    return {
        "id": "123",
        "merchant_id": "m123",
        "customer_id": "c345",
        "amount": 100.1,
        "category": "pos",
        "timestamp": int(time())
    }