Esempio n. 1
0
    f.write(text)

# конвертирование airlines.csv в airlines.avro
schema = schema.parse(open("airlines.avsc", "rb").read())

writer = DataFileWriter(open("airlines.avro", "wb"), DatumWriter(), schema)
for line in text.splitlines():
    print(line)
    fields = line.split(',')
    writer.append({
        "field0": int(fields[0]),
        "field1": fields[1],
        "field2": fields[2],
        "field3": fields[3],
        "field4": fields[4],
        "field5": fields[5],
        "field6": fields[6],
        "field7": fields[7]
    })
writer.close()

# reader = DataFileReader(open("airlines.avro", "rb"), DatumReader())
# for user in reader:
#     print(user)
# reader.close()

# запись airlines.avro в hdfs
with client.write(hdfs_path='/user/student/airlines/airlines.avro') as writer:
    with open('./airlines.avro', mode='rb') as f:
        writer.write(f.read())
Esempio n. 2
0
# In[35]:

r["productMap"]

# ## Generate Avro Schemaless

# In[36]:

writer = avro.io.DatumWriter(schema)

bytes_writer = io.BytesIO()
encoder = avro.io.BinaryEncoder(bytes_writer)

# Write data using DatumWriter
writer.write(
    {
        "modelId": model_id,
        "tensorFlowModel": model_file_binary,
        "productMap": productMapping,
        "customerMap": customerMapping
    }, encoder)
raw_bytes = bytes_writer.getvalue()

open(model_path + "recommender-no-schema.avro", 'wb').write(raw_bytes)

bytes_reader = io.BytesIO(raw_bytes)
decoder = avro.io.BinaryDecoder(bytes_reader)
reader = avro.io.DatumReader(schema)
r = reader.read(decoder)
r["productMap"]