f.write(text) # конвертирование airlines.csv в airlines.avro schema = schema.parse(open("airlines.avsc", "rb").read()) writer = DataFileWriter(open("airlines.avro", "wb"), DatumWriter(), schema) for line in text.splitlines(): print(line) fields = line.split(',') writer.append({ "field0": int(fields[0]), "field1": fields[1], "field2": fields[2], "field3": fields[3], "field4": fields[4], "field5": fields[5], "field6": fields[6], "field7": fields[7] }) writer.close() # reader = DataFileReader(open("airlines.avro", "rb"), DatumReader()) # for user in reader: # print(user) # reader.close() # запись airlines.avro в hdfs with client.write(hdfs_path='/user/student/airlines/airlines.avro') as writer: with open('./airlines.avro', mode='rb') as f: writer.write(f.read())
# In[35]: r["productMap"] # ## Generate Avro Schemaless # In[36]: writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) # Write data using DatumWriter writer.write( { "modelId": model_id, "tensorFlowModel": model_file_binary, "productMap": productMapping, "customerMap": customerMapping }, encoder) raw_bytes = bytes_writer.getvalue() open(model_path + "recommender-no-schema.avro", 'wb').write(raw_bytes) bytes_reader = io.BytesIO(raw_bytes) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(schema) r = reader.read(decoder) r["productMap"]