Example #1
0
 def f(index, iterator):
     import pyarrow.plasma as plasma
     from zoo.orca.data.utils import get_node_ip
     # mapPartition would set the same random seed for each partition?
     # Here use the partition index to override the random seed so that there won't be
     # identical object_ids in plasma.
     random.seed(seed + str(index))
     res = list(iterator)
     client = plasma.connect(object_store_address)
     object_id = client.put(res)
     yield object_id, get_node_ip()
Example #2
0
 def f(index, iterator):
     import pyarrow.plasma as plasma
     from zoo.orca.data.utils import get_node_ip
     res = list(iterator)
     client = plasma.connect(object_store_address)
     target_id = ids[index]
     # If the ObjectID exists in plasma, we assume a task trial
     # succeeds and the data is already in the object store.
     if not client.contains(target_id):
         object_id = client.put(res, target_id)
         assert object_id == target_id, \
             "Errors occurred when putting data into plasma object store"
     client.disconnect()
     yield target_id, get_node_ip()