コード例 #1
0
 def _get_rank(cluster_info):
     # As task placement may not be identical between two different jobs,
     # we cannot simply index cluster_info using partitionId to get current
     # ip and port.
     # The approach here is to first get all tasks' ip in this job and compute
     # a local rank by counting how many tasks has the same ip but with lower id.
     # We then use the local rank to find the right slot in cluster_info to find
     # the right global_rank.
     tc = BarrierTaskContext().get()
     infos = tc.getTaskInfos()
     idx = tc.partitionId()
     local_ip = infos[idx].address.split(":")[0]
     local_rank = 0
     for i in range(0, idx):
         if infos[i].address.startswith(local_ip):
             local_rank += 1
     global_rank = -1
     local_count = 0
     for node in cluster_info:
         if node.startswith(local_ip):
             local_count += 1
         global_rank += 1
         if local_count == local_rank + 1:
             break
     return global_rank
コード例 #2
0
def find_ip_and_port(pre_iter):
    tc = BarrierTaskContext().get()
    address = tc.getTaskInfos()[tc.partitionId()].address.split(":")[0]
    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
        s.bind(("", 0))
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        tc.barrier()
        free_ip_port = f"{address}:{s.getsockname()[1]}"
    return [free_ip_port]