vocab = Vocabulary() vocab.read_from_precomputed(name_dir_out) size_window = 2 d = collections.deque(maxlen=size_window) for i in range(size_window): d.append(-1) #matrix=dok_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64) #matrix=lil_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64) #matrix=dok_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64) cnt_workers = 2 m = ArrayOfTrees(vocab.cnt_words) def get_start_i(N, cnt_workers, id_worker): if N < cnt_workers: return min(N, id_worker) length_of_range = ((N + 1) // cnt_workers) start = length_of_range * id_worker if id_worker < N % cnt_workers: start += id_worker else: start += N % cnt_workers return start def get_interval(N, cnt_workers, id_worker):
# process_file(os.path.join(root,items)) lst_files=rndrobin_list(lst_files,cnt_mappers,id_worker) #print("I'm mapper {}, processing files in ".format(id_mapper),lst_files) for f in lst_files: process_file(f) #send unfinished buffers for id_dest in range(cnt_reducers): for pos_buf in range(pos_bufers[id_dest],size_buffer): buffers[id_dest][pos_buf][0]=-1 comm.Send(buffers[id_dest], dest=id_dest+cnt_mappers, tag=1) print ("m{} finished! sppend {:.2f} s on waiting Send".format(id_worker,timings["wait_send"])) #print (lst_files) else: #this is reducer timings["collecting"]-=timer() rstart,rend=get_interval(cnt_words,cnt_reducers,id_reducer) m=ArrayOfTrees(rend-rstart) print ("I'm reducer {} of {} running on {}, my ownership range is from {} to {}".format(id_reducer,cnt_reducers,MPI.Get_processor_name(),rstart,rend)) buffer = np.empty((size_buffer,2), dtype=np.int64) cnt_mappers_finished=0 has_work=True while has_work: #print("r{}: waiting rcv".format(id_reducer)) sys.stdout.flush() comm.Recv(buffer, source=MPI.ANY_SOURCE, tag=1) #print ("r{} recvd {}".format(id_reducer,buffer.shape)) for i in range(size_buffer): if buffer[i,0]>=0: m.accumulate(int(buffer[i][0]-rstart),int(buffer[i][1]))#todo mapping for aot else: cnt_mappers_finished+=1 #print("r{}: one mapper finished".format(id_reducer))
lst_files = rndrobin_list(lst_files, cnt_mappers, id_worker) #print("I'm mapper {}, processing files in ".format(id_mapper),lst_files) for f in lst_files: process_file(f) #send unfinished buffers for id_dest in range(cnt_reducers): for pos_buf in range(pos_bufers[id_dest], size_buffer): buffers[id_dest][pos_buf][0] = -1 comm.Send(buffers[id_dest], dest=id_dest + cnt_mappers, tag=1) print("m{} finished! sppend {:.2f} s on waiting Send".format( id_worker, timings["wait_send"])) #print (lst_files) else: #this is reducer timings["collecting"] -= timer() rstart, rend = get_interval(cnt_words, cnt_reducers, id_reducer) m = ArrayOfTrees(rend - rstart) print( "I'm reducer {} of {} running on {}, my ownership range is from {} to {}" .format(id_reducer, cnt_reducers, MPI.Get_processor_name(), rstart, rend)) buffer = np.empty((size_buffer, 2), dtype=np.int64) cnt_mappers_finished = 0 has_work = True while has_work: #print("r{}: waiting rcv".format(id_reducer)) sys.stdout.flush() comm.Recv(buffer, source=MPI.ANY_SOURCE, tag=1) #print ("r{} recvd {}".format(id_reducer,buffer.shape)) for i in range(size_buffer): if buffer[i, 0] >= 0: m.accumulate(int(buffer[i][0] - rstart),
vocab=Vocabulary() vocab.read_from_precomputed(name_dir_out) size_window=2 d = collections.deque(maxlen=size_window) for i in range(size_window): d.append(-1) #matrix=dok_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64) #matrix=lil_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64) #matrix=dok_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64) cnt_workers=2 m=ArrayOfTrees(vocab.cnt_words) def get_start_i(N,cnt_workers,id_worker): if N<cnt_workers: return min(N,id_worker) length_of_range=((N+1)//cnt_workers) start = length_of_range*id_worker if id_worker<N%cnt_workers: start+=id_worker else: start+=N%cnt_workers return start def get_interval(N,cnt_workers,id_worker): return (get_start_i(N,cnt_workers,id_worker),get_start_i(N,cnt_workers,id_worker+1)) def get_worker_id(N,cnt_workers,v):