Esempio n. 1
0
vocab = Vocabulary()
vocab.read_from_precomputed(name_dir_out)

size_window = 2
d = collections.deque(maxlen=size_window)
for i in range(size_window):
    d.append(-1)

#matrix=dok_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64)
#matrix=lil_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64)
#matrix=dok_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64)

cnt_workers = 2

m = ArrayOfTrees(vocab.cnt_words)


def get_start_i(N, cnt_workers, id_worker):
    if N < cnt_workers:
        return min(N, id_worker)
    length_of_range = ((N + 1) // cnt_workers)
    start = length_of_range * id_worker
    if id_worker < N % cnt_workers:
        start += id_worker
    else:
        start += N % cnt_workers
    return start


def get_interval(N, cnt_workers, id_worker):
#			process_file(os.path.join(root,items))
	lst_files=rndrobin_list(lst_files,cnt_mappers,id_worker)
	#print("I'm mapper {}, processing files in ".format(id_mapper),lst_files)
	for f in lst_files:
		process_file(f)
	#send unfinished buffers
	for id_dest in range(cnt_reducers):
		for pos_buf in range(pos_bufers[id_dest],size_buffer):
			buffers[id_dest][pos_buf][0]=-1
		comm.Send(buffers[id_dest], dest=id_dest+cnt_mappers, tag=1)
	print ("m{} finished! sppend {:.2f} s on waiting Send".format(id_worker,timings["wait_send"]))
	#print (lst_files)
else:	#this is reducer
	timings["collecting"]-=timer()
	rstart,rend=get_interval(cnt_words,cnt_reducers,id_reducer)
	m=ArrayOfTrees(rend-rstart)
	print ("I'm reducer {} of {} running on {}, my ownership range is from {} to {}".format(id_reducer,cnt_reducers,MPI.Get_processor_name(),rstart,rend))
	buffer = np.empty((size_buffer,2), dtype=np.int64)
	cnt_mappers_finished=0
	has_work=True
	while has_work:
		#print("r{}: waiting rcv".format(id_reducer))
		sys.stdout.flush()
		comm.Recv(buffer, source=MPI.ANY_SOURCE, tag=1)
		#print ("r{} recvd {}".format(id_reducer,buffer.shape))
		for i in range(size_buffer):
			if buffer[i,0]>=0:
				m.accumulate(int(buffer[i][0]-rstart),int(buffer[i][1]))#todo mapping for aot
			else:
				cnt_mappers_finished+=1
				#print("r{}: one mapper finished".format(id_reducer))
    lst_files = rndrobin_list(lst_files, cnt_mappers, id_worker)
    #print("I'm mapper {}, processing files in ".format(id_mapper),lst_files)
    for f in lst_files:
        process_file(f)
    #send unfinished buffers
    for id_dest in range(cnt_reducers):
        for pos_buf in range(pos_bufers[id_dest], size_buffer):
            buffers[id_dest][pos_buf][0] = -1
        comm.Send(buffers[id_dest], dest=id_dest + cnt_mappers, tag=1)
    print("m{} finished! sppend {:.2f} s on waiting Send".format(
        id_worker, timings["wait_send"]))
    #print (lst_files)
else:  #this is reducer
    timings["collecting"] -= timer()
    rstart, rend = get_interval(cnt_words, cnt_reducers, id_reducer)
    m = ArrayOfTrees(rend - rstart)
    print(
        "I'm reducer {} of {} running on {}, my ownership range is from {} to {}"
        .format(id_reducer, cnt_reducers, MPI.Get_processor_name(), rstart,
                rend))
    buffer = np.empty((size_buffer, 2), dtype=np.int64)
    cnt_mappers_finished = 0
    has_work = True
    while has_work:
        #print("r{}: waiting rcv".format(id_reducer))
        sys.stdout.flush()
        comm.Recv(buffer, source=MPI.ANY_SOURCE, tag=1)
        #print ("r{} recvd {}".format(id_reducer,buffer.shape))
        for i in range(size_buffer):
            if buffer[i, 0] >= 0:
                m.accumulate(int(buffer[i][0] - rstart),
Esempio n. 4
0
vocab=Vocabulary()
vocab.read_from_precomputed(name_dir_out)

size_window=2
d = collections.deque(maxlen=size_window)
for i in range(size_window):
	d.append(-1)

#matrix=dok_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64)
#matrix=lil_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64)
#matrix=dok_matrix((vocab.cnt_words, vocab.cnt_words), dtype=np.int64)

cnt_workers=2

m=ArrayOfTrees(vocab.cnt_words)


def get_start_i(N,cnt_workers,id_worker):
    if N<cnt_workers: return min(N,id_worker)
    length_of_range=((N+1)//cnt_workers)
    start = length_of_range*id_worker
    if id_worker<N%cnt_workers:
        start+=id_worker
    else:
        start+=N%cnt_workers
    return start
def get_interval(N,cnt_workers,id_worker):
    return (get_start_i(N,cnt_workers,id_worker),get_start_i(N,cnt_workers,id_worker+1))

def get_worker_id(N,cnt_workers,v):