def my_map_function(vec): cos = COSBackend() resX = [] vec = numpy.array(vec) for act in range(0, len(vec)): actual = vec[act] i = actual[0] j = actual[1] #load the row of the first matrix nameRow = 'A' + str(i) serialized1 = cos.get_object('cuc-bucket', nameRow) memfile = io.BytesIO() memfile.write(json.loads(serialized1).encode('latin-1')) memfile.seek(0) row = numpy.load(memfile) #load the column of the second matrix nameColumn = 'B' + str(j) serialized2 = cos.get_object('cuc-bucket', nameColumn) memfile = io.BytesIO() memfile.write(json.loads(serialized2).encode('latin-1')) memfile.seek(0) col = numpy.load(memfile) #calculation row * column x = numpy.dot(row, col) res = [x, i, j] resX.append(res) return resX
def main(args): # initialize cos wrapper cb = COSBackend(args['cos']['service_endpoint'], args['cos']['secret_key'], args['cos']['access_key']) # fetch the assigned range of bytes and parse that chunk into words to then count the number of occurrences of each word # ( by the way, this must be done in one line (as a r-value) so that the object returned by the cb.get_object method gets # free'd by the garbage collector ASAP, therefore reserved memory doesn't stack up too much ) words = re.findall( r'\w+', cb.get_object(args['target_bucket'], args['target_fname'], extra_get_args={ 'Range': args['Range'] }).decode('UTF-8', errors='ignore')) result = {} for word in words: adapted_word = word.lower() #unidecode.unidecode(word).lower() if adapted_word in result: result[adapted_word] += 1 else: result[adapted_word] = 1 # commit result on the cloud result_tag = '{}/CW-result-{}'.format(args['target_fname'], args['index']) cb.put_object(args['target_bucket'], result_tag, json.dumps(result)) # notify via queue, message = result file name on the cloud pika_params = pika.URLParameters(args['rabbitamqp_url']) connection = pika.BlockingConnection(pika_params) channel = connection.channel() channel.basic_publish(exchange='', routing_key=args['qid'], body=result_tag) connection.close()
def map_function(i, j): obj2 = COSBackend(dic) # Get submatrix m1 = pickle.loads(obj2.get_object('prac1', 'A' + str(i) + '.mtx')) m2 = pickle.loads(obj2.get_object('prac1', 'B' + str(j) + '.mtx')) # Calculate multiplication result = m1.dot(m2) return result
def map_count_words(file, args): cos_params = args.get('cos_params') num_partition = args.get('num_partition') bucket_name = args.get('bucket_name') file_name = args.get('file_name') cos = COSBackend(cos_params) num_words = len(file.split()) file_to_create = "cw_" + file_name + str(num_partition) cos.put_object(bucket_name, file_to_create, str(num_words)) return {'finish': "OK"}
def mult(array): result = [] cos = COSBackend() for i in range(len(array)): if (i % 2) != 0: continue matrix1 = cos.get_object('____', array[i]) matrix1 = pickle.loads(matrix1) matrix2 = cos.get_object('_____', array[i + 1]) matrix2 = pickle.loads(matrix2) result = np.append(result, np.dot(matrix1, matrix2)) return result
def toRows(mat): cos = COSBackend() #storage the rows of matrix A (AxB) to the bucket for x in range(0, dim1): row = mat[x, :] memfile = io.BytesIO() numpy.save(memfile, row) memfile.seek(0) serialized = json.dumps(memfile.read().decode('latin-1')) cos.put_object('cuc-bucket', 'A' + str(x), serialized)
def toColumns(mat): cos = COSBackend() #storage the columns of matrix B (AxB) to the bucket for x in range(0, dim3): column = mat[:, x] memfile = io.BytesIO() numpy.save(memfile, column) memfile.seek(0) serialized = json.dumps(memfile.read().decode('latin-1')) cos.put_object('cuc-bucket', 'B' + str(x), serialized)
def matrix_mult_paquetes(x): cos = COSBackend() # Cálculo de forma secuencial if WORKERS == 1: A = p.loads(cos.get_object(BUCKET, '/secuencial/A')) B = p.loads(cos.get_object(BUCKET, '/secuencial/B')) results = np.dot(A, B) # Cálculo de forma paralela que hará cada worker con su parte correspondiente else: x = str(x).split('|') results = [] worker = int(x[0]) A = p.loads( cos.get_object(BUCKET, '/paralelo/A' + str(worker))) # Descargamos los paquetes del worker B = p.loads(cos.get_object(BUCKET, '/paralelo/B' + str(worker))) op_ini = x[1].split(',') op_ini[0] = int(op_ini[0]) op_ini[1] = int(op_ini[1]) op_fi = x[2].split(',') op_fi[0] = int(op_fi[0]) op_fi[1] = int(op_fi[1]) f = 0 if (M * L / WORKERS) >= L: # Si el paquete de B descargado incluye todo B while op_ini <= op_fi: # Cálculo del worker con B entera results.append(A[f].dot(B[:, op_ini[1]])) op_ini[1] = op_ini[1] + 1 if (op_ini[1] >= L): op_ini[0] = op_ini[0] + 1 f = f + 1 op_ini[1] = 0 else: c = 0 while op_ini <= op_fi: # Cálculo del worker siguiendo el orden de las columnas en Bw results.append(A[f].dot(B[:, c])) op_ini[1] = op_ini[1] + 1 c = c + 1 if (op_ini[1] >= L): op_ini[0] = op_ini[0] + 1 f = f + 1 op_ini[1] = 0 return results
def ensamblar(results): global n,l,work cos = COSBackend() if (n%work != 0 or l%work != 0) and (work < l or work < n): array = [] for result in results: array = np.append(array,result) final = np.reshape(array, (n, l)) else: final = np.reshape(results, (n, l)) cos.put_object('____', 'matrizFinal', pickle.dumps(final, pickle.HIGHEST_PROTOCOL)) return final
def matrix_ini(x, n, m, l, iterdata): cos = COSBackend() np.random.seed() A = np.random.randint(2 * x, size=(m, n)) - x B = np.random.randint(2 * x, size=(n, l)) - x #Subida de datos de forma secuencial if WORKERS == 1: cos.put_object(BUCKET, '/secuencial/A', p.dumps(A, p.HIGHEST_PROTOCOL)) cos.put_object(BUCKET, '/secuencial/B', p.dumps(B, p.HIGHEST_PROTOCOL)) #Subida de datos de forma paralela else: #Dividir matriz A en paquetes según el número de workers for i in iterdata: i = str(i).split('|') #Obtener posición de inicio del worker op_ini = i[1].split(',') op_ini[0] = int(op_ini[0]) #Obtener posición final del worker op_fi = i[2].split(',') op_fi[0] = int(op_fi[0]) + 1 cos.put_object( BUCKET, '/paralelo/f' + i[0], p.dumps(A[op_ini[0]:op_fi[0], :], p.HIGHEST_PROTOCOL)) #Subir matriz B entera cos.put_object(BUCKET, '/secuencial/B', p.dumps(B, p.HIGHEST_PROTOCOL))
def multiplication_reduce(results): cos=COSBackend() matrixC=[] #quan acabi aquest for ja haurem tractat tots els casos for indexWorkerFila in range(nWorkersA): for numeroFila in range(len(results[indexWorkerFila*nWorkersB])): fila=[] for indexWorkerColumna in range(nWorkersB): contadorWorker=indexWorkerFila*nWorkersB+indexWorkerColumna for valor in results[contadorWorker][numeroFila]: fila.append(valor) matrixC.append(fila) cos.put_object('practica-sd-mp','matrixC.txt', pickle.dumps(matrixC))
def main(args): #get arguments s1 = json.dumps(args) args = json.loads(s1) res = args["res"] url = res["rabbitmq"]["url"] topRange = int(args["topRange"]) bottomRange = int(args["bottomRange"]) #configure COS library odb = COSBackend(res["ibm_cos"]) #rabbitmq configuration params = pika.URLParameters(url) connection = pika.BlockingConnection(params) channel = connection.channel() channel.queue_declare(queue="CountingWords") #Calcules a range which doesn't cut any word # if functionNumber = -1 it means that is the last one so it has to analyse until the end # if functionNumber = 0 it means that is the 1st one and it can't search before it if args["functionNumber"] != "-1": topRange = selectRange(args["fileName"], topRange, res) if args["functionNumber"] != '0': bottomRange = selectRange(args["fileName"], bottomRange, res) #download the part of the file that is needed fileFromServer = odb.get_object(res["ibm_cos"]["bucket"], args["fileName"], extra_get_args={ "Range": "bytes={0}-{1}".format( bottomRange, topRange) }).decode('UTF-8', errors='ignore') #Delete unwanted characters stringFiltered = re.sub('[^A-Za-z \n]+', '', fileFromServer) #Split the string stringSplitted = re.split("\ |\n", stringFiltered) #Delete "" in array stringSplitted = list(filter(None, stringSplitted)) #create a json: # {'words' : numberWords} body = json.dumps({"words": len(stringSplitted)}) #send a msg to reduce function channel.basic_publish(exchange='', routing_key='CountingWords', body=body) #close connection connection.close() return {}
def selectRange(fileName, rang, res): odb = COSBackend(res['ibm_cos']) #read 20 bytes from file fileFromServer = odb.get_object(res['ibm_cos']["bucket"], fileName, extra_get_args={ 'Range': 'bytes={0}-{1}'.format( rang - 20, rang) }).decode('UTF-8', errors='ignore') #Search an space in the text while (fileFromServer[-1] != " "): fileFromServer = fileFromServer[:-1] rang = rang - 1 return rang
def funcio_normal_fila(minim): #minim és la fila on comença a tractar cos = COSBackend() maxim = int(minim) + porcio_basica_fil while minim < maxim: #minim és la fila actual a tractar dada = "" for j in range(y): #j és la columna que estem tractant actualment #print(type(mat1[minim][j])) dada = dada + str(mat1[int(minim)][int(j)]) + "," dada = dada[:-1] dada = dada.encode() cos.put_object('sd-ori-un-buen-cubo', 'fila' + str(int(minim) + 1) + '.txt', dada) minim += 1
def main(args): cos = COSBackend(args.get('cos_params')) space = args.get('space') byte_range = "bytes=" + str(int(space[0])) + "-" + str(int(space[1])) file = cos.get_object(args.get('bucket_name'), args.get('file_name'), extra_get_args={ 'Range': byte_range }).decode('iso8859-15').lower() clean_file = re.sub('[.,;:-_*+"(\'){!}@#%&?¿¡]', ' ', file) if int(args.get('program')) == 1: return map_count_words(clean_file, args) else: return map_word_count(clean_file, args)
def matrizMultCloud(casilla_ini, num_casillas): cos = COSBackend(config_os) res = 0 resultados = [] while (num_casillas > 0): fila_num, col_num = CalcPosMatrix(casilla_ini, M, L) fila = pickle.loads( cos.get_object('sistemasdistribuidos2', 'fila' + str(fila_num))) columna = pickle.loads( cos.get_object('sistemasdistribuidos2', 'colum' + str(col_num))) for n in range(N): res += fila[n] * columna[n] resultados.append([fila_num, col_num, res]) num_casillas -= 1 casilla_ini += 1 res = 0 return resultados
def my_reduce_function(results): cos = COSBackend() matrix = numpy.zeros((dim1, dim3)) #generate final matrix from parcial results for xResult in results: for map_result in xResult: matrix[map_result[1], map_result[2]] = map_result[0] #put the fianl matrix to bucket memfile = io.BytesIO() numpy.save(memfile, matrix) memfile.seek(0) serialized = json.dumps(memfile.read().decode('latin-1')) cos.put_object('cuc-bucket', 'matriu_result', serialized) return matrix
def generateMatrix(name, pos, dimf, dimc): numpy.random.seed() cos = COSBackend() #generate random matrix mat_original = numpy.random.randint(MAX_RANDOM, size=(dimf, dimc)) #upload to cloud memfile = io.BytesIO() numpy.save(memfile, mat_original) memfile.seek(0) serialized = json.dumps(memfile.read().decode('latin-1')) cos.put_object('cuc-bucket', name, serialized) if pos is 'A': toRows(mat_original) else: toColumns(mat_original)
def matrix_multiplication(data): cos=COSBackend() valuesWorker=pickle.loads(cos.get_object('practica-sd-mp',f'{data}')) worker=data.split("w") i=int(worker[0]) j=int(worker[1]) #ara que tenim les files i columnes a calcular les calculem resultats=[] for lineA in valuesWorker[0]: resultatsFila=[] for columnB in valuesWorker[1]: total=0 for x in range(n): total+=lineA[x]*columnB[x] resultatsFila.append(total) resultats.append(resultatsFila) return resultats
def funcio_residu_col(minim): #minim és la fila on comença a tractar cos = COSBackend() minim = 0 maxim = residu_col + 1 llista = list() while minim < maxim: #minim és la fila actual a tractar dada = "" for j in range(y): #j és la columna que estem tractant actualment #print(type(mat1[minim][j])) dada = dada + str(mat2[int(j)][int(minim)]) + "," dada = dada[:-1] dada = dada.encode() cos.put_object('sd-ori-un-buen-cubo', 'col' + str(int(minim) + 1) + '.txt', dada) minim += 1
def slave(id, x, ibm_cos): obj = COSBackend(config=ibm_cos) obj.put_object('practise2', "p_write_{" + str(id) + "}", b"") my_turn = 0 while (not my_turn): time.sleep(X) if (obj.list_objects('practise2', 'write_{' + str(id) + '}')): my_turn = 1 result_file = json.loads(obj.get_object('practise2', 'result.json')) result_file.append(id) obj.put_object('practise2', 'result.json', json.dumps(result_file))
def clean(): cos = COSBackend() print('Cleaning...', end='') if WORKERS != 1: cos.delete_object(BUCKET, '/secuencial/B') for i in range(0, WORKERS): print('.', end='') cos.delete_object(BUCKET, '/paralelo/f' + str(i)) else: cos.delete_object(BUCKET, '/secuencial/A') cos.delete_object(BUCKET, '/secuencial/B') print('.', end='\n')
def funcio_reduce(results): cos = COSBackend() mat_result = np.zeros(shape=(x, z)) for m in range(len(results)): valor = cos.get_object('sd-ori-un-buen-cubo', 'worker' + results[m] + '.txt') valor = valor.decode() cont = 0 valor = valor.split(" ") for n in range(len(valor) // 3): i = int(valor[cont]) j = int(valor[cont + 1]) res = valor[cont + 2] cont += 3 mat_result[i][j] = res return (mat_result)
def map_word_count(file, args): cos_params = args.get('cos_params') num_partition = args.get('num_partition') bucket_name = args.get('bucket_name') file_name = args.get('file_name') cos = COSBackend(cos_params) split_file = file.split() new_dict = {} for word in split_file: paraula = str(word) if paraula not in new_dict.keys(): new_dict[paraula] = 1 else: new_dict[paraula] += 1 cos.put_object(bucket_name, str("wc_" + file_name + str(num_partition)), json.dumps(new_dict)) return {'finish': "OK"}
def main(args): start_time = time.time() args.update(args['chunk']) parameters = SimpleNamespace(**args['parameters']) cos = COSBackend( aws_access_key_id=args['cos']['aws_access_key_id'], aws_secret_access_key=args['cos']['aws_secret_access_key'], endpoint_url=args['cos']['private_endpoint']) mdt_key = args['mdt_key'] mdt = cos.get_object(key=mdt_key, bucket=parameters.BUCKET) siam_stream = cos.get_object(key='siam_out.csv', bucket=parameters.BUCKET) out = map_interpolation(siam_stream=siam_stream, mdt=mdt, block_x=args['block_x'], block_y=args['block_y'], splits=parameters.SPLITS, area_of_influence=parameters.AREA_OF_INFLUENCE) result_key = '/'.join([ 'tmp', 'WIND', os.path.basename(mdt_key).rsplit('.')[0], str(args['block_x']) + '_' + str(args['block_y']) + '.tif' ]) cos.upload_file(filename=out, bucket=parameters.BUCKET, key=result_key) end_time = time.time() return { 'result': result_key, 'start_time': start_time, 'end_time': end_time }
def main(args): start_time = time.time() parameters = SimpleNamespace(**args['parameters']) cos = COSBackend( aws_access_key_id=args['cos']['aws_access_key_id'], aws_secret_access_key=args['cos']['aws_secret_access_key'], endpoint_url=args['cos']['private_endpoint']) tile = args['tile'] # Download shapefile shapefile = cos.get_object(bucket=parameters.BUCKET, key='shapefile.zip') with open('shape.zip', 'wb') as shapf: for chunk in iter(partial(shapefile.read, 200 * 1024 * 1024), ''): if not chunk: break shapf.write(chunk) rasters = {} for type in ['TEMPERATURE', 'HUMIDITY', 'WIND', 'EXTRAD', 'RADIANCE']: key = '/'.join(['tmp', type, tile, 'merged.tif']) rasters[type.lower()] = cos.get_object(bucket=parameters.BUCKET, key=key) filename = combine_calculations(tile=tile, **rasters) result_key = '/'.join(['tmp', 'ETC', args['tile'] + '.tif']) cos.upload_file(filename=filename, bucket=parameters.BUCKET, key=result_key) end_time = time.time() return {'result': filename, 'start_time': start_time, 'end_time': end_time}
def main(args): start_time = time.time() parameters = SimpleNamespace(**args['parameters']) mdt_key = args['mdt_key'] mdt_filename = os.path.basename(mdt_key) cos = COSBackend( aws_access_key_id=args['cos']['aws_access_key_id'], aws_secret_access_key=args['cos']['aws_secret_access_key'], endpoint_url=args['cos']['private_endpoint']) cos.download_file(parameters.BUCKET, mdt_key, mdt_filename) tiff_file = os.path.splitext(mdt_filename)[0] + '.tif' with rasterio.open(mdt_filename) as src: profile = src.profile # Cloud optimized GeoTiff parameters (No hace falta rio_cogeo) profile.update(driver='GTiff') profile.update(blockxsize=256) profile.update(blockysize=256) profile.update(tiled=True) profile.update(compress='deflate') profile.update(interleave='band') with rasterio.open(tiff_file, "w", **profile) as dest: dest.write(src.read()) cos.upload_file(filename=tiff_file, bucket=parameters.BUCKET, key='tiff/{}'.format(tiff_file)) end_time = time.time() return { 'result': tiff_file, 'start_time': start_time, 'end_time': end_time }
def main(args): # initialize cos wrapper cb = COSBackend(args['cos']['service_endpoint'], args['cos']['secret_key'], args['cos']['access_key']) # initialize queue system for the mappers' queue pika_params = pika.URLParameters(args['rabbitamqp_url']) connection = pika.BlockingConnection(pika_params) channel = connection.channel() channel.queue_declare(queue=args['mapper_qid']) # check what we are reducing if 'reduce_WordCount' in args and args['reduce_WordCount'] == 'yes': callback = ReduceCallback(cb, args['target_bucket'], args['nthreads']) # create a callback channel.basic_consume(callback, queue=args['mapper_qid']) # set a callback channel.start_consuming() cb.put_object(args['target_bucket'], '{}/WC-result'.format(args['target_fname']), json.dumps(callback.result)) # commit result if 'reduce_CountingWords' in args and args['reduce_CountingWords'] == 'yes': callback = ReduceCallback(cb, args['target_bucket'], args['nthreads']) channel.basic_consume(callback, queue=args['mapper_qid']) channel.start_consuming() cb.put_object(args['target_bucket'], '{}/CW-result'.format(args['target_fname']), json.dumps(callback.result)) # tell the orchestrator job is done channel.basic_publish(exchange='', routing_key=args['reducer_qid'], body='OK') connection.close()
def generatex(x,y,z,a): cos=COSBackend() matrixA=[] matrixB=[] for m_value in range(x): valors=[] for n_value in range(y): valors.append(random.randint(0,10)) matrixA.append(valors) for n_value in range(y): valors=[] for l_value in range(z): valors.append(random.randint(0,10)) matrixB.append(valors) cos.put_object('practica-sd-mp', 'matrixA.txt', pickle.dumps(matrixA)) cos.put_object('practica-sd-mp', 'matrixB.txt', pickle.dumps(matrixB)) for i in range(nWorkersA): if(mImpar!=0 and i==nWorkersA-1): filesA=matrixA[i*a:] else: filesA=matrixA[i*a:i*a+a] for j in range(nWorkersB): columnesB=[] if(lImpar!=0 and j==nWorkersB-1): columnesTotals=lImpar else: columnesTotals=a for k in range(columnesTotals): columna=[item[j*a+k] for item in matrixB] columnesB.append(columna) #ja tinc les files i les columnes infoWorkers=[] infoWorkers.append(filesA) infoWorkers.append(columnesB) cos.put_object('practica-sd-mp', f'{i}w{j}', pickle.dumps(infoWorkers))
def matrix_mult(x): cos = COSBackend() x = str(x).split('|') #Calculo de forma secuencial if WORKERS == 1: A = p.loads(cos.get_object(BUCKET, '/secuencial/A')) B = p.loads(cos.get_object(BUCKET, '/secuencial/B')) results = np.dot(A, B) #Calculo de forma paralela que hará cada worker con su parte correspondiente else: results = [] op_ini = x[1].split(',') op_ini[0] = int(op_ini[0]) op_ini[1] = int(op_ini[1]) op_fi = x[2].split(',') op_fi[0] = int(op_fi[0]) op_fi[1] = int(op_fi[1]) A = p.loads(cos.get_object(BUCKET, '/paralelo/f' + x[0])) B = p.loads(cos.get_object(BUCKET, '/secuencial/B')) rango = op_ini[0] while op_ini <= op_fi: #Calculo de la posición C[f_act-f_ini, c_act] results.append(A[op_ini[0] - rango].dot(B[:, op_ini[1]])) op_ini[1] = op_ini[1] + 1 #Saltamos de fila de C if (op_ini[1] >= L): op_ini[0] = op_ini[0] + 1 op_ini[1] = 0 return results