def main(args): start_time = time.time() parameters = SimpleNamespace(**args['parameters']) cos = COSBackend( aws_access_key_id=args['cos']['aws_access_key_id'], aws_secret_access_key=args['cos']['aws_secret_access_key'], endpoint_url=args['cos']['private_endpoint']) tile = args['tile'] # Download shapefile shapefile = cos.get_object(bucket=parameters.BUCKET, key='shapefile.zip') with open('shape.zip', 'wb') as shapf: for chunk in iter(partial(shapefile.read, 200 * 1024 * 1024), ''): if not chunk: break shapf.write(chunk) rasters = {} for type in ['TEMPERATURE', 'HUMIDITY', 'WIND', 'EXTRAD', 'RADIANCE']: key = '/'.join(['tmp', type, tile, 'merged.tif']) rasters[type.lower()] = cos.get_object(bucket=parameters.BUCKET, key=key) filename = combine_calculations(tile=tile, **rasters) result_key = '/'.join(['tmp', 'ETC', args['tile'] + '.tif']) cos.upload_file(filename=filename, bucket=parameters.BUCKET, key=result_key) end_time = time.time() return {'result': filename, 'start_time': start_time, 'end_time': end_time}
def main(args): start_time = time.time() args.update(args['chunk']) parameters = SimpleNamespace(**args['parameters']) cos = COSBackend( aws_access_key_id=args['cos']['aws_access_key_id'], aws_secret_access_key=args['cos']['aws_secret_access_key'], endpoint_url=args['cos']['private_endpoint']) mdt_key = args['mdt_key'] mdt = cos.get_object(key=mdt_key, bucket=parameters.BUCKET) siam_stream = cos.get_object(key='siam_out.csv', bucket=parameters.BUCKET) out = map_interpolation(siam_stream=siam_stream, mdt=mdt, block_x=args['block_x'], block_y=args['block_y'], splits=parameters.SPLITS, area_of_influence=parameters.AREA_OF_INFLUENCE) result_key = '/'.join([ 'tmp', 'WIND', os.path.basename(mdt_key).rsplit('.')[0], str(args['block_x']) + '_' + str(args['block_y']) + '.tif' ]) cos.upload_file(filename=out, bucket=parameters.BUCKET, key=result_key) end_time = time.time() return { 'result': result_key, 'start_time': start_time, 'end_time': end_time }
def my_map_function(vec): cos = COSBackend() resX = [] vec = numpy.array(vec) for act in range(0, len(vec)): actual = vec[act] i = actual[0] j = actual[1] #load the row of the first matrix nameRow = 'A' + str(i) serialized1 = cos.get_object('cuc-bucket', nameRow) memfile = io.BytesIO() memfile.write(json.loads(serialized1).encode('latin-1')) memfile.seek(0) row = numpy.load(memfile) #load the column of the second matrix nameColumn = 'B' + str(j) serialized2 = cos.get_object('cuc-bucket', nameColumn) memfile = io.BytesIO() memfile.write(json.loads(serialized2).encode('latin-1')) memfile.seek(0) col = numpy.load(memfile) #calculation row * column x = numpy.dot(row, col) res = [x, i, j] resX.append(res) return resX
def mult(array): result = [] cos = COSBackend() for i in range(len(array)): if (i % 2) != 0: continue matrix1 = cos.get_object('____', array[i]) matrix1 = pickle.loads(matrix1) matrix2 = cos.get_object('_____', array[i + 1]) matrix2 = pickle.loads(matrix2) result = np.append(result, np.dot(matrix1, matrix2)) return result
def matrix_mult_paquetes(x): cos = COSBackend() # Cálculo de forma secuencial if WORKERS == 1: A = p.loads(cos.get_object(BUCKET, '/secuencial/A')) B = p.loads(cos.get_object(BUCKET, '/secuencial/B')) results = np.dot(A, B) # Cálculo de forma paralela que hará cada worker con su parte correspondiente else: x = str(x).split('|') results = [] worker = int(x[0]) A = p.loads( cos.get_object(BUCKET, '/paralelo/A' + str(worker))) # Descargamos los paquetes del worker B = p.loads(cos.get_object(BUCKET, '/paralelo/B' + str(worker))) op_ini = x[1].split(',') op_ini[0] = int(op_ini[0]) op_ini[1] = int(op_ini[1]) op_fi = x[2].split(',') op_fi[0] = int(op_fi[0]) op_fi[1] = int(op_fi[1]) f = 0 if (M * L / WORKERS) >= L: # Si el paquete de B descargado incluye todo B while op_ini <= op_fi: # Cálculo del worker con B entera results.append(A[f].dot(B[:, op_ini[1]])) op_ini[1] = op_ini[1] + 1 if (op_ini[1] >= L): op_ini[0] = op_ini[0] + 1 f = f + 1 op_ini[1] = 0 else: c = 0 while op_ini <= op_fi: # Cálculo del worker siguiendo el orden de las columnas en Bw results.append(A[f].dot(B[:, c])) op_ini[1] = op_ini[1] + 1 c = c + 1 if (op_ini[1] >= L): op_ini[0] = op_ini[0] + 1 f = f + 1 op_ini[1] = 0 return results
def main(args): # initialize cos wrapper cb = COSBackend(args['cos']['service_endpoint'], args['cos']['secret_key'], args['cos']['access_key']) # fetch the assigned range of bytes and parse that chunk into words to then count the number of occurrences of each word # ( by the way, this must be done in one line (as a r-value) so that the object returned by the cb.get_object method gets # free'd by the garbage collector ASAP, therefore reserved memory doesn't stack up too much ) words = re.findall( r'\w+', cb.get_object(args['target_bucket'], args['target_fname'], extra_get_args={ 'Range': args['Range'] }).decode('UTF-8', errors='ignore')) result = {} for word in words: adapted_word = word.lower() #unidecode.unidecode(word).lower() if adapted_word in result: result[adapted_word] += 1 else: result[adapted_word] = 1 # commit result on the cloud result_tag = '{}/CW-result-{}'.format(args['target_fname'], args['index']) cb.put_object(args['target_bucket'], result_tag, json.dumps(result)) # notify via queue, message = result file name on the cloud pika_params = pika.URLParameters(args['rabbitamqp_url']) connection = pika.BlockingConnection(pika_params) channel = connection.channel() channel.basic_publish(exchange='', routing_key=args['qid'], body=result_tag) connection.close()
def matrizMultCloud(casilla_ini, num_casillas): cos = COSBackend(config_os) res = 0 resultados = [] while (num_casillas > 0): fila_num, col_num = CalcPosMatrix(casilla_ini, M, L) fila = pickle.loads( cos.get_object('sistemasdistribuidos2', 'fila' + str(fila_num))) columna = pickle.loads( cos.get_object('sistemasdistribuidos2', 'colum' + str(col_num))) for n in range(N): res += fila[n] * columna[n] resultados.append([fila_num, col_num, res]) num_casillas -= 1 casilla_ini += 1 res = 0 return resultados
def map_function(i, j): obj2 = COSBackend(dic) # Get submatrix m1 = pickle.loads(obj2.get_object('prac1', 'A' + str(i) + '.mtx')) m2 = pickle.loads(obj2.get_object('prac1', 'B' + str(j) + '.mtx')) # Calculate multiplication result = m1.dot(m2) return result
def main(args): start_time = time.time() parameters = SimpleNamespace(**args['parameters']) cos = COSBackend(aws_access_key_id=args['cos']['aws_access_key_id'], aws_secret_access_key=args['cos']['aws_secret_access_key'], endpoint_url=args['cos']['private_endpoint']) keys = cos.list_keys_prefix(bucket=parameters.BUCKET, prefix='tmp/{}/{}'.format(args['type'], args['tile'])) chunk = cos.get_object(key=keys[0], bucket=parameters.BUCKET) profile = obtain_meta(chunk, parameters.SPLITS) tiles = ((cos.get_object(bucket=parameters.BUCKET, key=key), tuple(os.path.basename(key)[:3].split('_'))) for key in keys) out = gather_blocks(tiles, profile) result_key = '/'.join(['tmp', args['type'], args['tile'], 'merged.tif']) cos.upload_file(filename=out, bucket=parameters.BUCKET, key=result_key) end_time = time.time() return {'result': result_key, 'start_time': start_time, 'end_time': end_time}
def slave(id, x, ibm_cos): obj = COSBackend(config=ibm_cos) obj.put_object('practise2', "p_write_{" + str(id) + "}", b"") my_turn = 0 while (not my_turn): time.sleep(X) if (obj.list_objects('practise2', 'write_{' + str(id) + '}')): my_turn = 1 result_file = json.loads(obj.get_object('practise2', 'result.json')) result_file.append(id) obj.put_object('practise2', 'result.json', json.dumps(result_file))
def funcio_map(k): cos = COSBackend() """f=0 j=0""" #for i in range(len(iterdata)): #cont=0 k = k.split(" ") cont = 0 dada = '' for a in range(len(k) // int(2)): i = k[cont] j = k[cont + 1] cont += 2 fil = 'fila' + str(int(i) + 1) + '.txt' col = 'col' + str(int(j) + 1) + '.txt' fila = cos.get_object('sd-ori-un-buen-cubo', fil) columna = cos.get_object('sd-ori-un-buen-cubo', col) fila = fila.decode() columna = columna.decode() fila = fila.split(",") columna = columna.split(",") acum = 0 for b in range(len(fila)): acum += int(fila[b]) * int(columna[b]) dada += str(i) + " " + str(j) + ' ' + str(acum) + ' ' dada = dada[:-1] dada = dada.encode() cos.put_object('sd-ori-un-buen-cubo', 'worker' + k[len(k) - 1] + '.txt', dada) return (k[len(k) - 1])
def matrix_mult(x): cos = COSBackend() x = str(x).split('|') #Calculo de forma secuencial if WORKERS == 1: A = p.loads(cos.get_object(BUCKET, '/secuencial/A')) B = p.loads(cos.get_object(BUCKET, '/secuencial/B')) results = np.dot(A, B) #Calculo de forma paralela que hará cada worker con su parte correspondiente else: results = [] op_ini = x[1].split(',') op_ini[0] = int(op_ini[0]) op_ini[1] = int(op_ini[1]) op_fi = x[2].split(',') op_fi[0] = int(op_fi[0]) op_fi[1] = int(op_fi[1]) A = p.loads(cos.get_object(BUCKET, '/paralelo/f' + x[0])) B = p.loads(cos.get_object(BUCKET, '/secuencial/B')) rango = op_ini[0] while op_ini <= op_fi: #Calculo de la posición C[f_act-f_ini, c_act] results.append(A[op_ini[0] - rango].dot(B[:, op_ini[1]])) op_ini[1] = op_ini[1] + 1 #Saltamos de fila de C if (op_ini[1] >= L): op_ini[0] = op_ini[0] + 1 op_ini[1] = 0 return results
def main(args): #get arguments s1 = json.dumps(args) args = json.loads(s1) res = args["res"] url = res["rabbitmq"]["url"] topRange = int(args["topRange"]) bottomRange = int(args["bottomRange"]) #configure COS library odb = COSBackend(res["ibm_cos"]) #rabbitmq configuration params = pika.URLParameters(url) connection = pika.BlockingConnection(params) channel = connection.channel() channel.queue_declare(queue="CountingWords") #Calcules a range which doesn't cut any word # if functionNumber = -1 it means that is the last one so it has to analyse until the end # if functionNumber = 0 it means that is the 1st one and it can't search before it if args["functionNumber"] != "-1": topRange = selectRange(args["fileName"], topRange, res) if args["functionNumber"] != '0': bottomRange = selectRange(args["fileName"], bottomRange, res) #download the part of the file that is needed fileFromServer = odb.get_object(res["ibm_cos"]["bucket"], args["fileName"], extra_get_args={ "Range": "bytes={0}-{1}".format( bottomRange, topRange) }).decode('UTF-8', errors='ignore') #Delete unwanted characters stringFiltered = re.sub('[^A-Za-z \n]+', '', fileFromServer) #Split the string stringSplitted = re.split("\ |\n", stringFiltered) #Delete "" in array stringSplitted = list(filter(None, stringSplitted)) #create a json: # {'words' : numberWords} body = json.dumps({"words": len(stringSplitted)}) #send a msg to reduce function channel.basic_publish(exchange='', routing_key='CountingWords', body=body) #close connection connection.close() return {}
def selectRange(fileName, rang, res): odb = COSBackend(res['ibm_cos']) #read 20 bytes from file fileFromServer = odb.get_object(res['ibm_cos']["bucket"], fileName, extra_get_args={ 'Range': 'bytes={0}-{1}'.format( rang - 20, rang) }).decode('UTF-8', errors='ignore') #Search an space in the text while (fileFromServer[-1] != " "): fileFromServer = fileFromServer[:-1] rang = rang - 1 return rang
def main(args): cos = COSBackend(args.get('cos_params')) space = args.get('space') byte_range = "bytes=" + str(int(space[0])) + "-" + str(int(space[1])) file = cos.get_object(args.get('bucket_name'), args.get('file_name'), extra_get_args={ 'Range': byte_range }).decode('iso8859-15').lower() clean_file = re.sub('[.,;:-_*+"(\'){!}@#%&?¿¡]', ' ', file) if int(args.get('program')) == 1: return map_count_words(clean_file, args) else: return map_word_count(clean_file, args)
def matrix_multiplication(data): cos=COSBackend() valuesWorker=pickle.loads(cos.get_object('practica-sd-mp',f'{data}')) worker=data.split("w") i=int(worker[0]) j=int(worker[1]) #ara que tenim les files i columnes a calcular les calculem resultats=[] for lineA in valuesWorker[0]: resultatsFila=[] for columnB in valuesWorker[1]: total=0 for x in range(n): total+=lineA[x]*columnB[x] resultatsFila.append(total) resultats.append(resultatsFila) return resultats
def reduce_word_count(args): file_name = args.get('file_name') num_partitions = args.get('num_partitions') cos = COSBackend(args.get('cos_params')) bucket_name = args.get('bucket_name') result_dict = {} for i in range(num_partitions): file = "wc_" + file_name + str(i) file_dict = json.loads(cos.get_object(bucket_name, file)) cos.delete_object(bucket_name, file) result_dict = { key: result_dict.get(key, 0) + file_dict.get(key, 0) for key in set(result_dict) | set(file_dict) } cos.put_object(bucket_name, "final_" + file_name, json.dumps(result_dict)) return {'finish': "OK"}
def reduce_count_words(args): file_name = args.get('file_name') num_partitions = args.get('num_partitions') cos = COSBackend(args.get('cos_params')) bucket_name = args.get('bucket_name') total_words = 0 for i in range(num_partitions): file = "cw_" + file_name + str(i) total_words += int(cos.get_object(bucket_name, file)) cos.delete_object(bucket_name, file) cos.put_object(bucket_name, "final_" + file_name, str(total_words)) #for i in range(num_partitions): # file_to_delete = "cw_"+file_name+str(i) # cos.delete_object(bucket_name, file_to_delete) return {'finish': "OK"}
def funcio_reduce(results): cos = COSBackend() mat_result = np.zeros(shape=(x, z)) for m in range(len(results)): valor = cos.get_object('sd-ori-un-buen-cubo', 'worker' + results[m] + '.txt') valor = valor.decode() cont = 0 valor = valor.split(" ") for n in range(len(valor) // 3): i = int(valor[cont]) j = int(valor[cont + 1]) res = valor[cont + 2] cont += 3 mat_result[i][j] = res return (mat_result)
def main(args): start_time = time.time() args.update(args['chunk']) parameters = SimpleNamespace(**args['parameters']) cos = COSBackend( aws_access_key_id=args['cos']['aws_access_key_id'], aws_secret_access_key=args['cos']['aws_secret_access_key'], endpoint_url=args['cos']['private_endpoint']) mdt_key = args['mdt_key'] mdt = cos.get_object(key=mdt_key, bucket=parameters.BUCKET) filename = map_interpolation(mdt, parameters.DAY_OF_YEAR, args['block_x'], args['block_y'], parameters.SPLITS) result_key = '/'.join([ 'tmp', 'EXTRAD', os.path.basename(mdt_key).rsplit('.')[0], str(args['block_x']) + '_' + str(args['block_y']) + '.tif' ]) cos.upload_file(filename=filename, bucket=parameters.BUCKET, key=result_key) result_key = '/'.join([ 'tmp', 'RADIANCE', os.path.basename(mdt_key).rsplit('.')[0], str(args['block_x']) + '_' + str(args['block_y']) + '.tif' ]) cos.upload_file(filename='output', bucket=parameters.BUCKET, key=result_key) end_time = time.time() return { 'result': result_key, 'start_time': start_time, 'end_time': end_time }
class Orchestrator: def __init__(self, target_bucket, target_fname, upload=False): self.target_fname = target_fname self.target_bucket = target_bucket self.ini_error = False format_str = "cloudfunctions:\n 'endpoint': ''\n 'namespace': ''\n 'api_key': ''\nrabbitamqp:\n 'url': ''\ncos:\n service_endpoint: ''\n secret_key: ''\n access_key: ''" try: # load keys securely with open('secret.yaml', 'r') as f: secret = yaml.safe_load(f) # initialitze the remote storage wrapper, and upload the target file self.cb = COSBackend(secret['cos']['service_endpoint'], secret['cos']['secret_key'], secret['cos']['access_key']) if upload: target_file = open(self.target_fname, "rb") self.cb.put_object(target_bucket, target_fname, target_file.read()) target_file.close() # retrieve file length, ensure file has been uploaded try: self.fsize = int( self.cb.head_object(self.target_bucket, self.target_fname)['content-length']) except: print( 'File \'{}\' was not found in this bucket \'{}\'. Upload it and retry.' .format(self.target_fname, self.target_bucket)) self.ini_error = True return None # initialize the function wrapper config = {} config['endpoint'] = secret['cloudfunctions']['endpoint'] config['namespace'] = secret['cloudfunctions']['namespace'] config['api_key'] = secret['cloudfunctions']['api_key'] self.cf = CloudFunctions(config) # initialize the queue system self.pika_params = pika.URLParameters(secret['rabbitamqp']['url']) except KeyError: print('Wrong yaml document format. Please use the following one:') print(format_str) self.ini_error = True except FileNotFoundError as e: print('File \'{}\' not found.'.format(e.filename)) self.ini_error = True # set the common args stub self.comargs = {} self.comargs['cos'] = secret['cos'] self.comargs['rabbitamqp_url'] = secret['rabbitamqp']['url'] self.comargs['target_bucket'] = self.target_bucket self.comargs['target_fname'] = self.target_fname # two separate queues, the reducer waits for the mappers and the orchestrator waits for the reducer self.mapper_qid = 'mapperQueue' self.reducer_qid = 'reducerQueue' def run(self, mapper, nthreads): # check if initialization was good if self.ini_error: return -4 # validation of parameters if nthreads < 1: print( 'Minimum number of partitions or threads must be 1. \nExiting...' ) return -1 if mapper != 'CountingWords' and mapper != 'WordCount': print( '{} is not supported as a mapper yet. Supported mappers: CountingWords, WordCount. \nExiting...' .format(mapper)) return -2 # prepare arguments for the mapper (mapper args) chunk_size = int(self.fsize / nthreads) mapargs = self.comargs.copy() mapargs['qid'] = self.mapper_qid # stat connection with the queue system connection = pika.BlockingConnection(self.pika_params) channel = connection.channel() channel.queue_declare(queue=self.mapper_qid) channel.queue_purge( queue=self.mapper_qid) # ensure no message was left # measure time start_t = time.time() # dispatch mappers except the last one for i in range(0, nthreads - 1): mapargs['index'] = str(i) mapargs['Range'] = 'bytes={}-{}'.format(chunk_size * i, chunk_size * (i + 1)) self.cf.invoke(mapper, mapargs) #print('[{}]'.format(mapargs['index']), chunk_size*i, 'to', chunk_size*(i+1)) # dispatch the last mapper, so that it takes the rest of the file mapargs['index'] = nthreads - 1 mapargs['Range'] = 'bytes={}-{}'.format(chunk_size * (nthreads - 1), self.fsize) self.cf.invoke(mapper, mapargs) #print('[{}]'.format(mapargs['index']), chunk_size*(nthreads-1), 'to', self.fsize) # prepare arguments for the reducer (reducer args) redargs = self.comargs.copy() redargs['reduce_{}'.format(mapper)] = 'yes' redargs['nthreads'] = nthreads redargs['mapper_qid'] = self.mapper_qid redargs['reducer_qid'] = self.reducer_qid channel.queue_declare(queue=self.reducer_qid) channel.queue_purge( queue=self.reducer_qid) # ensure no message was left self.cf.invoke('Reducer', redargs) # wait for the reducer to finish channel.basic_consume(queue=self.reducer_qid, on_message_callback=SingleCallback()) channel.start_consuming() # measure time end_t = time.time() connection.close() print('Done.\nExecution time: {0:.5g}s'.format(end_t - start_t)) def claimFile(self, result_type, result_fname): # check if initialization was good if self.ini_error: return -4 try: result_file = open(result_fname, "w") cos_result = self.cb.get_object( self.target_bucket, '{}/{}-result'.format(self.target_fname, result_type)) result_file.write(cos_result.decode('utf-8')) result_file.close() except: print( 'Something went wrong, could not download result file for: {}, action: {}' .format(self.target_fname, result_type))
cos=COSBackend() ibcmf= pywren.ibm_cf_executor() start_time = time.time() ibcmf.wait(ibcmf.call_async(generatex,[m,n,l,a])) ibcmf.clean() iterdata=[] for i in range(nWorkersA): for j in range(nWorkersB): iterdata.append(f'{i}w{j}') #start_time = time.time() ibcmf.wait(ibcmf.map_reduce(matrix_multiplication,iterdata, multiplication_reduce, reducer_wait_local=True)) elapsed_time = time.time() - start_time for i in iterdata: cos.delete_object('practica-sd-mp',i) matrixA=pickle.loads(cos.get_object('practica-sd-mp','matrixA.txt')) matrixB=pickle.loads(cos.get_object('practica-sd-mp','matrixB.txt')) matrixC=pickle.loads(cos.get_object('practica-sd-mp','matrixC.txt')) print(f'Matriu A ({m} x {n}):') for filaA in matrixA: print(filaA) print(f'Matriu B ({n} x {l}):') for filaB in matrixB: print(filaB) print(f'Matriu C ({m} x {l}):' ) for filaC in matrixC: print(filaC) print(f'Valor de m: {m}\nValor de n: {n}\nValor de l: {l}\nValor de a: {a}') print(f'El número total de workers ha sigut de: {w}.\nTemps que ha passat en segons: {elapsed_time} s')
#configure COS library odb = COSBackend(res['ibm_cos']) fileSize = int(odb.head_object(res['ibm_cos']["bucket"], fileName)["content-length"]) #check if there are enough workers print(fileSize / nFunctions) if(fileSize / nFunctions) > 110000000: #this number is an aproximation we don't know the limit print("more workers are requiered for this file") exit(-1) #invoke functions calculating the time start = time.time() invokeFunctions('wordCount', nFunctions, fileSize, fileName, res) end1 = time.time() invokeFunctions('countingWords', nFunctions, fileSize, fileName, res) end2 = time.time() print("wordCount function's time: {0}".format(end1 - start)) print("CountingWords function's time: {0}".format(end2 - end1)) #download generated files fileFromServer = odb.get_object(res['ibm_cos']["bucket"], fileName[:-4] + 'CountingWordResult.txt') newFile = open(fileName[:-4] + 'CountingWordResult.txt', "wb") newFile.write(fileFromServer) newFile.close() print(fileName[:-4] + 'CountingWordResult.txt downloaded') fileFromServer = odb.get_object(res['ibm_cos']["bucket"], fileName[:-4] + 'WordCountResult.txt') newFile = open(fileName[:-4] + 'WordCountResult.txt', "wb") newFile.write(fileFromServer) newFile.close() print(fileName[:-4] + 'WordCountResult.txt downloaded')
def main(args): #get arguments s1 = json.dumps(args) args = json.loads(s1) res = args["res"] url = res["rabbitmq"]["url"] topRange = int(args["topRange"]) bottomRange = int(args["bottomRange"]) #configure COS library odb = COSBackend(res["ibm_cos"]) counts = Counter() #pika configuration params = pika.URLParameters(url) connection = pika.BlockingConnection(params) channel = connection.channel() channel.queue_declare(queue='WordCount') #Calcules a range which doesn't cut any word # if functionNumber = -1 means that is the last one so it has to analyse until the end # if functionNumber = 0 means that is the 1st one and it can't search before it if args["functionNumber"] != "-1": topRange = selectRange(args["fileName"], topRange, res) if args["functionNumber"] != '0': bottomRange = selectRange(args["fileName"], bottomRange, res) #get the part of the file that is needed in this function fileFromServer = odb.get_object(res["ibm_cos"]["bucket"], args["fileName"], extra_get_args={ "Range": "bytes={0}-{1}".format( bottomRange, topRange) }).decode('UTF-8', errors='ignore') #Delete unwanted characters stringSplitted = re.sub('[^A-Za-z \n]+', '', fileFromServer) #Split the string stringSplitted = re.split("\ |\n", stringSplitted) #Delete "" in array stringSplitted = list(filter(None, stringSplitted)) #convert array to count: # {word1:numberWord1, word2:numberWord2...wordN:numberWordN} counts.update(word.strip('.,?!"\'').lower() for word in stringSplitted) #count to dict diccionary = dict(counts) #dict to json dumped_json_string = json.dumps(diccionary) #upload file with result: # nameFile -> book + numberFunction # body -> json(dict(count)) odb.put_object(res["ibm_cos"]["bucket"], args["fileName"] + args["functionNumber"], dumped_json_string) #send a msg to reduce with the file name as body channel.basic_publish(exchange='', routing_key='WordCount', body=args["fileName"] + args["functionNumber"]) #close the connection connection.close() return {}
for i in range(int(sys.argv[2])): params['num_partition'] = i params['space'] = (i * partition_size, (i + 1) * partition_size) tasks.append(loop.create_task(perform_cloud('map', params.copy()))) #Esperem fins que acabin les tasques al cloud. loop.run_until_complete(asyncio.gather(*tasks)) #Tasques acabades: params['num_partitions'] = int(sys.argv[2]) result = ibm_cf.invoke_with_result('reduce', params) time_diff = datetime.now() - initial_time if result.get('finish') == "OK": if program == 1: print("\nCounting Words del fitxer " + file) result = int(cos_backend.get_object(bucket_name, 'final_' + file)) print("Resultat: El fitxer conte " + str(result) + " paraules.") else: print("\nWord Count del fitxer " + file) result = cos_backend.get_object(bucket_name, 'final_' + file) print("Resultat:") print(result) else: print(result) print("\nTemps d'execucio: " + str(time_diff.total_seconds()) + "\n") else: print("Error: Havia de seleccionar 0 o 1 segons la opcio.")