def main(): try: with open('.ibm_cloud_config', 'r') as config_file: config = yaml.safe_load(config_file) except: print("Configuration file 'ibm-cloud_config' not found.\n") exit(1) if len(sys.argv) != 3: print("Usage:\n\torchestrator.py INPUT_FILE PARTITIONS\n") exit(2) try: int(sys.argv[2]) except ValueError: print( "Wrong parameter format.\nUsage:\n\torchestrator.py INPUT_FILE PARTITIONS\n" ) exit(3) if int(sys.argv[2]) < 1: print("The number of partitions must be greater than 0.\n") exit(4) cos = cos_backend.COSBackend(config['ibm_cos']) cf = ibm_cf_connector.CloudFunctions(config['ibm_cf']) file_name = sys.argv[1] partitions = int(sys.argv[2]) data = {} data['config'] = config data['n_partitions'] = partitions data['file_name'] = file_name data['bucket'] = 'sdtp1' data['file_size'] = int(cos.head_object(data['bucket'], file_name)) data['part_size'] = int(data['file_size'] / data['n_partitions']) data['final_size'] = int(data['file_size'] % data['n_partitions']) start_time = time.time() invoke_countingword(data, cf) cf.invoke_with_result('reduce', data) final_time = time.time() print('Counting Words execution time: ' + '{:.3f}'.format(final_time - start_time)) cos.delete_object(data['bucket'], data['file_name'] + '.reduce') start_time = time.time() invoke_wordcount(data, cf) cf.invoke_with_result('reduce', data) final_time = time.time() print('Word Count execution time: ' + '{:.3f}'.format(final_time - start_time)) cos.delete_object(data['bucket'], data['file_name'] + '.reduce') exit(0)
def inicialitzacions(self, dataset, chunks, conf): self.functions = ibm_cf_connector.CloudFunctions(conf['ibm_cf']) self.cos = COSBackend(conf['ibm_cos']) print("\nInicialitzant IBM COS\n") self.dataset = dataset self.bucket = conf['ibm_cos']['bucket'] print(self.bucket) self.chunks = int(chunks) print("\nInicialitzant COS Backend\n") self.credentials = conf self.id = str(datetime.now().timestamp()).replace(".", "") self.size_file = 0 self.uploadDataset() self.zipFile('wordCount') self.zipFile('countingWords') self.zipFile('reduce') #crear funcions self.createFunction(self.functions, "wordCount.zip", "word_count") self.createFunction(self.functions, "countingWords.zip", "counting_words") self.createFunction(self.functions, "reduce.zip", "reduce")
while (i == 0): i = ibm_cos.list_objects(bucket, res_file) if __name__ == '__main__': # Read arguments ap = argparse.ArgumentParser() ap.add_argument("-f", "--file", required=True, help="name of the file") ap.add_argument("-c", "--chunks", required=True, help="number of chunks") ap.add_argument("-b", "--bucket", required=True, help="name of the bucket") args = vars(ap.parse_args()) with open('ibm_cloud_config.yaml', 'r') as config_file: res = yaml.safe_load(config_file) ibm_cos = cb.COSbackend(res['ibm_cos']) ibm_cf = cf.CloudFunctions(res['ibm_cf']) bucket = args["bucket"] chunks = int(args["chunks"]) src_file = args["file"] file_size = ibm_cos.head_object(bucket, src_file) file_size = int(file_size['content-length']) chunk_size = int(file_size / chunks) # Parameters that will be passed to mapCountWord, mapWordCount and reducer params = {} params["config"] = res['ibm_cos'] params["file"] = src_file params["bucket"] = bucket params["chunks"] = chunks
import json with open('.ibm-cloud_config', 'r') as config_file: configu = yaml.safe_load(config_file) if (len(sys.argv) >= 3): print( "Escolleix si vols fer el wordCount o el countingWords: (wordCount(1)/countingWords(2))" ) opcio = int(input()) if (opcio == 1 or opcio == 2): file_name = sys.argv[1] n_particions = int(sys.argv[2]) cos = cos_backend.cos_backend(configu['ibm_cos']) tamany_fitxer = int(cos.head_object('joanuni', file_name)) provemFunc = ibm_cf_connector.CloudFunctions(configu['ibm_cf']) diccionari = {} diccionari["config"] = configu['ibm_cos'] diccionari["file_name"] = file_name diccionari["particions"] = n_particions actual = 0 aux = int(tamany_fitxer / n_particions) seguent = aux modul = tamany_fitxer % n_particions i = 0 fitxer = 0 instanteInicial = time() while (i < n_particions): tamany_agafar = "bytes=" + str(actual) + "-" + str(seguent) if (n_particions - i == 1): seguent += modul
import ibm_cf_connector import sys import yaml import cos_backend import re import os from time import time with open("ibm_cloud_config.yaml", "r") as config_file: res = yaml.safe_load(config_file) if (len(sys.argv) == 3): cos = cos_backend.cos_backend(res['ibm_cos']) cf = ibm_cf_connector.CloudFunctions(res['ibm_cf']) print("Nombre del bucket: ") bucket_name = input() file_name = sys.argv[1] num_chunks = int(sys.argv[2]) size_file = int(cos.head_object(bucket_name, file_name)) diccionari = {} diccionari["config"] = res['ibm_cos'] diccionari["num_chunks"] = num_chunks diccionari["file_name"] = file_name diccionari["bucket_name"] = bucket_name chunk_size = int(size_file / num_chunks) llistaRang = []