Esempio n. 1
0
def main():
    try:
        with open('.ibm_cloud_config', 'r') as config_file:
            config = yaml.safe_load(config_file)

    except:
        print("Configuration file 'ibm-cloud_config' not found.\n")
        exit(1)

    if len(sys.argv) != 3:
        print("Usage:\n\torchestrator.py INPUT_FILE PARTITIONS\n")
        exit(2)

    try:
        int(sys.argv[2])
    except ValueError:
        print(
            "Wrong parameter format.\nUsage:\n\torchestrator.py INPUT_FILE PARTITIONS\n"
        )
        exit(3)

    if int(sys.argv[2]) < 1:
        print("The number of partitions must be greater than 0.\n")
        exit(4)

    cos = cos_backend.COSBackend(config['ibm_cos'])
    cf = ibm_cf_connector.CloudFunctions(config['ibm_cf'])

    file_name = sys.argv[1]
    partitions = int(sys.argv[2])

    data = {}
    data['config'] = config
    data['n_partitions'] = partitions
    data['file_name'] = file_name
    data['bucket'] = 'sdtp1'
    data['file_size'] = int(cos.head_object(data['bucket'], file_name))
    data['part_size'] = int(data['file_size'] / data['n_partitions'])
    data['final_size'] = int(data['file_size'] % data['n_partitions'])

    start_time = time.time()
    invoke_countingword(data, cf)
    cf.invoke_with_result('reduce', data)
    final_time = time.time()

    print('Counting Words execution time: ' +
          '{:.3f}'.format(final_time - start_time))
    cos.delete_object(data['bucket'], data['file_name'] + '.reduce')

    start_time = time.time()
    invoke_wordcount(data, cf)
    cf.invoke_with_result('reduce', data)
    final_time = time.time()

    print('Word Count execution time: ' +
          '{:.3f}'.format(final_time - start_time))
    cos.delete_object(data['bucket'], data['file_name'] + '.reduce')

    exit(0)
Esempio n. 2
0
    def inicialitzacions(self, dataset, chunks, conf):
        self.functions = ibm_cf_connector.CloudFunctions(conf['ibm_cf'])
        self.cos = COSBackend(conf['ibm_cos'])
        print("\nInicialitzant IBM COS\n")
        self.dataset = dataset
        self.bucket = conf['ibm_cos']['bucket']
        print(self.bucket)
        self.chunks = int(chunks)
        print("\nInicialitzant COS Backend\n")
        self.credentials = conf
        self.id = str(datetime.now().timestamp()).replace(".", "")
        self.size_file = 0
        self.uploadDataset()
        self.zipFile('wordCount')
        self.zipFile('countingWords')
        self.zipFile('reduce')

        #crear funcions
        self.createFunction(self.functions, "wordCount.zip", "word_count")
        self.createFunction(self.functions, "countingWords.zip",
                            "counting_words")
        self.createFunction(self.functions, "reduce.zip", "reduce")
Esempio n. 3
0
    while (i == 0):
        i = ibm_cos.list_objects(bucket, res_file)


if __name__ == '__main__':
    # Read arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-f", "--file", required=True, help="name of the file")
    ap.add_argument("-c", "--chunks", required=True, help="number of chunks")
    ap.add_argument("-b", "--bucket", required=True, help="name of the bucket")
    args = vars(ap.parse_args())

    with open('ibm_cloud_config.yaml', 'r') as config_file:
        res = yaml.safe_load(config_file)
    ibm_cos = cb.COSbackend(res['ibm_cos'])
    ibm_cf = cf.CloudFunctions(res['ibm_cf'])

    bucket = args["bucket"]
    chunks = int(args["chunks"])
    src_file = args["file"]
    file_size = ibm_cos.head_object(bucket, src_file)
    file_size = int(file_size['content-length'])
    chunk_size = int(file_size / chunks)

    # Parameters that will be passed to mapCountWord, mapWordCount and reducer
    params = {}
    params["config"] = res['ibm_cos']
    params["file"] = src_file
    params["bucket"] = bucket
    params["chunks"] = chunks
import json

with open('.ibm-cloud_config', 'r') as config_file:
    configu = yaml.safe_load(config_file)

if (len(sys.argv) >= 3):
    print(
        "Escolleix si vols fer el wordCount o el countingWords: (wordCount(1)/countingWords(2))"
    )
    opcio = int(input())
    if (opcio == 1 or opcio == 2):
        file_name = sys.argv[1]
        n_particions = int(sys.argv[2])
        cos = cos_backend.cos_backend(configu['ibm_cos'])
        tamany_fitxer = int(cos.head_object('joanuni', file_name))
        provemFunc = ibm_cf_connector.CloudFunctions(configu['ibm_cf'])
        diccionari = {}
        diccionari["config"] = configu['ibm_cos']
        diccionari["file_name"] = file_name
        diccionari["particions"] = n_particions
        actual = 0
        aux = int(tamany_fitxer / n_particions)
        seguent = aux
        modul = tamany_fitxer % n_particions
        i = 0
        fitxer = 0
        instanteInicial = time()
        while (i < n_particions):
            tamany_agafar = "bytes=" + str(actual) + "-" + str(seguent)
            if (n_particions - i == 1):
                seguent += modul
Esempio n. 5
0
import ibm_cf_connector
import sys
import yaml
import cos_backend
import re
import os
from time import time

with open("ibm_cloud_config.yaml", "r") as config_file:
    res = yaml.safe_load(config_file)

if (len(sys.argv) == 3):

    cos = cos_backend.cos_backend(res['ibm_cos'])
    cf = ibm_cf_connector.CloudFunctions(res['ibm_cf'])
    print("Nombre del bucket: ")
    bucket_name = input()

    file_name = sys.argv[1]
    num_chunks = int(sys.argv[2])
    size_file = int(cos.head_object(bucket_name, file_name))

    diccionari = {}
    diccionari["config"] = res['ibm_cos']
    diccionari["num_chunks"] = num_chunks
    diccionari["file_name"] = file_name
    diccionari["bucket_name"] = bucket_name

    chunk_size = int(size_file / num_chunks)
    llistaRang = []