예제 #1
0
def parse_digests(job_output):
    """
    return a list of digests
    """

    maj = majority(medusa_settings.faults)
    # list of dicts
    nset_digests = []
    for job in job_output:
        ndigests = {}
        for digest in job.digests:
            ndigests.update(digest)  # concat dicts

        nset_digests.append(ndigests)  # append it to a list

    keys = []
    for _d in job_output[0].digests:
        keys += _d.keys()

    digests_matrix = []
    for k in keys:
        temp_val = []
        for dset in nset_digests:
            temp_val.append(dset[k])

        v, k = Counter(temp_val).most_common(1)[0]

        if k >= maj:
            digests_matrix.append((True, v))
        else:
            return False, None

    return True, digests_matrix
예제 #2
0
def needs_more_copies(subset, pinput, faults):
    """
    check how many clusters contains all the inputs
    """

    # get the majority value
    entries = collections.Counter(
        {my_obj.cluster: len(my_obj.paths) for my_obj in subset})
    count = sum(1 for entry in entries.items() if entry[1] >= len(pinput))

    return count < majority(faults)
def pick_up_clusters(step, force=False):
    """
    Get a set of running clusters that will be used to run the job

    :param step(int) step of execution
    :param force (boolean) force to execute the if clause
    :return list of clusters
    """

    if step == 0 or force:  # if it is the first time that we are getting a set of clusters
        n = majority(medusa_settings.faults)
        clusters = simplecache.SimpleCache.get_pick_up_clusters()[0:n]
    else:
        clusters = simplecache.SimpleCache.get_pick_up_clusters()

    return clusters
def vote(digests, aggregation, faults=0):
    """
    from a matrix get a majority of values

    E.g.str(data).splitlines()
    {gid:[[list digests 1], [list digests 2]}
    """
    if faults == 0:
        return None

    value = Counter(tuple(item) for item in digests).most_common(1)

    if medusa_settings.digests_fault and not aggregation and medusa_settings.faults_left > 0:
        decrease_faults()
        return None, False

    nr_produced_equal_digests = value[0][1]
    if nr_produced_equal_digests < majority(faults):
        return None, False

    digests_selected = value[0][0]
    return digests_selected, True
예제 #5
0
def run_execution_threads(faults, jobs, aggregation, reference_digests):
    """
     Execute jobs in serial

    :param faults: (int) Number of faults to tolerate
    :param jobs: (list) list of Job structures
    :param aggregation: (boolean) is it the aggregation phase or not
    :param reference_digests:
    :return:
    """

    group_jobs = []
    if not jobs:
        return group_jobs

    logging.info(" Running scheduling: %s" % medusa_settings.ranking_scheduler)

    job_args = []

    # Setup a list of processes that we want to run
    output = mp.Queue()
    processes = [
        Thread(target=_copy_and_aggregate,
               args=(job, reference_digests, aggregation, output))
        for job in jobs
    ]

    # Run and exit processes
    [p.start() for p in processes]
    [p.join() for p in processes]

    # Get process results from the output queue
    list_clusters = [output.get() for _ in processes]

    for clusters_to_launch_job, job in zip(list_clusters, jobs):
        logging.debug("Clusters included %s" % clusters_to_launch_job)
        job_args.append(
            ExecutionJob(job.id, clusters_to_launch_job, job.command,
                         job.output_path + '/part*', majority(faults)))

    # if medusa_settings.relaunch_job_other_cluster and not aggregation:
    #     logging.warn("Please shut one cluster down... Execution will resume in 10 secs.")
    #     time.sleep(10)

    logging.info("Running %s jobs..." % (len(job_args)))
    seffective_job_runtime = time.time()

    processes = []
    for execution_parameters in job_args:
        # Each thread executes a job in the respective clusters
        processes.append(
            Thread(target=run_job, args=(
                execution_parameters,
                output,
            )))

    # Run processes
    [p.start() for p in processes]
    [p.join() for p in processes]

    logging.info("Run_job took %s" % str(seffective_job_runtime - time.time()))

    job_output_list = []
    _output_list = [output.get() for _ in processes]
    _job_output = [_output for _output in _output_list[0]]
    for _output in _job_output:
        job_output_list.append(parse_data(_output))

    digests_matrix = []
    while True:
        successful, digests = run_verification(job_output_list, aggregation)
        if not successful:
            if medusa_settings.relaunch_job_same_cluster:
                # relaunch job in the same cloud
                path_to_remove = os.path.dirname(
                    execution_parameters.output_path)
                _relaunch_job_same_cluster(execution_parameters,
                                           path_to_remove)
            else:
                logging.debug("Re-launching job %s" %
                              execution_parameters.command)
                save_reexecute_another_cloud(True)
                execution_parameters = _relaunch_job_other_cluster(
                    execution_parameters, jobs, reference_digests, aggregation)

            _job_output = run_job(execution_parameters)
            for _output in _job_output:
                job_output_list.append(parse_data(_output))
        else:
            digests_matrix.append(digests)
            break

    # save progress of the job
    filename = settings.get_temp_dir() + "/job_progress_log.json"
    step = 2 if not aggregation else 5
    update_json_file(filename, step)

    eeffective_job_runtime = time.time()
    span = str(eeffective_job_runtime - seffective_job_runtime)
    """ The total time that it took to execute all jobs """
    logging.info("Effective job run-time: %s" % span)

    return digests_matrix
예제 #6
0
def run_execution_serial(faults, jobs, aggregation, reference_digests):
    """
     Execute jobs in serial

    :param faults: (int) Number of faults to tolerate
    :param jobs: (list) list of Job structures
    :param aggregation: (boolean) is it the aggregation phase or not
    :param reference_digests: (RefDigests) digests of reference
    :return: list with the result of the selected digest. Ex: (True, {u'/aggregate-output/part-r-00000': u'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'})
    """

    group_jobs = []
    if not jobs:
        return group_jobs

    logging.info(" Running scheduling: %s" % medusa_settings.ranking_scheduler)

    job_args = []
    for job in jobs:
        clusters_to_launch_job = _copy_and_aggregate(job, reference_digests,
                                                     aggregation)

        logging.debug("Clusters included %s" % clusters_to_launch_job)
        job_args.append(
            ExecutionJob(job.id, clusters_to_launch_job, job.command,
                         job.output_path + '/part*', majority(faults)))

    # if medusa_settings.relaunch_job_other_cluster and not aggregation:
    #     logging.warn("Please shut one cluster down... Execution will resume in 10 secs.")
    #     time.sleep(10)

    logging.info("Running %s jobs..." % (len(job_args)))
    seffective_job_runtime = time.time()

    digests_matrix = []
    for execution_parameters in job_args:
        _job_output_list = []
        while True:
            _job_output = run_job(
                execution_parameters)  # run job in the set of clusters
            for _output in _job_output:
                _job_output_list.append(parse_data(_output))

            successful, digests = run_verification(_job_output_list,
                                                   aggregation)
            if not successful:
                if medusa_settings.relaunch_job_same_cluster:
                    # relaunch job in the same cloud
                    path_to_remove = os.path.dirname(
                        execution_parameters.output_path)
                    _relaunch_job_same_cluster(execution_parameters,
                                               path_to_remove)
                else:
                    # if len(_failed_exec) > 0:
                    logging.debug("Re-launching job %s" %
                                  execution_parameters.command)
                    save_reexecute_another_cloud(True)
                    execution_parameters = _relaunch_job_other_cluster(
                        execution_parameters, jobs, reference_digests,
                        aggregation)
            else:
                digests_matrix.append(digests)
                break

    # save progress of the job
    filename = settings.get_temp_dir() + "/job_progress_log.json"
    step = 2 if not aggregation else 5
    update_json_file(filename, step)

    eeffective_job_runtime = time.time()
    span = str(eeffective_job_runtime - seffective_job_runtime)
    """ The total time that it took to execute all jobs """
    logging.info("Effective job run-time: %s" % span)

    return digests_matrix
def run_execution_threads(faults, jobs):
    """
     Execute jobs in serial

    :param faults: (int) Number of faults to tolerate
    :param jobs: (list) list of Job structures
    :param reference_digests:
    :return:
    """

    group_jobs = []
    if not jobs:
        return group_jobs

    logging.info(" Running scheduling: %s" % medusa_settings.ranking_scheduler)

    # Setup a list of processes that we want to run
    output = mp.Queue()

    # Get process results from the output queue
    clusters_to_launch_job = pick_up_clusters(0)

    job_args = []
    for job in jobs:
        job_args.append(
            ExecutionJob(job.id, clusters_to_launch_job, job.command, job.output_path + '/part*', majority(faults)))

    logging.info("Running %s jobs..." % (len(job_args)))
    seffective_job_runtime = time.time()

    processes = []
    for execution_parameters in job_args:
        # Each thread executes a job in the respective clusters
        processes.append(Thread(target=run_job, args=(execution_parameters, output,)))

    # Run processes
    [p.start() for p in processes]
    [p.join() for p in processes]

    _output_list = output.get()
    logging.info("Run_job took %s" % str(time.time() - seffective_job_runtime))

    spart = time.time()
    _job_output = []
    for _output in _output_list:
        _job_output += _output

    job_output_list = [ parse_data(_joutput) for _joutput in _job_output ]

    logging.info("Parse_data took %s" % str(time.time() - spart))

    srverification = time.time()
    digests_matrix = []
    while True:
        successful, digests = run_verification(job_output_list)
        if not successful:
            if medusa_settings.relaunch_job_same_cluster:
                # relaunch job in the same cloud
                path_to_remove = os.path.dirname(execution_parameters.output_path)
                _relaunch_job_same_cluster(execution_parameters, path_to_remove)
            else:
                logging.debug("Re-launching job %s" % execution_parameters.command)
                execution_parameters = _relaunch_job_other_cluster(execution_parameters, jobs)

            _job_output = run_job(execution_parameters)
            for _output in _job_output:
                job_output_list.append(parse_data(_output[0]))
        else:
            digests_matrix.append(digests)
            break
    logging.info("Run_verification took %s" % str(time.time() - srverification))

    # save progress of the job
    filename = settings.get_temp_dir() + "/job_progress_log.json"
    step = 2
    update_json_file(filename, step)

    eeffective_job_runtime = time.time()
    span = str(eeffective_job_runtime - seffective_job_runtime)

    """ The total time that it took to execute all jobs """
    logging.info("Effective job run-time: %s" % span)

    return digests_matrix
def run_execution(faults, jobs, aggregation):
    """
    faults is the number of faults to tolerate
    jobs is the
    """
    group_jobs = []
    if not jobs:
        return group_jobs

    print " Running scheduling: %s" % medusa_settings.ranking_scheduler

    seffective_job_runtime = 0
    pool = Pool(processes=4)

    history_rank = defaultdict(lambda: 1)

    args = []
    for job in jobs:
        gid = str(int(time.time()))
        if not aggregation:
            args.append((gid, job, faults, history_rank))
        else:
            args += (gid, job, faults)

    outputs = []
    if not aggregation:
        outputs = pool.map(mergeDirs, args)
    else:
        outputs = pool.map(aggregationMergeDirs, args)

    for output in outputs:
        new_included, new_command, new_poutput = output
        print "Clusters included %s" % new_included

        params = (new_command, new_poutput + '/part*')
        pparams = (new_included, params, majority(faults))
        output = run_job_test(pparams)
        # job_args.append((new_included, params, majority(faults)))

    # outputs=pool2.map(run_job, job_args)
    # seffective_job_runtime = time.time()
    # print "Running jobs (%s)" %(new_command)
    # gjobs = run_job(new_included, params, majority(faults))
    # group_jobs.append({gid: gjobs})

    group_data = []
    for waiter in group_jobs:
        dlist = []
        key, value = waiter.items()[0]
        for v in value:
            cluster, xmlfile = v.get()
            # print xmlfile
            print "Job finished at %s" % cluster
            data = parseJobOutputMetrics(xmlfile)

            logline = "%s:%s:%s:%s:%s:%s:%s:%s" % (cluster,
                                                   data[
                                                       'currentqueuecapacity'],
                                                   data['hdfsbytesread'],
                                                   data['hdfsbyteswritten'],
                                                   data['jobsrunning'],
                                                   data['maps'],
                                                   data['reduces'],
                                                   data['time'])

            command = writeJobRunning(logline)
            s1 = executeCommand.apply_async(queue=cluster, args=(command,))
            s1.get()

            dlist.append([data['digests']])

        group_data.append({key: dlist})

    eeffective_job_runtime = time.time()

    span = str(eeffective_job_runtime - seffective_job_runtime)

    """ The total time that it took to execute all jobs """
    print "Effective job run-time: %s" % span

    return group_data