예제 #1
0
def _copy_and_aggregate_other_cluster(job,
                                      reference_digests,
                                      aggregation=False,
                                      output=None):
    """
    merge dirs in aggregation and in a normal merge
    :param job (Job) job structure
    :param reference_digests
    :param aggregation (boolean) check if it should aggregate data

    :return (list) list of clusters that contain the data
    """

    filename = settings.get_temp_dir() + "/job_progress_log.json"
    if not aggregation:
        new_clusters = mergeDirs_reexecution([job.output_path])

        # save progress of the job
        update_json_file(filename, 1)
    else:
        new_clusters = aggregationMergeDirs(job, reference_digests)

        # save progress of the job
        update_json_file(filename, 4)

    if output is not None:
        output.put(new_clusters)
        return

    return new_clusters  # [new_clusters[-1]]  # shortcut just for this test. Must be removed in the end and put just "return new_clusters"
예제 #2
0
def run_verification(job_output, aggregation):
    """
    Check the digests of the set of jobs that have run

    :param job_output (list) list of output of the jobs (json)
    :param aggregation (Boolean) is it for aggregation?

    :return (tuple) with the result of the validation (True|False) or the selected digest
    """
    result, selected_digest = parse_digests(job_output)

    if settings.medusa_settings.faults_left > 0:
        result = False
        settings.medusa_settings.faults_left -= 1

    if result:
        filename = settings.get_temp_dir() + "/job_progress_log.json"
        step = 3 if aggregation == 0 else 6
        update_json_file(filename, step)

        # for purpose when it is necessary to execute in another cloud
        save_reexecute_another_cloud(False)

        return result, selected_digest

    return False, None
예제 #3
0
def save_prediction(job_params):
    """
    Save prediction values of the job into a file
    :param job_params (string) data to be saved
    """

    prediction_file = "%s/prediction.json" % settings.get_temp_dir()
    write_data(prediction_file, job_params)
def read_remote_job_data():
    """
    Read job data

    :return: output of the command
    """
    command = lcat(settings.get_temp_dir() + "/" + "job_log.json")
    output = medusa.execution.local_execute_command(command)
    return output
예제 #5
0
def writeJobRunning(job_output):
    """
    append job execution results to a file
    :param job_output (string) job output data
    """
    path = settings.get_temp_dir() + "/job_log.json"
    job_remote_dataset = json.loads(read_data_oneline(path))
    job_remote_dataset["data"].append(json.loads(job_output))
    write_data(path, json.dumps(job_remote_dataset, indent=2))
예제 #6
0
def load_penalization():
    """ Load penalization values """
    prediction_file = "%s/penalization.json" % settings.get_temp_dir()
    data = read_data(prediction_file)[0]

    print "----"
    print data
    print "----"

    return data
def load_prediction():
    """
    Load prediction values of the job into a file
    :return:
    """

    prediction_file = "%s/prediction.json" % settings.get_temp_dir()
    data = read_data(prediction_file)[0]

    return data
예제 #8
0
def read_remote_network_data(from_cluster, to_cluster, packet_size):
    """
    Read network data
    :param from_cluster: (string) from cluster
    :param to_cluster: (string) to cluster
    :param packet_size: (string) size of the packet
    :return: output of the command
    """
    command = lcat(settings.get_temp_dir() + "/" + from_cluster + "-" +
                   to_cluster + "-" + packet_size + ".json")
    output = local_execute_command(command)

    return output
예제 #9
0
    def test_update_json(self):
        path = settings.get_temp_dir() + "/job_log.json"
        content = read_data(path)
        content = ''.join(content)
        content = content.replace("\n", "")
        content = content.strip()
        content = json.loads(content)

        gid = "3"
        for x in content["jobs"]["job"]:
            if x["gid"] == gid:
                x["step"] = "20"

        write_data(path, json.dumps(content))
예제 #10
0
    def test_add_entry_json(self):
        # from pudb import set_trace; set_trace()
        path = settings.get_temp_dir() + "/job_log.json"
        content = read_data(path)
        content = ''.join(content)
        content = content.replace("\n", "")
        content = content.strip()
        content = json.loads(content)

        gid = "40"
        for x in content["jobs"]["job"]:
            if x["gid"] == gid:
                x["step"] = "20"

        content["jobs"]["job"].append({"gid": "40", "command": "test", "step": "1"})
        write_data(path, json.dumps(content))
def run_verification(job_output):
    """
    Check the digests of the set of jobs that have run

    :param job_output (list) list of output of the jobs (json)

    :return (tuple) with the result of the validation (True|False) or the selected digest
    """
    result, selected_digest = parse_digests(job_output)

    if settings.medusa_settings.faults_left > 0:
        result = False
        settings.medusa_settings.faults_left -= 1

    if result:
        filename = settings.get_temp_dir() + "/job_progress_log.json"
        step = 3
        update_json_file(filename, step)

        return result, selected_digest

    return False, None
예제 #12
0
def run_execution_threads(faults, jobs, aggregation, reference_digests):
    """
     Execute jobs in serial

    :param faults: (int) Number of faults to tolerate
    :param jobs: (list) list of Job structures
    :param aggregation: (boolean) is it the aggregation phase or not
    :param reference_digests:
    :return:
    """

    group_jobs = []
    if not jobs:
        return group_jobs

    logging.info(" Running scheduling: %s" % medusa_settings.ranking_scheduler)

    job_args = []

    # Setup a list of processes that we want to run
    output = mp.Queue()
    processes = [
        Thread(target=_copy_and_aggregate,
               args=(job, reference_digests, aggregation, output))
        for job in jobs
    ]

    # Run and exit processes
    [p.start() for p in processes]
    [p.join() for p in processes]

    # Get process results from the output queue
    list_clusters = [output.get() for _ in processes]

    for clusters_to_launch_job, job in zip(list_clusters, jobs):
        logging.debug("Clusters included %s" % clusters_to_launch_job)
        job_args.append(
            ExecutionJob(job.id, clusters_to_launch_job, job.command,
                         job.output_path + '/part*', majority(faults)))

    # if medusa_settings.relaunch_job_other_cluster and not aggregation:
    #     logging.warn("Please shut one cluster down... Execution will resume in 10 secs.")
    #     time.sleep(10)

    logging.info("Running %s jobs..." % (len(job_args)))
    seffective_job_runtime = time.time()

    processes = []
    for execution_parameters in job_args:
        # Each thread executes a job in the respective clusters
        processes.append(
            Thread(target=run_job, args=(
                execution_parameters,
                output,
            )))

    # Run processes
    [p.start() for p in processes]
    [p.join() for p in processes]

    logging.info("Run_job took %s" % str(seffective_job_runtime - time.time()))

    job_output_list = []
    _output_list = [output.get() for _ in processes]
    _job_output = [_output for _output in _output_list[0]]
    for _output in _job_output:
        job_output_list.append(parse_data(_output))

    digests_matrix = []
    while True:
        successful, digests = run_verification(job_output_list, aggregation)
        if not successful:
            if medusa_settings.relaunch_job_same_cluster:
                # relaunch job in the same cloud
                path_to_remove = os.path.dirname(
                    execution_parameters.output_path)
                _relaunch_job_same_cluster(execution_parameters,
                                           path_to_remove)
            else:
                logging.debug("Re-launching job %s" %
                              execution_parameters.command)
                save_reexecute_another_cloud(True)
                execution_parameters = _relaunch_job_other_cluster(
                    execution_parameters, jobs, reference_digests, aggregation)

            _job_output = run_job(execution_parameters)
            for _output in _job_output:
                job_output_list.append(parse_data(_output))
        else:
            digests_matrix.append(digests)
            break

    # save progress of the job
    filename = settings.get_temp_dir() + "/job_progress_log.json"
    step = 2 if not aggregation else 5
    update_json_file(filename, step)

    eeffective_job_runtime = time.time()
    span = str(eeffective_job_runtime - seffective_job_runtime)
    """ The total time that it took to execute all jobs """
    logging.info("Effective job run-time: %s" % span)

    return digests_matrix
예제 #13
0
def run_execution_serial(faults, jobs, aggregation, reference_digests):
    """
     Execute jobs in serial

    :param faults: (int) Number of faults to tolerate
    :param jobs: (list) list of Job structures
    :param aggregation: (boolean) is it the aggregation phase or not
    :param reference_digests: (RefDigests) digests of reference
    :return: list with the result of the selected digest. Ex: (True, {u'/aggregate-output/part-r-00000': u'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'})
    """

    group_jobs = []
    if not jobs:
        return group_jobs

    logging.info(" Running scheduling: %s" % medusa_settings.ranking_scheduler)

    job_args = []
    for job in jobs:
        clusters_to_launch_job = _copy_and_aggregate(job, reference_digests,
                                                     aggregation)

        logging.debug("Clusters included %s" % clusters_to_launch_job)
        job_args.append(
            ExecutionJob(job.id, clusters_to_launch_job, job.command,
                         job.output_path + '/part*', majority(faults)))

    # if medusa_settings.relaunch_job_other_cluster and not aggregation:
    #     logging.warn("Please shut one cluster down... Execution will resume in 10 secs.")
    #     time.sleep(10)

    logging.info("Running %s jobs..." % (len(job_args)))
    seffective_job_runtime = time.time()

    digests_matrix = []
    for execution_parameters in job_args:
        _job_output_list = []
        while True:
            _job_output = run_job(
                execution_parameters)  # run job in the set of clusters
            for _output in _job_output:
                _job_output_list.append(parse_data(_output))

            successful, digests = run_verification(_job_output_list,
                                                   aggregation)
            if not successful:
                if medusa_settings.relaunch_job_same_cluster:
                    # relaunch job in the same cloud
                    path_to_remove = os.path.dirname(
                        execution_parameters.output_path)
                    _relaunch_job_same_cluster(execution_parameters,
                                               path_to_remove)
                else:
                    # if len(_failed_exec) > 0:
                    logging.debug("Re-launching job %s" %
                                  execution_parameters.command)
                    save_reexecute_another_cloud(True)
                    execution_parameters = _relaunch_job_other_cluster(
                        execution_parameters, jobs, reference_digests,
                        aggregation)
            else:
                digests_matrix.append(digests)
                break

    # save progress of the job
    filename = settings.get_temp_dir() + "/job_progress_log.json"
    step = 2 if not aggregation else 5
    update_json_file(filename, step)

    eeffective_job_runtime = time.time()
    span = str(eeffective_job_runtime - seffective_job_runtime)
    """ The total time that it took to execute all jobs """
    logging.info("Effective job run-time: %s" % span)

    return digests_matrix
예제 #14
0
def save_penalization(penalization_values):
    """ Save penalization values """
    prediction_file = "%s/penalization.json" % settings.get_temp_dir()

    with open(prediction_file, 'w') as the_file:
        the_file.write(penalization_values)
def run_execution_threads(faults, jobs):
    """
     Execute jobs in serial

    :param faults: (int) Number of faults to tolerate
    :param jobs: (list) list of Job structures
    :param reference_digests:
    :return:
    """

    group_jobs = []
    if not jobs:
        return group_jobs

    logging.info(" Running scheduling: %s" % medusa_settings.ranking_scheduler)

    # Setup a list of processes that we want to run
    output = mp.Queue()

    # Get process results from the output queue
    clusters_to_launch_job = pick_up_clusters(0)

    job_args = []
    for job in jobs:
        job_args.append(
            ExecutionJob(job.id, clusters_to_launch_job, job.command, job.output_path + '/part*', majority(faults)))

    logging.info("Running %s jobs..." % (len(job_args)))
    seffective_job_runtime = time.time()

    processes = []
    for execution_parameters in job_args:
        # Each thread executes a job in the respective clusters
        processes.append(Thread(target=run_job, args=(execution_parameters, output,)))

    # Run processes
    [p.start() for p in processes]
    [p.join() for p in processes]

    _output_list = output.get()
    logging.info("Run_job took %s" % str(time.time() - seffective_job_runtime))

    spart = time.time()
    _job_output = []
    for _output in _output_list:
        _job_output += _output

    job_output_list = [ parse_data(_joutput) for _joutput in _job_output ]

    logging.info("Parse_data took %s" % str(time.time() - spart))

    srverification = time.time()
    digests_matrix = []
    while True:
        successful, digests = run_verification(job_output_list)
        if not successful:
            if medusa_settings.relaunch_job_same_cluster:
                # relaunch job in the same cloud
                path_to_remove = os.path.dirname(execution_parameters.output_path)
                _relaunch_job_same_cluster(execution_parameters, path_to_remove)
            else:
                logging.debug("Re-launching job %s" % execution_parameters.command)
                execution_parameters = _relaunch_job_other_cluster(execution_parameters, jobs)

            _job_output = run_job(execution_parameters)
            for _output in _job_output:
                job_output_list.append(parse_data(_output[0]))
        else:
            digests_matrix.append(digests)
            break
    logging.info("Run_verification took %s" % str(time.time() - srverification))

    # save progress of the job
    filename = settings.get_temp_dir() + "/job_progress_log.json"
    step = 2
    update_json_file(filename, step)

    eeffective_job_runtime = time.time()
    span = str(eeffective_job_runtime - seffective_job_runtime)

    """ The total time that it took to execute all jobs """
    logging.info("Effective job run-time: %s" % span)

    return digests_matrix