Exemple #1
0
def run_prepare(job_id, job, no_new_learn=0):
    """
    Parse step's parameter and predict the resources needed by the step
    :param job_id: int, jod id
    :param job: dict, job dict
    :param no_new_learn: int, 1 means refusing creating new training item
    :return:
    """
    learning = 0

    if job['status'] == -1 and job['resume'] != -1:
        # skip and resume
        OUTPUT_DICT[job_id] = baseDriver.load_output_dict(job_id)

    if (job['resume'] + 1) == len(job['steps']):
        return None
    elif job['status'] > 0:
        return 'running'
    else:
        step = job['steps'][job['resume'] + 1]['parameter']

    step = step.replace('{Job}', str(job_id))

    if job_id in LAST_OUTPUT_STRING.keys():
        step = step.replace('{LastOutput}', LAST_OUTPUT_STRING[job_id])
    if job_id in OUTPUTS.keys():
        step = step.replace('{AllOutputBefore}', ' '.join(OUTPUTS[job_id]))
    if job_id in NEW_FILES.keys():
        step = parameterParser.last_output_map(step, NEW_FILES[job_id])
    if job_id in JOB_PARAMETERS.keys():
        step = parameterParser.special_parameter_map(step,
                                                     JOB_PARAMETERS[job_id])
    if job_id in OUTPUT_DICT.keys():
        step = parameterParser.output_file_map(step, OUTPUT_DICT[job_id])
    if job_id in JOB_INPUT_FILES.keys():
        step, outside_size = parameterParser.input_file_map(
            step, JOB_INPUT_FILES[job_id], job['user_folder'])
    if job_id in LAST_OUTPUT_SUFFIX.keys(
    ) and job_id in OUTPUT_DICT_SUFFIX.keys():
        step = parameterParser.suffix_map(step, OUTPUT_DICT_SUFFIX[job_id],
                                          LAST_OUTPUT_SUFFIX[job_id])
    step, outside_size_upload = parameterParser.upload_file_map(
        step, job['user_folder'])
    outside_size += outside_size_upload
    step = step.replace('{Workspace}', job['job_folder'])
    step = step.replace('{ThreadN}', str(settings['env']['cpu']))
    JOB_COMMAND[job_id] = parameterParser.parameter_string_to_list(step)
    LAST_OUTPUT[job_id] = baseDriver.get_folder_content(job['job_folder'])
    training_num = get_training_items(job['steps'][job['resume'] + 1]['hash'])
    if training_num < 10:
        learning = 1

    if INPUT_SIZE[job_id] == 0:
        INPUT_SIZE[job_id] = baseDriver.get_folder_size(job['job_folder'])
    else:
        if job_id in OUTPUT_SIZE.keys():
            INPUT_SIZE[job_id] = OUTPUT_SIZE[job_id]
        else:
            INPUT_SIZE[job_id] = 0
    FOLDER_SIZE_BEFORE[job_id] = baseDriver.get_folder_size(job['job_folder'])
    INPUT_SIZE[job_id] += outside_size

    resource_needed = checkPoint.predict_resource_needed(
        job['steps'][job['resume'] + 1]['hash'], INPUT_SIZE[job_id],
        training_num)

    if learning == 1 and no_new_learn == 0:
        trace_id = create_machine_learning_item(
            job['steps'][job['resume'] + 1]['hash'], INPUT_SIZE[job_id])
        resource_needed['trace'] = trace_id

    return resource_needed
Exemple #2
0
def run_prepare(job_id, job, no_new_learn=0):
    """
    Parse step's parameter and predict the resources needed by the step
    :param job_id: int, jod id
    :param job: dict, job dict
    :param no_new_learn: int, 1 means refusing creating new training item
    :return:
    """
    global LAST_OUTPUT_STRING, OUTPUTS, OUTPUT_DICT, OUTPUT_DICT_SUFFIX, NEW_FILES, LAST_OUTPUT, LAST_OUTPUT_STRING
    learning = 0
    outside_size = 0

    if job['status'] == -1 and job['resume'] != -1:
        # skip and resume
        tmp_dict = baseDriver.load_output_dict(job_id)
        if 'LAST_OUTPUT_STRING' in tmp_dict.keys():
            LAST_OUTPUT_STRING[job_id] = tmp_dict['LAST_OUTPUT_STRING']
        if 'OUTPUTS' in tmp_dict.keys():
            OUTPUTS[job_id] = tmp_dict['OUTPUTS']
        if 'OUTPUT_DICT' in tmp_dict.keys():
            OUTPUT_DICT[job_id] = tmp_dict['OUTPUT_DICT']
        if 'OUTPUT_DICT_SUFFIX' in tmp_dict.keys():
            OUTPUT_DICT_SUFFIX[job_id] = tmp_dict['OUTPUT_DICT_SUFFIX']
        if 'NEW_FILES' in tmp_dict.keys():
            NEW_FILES[job_id] = tmp_dict['NEW_FILES']
        if 'LAST_OUTPUT' in tmp_dict.keys():
            LAST_OUTPUT[job_id] = tmp_dict['LAST_OUTPUT']
        if 'LAST_OUTPUT_SUFFIX' in tmp_dict.keys():
            LAST_OUTPUT_SUFFIX[job_id] = tmp_dict['LAST_OUTPUT_SUFFIX']

    if (job['resume'] + 1) == len(job['steps']):
        return None
    elif job['status'] > 0:
        return 'running'
    else:
        step = job['steps'][job['resume'] + 1]['parameter']

    step = step.replace('{Job}', str(job_id))
    step = step.replace('{JobName}', str(JOB_TABLE[job_id]['name']))

    if job_id in LAST_OUTPUT_STRING.keys():
        step = step.replace('{LastOutput}', LAST_OUTPUT_STRING[job_id])
    if job_id in OUTPUTS.keys():
        step = step.replace('{AllOutputBefore}', ' '.join(OUTPUTS[job_id]))
    if job_id in NEW_FILES.keys():
        step = parameterParser.last_output_map(step, NEW_FILES[job_id])
    if job_id in JOB_PARAMETERS.keys():
        step = parameterParser.special_parameter_map(step,
                                                     JOB_PARAMETERS[job_id])
    if job_id in OUTPUT_DICT.keys():
        step = parameterParser.output_file_map(step, OUTPUT_DICT[job_id])
    if job_id in JOB_INPUT_FILES.keys():
        step, outside_size = parameterParser.input_file_map(
            step, JOB_INPUT_FILES[job_id], job['user_folder'])
    if job_id in LAST_OUTPUT_SUFFIX.keys(
    ) and job_id in OUTPUT_DICT_SUFFIX.keys():
        step = parameterParser.suffix_map(step, OUTPUT_DICT_SUFFIX[job_id],
                                          LAST_OUTPUT_SUFFIX[job_id])
    step = parameterParser.history_map(step, job['user_id'],
                                       job['user_folder'], Queue)

    step, outside_size_upload = parameterParser.upload_file_map(
        step, job['user_folder'])
    outside_size += outside_size_upload
    step = step.replace('{Workspace}', job['job_folder'])
    user_bin_dir = os.path.join(
        os.path.join(settings['env']['workspace'], job['user_id'], 'bin'))
    if not os.path.exists(user_bin_dir):
        try:
            os.makedirs(user_bin_dir)
        except:
            pass
    step = step.replace('{UserBin}', user_bin_dir)
    if settings['cluster']['type']:
        if 'cpu' in settings['cluster'].keys() and settings['cluster']['cpu']:
            step = step.replace('{ThreadN}', str(settings['cluster']['cpu']))
        else:
            step = step.replace('{ThreadN}', str(settings['env']['cpu']))
    else:
        step = step.replace('{ThreadN}', str(settings['env']['cpu']))
    JOB_COMMAND[job_id] = parameterParser.parameter_string_to_list(step)
    LAST_OUTPUT[job_id] = baseDriver.get_folder_content(job['job_folder'])
    training_num = get_training_items(job['steps'][job['resume'] + 1]['hash'])
    if training_num < 10:
        learning = 1

    if INPUT_SIZE[job_id] == 0:
        INPUT_SIZE[job_id] = baseDriver.get_folder_size(job['job_folder'])
    else:
        if job_id in OUTPUT_SIZE.keys():
            INPUT_SIZE[job_id] = OUTPUT_SIZE[job_id]
        else:
            INPUT_SIZE[job_id] = 0
    FOLDER_SIZE_BEFORE[job_id] = baseDriver.get_folder_size(job['job_folder'])
    INPUT_SIZE[job_id] += outside_size

    resource_needed = checkPoint.predict_resource_needed(
        job['steps'][job['resume'] + 1]['hash'], INPUT_SIZE[job_id],
        training_num)
    if resource_needed['cpu'] > int(settings['env']['cpu']) * 100:
        resource_needed['cpu'] = int(settings['env']['cpu']) * 95

    # if resource_needed['mem'] >
    if learning == 1 and no_new_learn == 0:
        trace_id = create_machine_learning_item(
            job['steps'][job['resume'] + 1]['hash'], INPUT_SIZE[job_id])
        resource_needed['trace'] = trace_id

    return resource_needed