def run_prepare(job_id, job, no_new_learn=0): """ Parse step's parameter and predict the resources needed by the step :param job_id: int, jod id :param job: dict, job dict :param no_new_learn: int, 1 means refusing creating new training item :return: """ learning = 0 if job['status'] == -1 and job['resume'] != -1: # skip and resume OUTPUT_DICT[job_id] = baseDriver.load_output_dict(job_id) if (job['resume'] + 1) == len(job['steps']): return None elif job['status'] > 0: return 'running' else: step = job['steps'][job['resume'] + 1]['parameter'] step = step.replace('{Job}', str(job_id)) if job_id in LAST_OUTPUT_STRING.keys(): step = step.replace('{LastOutput}', LAST_OUTPUT_STRING[job_id]) if job_id in OUTPUTS.keys(): step = step.replace('{AllOutputBefore}', ' '.join(OUTPUTS[job_id])) if job_id in NEW_FILES.keys(): step = parameterParser.last_output_map(step, NEW_FILES[job_id]) if job_id in JOB_PARAMETERS.keys(): step = parameterParser.special_parameter_map(step, JOB_PARAMETERS[job_id]) if job_id in OUTPUT_DICT.keys(): step = parameterParser.output_file_map(step, OUTPUT_DICT[job_id]) if job_id in JOB_INPUT_FILES.keys(): step, outside_size = parameterParser.input_file_map( step, JOB_INPUT_FILES[job_id], job['user_folder']) if job_id in LAST_OUTPUT_SUFFIX.keys( ) and job_id in OUTPUT_DICT_SUFFIX.keys(): step = parameterParser.suffix_map(step, OUTPUT_DICT_SUFFIX[job_id], LAST_OUTPUT_SUFFIX[job_id]) step, outside_size_upload = parameterParser.upload_file_map( step, job['user_folder']) outside_size += outside_size_upload step = step.replace('{Workspace}', job['job_folder']) step = step.replace('{ThreadN}', str(settings['env']['cpu'])) JOB_COMMAND[job_id] = parameterParser.parameter_string_to_list(step) LAST_OUTPUT[job_id] = baseDriver.get_folder_content(job['job_folder']) training_num = get_training_items(job['steps'][job['resume'] + 1]['hash']) if training_num < 10: learning = 1 if INPUT_SIZE[job_id] == 0: INPUT_SIZE[job_id] = baseDriver.get_folder_size(job['job_folder']) else: if job_id in OUTPUT_SIZE.keys(): INPUT_SIZE[job_id] = OUTPUT_SIZE[job_id] else: INPUT_SIZE[job_id] = 0 FOLDER_SIZE_BEFORE[job_id] = baseDriver.get_folder_size(job['job_folder']) INPUT_SIZE[job_id] += outside_size resource_needed = checkPoint.predict_resource_needed( job['steps'][job['resume'] + 1]['hash'], INPUT_SIZE[job_id], training_num) if learning == 1 and no_new_learn == 0: trace_id = create_machine_learning_item( job['steps'][job['resume'] + 1]['hash'], INPUT_SIZE[job_id]) resource_needed['trace'] = trace_id return resource_needed
def run_prepare(job_id, job, no_new_learn=0): """ Parse step's parameter and predict the resources needed by the step :param job_id: int, jod id :param job: dict, job dict :param no_new_learn: int, 1 means refusing creating new training item :return: """ global LAST_OUTPUT_STRING, OUTPUTS, OUTPUT_DICT, OUTPUT_DICT_SUFFIX, NEW_FILES, LAST_OUTPUT, LAST_OUTPUT_STRING learning = 0 outside_size = 0 if job['status'] == -1 and job['resume'] != -1: # skip and resume tmp_dict = baseDriver.load_output_dict(job_id) if 'LAST_OUTPUT_STRING' in tmp_dict.keys(): LAST_OUTPUT_STRING[job_id] = tmp_dict['LAST_OUTPUT_STRING'] if 'OUTPUTS' in tmp_dict.keys(): OUTPUTS[job_id] = tmp_dict['OUTPUTS'] if 'OUTPUT_DICT' in tmp_dict.keys(): OUTPUT_DICT[job_id] = tmp_dict['OUTPUT_DICT'] if 'OUTPUT_DICT_SUFFIX' in tmp_dict.keys(): OUTPUT_DICT_SUFFIX[job_id] = tmp_dict['OUTPUT_DICT_SUFFIX'] if 'NEW_FILES' in tmp_dict.keys(): NEW_FILES[job_id] = tmp_dict['NEW_FILES'] if 'LAST_OUTPUT' in tmp_dict.keys(): LAST_OUTPUT[job_id] = tmp_dict['LAST_OUTPUT'] if 'LAST_OUTPUT_SUFFIX' in tmp_dict.keys(): LAST_OUTPUT_SUFFIX[job_id] = tmp_dict['LAST_OUTPUT_SUFFIX'] if (job['resume'] + 1) == len(job['steps']): return None elif job['status'] > 0: return 'running' else: step = job['steps'][job['resume'] + 1]['parameter'] step = step.replace('{Job}', str(job_id)) step = step.replace('{JobName}', str(JOB_TABLE[job_id]['name'])) if job_id in LAST_OUTPUT_STRING.keys(): step = step.replace('{LastOutput}', LAST_OUTPUT_STRING[job_id]) if job_id in OUTPUTS.keys(): step = step.replace('{AllOutputBefore}', ' '.join(OUTPUTS[job_id])) if job_id in NEW_FILES.keys(): step = parameterParser.last_output_map(step, NEW_FILES[job_id]) if job_id in JOB_PARAMETERS.keys(): step = parameterParser.special_parameter_map(step, JOB_PARAMETERS[job_id]) if job_id in OUTPUT_DICT.keys(): step = parameterParser.output_file_map(step, OUTPUT_DICT[job_id]) if job_id in JOB_INPUT_FILES.keys(): step, outside_size = parameterParser.input_file_map( step, JOB_INPUT_FILES[job_id], job['user_folder']) if job_id in LAST_OUTPUT_SUFFIX.keys( ) and job_id in OUTPUT_DICT_SUFFIX.keys(): step = parameterParser.suffix_map(step, OUTPUT_DICT_SUFFIX[job_id], LAST_OUTPUT_SUFFIX[job_id]) step = parameterParser.history_map(step, job['user_id'], job['user_folder'], Queue) step, outside_size_upload = parameterParser.upload_file_map( step, job['user_folder']) outside_size += outside_size_upload step = step.replace('{Workspace}', job['job_folder']) user_bin_dir = os.path.join( os.path.join(settings['env']['workspace'], job['user_id'], 'bin')) if not os.path.exists(user_bin_dir): try: os.makedirs(user_bin_dir) except: pass step = step.replace('{UserBin}', user_bin_dir) if settings['cluster']['type']: if 'cpu' in settings['cluster'].keys() and settings['cluster']['cpu']: step = step.replace('{ThreadN}', str(settings['cluster']['cpu'])) else: step = step.replace('{ThreadN}', str(settings['env']['cpu'])) else: step = step.replace('{ThreadN}', str(settings['env']['cpu'])) JOB_COMMAND[job_id] = parameterParser.parameter_string_to_list(step) LAST_OUTPUT[job_id] = baseDriver.get_folder_content(job['job_folder']) training_num = get_training_items(job['steps'][job['resume'] + 1]['hash']) if training_num < 10: learning = 1 if INPUT_SIZE[job_id] == 0: INPUT_SIZE[job_id] = baseDriver.get_folder_size(job['job_folder']) else: if job_id in OUTPUT_SIZE.keys(): INPUT_SIZE[job_id] = OUTPUT_SIZE[job_id] else: INPUT_SIZE[job_id] = 0 FOLDER_SIZE_BEFORE[job_id] = baseDriver.get_folder_size(job['job_folder']) INPUT_SIZE[job_id] += outside_size resource_needed = checkPoint.predict_resource_needed( job['steps'][job['resume'] + 1]['hash'], INPUT_SIZE[job_id], training_num) if resource_needed['cpu'] > int(settings['env']['cpu']) * 100: resource_needed['cpu'] = int(settings['env']['cpu']) * 95 # if resource_needed['mem'] > if learning == 1 and no_new_learn == 0: trace_id = create_machine_learning_item( job['steps'][job['resume'] + 1]['hash'], INPUT_SIZE[job_id]) resource_needed['trace'] = trace_id return resource_needed