def create_inversion_dict_stage(cmt_file_db, param_path, task_counter): """Creates stage for the creation of the inversion files. This stage is tiny, but required before the actual inversion. :param cmt_file_db: :param param_path: :param task_counter: :return: """ # Get database parameter path databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Load Parameters DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # Function inv_dict_func = os.path.join(bin_path, "write_inversion_dicts.py") # Create Process Paths Stage (CPP) # Create a Stage object inv_dict_stage = Stage() inv_dict_stage.name = "Creating" # Create Task inv_dict_task = Task() # This way the task gets the name of the path file inv_dict_task.name = "Inversion-Dictionaries" inv_dict_task.pre_exec = [ # Conda activate DB_params["conda-activate"] ] inv_dict_task.executable = [DB_params["bin-python"]] # Assign exec # to the task inv_dict_task.arguments = [ inv_dict_func, "-f", cmt_file_db, "-p", param_path ] # In the future maybe to database dir as a total log? inv_dict_task.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), inv_dict_task.name)) inv_dict_task.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), inv_dict_task.name)) inv_dict_stage.add_tasks(inv_dict_task) task_counter += 1 return inv_dict_stage, task_counter
def create_inversion_stage(cmt_file_db, param_path, task_counter): """Creates inversion stage. :param cmt_file_db: :param param_path: :return: """ # Get database parameter path databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Load Parameters DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # Function inversion_func = os.path.join(bin_path, "inversion.py") # Create a Stage object inversion_stage = Stage() inversion_stage.name = "CMT3D" # Create Task inversion_task = Task() # This way the task gets the name of the path file inversion_task.name = "Inversion" inversion_task.pre_exec = [ # Conda activate DB_params["conda-activate"] ] inversion_task.executable = DB_params["bin-python"] # Assign exec # to the task inversion_task.arguments = [ inversion_func, "-f", cmt_file_db, "-p", param_path ] # In the future maybe to database dir as a total log? inversion_task.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), inversion_task.name)) inversion_task.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), inversion_task.name)) inversion_stage.add_tasks(inversion_task) return inversion_stage
def create_process_path_files(cmt_file_db, param_path, task_counter): """This function creates the path files used for processing both synthetic and observed data in ASDF format, as well as the following windowing procedure. :param cmt_file_db: cmtfile in the database :param param_path: path to parameter file directory :param pipelinedir: path to pipeline directory :return: EnTK Stage """ # Get database parameter path databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Load Parameters DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # Process path function create_process_path_bin = os.path.join(bin_path, "create_path_files.py") # Create Process Paths Stage (CPP) # Create a Stage object cpp = Stage() cpp.name = "CreateProcessPaths" # Create Task cpp_t = Task() cpp_t.name = "CPP-Task" cpp_t.pre_exec = [ # Conda activate DB_params["conda-activate"] ] cpp_t.executable = DB_params["bin-python"] # Assign executable # to the task cpp_t.arguments = [create_process_path_bin, cmt_file_db] # In the future maybe to database dir as a total log? cpp_t.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), cpp_t.name)) cpp_t.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), cpp_t.name)) task_counter += 1 cpp.add_tasks(cpp_t) return cpp, task_counter
def write_sources(cmt_file_db, param_path, task_counter): """ This function creates a stage that modifies the CMTSOLUTION files before the simulations are run. :param cmt_file_db: cmtfile in the database :param param_path: path to parameter file directory :param task_counter: total task count up until now in pipeline :return: EnTK Stage """ # Get Database parameters databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # Path to function write_source_func = os.path.join(bin_path, "write_sources.py") # Create a Stage object w_sources = Stage() w_sources.name = "Write-Sources" # Create Task for stage w_sources_t = Task() w_sources_t.name = "Task-Sources" w_sources_t.pre_exec = [ # Conda activate DB_params["conda-activate"] ] w_sources_t.executable = DB_params["bin-python"] # # Assign executable # to the task w_sources_t.arguments = [write_source_func, cmt_file_db] # In the future maybe to database dir as a total log? w_sources_t.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), w_sources_t.name)) w_sources_t.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), w_sources_t.name)) # Add Task to the Stage w_sources.add_tasks(w_sources_t) task_counter += 1 return w_sources, task_counter
def specfem_clean_up(cmt_file_db, param_path, task_counter): """ Cleaning up the simulation directories since we don"t need all the files for the future. :param cmt_file_db: cmtfile in the database :param param_path: path to parameter file directory :param pipelinedir: path to pipeline directory :return: EnTK Stage """ # Get Database parameters databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Database parameters. DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # Path to function clean_up_func = os.path.join(bin_path, "clean_up_simdirs.py") # Create a Stage object clean_up = Stage() clean_up.name = "Clean-Up" # Create Task for stage clean_up_t = Task() clean_up_t.name = "Task-Clean-Up" clean_up_t.pre_exec = [ # Conda activate DB_params["conda-activate"] ] clean_up_t.executable = DB_params["bin-python"] # Assign executable # to the task clean_up_t.arguments = [clean_up_func, cmt_file_db] # In the future maybe to database dir as a total log? clean_up_t.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), clean_up_t.name)) clean_up_t.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), clean_up_t.name)) # Add Task to the Stage clean_up.add_tasks(clean_up_t) return clean_up, task_counter
def data_request(cmt_file_db, param_path, task_counter): """ This function creates the request for the observed data and returns it as an EnTK Stage :param cmt_file_db: cmt_file in the database :param param_path: path to parameter file directory :param task_counter: total task count up until now in pipeline :return: EnTK Stage """ # Get Database parameters databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # # Path to function request_data_func = os.path.join(bin_path, "request_data.py") # Create a Stage object datarequest = Stage() datarequest_t = Task() datarequest_t.name = "data-request" datarequest_t.pre_exec = [ # Conda activate DB_params["conda-activate"] ] datarequest_t.executable = DB_params["bin-python"] # Assign executable # to the task datarequest_t.arguments = [request_data_func, cmt_file_db] # In the future maybe to database dir as a total log? datarequest_t.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), datarequest_t.name)) datarequest_t.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), datarequest_t.name)) # Add Task to the Stage datarequest.add_tasks(datarequest_t) # Increase task-counter task_counter += 1 return datarequest, task_counter
def create_entry(cmt_filename, param_path, task_counter): """This function creates the Entk stage for creation of a database entry. :param cmt_filename: cmt_filename :param param_path: parameter directory :param pipelinedir: Directory of the pipeline :return: EnTK Stage """ # Get Database parameters databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_filename) # Create a Stage object database_entry = Stage() t1 = Task() t1.name = "database-entry" t1.pre_exec = PRE_EXECS t1.executable = 'create-entry' # Assign # executable to the task t1.arguments = ['-f %s' % cmt_filename, '-p %s' % param_path] # In the future maybe to database dir as a total log? t1.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), t1.name)) t1.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), t1.name)) # Increase task-counter task_counter += 1 # Add Task to the Stage database_entry.add_tasks(t1) return database_entry, task_counter
def test_task_exceptions(s, l, i, b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s, l, i, b] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': [], 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': { 'uid': None, 'name': None }, 'parent_pipeline': { 'uid': None, 'name': None } } t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': ['sleep'], 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': { 'uid': 's1', 'name': 'stage1' }, 'parent_pipeline': { 'uid': 'p1', 'name': 'pipeline1' } }
def main(cmt_filename): '''This tiny function runs shit Args: cmt_filename: str containing the path to the cmt solution that is supposed to be inverted for Usage: From the commandline: python pipeline <path/to/cmtsolution> ''' # Path to pipeline file pipelinepath = os.path.abspath(__file__) pipelinedir = os.path.dirname(pipelinepath) # Define parameter directory param_path = os.path.join(os.path.dirname(pipelinedir), "params") databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") DB_params = read_yaml_file(databaseparam_path) print(DB_params) # Earthquake specific database parameters # Dir and eq_id eq_dir, eq_id = get_eq_entry_path(DB_params["databasedir"], cmt_filename) # Earthquake file in the database cmt_file_db = os.path.join(eq_dir, "eq_" + eq_id + ".cmt") # Create a Pipeline object p = Pipeline() # ---- DATABASE ENTRY TASK ---------------------------------------------- # # Path to function create_database_func = os.path.join(pipelinedir, "01_Create_Database_Entry.py") # Create a Stage object database_entry = Stage() t1 = Task() t1.name = 'database-entry' t1.pre_exec = [ # Conda activate DB_params["conda-activate"] ] t1.executable = [DB_params['bin-python']] # Assign executable to the task t1.arguments = [create_database_func, os.path.abspath(cmt_filename)] # In the future maybe to database dir as a total log? t1.stdout = os.path.join(pipelinedir, "database-entry." + eq_id + ".stdout") t1.stderr = os.path.join(pipelinedir, "database-entry." + eq_id + ".stderr") # Add Task to the Stage database_entry.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(database_entry) # # ---- REQUEST DATA ----------------------------------------------------- # # # # Path to function # request_data_func = os.path.join(pipelinedir, "02_Request_Data.py") # # # Create a Stage object # datarequest = Stage() # # datarequest_t = Task() # datarequest_t.name = 'data-request' # datarequest_t.pre_exec = [ # Conda activate # DB_params["conda-activate"]] # datarequest_t.executable = [DB_params['bin-python']] # Assign executable # # to the task # datarequest_t.arguments = [request_data_func, cmt_file_db] # # # In the future maybe to database dir as a total log? # datarequest_t.stdout = os.path.join(pipelinedir, # "datarequest." + eq_id + ".stdout") # datarequest_t.stderr = os.path.join(pipelinedir, # "datarequest." + eq_id + ".stderr") # # # Add Task to the Stage # datarequest.add_tasks(datarequest_t) # # # Add Stage to the Pipeline # p.add_stages(datarequest) # ---- Write Sources ---------------------------------------------------- # # Path to function write_source_func = os.path.join(pipelinedir, "03_Write_Sources.py") # Create a Stage object w_sources = Stage() w_sources.name = 'Write-Sources' # Create Task for stage w_sources_t = Task() w_sources_t.name = 'Write-Sources' w_sources_t.pre_exec = [ # Conda activate DB_params["conda-activate"] ] w_sources_t.executable = [DB_params['bin-python']] # Assign executable # to the task w_sources_t.arguments = [write_source_func, cmt_file_db] # In the future maybe to database dir as a total log? w_sources_t.stdout = os.path.join(pipelinedir, "write_sources." + eq_id + ".stdout") w_sources_t.stderr = os.path.join(pipelinedir, "write_sources." + eq_id + ".stderr") # Add Task to the Stage w_sources.add_tasks(w_sources_t) # Add Stage to the Pipeline p.add_stages(w_sources) # ---- Run Specfem ----------------------------------------------------- # specfemspec_path = os.path.join(param_path, "SpecfemParams/SpecfemParams.yml") comp_and_modules_path = os.path.join( param_path, "SpecfemParams/" "CompilersAndModules.yml") # Load Parameters specfemspecs = read_yaml_file(specfemspec_path) cm_dict = read_yaml_file(comp_and_modules_path) attr = [ "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp", "CMT_depth", "CMT_lat", "CMT_lon" ] simdir = os.path.join(eq_dir, "CMT_SIMs") # Create a Stage object runSF3d = Stage() runSF3d.name = 'Simulation' for at in attr[0]: sf_t = Task() sf_t.name = 'run-' + at # Module Loading sf_t.pre_exec = [ # Get rid of existing modules 'module purge' ] for module in cm_dict["modulelist"]: sf_t.pre_exec.append("module load %s" % module) sf_t.pre_exec.append("module load %s" % cm_dict["gpu_module"]) # Change directory to specfem directories sf_t.pre_exec.append( # Change directory "cd %s" % os.path.join(simdir, at)) sf_t.executable = ['./bin/xspecfem3D'] # Assign executable # In the future maybe to database dir as a total log? sf_t.stdout = os.path.join(pipelinedir, "run_specfem." + eq_id + ".stdout") sf_t.stderr = os.path.join(pipelinedir, "run_specfem." + eq_id + ".stderr") sf_t.gpu_reqs = { 'processes': 6, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } # Add Task to the Stage runSF3d.add_tasks(sf_t) # Add Simulation stage to the Pipeline p.add_stages(runSF3d) # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Create a dictionary describe four mandatory keys: # resource, walltime, and cpus # resource is 'local.localhost' to execute locally res_dict = { 'resource': 'princeton.tiger_gpu', 'project': 'geo', 'queue': 'gpu', 'schema': 'local', 'walltime': 300, 'cpus': 2, 'gpus': 6 } # Assign resource request description to the Application Manager appman.resource_desc = res_dict # Assign the workflow as a set or list of Pipelines to the Application Manager # Note: The list order is not guaranteed to be preserved appman.workflow = set([p]) # Run the Application Manager appman.run()
def create_windowing_stage(cmt_file_db, param_path, task_counter): """This function creates the ASDF windowing stage. :param cmt_file_db: cmtfile in the database :param param_path: path to parameter file directory :param pipelinedir: path to pipeline directory :return: EnTK Stage """ # Get database parameter path databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Load Parameters DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # Windowing parameter file directory window_process_dir = os.path.join(param_path, "CreateWindows") # Window path list # Important step! This creates a windowing list prior to having created # the actual window path files. It is tested so it definitely works! # This way the windowing processes can be distributed for each ASDF file # pair on one processor (No MPI support!) window_path_list, _ = get_windowing_list(cmt_file_db, window_process_dir, verbose=False) # Process path function window_func = os.path.join(bin_path, "window_selection_asdf.py") # The following little work around help getting around the fact that # multiple tasks cannot read the same file. # Create two stages one for #bodywaves or general entries and one for # surfaces waves. bodywave_list = [] surfacewave_list = [] for file in window_path_list: name = os.path.basename(file) if "surface" in name: surfacewave_list.append(file) else: bodywave_list.append(file) stage_list = [] if len(bodywave_list) > 0: stage_list.append(bodywave_list) if len(surfacewave_list) > 0: stage_list.append(surfacewave_list) # List of stages stages = [] for window_list in stage_list: # Create Process Paths Stage (CPP) # Create a Stage object window_stage = Stage() window_stage.name = "Windowing" # Loop over process path files for window_path in window_list: # Create Task window_task = Task() # This way the task gets the name of the path file window_task.name = os.path.basename(window_path) window_task.pre_exec = [ # Conda activate DB_params["conda-activate"] ] window_task.executable = [DB_params["bin-python"]] # Assign exec # to the task # Create Argument list arguments = [window_func, "-f", window_path] if DB_params["verbose"]: arguments.append("-v") window_task.arguments = arguments # In the future maybe to database dir as a total log? window_task.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), window_task.name)) window_task.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), window_task.name)) window_stage.add_tasks(window_task) task_counter += 1 stages.append(window_stage) return stages, task_counter
def create_processing_stage(cmt_file_db, param_path, task_counter): """This function creates the ASDF processing stage. :param cmt_file_db: cmtfile in the database :param param_path: path to parameter file directory :param pipelinedir: path to pipeline directory :return: EnTK Stage """ # Get database parameter path databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Load Parameters DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # Processing param dir process_obs_param_dir = os.path.join(param_path, "ProcessObserved") process_syn_param_dir = os.path.join(param_path, "ProcessSynthetic") # Process path list # Important step! This creates a processing list prior to having created # the actual process path files. It is tested so it definitely works! # this way the processes can be distributed for each ASDF file on one # processor or more (MPI enabled!) processing_list, _, _ = get_processing_list(cmt_file_db, process_obs_param_dir, process_syn_param_dir, verbose=True) # The following little work around help getting around the fact that # multiple tasks cannot read the same file. # Get all available bands bands = [] for file in processing_list: bands.append(os.path.basename(file).split(".")[-2]) bands = list(set(bands)) # List of stages stages = [] for band in bands: # Processing sublist sub_list = [x for x in processing_list if band in x] # Process path function process_func = os.path.join(bin_path, "process_asdf.py") # Create Process Paths Stage (CPP) # Create a Stage object process_stage = Stage() process_stage.name = "Processing" # Number of Processes: N = len(processing_list) # Loop over process path files for process_path in sub_list: # Create Task processing_task = Task() # This way the task gets the name of the path file processing_task.name = "Processing-" \ + os.path.basename(process_path) processing_task.pre_exec = [ # Conda activate DB_params["conda-activate"] ] processing_task.executable = DB_params[ "bin-python"] # Assign exec. # to the task # Create Argument list arguments = [process_func, "-f", process_path] if DB_params["verbose"]: arguments.append("-v") processing_task.arguments = arguments # In the future maybe to database dir as a total log? processing_task.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), processing_task.name)) processing_task.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), processing_task.name)) processing_task.cpu_reqs = { "processes": 1, "process_type": "MPI", "threads_per_process": 1, "thread_type": "OpenMP" } task_counter += 1 process_stage.add_tasks(processing_task) stages.append(process_stage) return stages, task_counter
def convert_traces(cmt_file_db, param_path, task_counter): """This function creates the to-ASDF conversion stage. Meaning, in this stage, both synthetic and observed traces are converted to ASDF files. :param cmt_file_db: cmtfile in the database :param param_path: path to parameter file directory :param pipelinedir: path to pipeline directory :return: EnTK Stage """ databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Load Parameters DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # File and directory cmt_dir = os.path.dirname(cmt_file_db) sim_dir = os.path.join(cmt_dir, "CMT_SIMs") ## Create a Stage object conversion_stage = Stage() conversion_stage.name = "Convert" # Conversion binary conversion_bin = os.path.join(bin_path, "convert_to_asdf.py") attr = [ "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp", "CMT_depth", "CMT_lat", "CMT_lon" ] ##### Converting the synthetic data if DB_params["verbose"]: print("\nConverting synthetic traces to ASDF ... \n") for _i, at in enumerate(attr[:DB_params["npar"] + 1]): # Path file syn_path_file = os.path.join(sim_dir, at, at + ".yml") # Create Task for stage c_task = Task() c_task.name = at c_task.pre_exec = [DB_params["conda-activate"]] c_task.executable = DB_params["bin-python"] # Assign executable # to the task arguments = [conversion_bin, "-f", syn_path_file] if DB_params["verbose"]: arguments.append("-v") c_task.arguments = arguments # In the future maybe to database dir as a total log? c_task.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), c_task.name)) c_task.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), c_task.name)) # Increase Task counter task_counter += 1 conversion_stage.add_tasks(c_task) ##### Converting the observed data if DB_params["verbose"]: print("\nConverting observed traces to ASDF ... \n") obs_path_file = os.path.join(cmt_dir, "seismograms", "obs", "observed.yml") # Create Task for stage c_task = Task() c_task.name = "Observed" c_task.pre_exec = [DB_params["conda-activate"]] c_task.executable = DB_params["bin-python"] # Assign executable # to the task # Create Argument list arguments = [conversion_bin, "-f", obs_path_file] if DB_params["verbose"]: arguments.append("-v") c_task.arguments = arguments # In the future maybe to database dir as a total log? c_task.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), c_task.name)) c_task.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), c_task.name)) # Increase Task counter task_counter += 1 conversion_stage.add_tasks(c_task) return conversion_stage, task_counter
def run_specfem(cmt_file_db, param_path, task_counter): """ This function runs the necessary Specfem simulations. :param cmt_file_db: cmtfile in the database :param param_path: path to parameter file directory :param task_counter: total task count up until now in pipeline :return: EnTK Stage """ # Get Database parameters databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Database parameters. DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) specfemspec_path = os.path.join(param_path, "SpecfemParams/SpecfemParams.yml") comp_and_modules_path = os.path.join( param_path, "SpecfemParams/" "CompilersAndModules.yml") # Load Parameters specfemspecs = read_yaml_file(specfemspec_path) cm_dict = read_yaml_file(comp_and_modules_path) # Simulations to be run attr = [ "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp", "CMT_depth", "CMT_lat", "CMT_lon" ] # Simulation directory simdir = os.path.join(os.path.dirname(cmt_file_db), "CMT_SIMs") # Create a Stage object runSF3d = Stage() runSF3d.name = "Simulation" for at in attr: sf_t = Task() sf_t.name = "run-" + at # Module Loading sf_t.pre_exec = [ # Get rid of existing modules "module purge" ] # Append to pre_execution module list. for module in cm_dict["modulelist"]: sf_t.pre_exec.append("module load %s" % module) if specfemspecs["GPU_MODE"] is True: sf_t.pre_exec.append("module load %s" % cm_dict["gpu_module"]) # Change directory to specfem directories sf_t.pre_exec.append( # Change directory "cd %s" % os.path.join(simdir, at)) sf_t.executable = "./bin/xspecfem3D" # Assigned executable # In the future maybe to database dir as a total log? sf_t.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), sf_t.name)) sf_t.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), sf_t.name)) print(sf_t.cpu_reqs) # Increase Task counter task_counter += 1 # Add Task to the Stage runSF3d.add_tasks(sf_t) return runSF3d, task_counter
def test_task_exceptions(s,l,i,b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s,l,i,b] for data in data_type: if not isinstance(data,str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data,list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, list): with pytest.raises(TypeError): t.executable = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': str(), 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': {'uid':None, 'name': None}, 'parent_pipeline': {'uid':None, 'name': None}} t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}} t.executable = 'sleep' d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}