Esempio n. 1
0
def create_inversion_dict_stage(cmt_file_db, param_path, task_counter):
    """Creates stage for the creation of the inversion files. This stage is
    tiny, but required before the actual inversion.

    :param cmt_file_db:
    :param param_path:
    :param task_counter:
    :return:
    """

    # Get database parameter path
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Function
    inv_dict_func = os.path.join(bin_path, "write_inversion_dicts.py")

    # Create Process Paths Stage (CPP)
    # Create a Stage object
    inv_dict_stage = Stage()
    inv_dict_stage.name = "Creating"

    # Create Task
    inv_dict_task = Task()

    # This way the task gets the name of the path file
    inv_dict_task.name = "Inversion-Dictionaries"

    inv_dict_task.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]

    inv_dict_task.executable = [DB_params["bin-python"]]  # Assign exec
    # to the task

    inv_dict_task.arguments = [
        inv_dict_func, "-f", cmt_file_db, "-p", param_path
    ]

    # In the future maybe to database dir as a total log?
    inv_dict_task.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), inv_dict_task.name))

    inv_dict_task.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), inv_dict_task.name))

    inv_dict_stage.add_tasks(inv_dict_task)

    task_counter += 1

    return inv_dict_stage, task_counter
Esempio n. 2
0
def create_inversion_stage(cmt_file_db, param_path, task_counter):
    """Creates inversion stage.

    :param cmt_file_db:
    :param param_path:
    :return:
    """

    # Get database parameter path
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Function
    inversion_func = os.path.join(bin_path, "inversion.py")

    # Create a Stage object
    inversion_stage = Stage()
    inversion_stage.name = "CMT3D"

    # Create Task
    inversion_task = Task()

    # This way the task gets the name of the path file
    inversion_task.name = "Inversion"

    inversion_task.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]

    inversion_task.executable = DB_params["bin-python"]  # Assign exec
    # to the task

    inversion_task.arguments = [
        inversion_func, "-f", cmt_file_db, "-p", param_path
    ]

    # In the future maybe to database dir as a total log?
    inversion_task.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), inversion_task.name))

    inversion_task.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), inversion_task.name))

    inversion_stage.add_tasks(inversion_task)

    return inversion_stage
Esempio n. 3
0
def create_process_path_files(cmt_file_db, param_path, task_counter):
    """This function creates the path files used for processing both
    synthetic and observed data in ASDF format, as well as the following
    windowing procedure.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param pipelinedir: path to pipeline directory
    :return: EnTK Stage

    """

    # Get database parameter path
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Process path function
    create_process_path_bin = os.path.join(bin_path, "create_path_files.py")

    # Create Process Paths Stage (CPP)
    # Create a Stage object
    cpp = Stage()
    cpp.name = "CreateProcessPaths"

    # Create Task
    cpp_t = Task()
    cpp_t.name = "CPP-Task"
    cpp_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    cpp_t.executable = DB_params["bin-python"]  # Assign executable
    # to the task
    cpp_t.arguments = [create_process_path_bin, cmt_file_db]

    # In the future maybe to database dir as a total log?
    cpp_t.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), cpp_t.name))

    cpp_t.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), cpp_t.name))

    task_counter += 1

    cpp.add_tasks(cpp_t)

    return cpp, task_counter
Esempio n. 4
0
def write_sources(cmt_file_db, param_path, task_counter):
    """ This function creates a stage that modifies the CMTSOLUTION files
    before the simulations are run.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param task_counter: total task count up until now in pipeline
    :return: EnTK Stage

    """

    # Get Database parameters
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Path to function
    write_source_func = os.path.join(bin_path, "write_sources.py")

    # Create a Stage object
    w_sources = Stage()

    w_sources.name = "Write-Sources"

    # Create Task for stage
    w_sources_t = Task()
    w_sources_t.name = "Task-Sources"
    w_sources_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    w_sources_t.executable = DB_params["bin-python"]  #
    # Assign executable
    # to the task
    w_sources_t.arguments = [write_source_func, cmt_file_db]

    # In the future maybe to database dir as a total log?
    w_sources_t.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), w_sources_t.name))

    w_sources_t.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), w_sources_t.name))

    # Add Task to the Stage
    w_sources.add_tasks(w_sources_t)

    task_counter += 1

    return w_sources, task_counter
Esempio n. 5
0
def specfem_clean_up(cmt_file_db, param_path, task_counter):
    """ Cleaning up the simulation directories since we don"t need all the
    files for the future.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param pipelinedir: path to pipeline directory
    :return: EnTK Stage

    """

    # Get Database parameters
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")
    # Database parameters.
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Path to function
    clean_up_func = os.path.join(bin_path, "clean_up_simdirs.py")

    # Create a Stage object
    clean_up = Stage()
    clean_up.name = "Clean-Up"

    # Create Task for stage
    clean_up_t = Task()
    clean_up_t.name = "Task-Clean-Up"
    clean_up_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    clean_up_t.executable = DB_params["bin-python"]  # Assign executable
    # to the task
    clean_up_t.arguments = [clean_up_func, cmt_file_db]

    # In the future maybe to database dir as a total log?
    clean_up_t.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), clean_up_t.name))

    clean_up_t.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), clean_up_t.name))

    # Add Task to the Stage
    clean_up.add_tasks(clean_up_t)

    return clean_up, task_counter
Esempio n. 6
0
def data_request(cmt_file_db, param_path, task_counter):
    """ This function creates the request for the observed data and returns
    it as an EnTK Stage

    :param cmt_file_db: cmt_file in the database
    :param param_path: path to parameter file directory
    :param task_counter: total task count up until now in pipeline
    :return: EnTK Stage

    """

    # Get Database parameters
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # # Path to function
    request_data_func = os.path.join(bin_path, "request_data.py")

    # Create a Stage object
    datarequest = Stage()

    datarequest_t = Task()
    datarequest_t.name = "data-request"
    datarequest_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    datarequest_t.executable = DB_params["bin-python"]  # Assign executable
    # to the task
    datarequest_t.arguments = [request_data_func, cmt_file_db]

    # In the future maybe to database dir as a total log?
    datarequest_t.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), datarequest_t.name))

    datarequest_t.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), datarequest_t.name))

    # Add Task to the Stage
    datarequest.add_tasks(datarequest_t)

    # Increase task-counter
    task_counter += 1

    return datarequest, task_counter
Esempio n. 7
0
def create_entry(cmt_filename, param_path, task_counter):
    """This function creates the Entk stage for creation of a database entry.

    :param cmt_filename: cmt_filename
    :param param_path: parameter directory
    :param pipelinedir: Directory of the pipeline
    :return: EnTK Stage
    """

    # Get Database parameters
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_filename)

    # Create a Stage object
    database_entry = Stage()

    t1 = Task()
    t1.name = "database-entry"
    t1.pre_exec = PRE_EXECS
    t1.executable = 'create-entry'  # Assign
    # executable to the task
    t1.arguments = ['-f %s' % cmt_filename, '-p %s' % param_path]

    # In the future maybe to database dir as a total log?
    t1.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), t1.name))

    t1.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), t1.name))

    # Increase task-counter
    task_counter += 1

    # Add Task to the Stage
    database_entry.add_tasks(t1)

    return database_entry, task_counter
Esempio n. 8
0
def test_task_exceptions(s, l, i, b):
    """
    **Purpose**: Test if all attribute assignments raise exceptions for invalid values
    """

    t = Task()

    data_type = [s, l, i, b]

    for data in data_type:

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                t.name = data

            with pytest.raises(TypeError):
                t.path = data

            with pytest.raises(TypeError):
                t.parent_stage = data

            with pytest.raises(TypeError):
                t.parent_pipeline = data

            with pytest.raises(TypeError):
                t.stdout = data

            with pytest.raises(TypeError):
                t.stderr = data

        if not isinstance(data, list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.executable = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.move_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data

            with pytest.raises(TypeError):
                t.move_output_data = data

        if not isinstance(data, str) and not isinstance(data, unicode):

            with pytest.raises(ValueError):
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': data,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': data
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': data,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': data
                }

        if not isinstance(data, int):

            with pytest.raises(TypeError):
                t.cpu_reqs = {
                    'processes': data,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': data,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': data,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': data,
                    'thread_type': None
                }
Esempio n. 9
0
def test_task_to_dict():
    """
    **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a
    dictionary
    """

    t = Task()
    d = t.to_dict()

    assert d == {
        'uid': None,
        'name': None,
        'state': states.INITIAL,
        'state_history': [states.INITIAL],
        'pre_exec': [],
        'executable': [],
        'arguments': [],
        'post_exec': [],
        'cpu_reqs': {
            'processes': 1,
            'process_type': None,
            'threads_per_process': 1,
            'thread_type': None
        },
        'gpu_reqs': {
            'processes': 0,
            'process_type': None,
            'threads_per_process': 0,
            'thread_type': None
        },
        'lfs_per_process': 0,
        'upload_input_data': [],
        'copy_input_data': [],
        'link_input_data': [],
        'move_input_data': [],
        'copy_output_data': [],
        'move_output_data': [],
        'download_output_data': [],
        'stdout': None,
        'stderr': None,
        'exit_code': None,
        'path': None,
        'tag': None,
        'parent_stage': {
            'uid': None,
            'name': None
        },
        'parent_pipeline': {
            'uid': None,
            'name': None
        }
    }

    t = Task()
    t.uid = 'test.0000'
    t.name = 'new'
    t.pre_exec = ['module load abc']
    t.executable = ['sleep']
    t.arguments = ['10']
    t.cpu_reqs['processes'] = 10
    t.cpu_reqs['threads_per_process'] = 2
    t.gpu_reqs['processes'] = 5
    t.gpu_reqs['threads_per_process'] = 3
    t.lfs_per_process = 1024
    t.upload_input_data = ['test1']
    t.copy_input_data = ['test2']
    t.link_input_data = ['test3']
    t.move_input_data = ['test4']
    t.copy_output_data = ['test5']
    t.move_output_data = ['test6']
    t.download_output_data = ['test7']
    t.stdout = 'out'
    t.stderr = 'err'
    t.exit_code = 1
    t.path = 'a/b/c'
    t.tag = 'task.0010'
    t.parent_stage = {'uid': 's1', 'name': 'stage1'}
    t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'}

    d = t.to_dict()

    assert d == {
        'uid': 'test.0000',
        'name': 'new',
        'state': states.INITIAL,
        'state_history': [states.INITIAL],
        'pre_exec': ['module load abc'],
        'executable': ['sleep'],
        'arguments': ['10'],
        'post_exec': [],
        'cpu_reqs': {
            'processes': 10,
            'process_type': None,
            'threads_per_process': 2,
            'thread_type': None
        },
        'gpu_reqs': {
            'processes': 5,
            'process_type': None,
            'threads_per_process': 3,
            'thread_type': None
        },
        'lfs_per_process': 1024,
        'upload_input_data': ['test1'],
        'copy_input_data': ['test2'],
        'link_input_data': ['test3'],
        'move_input_data': ['test4'],
        'copy_output_data': ['test5'],
        'move_output_data': ['test6'],
        'download_output_data': ['test7'],
        'stdout': 'out',
        'stderr': 'err',
        'exit_code': 1,
        'path': 'a/b/c',
        'tag': 'task.0010',
        'parent_stage': {
            'uid': 's1',
            'name': 'stage1'
        },
        'parent_pipeline': {
            'uid': 'p1',
            'name': 'pipeline1'
        }
    }
Esempio n. 10
0
def main(cmt_filename):
    '''This tiny function runs shit

    Args:
        cmt_filename: str containing the path to the cmt solution that is
                      supposed to be inverted for

    Usage:
        From the commandline:
            python pipeline <path/to/cmtsolution>

    '''

    # Path to pipeline file
    pipelinepath = os.path.abspath(__file__)
    pipelinedir = os.path.dirname(pipelinepath)

    # Define parameter directory
    param_path = os.path.join(os.path.dirname(pipelinedir), "params")
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")
    DB_params = read_yaml_file(databaseparam_path)
    print(DB_params)

    # Earthquake specific database parameters
    # Dir and eq_id
    eq_dir, eq_id = get_eq_entry_path(DB_params["databasedir"], cmt_filename)
    # Earthquake file in the database
    cmt_file_db = os.path.join(eq_dir, "eq_" + eq_id + ".cmt")

    # Create a Pipeline object
    p = Pipeline()

    # ---- DATABASE ENTRY TASK ---------------------------------------------- #

    # Path to function
    create_database_func = os.path.join(pipelinedir,
                                        "01_Create_Database_Entry.py")

    # Create a Stage object
    database_entry = Stage()

    t1 = Task()
    t1.name = 'database-entry'
    t1.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    t1.executable = [DB_params['bin-python']]  # Assign executable to the task
    t1.arguments = [create_database_func, os.path.abspath(cmt_filename)]

    # In the future maybe to database dir as a total log?
    t1.stdout = os.path.join(pipelinedir,
                             "database-entry." + eq_id + ".stdout")
    t1.stderr = os.path.join(pipelinedir,
                             "database-entry." + eq_id + ".stderr")

    # Add Task to the Stage
    database_entry.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(database_entry)

    # # ---- REQUEST DATA ----------------------------------------------------- #
    #
    # # Path to function
    # request_data_func = os.path.join(pipelinedir, "02_Request_Data.py")
    #
    # # Create a Stage object
    # datarequest = Stage()
    #
    # datarequest_t = Task()
    # datarequest_t.name = 'data-request'
    # datarequest_t.pre_exec = [  # Conda activate
    #     DB_params["conda-activate"]]
    # datarequest_t.executable = [DB_params['bin-python']]  # Assign executable
    #                                                       # to the task
    # datarequest_t.arguments = [request_data_func, cmt_file_db]
    #
    # # In the future maybe to database dir as a total log?
    # datarequest_t.stdout = os.path.join(pipelinedir,
    #                                   "datarequest." + eq_id + ".stdout")
    # datarequest_t.stderr = os.path.join(pipelinedir,
    #                                   "datarequest." + eq_id + ".stderr")
    #
    # # Add Task to the Stage
    # datarequest.add_tasks(datarequest_t)
    #
    # # Add Stage to the Pipeline
    # p.add_stages(datarequest)

    # ---- Write Sources ---------------------------------------------------- #

    # Path to function
    write_source_func = os.path.join(pipelinedir, "03_Write_Sources.py")

    # Create a Stage object
    w_sources = Stage()
    w_sources.name = 'Write-Sources'

    # Create Task for stage
    w_sources_t = Task()
    w_sources_t.name = 'Write-Sources'
    w_sources_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    w_sources_t.executable = [DB_params['bin-python']]  # Assign executable
    # to the task
    w_sources_t.arguments = [write_source_func, cmt_file_db]

    # In the future maybe to database dir as a total log?
    w_sources_t.stdout = os.path.join(pipelinedir,
                                      "write_sources." + eq_id + ".stdout")
    w_sources_t.stderr = os.path.join(pipelinedir,
                                      "write_sources." + eq_id + ".stderr")

    # Add Task to the Stage
    w_sources.add_tasks(w_sources_t)

    # Add Stage to the Pipeline
    p.add_stages(w_sources)

    # ---- Run Specfem ----------------------------------------------------- #

    specfemspec_path = os.path.join(param_path,
                                    "SpecfemParams/SpecfemParams.yml")
    comp_and_modules_path = os.path.join(
        param_path, "SpecfemParams/"
        "CompilersAndModules.yml")

    # Load Parameters
    specfemspecs = read_yaml_file(specfemspec_path)
    cm_dict = read_yaml_file(comp_and_modules_path)

    attr = [
        "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp",
        "CMT_depth", "CMT_lat", "CMT_lon"
    ]

    simdir = os.path.join(eq_dir, "CMT_SIMs")

    # Create a Stage object
    runSF3d = Stage()
    runSF3d.name = 'Simulation'

    for at in attr[0]:
        sf_t = Task()
        sf_t.name = 'run-' + at

        # Module Loading
        sf_t.pre_exec = [  # Get rid of existing modules
            'module purge'
        ]
        for module in cm_dict["modulelist"]:
            sf_t.pre_exec.append("module load %s" % module)
        sf_t.pre_exec.append("module load %s" % cm_dict["gpu_module"])

        # Change directory to specfem directories
        sf_t.pre_exec.append(  # Change directory
            "cd %s" % os.path.join(simdir, at))

        sf_t.executable = ['./bin/xspecfem3D']  # Assign executable

        # In the future maybe to database dir as a total log?
        sf_t.stdout = os.path.join(pipelinedir,
                                   "run_specfem." + eq_id + ".stdout")
        sf_t.stderr = os.path.join(pipelinedir,
                                   "run_specfem." + eq_id + ".stderr")

        sf_t.gpu_reqs = {
            'processes': 6,
            'process_type': 'MPI',
            'threads_per_process': 1,
            'thread_type': 'OpenMP'
        }

        # Add Task to the Stage
        runSF3d.add_tasks(sf_t)

    # Add Simulation stage to the Pipeline
    p.add_stages(runSF3d)

    # Create Application Manager
    appman = AppManager(hostname=hostname, port=port)

    # Create a dictionary describe four mandatory keys:
    # resource, walltime, and cpus
    # resource is 'local.localhost' to execute locally
    res_dict = {
        'resource': 'princeton.tiger_gpu',
        'project': 'geo',
        'queue': 'gpu',
        'schema': 'local',
        'walltime': 300,
        'cpus': 2,
        'gpus': 6
    }

    # Assign resource request description to the Application Manager
    appman.resource_desc = res_dict

    # Assign the workflow as a set or list of Pipelines to the Application Manager
    # Note: The list order is not guaranteed to be preserved
    appman.workflow = set([p])

    # Run the Application Manager
    appman.run()
Esempio n. 11
0
def create_windowing_stage(cmt_file_db, param_path, task_counter):
    """This function creates the ASDF windowing stage.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param pipelinedir: path to pipeline directory
    :return: EnTK Stage

    """

    # Get database parameter path
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Windowing parameter file directory
    window_process_dir = os.path.join(param_path, "CreateWindows")

    # Window path list
    # Important step! This creates a windowing list prior to having created
    # the actual window path files. It is tested so it definitely works!
    # This way the windowing processes can be distributed for each ASDF file
    # pair on one processor (No MPI support!)

    window_path_list, _ = get_windowing_list(cmt_file_db,
                                             window_process_dir,
                                             verbose=False)

    # Process path function
    window_func = os.path.join(bin_path, "window_selection_asdf.py")

    # The following little work around help getting around the fact that
    # multiple tasks cannot read the same file.
    # Create two stages one for #bodywaves or general entries and one for
    # surfaces waves.
    bodywave_list = []
    surfacewave_list = []
    for file in window_path_list:
        name = os.path.basename(file)
        if "surface" in name:
            surfacewave_list.append(file)
        else:
            bodywave_list.append(file)

    stage_list = []
    if len(bodywave_list) > 0:
        stage_list.append(bodywave_list)
    if len(surfacewave_list) > 0:
        stage_list.append(surfacewave_list)

    # List of stages
    stages = []

    for window_list in stage_list:
        # Create Process Paths Stage (CPP)
        # Create a Stage object
        window_stage = Stage()
        window_stage.name = "Windowing"

        # Loop over process path files
        for window_path in window_list:

            # Create Task
            window_task = Task()

            # This way the task gets the name of the path file
            window_task.name = os.path.basename(window_path)

            window_task.pre_exec = [  # Conda activate
                DB_params["conda-activate"]
            ]

            window_task.executable = [DB_params["bin-python"]]  # Assign exec
            # to the task

            # Create Argument list
            arguments = [window_func, "-f", window_path]
            if DB_params["verbose"]:
                arguments.append("-v")

            window_task.arguments = arguments

            # In the future maybe to database dir as a total log?
            window_task.stdout = os.path.join(
                "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
                (Cid, str(task_counter).zfill(4), window_task.name))

            window_task.stderr = os.path.join(
                "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
                (Cid, str(task_counter).zfill(4), window_task.name))

            window_stage.add_tasks(window_task)

            task_counter += 1

        stages.append(window_stage)

    return stages, task_counter
Esempio n. 12
0
def create_processing_stage(cmt_file_db, param_path, task_counter):
    """This function creates the ASDF processing stage.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param pipelinedir: path to pipeline directory
    :return: EnTK Stage

    """

    # Get database parameter path
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Processing param dir
    process_obs_param_dir = os.path.join(param_path, "ProcessObserved")
    process_syn_param_dir = os.path.join(param_path, "ProcessSynthetic")

    # Process path list
    # Important step! This creates a processing list prior to having created
    # the actual process path files. It is tested so it definitely works!
    # this way the processes can be distributed for each ASDF file on one
    # processor or more (MPI enabled!)
    processing_list, _, _ = get_processing_list(cmt_file_db,
                                                process_obs_param_dir,
                                                process_syn_param_dir,
                                                verbose=True)

    # The following little work around help getting around the fact that
    # multiple tasks cannot read the same file.
    # Get all available bands
    bands = []
    for file in processing_list:
        bands.append(os.path.basename(file).split(".")[-2])

    bands = list(set(bands))

    # List of stages
    stages = []

    for band in bands:

        # Processing sublist
        sub_list = [x for x in processing_list if band in x]

        # Process path function
        process_func = os.path.join(bin_path, "process_asdf.py")

        # Create Process Paths Stage (CPP)
        # Create a Stage object
        process_stage = Stage()
        process_stage.name = "Processing"

        # Number of Processes:
        N = len(processing_list)

        # Loop over process path files
        for process_path in sub_list:

            # Create Task
            processing_task = Task()

            # This way the task gets the name of the path file
            processing_task.name = "Processing-" \
                                   + os.path.basename(process_path)

            processing_task.pre_exec = [  # Conda activate
                DB_params["conda-activate"]
            ]

            processing_task.executable = DB_params[
                "bin-python"]  # Assign exec.
            # to the task

            # Create Argument list
            arguments = [process_func, "-f", process_path]
            if DB_params["verbose"]:
                arguments.append("-v")

            processing_task.arguments = arguments

            # In the future maybe to database dir as a total log?
            processing_task.stdout = os.path.join(
                "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
                (Cid, str(task_counter).zfill(4), processing_task.name))

            processing_task.stderr = os.path.join(
                "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
                (Cid, str(task_counter).zfill(4), processing_task.name))

            processing_task.cpu_reqs = {
                "processes": 1,
                "process_type": "MPI",
                "threads_per_process": 1,
                "thread_type": "OpenMP"
            }

            task_counter += 1

            process_stage.add_tasks(processing_task)

        stages.append(process_stage)

    return stages, task_counter
Esempio n. 13
0
def convert_traces(cmt_file_db, param_path, task_counter):
    """This function creates the to-ASDF conversion stage. Meaning, in this
    stage, both synthetic and observed traces are converted to ASDF files.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param pipelinedir: path to pipeline directory
    :return: EnTK Stage

    """

    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # File and directory
    cmt_dir = os.path.dirname(cmt_file_db)
    sim_dir = os.path.join(cmt_dir, "CMT_SIMs")

    ## Create a Stage object
    conversion_stage = Stage()
    conversion_stage.name = "Convert"

    # Conversion binary
    conversion_bin = os.path.join(bin_path, "convert_to_asdf.py")

    attr = [
        "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp",
        "CMT_depth", "CMT_lat", "CMT_lon"
    ]

    ##### Converting the synthetic data
    if DB_params["verbose"]:
        print("\nConverting synthetic traces to ASDF ... \n")

    for _i, at in enumerate(attr[:DB_params["npar"] + 1]):

        # Path file
        syn_path_file = os.path.join(sim_dir, at, at + ".yml")

        # Create Task for stage
        c_task = Task()
        c_task.name = at

        c_task.pre_exec = [DB_params["conda-activate"]]
        c_task.executable = DB_params["bin-python"]  # Assign executable
        # to the task

        arguments = [conversion_bin, "-f", syn_path_file]
        if DB_params["verbose"]:
            arguments.append("-v")

        c_task.arguments = arguments

        # In the future maybe to database dir as a total log?
        c_task.stdout = os.path.join(
            "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
            (Cid, str(task_counter).zfill(4), c_task.name))

        c_task.stderr = os.path.join(
            "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
            (Cid, str(task_counter).zfill(4), c_task.name))

        # Increase Task counter
        task_counter += 1

        conversion_stage.add_tasks(c_task)

    ##### Converting the observed data
    if DB_params["verbose"]:
        print("\nConverting observed traces to ASDF ... \n")

    obs_path_file = os.path.join(cmt_dir, "seismograms", "obs", "observed.yml")

    # Create Task for stage
    c_task = Task()
    c_task.name = "Observed"

    c_task.pre_exec = [DB_params["conda-activate"]]
    c_task.executable = DB_params["bin-python"]  # Assign executable
    # to the task

    # Create Argument list
    arguments = [conversion_bin, "-f", obs_path_file]
    if DB_params["verbose"]:
        arguments.append("-v")

    c_task.arguments = arguments

    # In the future maybe to database dir as a total log?
    c_task.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), c_task.name))

    c_task.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), c_task.name))
    # Increase Task counter
    task_counter += 1

    conversion_stage.add_tasks(c_task)

    return conversion_stage, task_counter
Esempio n. 14
0
def run_specfem(cmt_file_db, param_path, task_counter):
    """ This function runs the necessary Specfem simulations.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param task_counter: total task count up until now in pipeline
    :return: EnTK Stage

    """

    # Get Database parameters
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")
    # Database parameters.
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    specfemspec_path = os.path.join(param_path,
                                    "SpecfemParams/SpecfemParams.yml")
    comp_and_modules_path = os.path.join(
        param_path, "SpecfemParams/"
        "CompilersAndModules.yml")

    # Load Parameters
    specfemspecs = read_yaml_file(specfemspec_path)
    cm_dict = read_yaml_file(comp_and_modules_path)

    # Simulations to be run
    attr = [
        "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp",
        "CMT_depth", "CMT_lat", "CMT_lon"
    ]

    # Simulation directory
    simdir = os.path.join(os.path.dirname(cmt_file_db), "CMT_SIMs")

    # Create a Stage object
    runSF3d = Stage()
    runSF3d.name = "Simulation"

    for at in attr:
        sf_t = Task()
        sf_t.name = "run-" + at

        # Module Loading
        sf_t.pre_exec = [  # Get rid of existing modules
            "module purge"
        ]
        # Append to pre_execution module list.
        for module in cm_dict["modulelist"]:
            sf_t.pre_exec.append("module load %s" % module)

        if specfemspecs["GPU_MODE"] is True:
            sf_t.pre_exec.append("module load %s" % cm_dict["gpu_module"])

        # Change directory to specfem directories
        sf_t.pre_exec.append(  # Change directory
            "cd %s" % os.path.join(simdir, at))

        sf_t.executable = "./bin/xspecfem3D"  # Assigned executable

        # In the future maybe to database dir as a total log?
        sf_t.stdout = os.path.join(
            "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
            (Cid, str(task_counter).zfill(4), sf_t.name))

        sf_t.stderr = os.path.join(
            "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
            (Cid, str(task_counter).zfill(4), sf_t.name))

        print(sf_t.cpu_reqs)

        # Increase Task counter
        task_counter += 1

        # Add Task to the Stage
        runSF3d.add_tasks(sf_t)

    return runSF3d, task_counter
def test_task_exceptions(s,l,i,b):

    """
    **Purpose**: Test if all attribute assignments raise exceptions for invalid values
    """

    t = Task()

    data_type = [s,l,i,b]

    for data in data_type:

        if not isinstance(data,str):
            with pytest.raises(TypeError):
                t.name = data

            with pytest.raises(TypeError):
                t.path = data

            with pytest.raises(TypeError):
                t.parent_stage = data

            with pytest.raises(TypeError):
                t.parent_pipeline = data

            with pytest.raises(TypeError):
                t.stdout = data

            with pytest.raises(TypeError):
                t.stderr = data

        if not isinstance(data,list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.move_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data

            with pytest.raises(TypeError):
                t.move_output_data = data

        if not isinstance(data, str) and not isinstance(data, list):

            with pytest.raises(TypeError):
                t.executable = data

        if not isinstance(data, str) and not isinstance(data, unicode):

            with pytest.raises(ValueError):
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': data,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': data
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': data,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': data
                            }

        if not isinstance(data, int):

            with pytest.raises(TypeError):
                t.cpu_reqs = {
                                'processes': data,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': data,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': data,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': data,
                                'thread_type': None
                            }
def test_task_to_dict():

    """
    **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a
    dictionary
    """

    t = Task()
    d = t.to_dict()

    assert d == {   'uid': None,
                    'name': None,
                    'state': states.INITIAL,
                    'state_history': [states.INITIAL],
                    'pre_exec': [],
                    'executable': str(),
                    'arguments': [],
                    'post_exec': [],
                    'cpu_reqs': { 'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                                },
                    'gpu_reqs': { 'processes': 0,
                                'process_type': None,
                                'threads_per_process': 0,
                                'thread_type': None
                                },
                    'lfs_per_process': 0,
                    'upload_input_data': [],
                    'copy_input_data': [],
                    'link_input_data': [],
                    'move_input_data': [],
                    'copy_output_data': [],
                    'move_output_data': [],
                    'download_output_data': [],
                    'stdout': None,
                    'stderr': None,
                    'exit_code': None,
                    'path': None,
                    'tag': None,
                    'parent_stage': {'uid':None, 'name': None},
                    'parent_pipeline': {'uid':None, 'name': None}}


    t = Task()
    t.uid = 'test.0000'
    t.name = 'new'
    t.pre_exec = ['module load abc']
    t.executable = ['sleep']
    t.arguments = ['10']
    t.cpu_reqs['processes'] = 10
    t.cpu_reqs['threads_per_process'] = 2
    t.gpu_reqs['processes'] = 5
    t.gpu_reqs['threads_per_process'] = 3
    t.lfs_per_process = 1024
    t.upload_input_data = ['test1']
    t.copy_input_data = ['test2']
    t.link_input_data = ['test3']
    t.move_input_data = ['test4']
    t.copy_output_data = ['test5']
    t.move_output_data = ['test6']
    t.download_output_data = ['test7']
    t.stdout = 'out'
    t.stderr = 'err'
    t.exit_code = 1
    t.path = 'a/b/c'
    t.tag = 'task.0010'
    t.parent_stage = {'uid': 's1', 'name': 'stage1'}
    t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'}

    d = t.to_dict()

    assert d == {   'uid': 'test.0000',
                    'name': 'new',
                    'state': states.INITIAL,
                    'state_history': [states.INITIAL],
                    'pre_exec': ['module load abc'],
                    'executable': 'sleep',
                    'arguments': ['10'],
                    'post_exec': [],
                    'cpu_reqs': { 'processes': 10,
                                'process_type': None,
                                'threads_per_process': 2,
                                'thread_type': None
                                },
                    'gpu_reqs': { 'processes': 5,
                                'process_type': None,
                                'threads_per_process': 3,
                                'thread_type': None
                                },
                    'lfs_per_process': 1024,
                    'upload_input_data': ['test1'],
                    'copy_input_data': ['test2'],
                    'link_input_data': ['test3'],
                    'move_input_data': ['test4'],
                    'copy_output_data': ['test5'],
                    'move_output_data': ['test6'],
                    'download_output_data': ['test7'],
                    'stdout': 'out',
                    'stderr': 'err',
                    'exit_code': 1,
                    'path': 'a/b/c',
                    'tag': 'task.0010',
                    'parent_stage': {'uid': 's1', 'name': 'stage1'},
                    'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}


    t.executable = 'sleep'
    d = t.to_dict()

    assert d == {   'uid': 'test.0000',
                    'name': 'new',
                    'state': states.INITIAL,
                    'state_history': [states.INITIAL],
                    'pre_exec': ['module load abc'],
                    'executable': 'sleep',
                    'arguments': ['10'],
                    'post_exec': [],
                    'cpu_reqs': { 'processes': 10,
                                'process_type': None,
                                'threads_per_process': 2,
                                'thread_type': None
                                },
                    'gpu_reqs': { 'processes': 5,
                                'process_type': None,
                                'threads_per_process': 3,
                                'thread_type': None
                                },
                    'lfs_per_process': 1024,
                    'upload_input_data': ['test1'],
                    'copy_input_data': ['test2'],
                    'link_input_data': ['test3'],
                    'move_input_data': ['test4'],
                    'copy_output_data': ['test5'],
                    'move_output_data': ['test6'],
                    'download_output_data': ['test7'],
                    'stdout': 'out',
                    'stderr': 'err',
                    'exit_code': 1,
                    'path': 'a/b/c',
                    'tag': 'task.0010',
                    'parent_stage': {'uid': 's1', 'name': 'stage1'},
                    'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}