def __dispatch_raw_gradient_interpolation(self, event: str, verbose=False):
     """
     Take the gradient out of the adjoint simulations and
     interpolate them to the inversion grid.
     """
     submitted, retrieved = self.__submitted_retrieved(
         event, "gradient_interp")
     if submitted:
         if verbose:
             self.print(f"Interpolation for gradient {event} "
                        "has already been submitted")
         return
     hpc_cluster = get_site(self.comm.project.interpolation_site)
     if hpc_cluster.config["site_type"] == "local":
         interp_folder = os.path.join(
             self.comm.project.remote_inversionson_dir,
             "INTERPOLATION_WEIGHTS",
             "GRADIENTS",
             event,
         )
     else:
         interp_folder = os.path.join(
             self.comm.project.remote_inversionson_dir,
             "INTERPOLATION_WEIGHTS",
             "GRADIENTS",
             event,
         )
     if not hpc_cluster.remote_exists(interp_folder):
         hpc_cluster.remote_mkdir(interp_folder)
     # Here I need to make sure that the correct layers are interpolated
     # I can just do this by specifying the layers, rather than saying
     # nocore. That's less nice though of course. Could be specified
     # in the config file. Then it should work fine.
     self.comm.multi_mesh.interpolate_gradient_to_model(
         event, smooth=False, interp_folder=interp_folder)
    def __prepare_forward(self, event: str):
        """
        Interpolate model to a simulation mesh

        :param event: Name of event
        :type event: str
        """

        submitted, _ = self.__submitted_retrieved(event,
                                                  sim_type="prepare_forward")
        if submitted:
            return

        hpc_cluster = get_site(self.comm.project.interpolation_site)
        interp_folder = os.path.join(
            self.comm.project.remote_inversionson_dir,
            "INTERPOLATION_WEIGHTS",
            "MODELS",
            event,
        )
        if not hpc_cluster.remote_exists(interp_folder):
            hpc_cluster.remote_mkdir(interp_folder)

        self.comm.multi_mesh.prepare_forward(event=event)
        self.comm.project.update_iteration_toml()
Esempio n. 3
0
    def construct_simulation_from_dict(self, event: str):
        """
        Download a dictionary with the simulation object and use it to create a local simulation object
        without having any of the relevant data locally.
        Only used to submit a job to the remote without having to store anything locally.

        :param event: Name of event
        :type event: str
        """

        # Always write events to the same folder
        destination = (
            self.comm.lasif.lasif_comm.project.paths["salvus_files"] /
            f"SIMULATION_DICTS" / event / "simulation_dict.toml")
        if not os.path.exists(destination.parent):
            os.makedirs(destination.parent)

        if not os.path.exists(destination):
            hpc_cluster = sapi.get_site(self.comm.project.site_name)
            interp_job = self.get_job(event, sim_type="prepare_forward")
            remote_dict = (interp_job.stdout_path.parent / "output" /
                           "simulation_dict.toml")
            hpc_cluster.remote_get(remotepath=remote_dict,
                                   localpath=destination)

        sim_dict = toml.load(destination)

        local_dummy_mesh_path = self.comm.lasif.get_master_model()
        local_dummy_mesh = self.comm.lasif.get_master_mesh()
        for key in ["mesh", "model", "geometry"]:
            sim_dict["domain"][key]["filename"] = local_dummy_mesh_path

        w = self.simulation_from_dict(sim_dict, local_dummy_mesh)

        return w
Esempio n. 4
0
    def upload_stf(self, iteration: str, hpc_cluster=None):
        """
        Upload the source time function to the remote machine

        :param iteration: Name of iteration
        :type iteration: str
        """
        local_stf = self.find_stf(iteration=iteration)
        if not os.path.exists(local_stf):
            write_custom_stf(stf_path=local_stf, comm=self.lasif_comm)

        if hpc_cluster is None:
            hpc_cluster = get_site(self.comm.project.site_name)

        if not hpc_cluster.remote_exists(
                self.comm.project.remote_inversionson_dir /
                "SOURCE_TIME_FUNCTIONS" / iteration):
            hpc_cluster.remote_mkdir(
                self.comm.project.remote_inversionson_dir /
                "SOURCE_TIME_FUNCTIONS" / iteration)
        if not hpc_cluster.remote_exists(
                self.comm.project.remote_inversionson_dir /
                "SOURCE_TIME_FUNCTIONS" / iteration / "stf.h5"):
            hpc_cluster.remote_put(
                local_stf,
                self.comm.project.remote_inversionson_dir /
                "SOURCE_TIME_FUNCTIONS" / iteration / "stf.h5",
            )
    def __cut_and_clip_gradient(self, event, verbose=False):
        """
        Cut sources and receivers from gradient before summing or potential
        smoothing.
        We also clip the gradient to some percentile
        This can all be configured in information toml.

        :param event: name of the event
        """
        if self.comm.project.cut_source_radius == 0.0 and \
                self.comm.project.clip_gradient == 1.0:
            return

        job = self.comm.salvus_flow.get_job(event, "adjoint")
        output_files = job.get_output_files()
        gradient_path = output_files[0][("adjoint", "gradient",
                                         "output_filename")]
        # Connect to daint
        hpc_cluster = get_site(self.comm.project.site_name)

        remote_inversionson_dir = os.path.join(
            self.comm.project.remote_inversionson_dir, "GRADIENT_PROCESSING")

        if not hpc_cluster.remote_exists(remote_inversionson_dir):
            hpc_cluster.remote_mkdir(remote_inversionson_dir)

        # copy processing script to hpc
        remote_script = os.path.join(remote_inversionson_dir,
                                     "cut_and_clip.py")
        if not hpc_cluster.remote_exists(remote_script):
            hpc_cluster.remote_put(CUT_SOURCE_SCRIPT_PATH, remote_script)

        info = {}
        info["filename"] = str(gradient_path)
        info["cutout_radius_in_km"] = self.comm.project.cut_source_radius
        info["source_location"] = self.comm.lasif.get_source(event_name=event)

        info["clipping_percentile"] = self.comm.project.clip_gradient
        info["parameters"] = self.comm.project.inversion_params

        toml_filename = f"{event}_gradient_process.toml"
        with open(toml_filename, "w") as fh:
            toml.dump(info, fh)

        # put toml on daint and remove local toml
        remote_toml = os.path.join(remote_inversionson_dir, toml_filename)
        hpc_cluster.remote_put(toml_filename, remote_toml)
        os.remove(toml_filename)

        # Call script
        exit_code, stdout, stderr = hpc_cluster.run_ssh_command(
            f"python {remote_script} {remote_toml}")
        if "Remote source cut completed successfully" in stdout:
            self.print(f"Source cut and clip completed for {event}.",
                       emoji_alias=":scissors:")
        else:
            print(
                "Something went wrong in cutting and clipping on the remote.")
            raise Exception(stdout, stderr)
Esempio n. 6
0
    def __get_custom_job(self, event: str, sim_type: str):
        """
        A get_job function which handles job types which are not of type
        salvus.flow.sites.salvus_job.SalvusJob

        :param event: Name of event
        :type event: str
        :param sim_type: Type of simulation
        :type sim_type: str
        """
        gradient = False

        if sim_type == "prepare_forward":
            if self.comm.project.prepare_forward_job[event]["submitted"]:
                job_name = self.comm.project.prepare_forward_job[event]["name"]
            else:
                raise InversionsonError(
                    f"Model interpolation job for event: {event} "
                    "has not been submitted")
        if sim_type == "hpc_processing":
            if self.comm.project.hpc_processing_job[event]["submitted"]:
                job_name = self.comm.project.hpc_processing_job[event]["name"]
            else:
                raise InversionsonError(
                    f"HPC processing job for event: {event} "
                    "has not been submitted")
        elif sim_type == "gradient_interp":
            gradient = True
            if self.comm.project.gradient_interp_job[event]["submitted"]:
                job_name = self.comm.project.gradient_interp_job[event]["name"]
            else:
                raise InversionsonError(
                    f"Gradient interpolation job for event: {event} "
                    "has not been submitted")
        site_name = self.comm.project.interpolation_site
        db_job = sapi._get_config()["db"].get_jobs(
            limit=1,
            site_name=site_name,
            job_name=job_name,
        )[0]

        job = s_job.Job(
            site=sapi.get_site(site_name=db_job.site.site_name),
            commands=self.comm.multi_mesh.get_interp_commands(event, gradient),
            job_type=db_job.job_type,
            job_info=db_job.info,
            jobname=db_job.job_name,
            job_description=db_job.description,
            wall_time_in_seconds=db_job.wall_time_in_seconds,
            working_dir=pathlib.Path(db_job.working_directory),
            tmpdir_root=pathlib.Path(db_job.temp_directory_root)
            if db_job.temp_directory_root else None,
            rundir_root=pathlib.Path(db_job.run_directory_root)
            if db_job.run_directory_root else None,
            job_groups=[i.group_name for i in db_job.groups],
            initialize_on_site=False,
        )
        return job
Esempio n. 7
0
    def get_simulation_time_step(self, event=None):
        """
        Get the timestep from a forward job if it does not exist yet.

        Returns the timestep if it is there and managed to do so.
        If an event is passed and it does not exist it, it will get it
        from an stdout file.
        """

        misc_folder = os.path.join(self.comm.project.paths["documentation"],
                                   "MISC")
        if not os.path.exists(misc_folder):
            os.mkdir(misc_folder)

        timestep_file = os.path.join(misc_folder, "simulation_timestep.toml")

        if not os.path.exists(timestep_file) and event is not None:
            local_stdout = os.path.join(
                self.comm.project.paths["documentation"],
                "stdout_for_timestep_test")
            hpc_cluster = sapi.get_site(self.comm.project.site_name)
            forward_job = self.comm.salvus_flow.get_job(event=event,
                                                        sim_type="forward")
            stdout = forward_job.path / "stdout"

            hpc_cluster.remote_get(stdout, local_stdout)

            with open(local_stdout, "r") as fh:
                stdout_str = fh.read()
            stdout_str_split = stdout_str.split()
            if os.path.exists(local_stdout):
                os.remove(local_stdout)
            if "(CFL)" in stdout_str_split:
                time_step_idx = stdout_str_split.index("(CFL)") + 1
                try:
                    time_step = float(stdout_str_split[time_step_idx])
                    # basic check to see if timestep makes some sense
                    if time_step > 0.00001 and time_step < 1000:
                        time_step_dict = dict(time_step=time_step)
                        with open(timestep_file, "w") as fh:
                            toml.dump(time_step_dict, fh)
                    self.simulation_time_step = time_step
                    simulation_dict_folder = (self.comm.lasif.lasif_comm.
                                              project.paths["salvus_files"] /
                                              f"SIMULATION_DICTS")
                    # Clear cache of simulation dicts with the old checkpointing settings.
                    shutil.rmtree(simulation_dict_folder)
                except Exception as e:
                    print(e)
                self.simulation_time_step = False
        else:
            if os.path.exists(timestep_file):
                time_dict = toml.load(timestep_file)
                self.simulation_time_step = time_dict["time_step"]
            else:
                self.simulation_time_step = False
Esempio n. 8
0
    def _move_model_to_cluster(
        self,
        hpc_cluster=None,
        overwrite: bool = False,
        validation: bool = False,
    ):
        """
        The model is moved to a dedicated directory on cluster

        :param hpc_cluster: A Salvus site object, defaults to None
        :type hpc_cluster: salvus.flow.Site, optional
        :param overwrite: Overwrite mesh already there?, defaults to False
        :type overwrite: bool, optional
        """
        if hpc_cluster is None:
            hpc_cluster = get_site(self.comm.project.interpolation_site)

        optimizer = self.comm.project.get_optimizer()
        iteration = optimizer.iteration_name
        if validation:
            print("It's validation!")
            iteration = f"validation_{iteration}"
            local_model = self.comm.multi_mesh.find_model_file(iteration)
        else:
            local_model = optimizer.model_path
        has, path_to_mesh = self.has_remote_mesh(
            event=None,
            interpolate_to=False,
            gradient=False,
            hpc_cluster=hpc_cluster,
            iteration=iteration,
            validation=validation,
        )
        if has:
            if overwrite:
                hpc_cluster.remote_put(local_model, path_to_mesh)
            else:
                self.print(
                    f"Model for iteration {iteration} already on cluster",
                    emoji_alias=":white_check_mark:",
                )
                return
        else:
            if not hpc_cluster.remote_exists(path_to_mesh.parent):
                self.print("Making the directory")
                self.print(f"Directory is: {path_to_mesh.parent}")
                hpc_cluster.remote_mkdir(path_to_mesh.parent)
            self.print(f"Path to mesh is: {path_to_mesh}")
            hpc_cluster.remote_put(local_model, path_to_mesh)
            self.print("Did it")
Esempio n. 9
0
def preprocess_remote_gradient(comm, gradient_path: str, event: str):
    """
    Cut sources and receivers from gradient before smoothing.
    We also clip the gradient to some percentile
    This can all be configured in information toml.

    :param comm inversionson communicator
    :param gradient_path: gradient path on remote
    :type gradient_path: str
    :param event: name of the event
    """

    # Connect to daint
    daint = get_site(comm.project.site_name)
    username = daint.config["ssh_settings"]["username"]

    remote_inversionson_dir = os.path.join("/scratch/snx3000", username,
                                           "smoothing_info")

    if not daint.remote_exists(remote_inversionson_dir):
        daint.remote_mkdir(remote_inversionson_dir)

    # copy processing script to daint
    remote_script = os.path.join(remote_inversionson_dir, "cut_and_clip.py")
    if not daint.remote_exists(remote_script):
        daint.remote_put(CUT_SOURCE_SCRIPT_PATH, remote_script)

    if comm.project.cut_receiver_radius > 0.0:
        raise InversionsonError("Remote receiver cutting not implemented yet.")

    info = {}
    info["filename"] = str(gradient_path)
    info["cutout_radius_in_km"] = comm.project.cut_source_radius
    info["source_location"] = comm.lasif.get_source(event_name=event)

    info["clipping_percentile"] = comm.project.clip_gradient
    info["parameters"] = comm.project.inversion_params

    toml_filename = f"{event}_gradient_process.toml"
    with open(toml_filename, "w") as fh:
        toml.dump(info, fh)

    # put toml on daint and remove local toml
    remote_toml = os.path.join(remote_inversionson_dir, toml_filename)
    daint.remote_put(toml_filename, remote_toml)
    os.remove(toml_filename)

    # Call script
    print(daint.run_ssh_command(f"python {remote_script} {remote_toml}"))
Esempio n. 10
0
    def get_remote_field_moving_script_path(self):
        site = get_site(self.comm.project.interpolation_site)
        username = site.config["ssh_settings"]["username"]

        remote_inversionson_scripts = os.path.join("/users", username, "scripts")

        if not site.remote_exists(remote_inversionson_scripts):
            site.remote_mkdir(remote_inversionson_scripts)

        # copy processing script to daint
        remote_script = os.path.join(remote_inversionson_scripts, "move_fields.py")
        if not site.remote_exists(remote_script):
            site.remote_put(
                os.path.join(REMOTE_SCRIPT_PATHS, "cut_and_clip.py"), remote_script
            )
        return remote_script
Esempio n. 11
0
    def _move_mesh_to_cluster(self,
                              event: str,
                              gradient=False,
                              hpc_cluster=None):
        """
        Move the mesh to the cluster for interpolation

        :param event: Name of event
        :type event: str
        """
        if event is None:
            if gradient:
                self.print("Moving example gradient to cluster",
                           emoji_alias=":package:")
                self.move_gradient_to_cluster(hpc_cluster)
            else:
                # This happens when we want to move the model to the cluster
                self.print("Moving model to cluster", emoji_alias=":package:")
                self._move_model_to_cluster(hpc_cluster)
            return
        has, event_mesh = lapi.find_event_mesh(self.lasif_comm, event)

        if not has:
            raise InversionsonError(f"Mesh for event {event} does not exist.")
        # Get remote connection
        if hpc_cluster is None:
            hpc_cluster = get_site(self.comm.project.interpolation_site)

        path_to_mesh = self.find_remote_mesh(
            event=event,
            interpolate_to=True,
            check_if_exists=False,
            hpc_cluster=hpc_cluster,
        )
        if not hpc_cluster.remote_exists(path_to_mesh.parent):
            hpc_cluster.remote_mkdir(path_to_mesh.parent)
        if not hpc_cluster.remote_exists(path_to_mesh):
            self.print(f"Moving mesh for event {event} to cluster",
                       emoji_alias=":package:")
            hpc_cluster.remote_put(event_mesh, path_to_mesh)
Esempio n. 12
0
    def process_data(self, event: str):
        """
        Process the data for the periods specified in Lasif project.

        :param event: Name of event to be processed
        :type event: str
        """
        if self._already_processed(event):
            return

        if self.comm.project.remote_data_processing:
            # Get local proc filename
            lasif_root = self.comm.project.lasif_root
            proc_filename = (
                f"preprocessed_{int(self.comm.project.min_period)}s_"
                f"to_{int(self.comm.project.max_period)}s.h5")
            local_proc_folder = os.path.join(lasif_root, "PROCESSED_DATA",
                                             "EARTHQUAKES", event)
            local_proc_file = os.path.join(local_proc_folder, proc_filename)

            if not os.path.exists(local_proc_folder):
                os.mkdir(local_proc_folder)

            remote_proc_file_name = f"{event}_{proc_filename}"
            hpc_cluster = get_site(self.comm.project.site_name)

            remote_processed_dir = os.path.join(
                self.comm.project.remote_inversionson_dir, "PROCESSED_DATA")

            remote_proc_path = os.path.join(remote_processed_dir,
                                            remote_proc_file_name)
            tmp_local_path = local_proc_file + "_tmp"
            if hpc_cluster.remote_exists(remote_proc_path):
                hpc_cluster.remote_get(remote_proc_path, tmp_local_path)
                os.rename(tmp_local_path, local_proc_file)
                return  # Return if it got it and got it there.

        lapi.process_data(self.lasif_comm, events=[event])
Esempio n. 13
0
    def move_toml_to_hpc(
        self, toml_filename: pathlib.Path, event: str, hpc_cluster=None
    ):
        """
        Move information file to HPC so that it can perform mesh generation
        and interpolation

        :param toml_filename: path to local toml
        :type toml_filename: pathlib.Path
        :param event: name of event
        :type event: str
        :param hpc_cluster: the cluster site object, defaults to None
        :type hpc_cluster: Salvus.site, optional
        """
        if hpc_cluster is None:
            hpc_cluster = sapi.get_site(self.comm.project.interpolation_site)
        remote_path = (
            pathlib.Path(self.comm.project.fast_mesh_dir) / event / toml_filename.name
        )
        if not hpc_cluster.remote_exists(remote_path.parent):
            hpc_cluster.remote_mkdir(remote_path.parent)
        hpc_cluster.remote_put(toml_filename, remote_path)
        return str(remote_path)
Esempio n. 14
0
    def has_remote_mesh(
        self,
        event: str,
        gradient: bool,
        interpolate_to: bool = True,
        hpc_cluster=None,
        iteration: str = None,
        validation: bool = False,
    ):
        """
        Just to check if remote mesh exists

        :param event: Name of event
        :type event: str
        :param gradient: Is it a gradient?
        :type gradient: bool
        :param interpolate_to: Mesh to interpolate to?, defaults to True
        :type interpolate_to: bool, optional
        :param hpc_cluster: you can pass the site object. Defaults to None
        :type hpc_cluster: salvus.flow.Site, optional
        :param iteration: Name of an iteration, defaults to None
        :type iteration: str, optional
        """

        if hpc_cluster is None:
            hpc_cluster = get_site(self.comm.project.interpolation_site)
        mesh = self.find_remote_mesh(
            event=event,
            hpc_cluster=hpc_cluster,
            check_if_exists=False,
            iteration=iteration,
            validation=validation,
            interpolate_to=interpolate_to,
            gradient=gradient,
        )

        return hpc_cluster.remote_exists(mesh), mesh
Esempio n. 15
0
    def move_gradient_to_cluster(self,
                                 hpc_cluster=None,
                                 overwrite: bool = False):
        """
        Empty gradient moved to a dedicated directory on cluster

        :param hpc_cluster: A Salvus site object, defaults to None
        :type hpc_cluster: salvus.flow.Site, optional
        """
        if hpc_cluster is None:
            hpc_cluster = get_site(self.comm.project.interpolation_site)

        has, path_to_mesh = self.has_remote_mesh(
            event=None,
            interpolate_to=True,
            gradient=True,
            hpc_cluster=hpc_cluster,
            iteration=None,
            validation=False,
        )

        if has and not overwrite:
            self.print("Empty gradient already on cluster",
                       emoji_alias=":white_check_mark:")
            return

        local_grad = self.lasif_comm.project.paths[
            "models"] / "GRADIENT" / "mesh.h5"
        if not os.path.exists(local_grad.parent):
            os.makedirs(local_grad.parent)
        inversion_grid = self.get_master_model()
        shutil.copy(inversion_grid, local_grad)
        self.comm.salvus_mesher.fill_inversion_params_with_zeroes(local_grad)

        if not hpc_cluster.remote_exists(path_to_mesh.parent):
            hpc_cluster.remote_mkdir(path_to_mesh.parent)
        hpc_cluster.remote_put(local_grad, path_to_mesh)
    def _launch_hpc_processing_job(self, event):
        """
        Here, we launch a job to select windows and get adjoint sources
        for an event.

        """
        submitted, _ = self.__submitted_retrieved(event, "hpc_processing")
        if submitted:
            return

        if not self.comm.project.remote_data_processing:
            self.__process_data(event)

        iteration = self.comm.project.current_iteration
        forward_job = sapi.get_job(
            site_name=self.comm.project.site_name,
            job_name=self.comm.salvus_flow.get_job_name(event=event,
                                                        sim_type="forward"),
        )

        # Get forward paths
        remote_syn_path = str(forward_job.output_path / "receivers.h5")
        forward_meta_json_filename = str(forward_job.output_path / "meta.json")

        # Get local proc filename
        lasif_root = self.comm.project.lasif_root
        proc_filename = (f"preprocessed_{int(self.comm.project.min_period)}s_"
                         f"to_{int(self.comm.project.max_period)}s.h5")
        local_proc_file = os.path.join(lasif_root, "PROCESSED_DATA",
                                       "EARTHQUAKES", event, proc_filename)

        remote_proc_file_name = f"{event}_{proc_filename}"
        hpc_cluster = get_site(self.comm.project.site_name)

        remote_processed_dir = os.path.join(
            self.comm.project.remote_inversionson_dir, "PROCESSED_DATA")
        remote_adj_dir = os.path.join(
            self.comm.project.remote_inversionson_dir, "ADJOINT_SOURCES")
        remote_receiver_dir = os.path.join(
            self.comm.project.remote_inversionson_dir, "RECEIVERS")
        for dir_name in [
                remote_processed_dir, remote_adj_dir, remote_receiver_dir
        ]:
            if not hpc_cluster.remote_exists(dir_name):
                hpc_cluster.remote_mkdir(remote_processed_dir)

        remote_proc_path = os.path.join(remote_processed_dir,
                                        remote_proc_file_name)
        tmp_remote_path = remote_proc_path + "_tmp"
        if not hpc_cluster.remote_exists(remote_proc_path):
            hpc_cluster.remote_put(local_proc_file, tmp_remote_path)
            hpc_cluster.run_ssh_command(
                f"mv {tmp_remote_path} {remote_proc_path}")

        remote_adj_dir = os.path.join(
            self.comm.project.remote_inversionson_dir, "ADJOINT_SOURCES")

        if "VPV" in self.comm.project.inversion_params:
            parameterization = "tti"
        elif "VP" in self.comm.project.inversion_params:
            parameterization = "rho-vp-vs"

        info = dict(
            processed_filename=remote_proc_path,
            synthetic_filename=remote_syn_path,
            forward_meta_json_filename=forward_meta_json_filename,
            parameterization=parameterization,
            event_name=event,
            delta=self.comm.project.simulation_dict["time_step"],
            npts=self.comm.project.simulation_dict["number_of_time_steps"],
            iteration_name=iteration,
            minimum_period=self.comm.project.min_period,
            maximum_period=self.comm.project.max_period,
            start_time_in_s=self.comm.project.simulation_dict["start_time"],
            receiver_json_path=os.path.join(remote_receiver_dir,
                                            f"{event}_receivers.json"),
            ad_src_type=self.comm.project.ad_src_type,
        )

        toml_filename = f"{iteration}_{event}_adj_info.toml"
        with open(toml_filename, "w") as fh:
            toml.dump(info, fh)

        # Put info toml on daint and remove local toml
        remote_toml = os.path.join(remote_adj_dir, toml_filename)
        hpc_cluster.remote_put(toml_filename, remote_toml)
        os.remove(toml_filename)

        # Copy processing script to hpc
        remote_script = os.path.join(remote_adj_dir,
                                     "window_and_calc_adj_src.py")
        if not hpc_cluster.remote_exists(remote_script):
            hpc_cluster.remote_put(PROCESS_OUTPUT_SCRIPT_PATH, remote_script)

        # Now submit the job
        description = f"HPC processing of {event} for iteration {iteration}"

        # use interp wall time for now
        wall_time = self.comm.project.hpc_processing_wall_time
        from salvus.flow.sites import job, remote_io_site

        commands = [
            remote_io_site.site_utils.RemoteCommand(command="mkdir output",
                                                    execute_with_mpi=False),
            remote_io_site.site_utils.RemoteCommand(
                command=f"python {remote_script} {remote_toml}",
                execute_with_mpi=False),
        ]
        # Allow to set conda environment first
        if self.comm.project.remote_conda_env:
            conda_command = [
                remote_io_site.site_utils.RemoteCommand(
                    command=
                    f"conda activate {self.comm.project.remote_conda_env}",
                    execute_with_mpi=False,
                )
            ]
            commands = conda_command + commands

        job = job.Job(
            site=sapi.get_site(self.comm.project.interpolation_site),
            commands=commands,
            job_type="hpc_processing",
            job_description=description,
            job_info={},
            wall_time_in_seconds=wall_time,
            no_db=False,
        )

        self.comm.project.change_attribute(
            attribute=f'hpc_processing_job["{event}"]["name"]',
            new_value=job.job_name,
        )
        job.launch()
        self.comm.project.change_attribute(
            attribute=f'hpc_processing_job["{event}"]["submitted"]',
            new_value=True,
        )
        self.print(f"HPC Processing job for event {event} submitted")
        self.comm.project.update_iteration_toml()
Esempio n. 17
0
    def get_interp_commands(
        self,
        event: str,
        gradient: bool,
    ) -> list:
        """
        Get the interpolation commands needed to do remote interpolations.
        If not gradient, we will look for a smoothie mesh and create it if needed.
        """

        # TODO Add average model option here


        # This might be a validation model
        if self.comm.project.is_validation_event(event) \
            and self.comm.project.use_model_averaging \
            and "00000" not in self.comm.project.current_iteration:
            average_model = True
        else:
            average_model = False
        optimizer = self.comm.project.get_optimizer()
        if not gradient:
            mesh_to_interpolate_from = optimizer.get_remote_model_path(
                model_average=average_model)
        else:
            mesh_to_interpolate_from = self.comm.lasif.find_remote_mesh(
                event=event,
                gradient=True,
                interpolate_to=False,
                validation=False,
            )
        interpolation_script = self.find_interpolation_script()
        hpc_cluster = sapi.get_site(self.comm.project.interpolation_site)
        interpolation_toml = self.prepare_interpolation_toml(
            gradient=gradient, event=event, hpc_cluster=hpc_cluster
        )
        remote_toml = self.move_toml_to_hpc(
            toml_filename=interpolation_toml,
            event=event,
            hpc_cluster=hpc_cluster,
        )

        commands = [
            remote_io_site.site_utils.RemoteCommand(
                command=f"cp {remote_toml} ./interp_info.toml",
                execute_with_mpi=False,
            ),
            remote_io_site.site_utils.RemoteCommand(
                command=f"cp {mesh_to_interpolate_from} ./from_mesh.h5",
                execute_with_mpi=False,
            ),
            remote_io_site.site_utils.RemoteCommand(
                command=f"cp {interpolation_script} ./interpolate.py",
                execute_with_mpi=False,
            ),
            remote_io_site.site_utils.RemoteCommand(
                command="mkdir output", execute_with_mpi=False
            ),
            remote_io_site.site_utils.RemoteCommand(
                command="python interpolate.py ./interp_info.toml",
                execute_with_mpi=False,
            ),
        ]

        if self.comm.project.remote_data_processing and not gradient:
            hpc_cluster = get_site(self.comm.project.site_name)
            remote_processed_dir = os.path.join(
                self.comm.project.remote_inversionson_dir, "PROCESSED_DATA"
            )
            proc_filename = f"preprocessed_{int(self.comm.project.min_period)}s_to_{int(self.comm.project.max_period)}s.h5"
            remote_proc_file_name = f"{event}_{proc_filename}"
            remote_proc_path = os.path.join(remote_processed_dir, remote_proc_file_name)

            if not hpc_cluster.remote_exists(remote_proc_path):
                raw_file = os.path.join(
                    self.comm.project.remote_raw_data_dir, f"{event}.h5"
                )
                copy_data_command = [
                    remote_io_site.site_utils.RemoteCommand(
                        command=f"cp {raw_file} raw_event_data.h5",
                        execute_with_mpi=False,
                    )
                ]
                commands = copy_data_command + commands

        if self.comm.project.remote_conda_env:
            conda_command = [
                remote_io_site.site_utils.RemoteCommand(
                    command=f"conda activate {self.comm.project.remote_conda_env}",
                    execute_with_mpi=False,
                )
            ]
            commands = conda_command + commands
            if self.comm.project.remote_conda_source_location:
                source_command = [
                    remote_io_site.site_utils.RemoteCommand(
                        command=f"source {self.comm.project.remote_conda_source_location}",
                        execute_with_mpi=False
                    )
                ]
                commands = source_command + commands

        return commands
Esempio n. 18
0
    def prepare_interpolation_toml(self, gradient, event, hpc_cluster=None):
        toml_name = "gradient_interp.toml" if gradient else "prepare_forward.toml"
        toml_filename = (
            self.comm.project.inversion_root / "INTERPOLATION" / event / toml_name
        )
        if not os.path.exists(toml_filename.parent):
            os.makedirs(toml_filename.parent)
        tag = "GRADIENTS" if gradient else "MODELS"

        remote_weights_path = os.path.join(
            self.comm.project.remote_inversionson_dir,
            "INTERPOLATION_WEIGHTS",
            tag,
            event,
        )

        if os.path.exists(
            toml_filename
        ):  # if exists, we can update the important parameters. and skip the rest.
            information = toml.load(toml_filename)
        else:
            information = {}
        information["gradient"] = gradient
        information["mesh_info"] = {
            "event_name": event,
            "mesh_folder": str(self.comm.project.fast_mesh_dir),
            "long_term_mesh_folder": str(self.comm.project.remote_mesh_dir),
            "min_period": self.comm.project.min_period,
            "elems_per_quarter": self.comm.project.elem_per_quarter,
            "interpolation_weights": remote_weights_path,
            "elems_per_wavelength": self.comm.project.elem_per_wavelength,
        }
        if not gradient and self.comm.project.remote_data_processing:
            information["data_processing"] = True
        else:
            information["data_processing"] = False

        if self.comm.project.meshes == "multi-mesh":
            information["multi-mesh"] = True
        else:
            information["multi-mesh"] = False

        # Provide information for cut and clipping
        if gradient:
            information["cutout_radius_in_km"] = self.comm.project.cut_source_radius
            information["source_location"] = self.comm.lasif.get_source(
                event_name=event
            )
            information["clipping_percentile"] = self.comm.project.clip_gradient
            information["parameters"] = self.comm.project.inversion_params
        else:
            proc_filename = f"preprocessed_{int(self.comm.project.min_period)}s_to_{int(self.comm.project.max_period)}s.h5"
            remote_proc_path = f"{event}_{proc_filename}"
            if hpc_cluster is None:
                hpc_cluster = sapi.get_site(self.comm.project.interpolation_site)
            remote_processed_dir = os.path.join(
                self.comm.project.remote_inversionson_dir, "PROCESSED_DATA"
            )
            remote_proc_path = os.path.join(remote_processed_dir, remote_proc_path)

            if not hpc_cluster.remote_exists(remote_processed_dir):
                hpc_cluster.remote_mkdir(remote_processed_dir)

            processing_info = {
                "minimum_period": self.comm.project.min_period,
                "maximum_period": self.comm.project.max_period,
                "npts": self.comm.project.simulation_dict["number_of_time_steps"],
                "dt": self.comm.project.time_step,
                "start_time_in_s": self.comm.project.start_time,
                "asdf_input_filename": "raw_event_data.h5",
                "asdf_output_filename": remote_proc_path,
                "preprocessing_tag": self.comm.lasif.lasif_comm.waveforms.preprocessing_tag,
            }
            information["processing_info"] = processing_info

            remote_receiver_dir = os.path.join(
                self.comm.project.remote_inversionson_dir, "RECEIVERS"
            )
            if not hpc_cluster.remote_exists(remote_receiver_dir):
                hpc_cluster.remote_mkdir(remote_receiver_dir)
            information["receiver_json_path"] = os.path.join(
                remote_receiver_dir, f"{event}_receivers.json"
            )

            # If we have a dict already, we can just update it with the proper
            # remote mesh files and also we don't need to create the simulation
            # dict again in the interpolation job.
            local_simulation_dict = (
                self.comm.lasif.lasif_comm.project.paths["salvus_files"]
                / f"SIMULATION_DICTS"
                / event
                / "simulation_dict.toml"
            )
            # Only create simulation dict when we don't have it yet.
            information["create_simulation_dict"] = (
                False if os.path.exists(local_simulation_dict) else True
            )

            if not gradient:
                if self.comm.project.ellipticity:
                    information["ellipticity"] = 0.0033528106647474805
                if self.comm.project.topography["use"]:
                    information["mesh_info"][
                        "topography"
                    ] = self.comm.project.topography
                if self.comm.project.ocean_loading["use"]:
                    information["mesh_info"][
                        "ocean_loading"
                    ] = self.comm.project.ocean_loading
                source_info = self.comm.lasif.get_source(event_name=event)
                if isinstance(source_info, list):
                    source_info = source_info[0]
                source_info["side_set"] = (
                    "r1_ol"
                    if self.comm.project.ocean_loading["use"]
                    and not self.comm.project.meshes == "multi-mesh"
                    else "r1"
                )
                source_info["stf"] = str(
                    self.comm.project.remote_inversionson_dir
                    / "SOURCE_TIME_FUNCTIONS"
                    / self.comm.project.current_iteration
                    / "stf.h5"
                )
                information["source_info"] = source_info

                if (
                    not os.path.exists(toml_filename)
                    and not self.comm.project.remote_data_processing
                ):  # this is a slow step, so let's skip it if we can
                    receivers = self.comm.lasif.get_receivers(event_name=event)
                    information["receiver_info"] = receivers
                if self.comm.project.absorbing_boundaries:
                    if (
                        "inner_boundary"
                        in self.comm.lasif.lasif_comm.project.domain.get_side_set_names()
                    ):
                        side_sets = ["inner_boundary"]
                    else:
                        side_sets = [
                            "r0",
                            "t0",
                            "t1",
                            "p0",
                            "p1",
                        ]
                else:
                    side_sets = []

                information["simulation_info"] = {
                    "end_time": self.comm.project.end_time,
                    "time_step": self.comm.project.time_step,
                    "start_time": self.comm.project.start_time,
                    "minimum_period": self.comm.lasif.lasif_comm.project.simulation_settings[
                        "minimum_period_in_s"
                    ],
                    "simulation_time_step": self.comm.project.simulation_time_step,
                    "attenuation": self.comm.project.attenuation,
                    "absorbing_boundaries": self.comm.project.absorbing_boundaries,
                    "side_sets": side_sets,
                    "absorbing_boundary_length": self.comm.project.abs_bound_length
                    * 1000.0,
                }

        with open(toml_filename, "w") as fh:
            toml.dump(information, fh)
        return toml_filename
Esempio n. 19
0
    def construct_remote_interpolation_job(self, event: str, gradient=False):
        """
        Construct a custom Salvus job which can be submitted to an HPC cluster
        The job can either do an interpolation of model or gradient

        :param event: Name of event
        :type event: str
        :param gradient: Are we interpolating the gradient?, defaults to False
        :type gradient: bool, optional
        """

        description = "Interpolation of "
        description += "gradient " if gradient else "model "
        description += f"for event {event}"

        wall_time = 0.0
        if self.comm.project.meshes == "multi-mesh":
            wall_time += self.comm.project.model_interp_wall_time

        if self.comm.project.remote_data_processing and not gradient:
            hpc_cluster = get_site(self.comm.project.site_name)
            remote_processed_dir = os.path.join(
                self.comm.project.remote_inversionson_dir, "PROCESSED_DATA"
            )
            proc_filename = (
                f"preprocessed_{int(self.comm.project.min_period)}s"
                f"_to_{int(self.comm.project.max_period)}s.h5"
            )
            remote_proc_file_name = f"{event}_{proc_filename}"
            remote_proc_path = os.path.join(remote_processed_dir, remote_proc_file_name)

            # ALso add a check if the forward_dict exists here
            forward_simulation_dict = (
                    self.comm.lasif.lasif_comm.project.paths["salvus_files"]
                    / f"SIMULATION_DICTS"
                    / event
                    / "simulation_dict.toml"
            )
            # Submit a job either if the local dict is missing or
            # if the processed data is missing on the remote
            if not hpc_cluster.remote_exists(remote_proc_path) \
                    or not os.path.exists(forward_simulation_dict):
                wall_time += self.comm.project.remote_data_proc_wall_time
            elif self.comm.project.meshes != "multi-mesh":
                self.comm.project.change_attribute(
                    attribute=f'prepare_forward_job["{event}"]["submitted"]',
                    new_value=True,
                )
                self.comm.project.change_attribute(
                    attribute=f'prepare_forward_job["{event}"]["retrieved"]',
                    new_value=True,
                )
                return None

        if gradient:
            wall_time = self.comm.project.grad_interp_wall_time

        int_job = job.Job(
            site=sapi.get_site(self.comm.project.interpolation_site),
            commands=self.get_interp_commands(event=event, gradient=gradient),
            job_type="interpolation",
            job_description=description,
            job_info={},
            wall_time_in_seconds=wall_time,
            no_db=False,
        )
        return int_job
Esempio n. 20
0
    def get_adjoint_source_object(self, event_name: str) -> object:
        """
        Generate the adjoint source object for the respective event

        :param event_name: Name of event
        :type event_name: str
        :return: Adjoint source object for salvus
        :rtype: object
        """
        import h5py

        iteration = self.comm.project.current_iteration
        receivers = self.comm.lasif.get_receivers(event_name)
        if not self.comm.project.hpc_processing:
            adjoint_filename = self.comm.lasif.get_adjoint_source_file(
                event=event_name, iteration=iteration)

        if not self.comm.project.hpc_processing:
            p = h5py.File(adjoint_filename, "r")
            adjoint_recs = list(p.keys())
            p.close()
        else:
            forward_job = self.get_job(event_name, sim_type="forward")

            # remote synthetics
            remote_meta_path = forward_job.output_path / "meta.json"
            hpc_cluster = get_site(self.comm.project.site_name)
            meta_json_filename = "meta.json"
            if os.path.exists(meta_json_filename):
                os.remove(meta_json_filename)
            hpc_cluster.remote_get(remote_meta_path, meta_json_filename)

            proc_job = self.get_job(event_name, sim_type="hpc_processing")
            remote_misfit_dict_toml = str(proc_job.stdout_path.parent /
                                          "output" / "misfit_dict.toml")
            adjoint_filename = "REMOTE:" + str(
                proc_job.stdout_path.parent / "output" / "stf.h5")
            local_misfit_dict = "misfit_dict.toml"
            if os.path.exists(local_misfit_dict):
                os.remove(local_misfit_dict)
            hpc_cluster.remote_get(remote_misfit_dict_toml, local_misfit_dict)
            misfits = toml.load(local_misfit_dict)
            adjoint_recs = list(misfits[event_name].keys())
            if os.path.exists(local_misfit_dict):
                os.remove(local_misfit_dict)

        # Need to make sure I only take receivers with an adjoint source
        adjoint_sources = []
        for rec in receivers:
            if (rec["network-code"] + "_" + rec["station-code"] in adjoint_recs
                    or rec["network-code"] + "." + rec["station-code"]
                    in adjoint_recs):
                adjoint_sources.append(rec)

        # print(adjoint_sources)

        # Get path to meta.json to obtain receiver position, use again for adjoint
        if not self.comm.project.hpc_processing:
            meta_json_filename = os.path.join(
                self.comm.project.lasif_root,
                "SYNTHETICS",
                "EARTHQUAKES",
                f"ITERATION_{iteration}",
                event_name,
                "meta.json",
            )

        # Build meta info dict

        with open(meta_json_filename) as json_file:
            data = json.load(json_file)
        meta_recs = data["forward_run_input"]["output"]["point_data"][
            "receiver"]
        meta_info_dict = {}
        for rec in meta_recs:
            if (rec["network_code"] + "_" + rec["station_code"] in adjoint_recs
                    or rec["network_code"] + "." + rec["station_code"]
                    in adjoint_recs):
                rec_name = rec["network_code"] + "_" + rec["station_code"]
                meta_info_dict[rec_name] = {}
                # this is the rotation from XYZ to ZNE,
                # we still need to transpose to get ZNE -> XYZ
                meta_info_dict[rec_name]["rotation_on_input"] = {
                    "matrix":
                    np.array(rec["rotation_on_output"]["matrix"]).T.tolist()
                }
                meta_info_dict[rec_name]["location"] = rec["location"]

        adj_src = [
            source.cartesian.VectorPoint3D(
                x=meta_info_dict[rec["network-code"] + "_" +
                                 rec["station-code"]]["location"][0],
                y=meta_info_dict[rec["network-code"] + "_" +
                                 rec["station-code"]]["location"][1],
                z=meta_info_dict[rec["network-code"] + "_" +
                                 rec["station-code"]]["location"][2],
                fx=1.0,
                fy=1.0,
                fz=1.0,
                source_time_function=stf.Custom(
                    filename=adjoint_filename,
                    dataset_name="/" + rec["network-code"] + "_" +
                    rec["station-code"],
                ),
                rotation_on_input=meta_info_dict[rec["network-code"] + "_" +
                                                 rec["station-code"]]
                ["rotation_on_input"],
            ) for rec in adjoint_sources
        ]
        if os.path.exists(
                meta_json_filename) and self.comm.project.hpc_processing:
            os.remove(meta_json_filename)

        return adj_src
Esempio n. 21
0
    def get_sims_for_smoothing_task(
        self,
        reference_model,
        model_to_smooth,
        smoothing_lengths,
        smoothing_parameters,
    ):
        """
        Writes diffusion models based on a reference model and smoothing
        lengths. Then ploads them to the remote cluster if they don't exist there
        yet.
        and returns a list of simulations that can then be submitted
        as usual.

        The model_to_smooth [a
        Returns a list of simulation objects

        :param reference_model: Mesh file with the velocities on which smoothing lengths are based.
        This file should be locally present.
        :type reference_model: str
        :param model_to_smooth: Mesh file with the fields that require smoothing
        This may either be a file that is currently located on the HPC already
        or a file that stills needs to be uploaded. If it is located
        on the remote already, please pass a path starts with: "Remote:"
        :type model_to_smooth: str
        :param smoothing_lengths: List of floats that specify the smoothing lengths
        :type smoothing_lengths: list
        :param smoothing_parameters: List of strings that specify which parameters need smoothing
        :type smoothing_parameters: list
        """
        import salvus.flow.simple_config as sc
        from salvus.opt import smoothing
        from salvus.flow.api import get_site

        ref_model_name = ".".join(
            reference_model.split("/")[-1].split(".")[:-1])
        freq = 1.0 / self.comm.project.min_period

        hpc_cluster = get_site(self.comm.project.site_name)
        remote_diff_dir = self.comm.project.remote_diff_model_dir
        local_diff_model_dir = "DIFFUSION_MODELS"

        if not os.path.exists(local_diff_model_dir):
            os.mkdir(local_diff_model_dir)

        if not hpc_cluster.remote_exists(remote_diff_dir):
            hpc_cluster.remote_mkdir(remote_diff_dir)

        if "REMOTE:" not in model_to_smooth:
            print(f"Uploading initial values from: {model_to_smooth} "
                  f"for smoothing.")
            file_name = model_to_smooth.split("/")[-1]
            remote_file_path = os.path.join(remote_diff_dir, file_name)
            tmp_remote_file_path = remote_file_path + "_tmp"
            hpc_cluster.remote_put(model_to_smooth, tmp_remote_file_path)
            hpc_cluster.run_ssh_command(
                f"mv {tmp_remote_file_path} {remote_file_path}")
            model_to_smooth = "REMOTE:" + remote_file_path

        sims = []
        for param in smoothing_parameters:
            if param.startswith("V"):
                reference_velocity = param
            # If it is not some velocity, use P velocities
            elif not param.startswith("V"):
                if "VPV" in self.comm.project.inversion_params:
                    reference_velocity = "VPV"
                elif "VP" in self.comm.project.inversion_params:
                    reference_velocity = "VP"
                else:
                    raise NotImplementedError("Inversionson always expects"
                                              "to get models with at least VP")

            unique_id = (
                "_".join([str(i).replace(".", "")
                          for i in smoothing_lengths]) + "_" +
                str(self.comm.project.min_period))

            diff_model_file = unique_id + f"diff_model_{ref_model_name}_{param}.h5"
            remote_diff_model = os.path.join(remote_diff_dir, diff_model_file)
            diff_model_file = os.path.join(local_diff_model_dir,
                                           diff_model_file)

            if not os.path.exists(diff_model_file):
                smooth = smoothing.AnisotropicModelDependent(
                    reference_frequency_in_hertz=freq,
                    smoothing_lengths_in_wavelengths=smoothing_lengths,
                    reference_model=reference_model,
                    reference_velocity=reference_velocity,
                )
                diff_model = smooth.get_diffusion_model(reference_model)
                diff_model.write_h5(diff_model_file)

            if not hpc_cluster.remote_exists(remote_diff_model):
                tmp_remote_diff_model = remote_diff_model + "_tmp"
                hpc_cluster.remote_put(diff_model_file, tmp_remote_diff_model)
                hpc_cluster.run_ssh_command(
                    f"mv {tmp_remote_diff_model} {remote_diff_model}")

            sim = sc.simulation.Diffusion(mesh=diff_model_file)

            tensor_order = self.comm.project.smoothing_tensor_order

            sim.domain.polynomial_order = tensor_order

            if not self.comm.project.smoothing_timestep == "auto":
                sim.physics.diffusion_equation.time_step_in_seconds = (
                    self.comm.project.smoothing_timestep)
            sim.physics.diffusion_equation.courant_number = 0.06

            sim.physics.diffusion_equation.initial_values.filename = (
                model_to_smooth)
            sim.physics.diffusion_equation.initial_values.format = "hdf5"
            sim.physics.diffusion_equation.initial_values.field = f"{param}"
            sim.physics.diffusion_equation.final_values.filename = f"{param}.h5"

            sim.domain.mesh.filename = "REMOTE:" + remote_diff_model
            sim.domain.model.filename = "REMOTE:" + remote_diff_model
            sim.domain.geometry.filename = "REMOTE:" + remote_diff_model
            sim.validate()

            # append sim to array
            sims.append(sim)

        return sims
    def __run_forward_simulation(self, event: str, verbose=False):
        """
        Submit forward simulation to daint and possibly monitor aswell

        :param event: Name of event
        :type event: str
        """
        # Check status of simulation
        submitted, retrieved = self.__submitted_retrieved(event,
                                                          sim_type="forward")
        if submitted:
            return

        # In the case of a prepare_forward job, assume dict is created remotely
        if self.comm.project.prepare_forward:
            simulation_created_remotely = True
        else:
            simulation_created_remotely = False

        if verbose:
            self.print("Run forward simulation",
                       line_above=True,
                       emoji_alias=":rocket:")
            self.print(f"Event: {event}")

        if simulation_created_remotely:
            w = self.comm.salvus_flow.construct_simulation_from_dict(event)
        else:
            receivers = self.comm.salvus_flow.get_receivers(event)
            source = self.comm.salvus_flow.get_source_object(event)
            w = self.comm.salvus_flow.construct_simulation(
                event, source, receivers)

        if self.comm.project.meshes == "multi-mesh":
            already_interpolated = True
        else:
            already_interpolated = False

        # Get the average model when validation event
        if (self.comm.project.is_validation_event(event)
                and self.comm.project.use_model_averaging
                and "00000" not in self.comm.project.current_iteration):
            validation = True
        else:
            validation = False
        hpc_cluster = get_site(self.comm.project.interpolation_site)
        remote_mesh = self.comm.lasif.find_remote_mesh(
            event=event,
            gradient=False,
            interpolate_to=False,
            hpc_cluster=hpc_cluster,
            validation=validation,
            already_interpolated=already_interpolated,
        )
        w.set_mesh("REMOTE:" + str(remote_mesh))
        # make the mesh use

        self.comm.salvus_flow.submit_job(
            event=event,
            simulation=w,
            sim_type="forward",
            site=self.comm.project.site_name,
            ranks=self.comm.project.ranks,
        )

        self.print(f"Submitted forward job for event: {event}")
Esempio n. 23
0
    def submit_job(
        self,
        event: str,
        simulation: object,
        sim_type: str,
        site="daint",
        ranks=1024,
    ):
        """
        Submit a job with some information. Salvus flow returns an object
        which can be used to interact with job.

        :param event: Name of event
        :type event: str
        :param simulation: Simulation object constructed beforehand
        :type simulation: object
        :param sim_type: Type of simulation, forward or adjoint
        :type sim_type: str
        :param site: Name of site in salvus flow config file, defaults
        to "daint"
        :type site: str, optional
        :param ranks: How many cores to run on. (A multiple of 12 on daint),
        defaults to 1024
        :type ranks: int, optional
        """
        # Adjoint simulation takes longer and seems to be less predictable
        # we thus give it a longer wall time.
        if sim_type == "adjoint":
            wall_time = self.comm.project.wall_time * 1.5
        else:
            wall_time = self.comm.project.wall_time
        start_submit = time.time()
        job = sapi.run_async(
            site_name=site,
            input_file=simulation,
            ranks=ranks,
            wall_time_in_seconds=wall_time,
        )
        end_submit = time.time()
        self.print(
            f"Submitting took {end_submit - start_submit:.3f} seconds",
            emoji_alias=":hourglass:",
            color="magenta",
        )
        hpc_cluster = sapi.get_site(self.comm.project.site_name)

        if sim_type == "forward":
            self.comm.project.change_attribute(
                f'forward_job["{event}"]["name"]', job.job_name)
            self.comm.project.change_attribute(
                f'forward_job["{event}"]["submitted"]', True)

        elif sim_type == "adjoint":
            self.comm.project.change_attribute(
                f'adjoint_job["{event}"]["name"]', job.job_name)
            self.comm.project.change_attribute(
                f'adjoint_job["{event}"]["submitted"]', True)
        self.comm.project.update_iteration_toml()
        if hpc_cluster.config["site_type"] == "local":
            self.print(f"Running {sim_type} simulation...")
            job.wait(
                poll_interval_in_seconds=self.comm.project.sleep_time_in_s)
Esempio n. 24
0
    def run_remote_smoother(
        self,
        event: str,
    ):
        """
        Run the Smoother, the settings are specified in inversion toml. Make
        sure that the smoothing config has already been generated

        :param event: Name of event
        :type event: str
        """
        from salvus.opt import smoothing
        import salvus.flow.simple_config as sc
        from salvus.flow.api import get_site
        from salvus.flow import api as sapi

        if self.comm.project.meshes == "multi-mesh":
            mesh = self.comm.lasif.find_event_mesh(event)
        else:
            mesh = self.comm.lasif.get_simulation_mesh(event)
        freq = 1.0 / self.comm.project.min_period
        smoothing_lengths = self.comm.project.smoothing_lengths

        # get remote gradient filename
        job = self.comm.salvus_flow.get_job(event, "adjoint")
        output_files = job.get_output_files()
        remote_grad = str(output_files[0][("adjoint", "gradient",
                                           "output_filename")])

        # make site stuff (hardcoded for now)
        daint = get_site(self.comm.project.site_name)
        username = daint.config["ssh_settings"]["username"]
        remote_diff_dir = os.path.join("/scratch/snx3000", username,
                                       "diff_models")
        local_diff_model_dir = "DIFF_MODELS"

        if not os.path.exists(local_diff_model_dir):
            os.mkdir(local_diff_model_dir)

        if not daint.remote_exists(remote_diff_dir):
            daint.remote_mkdir(remote_diff_dir)

        sims = []
        for param in self.comm.project.inversion_params:
            if param.startswith("V"):
                reference_velocity = param
            elif param == "RHO":
                if "VP" in self.comm.project.inversion_params:
                    reference_velocity = "VP"
                elif "VPV" in self.comm.project.inversion_params:
                    reference_velocity = "VPV"

            unique_id = (
                "_".join([str(i).replace(".", "")
                          for i in smoothing_lengths]) + "_" +
                str(self.comm.project.min_period))

            diff_model_file = unique_id + f"diff_model_{param}.h5"
            if self.comm.project.meshes == "multi-mesh":
                diff_model_file = event + "_" + diff_model_file

            remote_diff_model = os.path.join(remote_diff_dir, diff_model_file)

            diff_model_file = os.path.join(local_diff_model_dir,
                                           diff_model_file)

            if not os.path.exists(diff_model_file):
                smooth = smoothing.AnisotropicModelDependent(
                    reference_frequency_in_hertz=freq,
                    smoothing_lengths_in_wavelengths=smoothing_lengths,
                    reference_model=mesh,
                    reference_velocity=reference_velocity,
                )
                diff_model = smooth.get_diffusion_model(mesh)
                diff_model.write_h5(diff_model_file)

            if not daint.remote_exists(remote_diff_model):
                daint.remote_put(diff_model_file, remote_diff_model)

            sim = sc.simulation.Diffusion(mesh=diff_model_file)

            if self.comm.project.meshes == "multi-mesh":
                tensor_order = 4
            else:
                tensor_order = 2

            sim.domain.polynomial_order = tensor_order
            sim.physics.diffusion_equation.time_step_in_seconds = (
                self.comm.project.smoothing_timestep)
            sim.physics.diffusion_equation.courant_number = 0.06

            sim.physics.diffusion_equation.initial_values.filename = (
                "REMOTE:" + remote_grad)
            sim.physics.diffusion_equation.initial_values.format = "hdf5"
            sim.physics.diffusion_equation.initial_values.field = f"{param}"
            sim.physics.diffusion_equation.final_values.filename = (
                f"{param}.h5")

            sim.domain.mesh.filename = "REMOTE:" + remote_diff_model
            sim.domain.model.filename = "REMOTE:" + remote_diff_model
            sim.domain.geometry.filename = "REMOTE:" + remote_diff_model

            sim.validate()

            # append sim to array
            sims.append(sim)

        job = sapi.run_many_async(
            input_files=sims,
            site_name=self.comm.project.smoothing_site_name,
            ranks_per_job=self.comm.project.smoothing_ranks,
            wall_time_in_seconds_per_job=self.comm.project.smoothing_wall_time,
        )
        if self.comm.project.inversion_mode == "mini-batch":
            self.comm.project.change_attribute(
                f'smoothing_job["{event}"]["name"]', job.job_array_name)
            self.comm.project.change_attribute(
                f'smoothing_job["{event}"]["submitted"]', True)
        else:
            self.comm.project.change_attribute('smoothing_job["name"]',
                                               job.job_array_name)
            self.comm.project.change_attribute('smoothing_job["submitted"]',
                                               True)
Esempio n. 25
0
    def find_remote_mesh(
        self,
        event: str,
        gradient: bool = False,
        interpolate_to: bool = True,
        check_if_exists: bool = False,
        hpc_cluster=None,
        iteration: str = None,
        already_interpolated: bool = False,
        validation: bool = False,
    ) -> pathlib.Path:
        """
        Find the path to the relevant mesh on the hpc cluster

        :param event: Name of event
        :type event: str
        :param gradient: Is it a gradient? If not, it's a model,
            defaults to False
        :type gradient: bool, optional
        :param interpolate_to: Mesh to interpolate to?, defaults to True
        :type interpolate_to: bool, optional
        :param check_if_exists: Check if the file exists?, defaults to False
        :type check_if_exists: bool, optional
        :param hpc_cluster: you can pass the site object. Defaults to None
        :type hpc_cluster: salvus.flow.Site, optional
        :param iteration: Name of an iteration, defaults to None
        :type iteration: str, optional
        :param already_interpolated: If mesh has been interpolated,
            we find it in the interpolation job folder, defaults to False
        :type already_interpolated: bool, optional
        :return: The path to the correct mesh
        :rtype: pathlib.Path
        """
        if hpc_cluster is None:
            hpc_cluster = get_site(self.comm.project.interpolation_site)
        remote_mesh_dir = pathlib.Path(self.comm.project.remote_mesh_dir)
        fast_dir = pathlib.Path(self.comm.project.remote_inversionson_dir)
        if iteration is None:
            iteration = self.comm.project.current_iteration

        if gradient:
            if interpolate_to:
                mesh = (self.comm.project.remote_inversionson_dir / "MESHES" /
                        "standard_gradient" / "mesh.h5")
                # mesh = remote_mesh_dir / "standard_gradient" / "mesh.h5"
            else:
                output = self.comm.salvus_flow.get_job_file_paths(
                    event=event, sim_type="adjoint")
                mesh = output[0][("adjoint", "gradient", "output_filename")]
        else:
            if already_interpolated:
                job = self.comm.salvus_flow.get_job(
                    event=event,
                    sim_type="prepare_forward",
                    iteration=iteration,
                )
                mesh = job.stdout_path.parent / "output" / "mesh.h5"
            else:
                if interpolate_to:
                    mesh = remote_mesh_dir / "meshes" / event / "mesh.h5"
                else:
                    if validation:
                        mesh = (fast_dir / "AVERAGE_MODELS" / iteration /
                                "mesh.h5")
                    else:
                        mesh = fast_dir / "MODELS" / iteration / "mesh.h5"

        if check_if_exists:
            if not hpc_cluster.remote_exists(mesh):
                if gradient and interpolate_to:
                    self._move_mesh_to_cluster(event=None,
                                               gradient=gradient,
                                               hpc_cluster=hpc_cluster)
                raise InversionsonError(
                    "Mesh for event {event} does not exist")
        return mesh
Esempio n. 26
0
    def prepare_iteration(
        self,
        it_name: str,
        events: List[str] = None,
    ):
        """
        Prepare iteration.

        :param it_name: Name of iteration
        :type it_name: "str", optional
        :param events: Pass a list of events if you want them to be predefined, defaults to None
        :type events: List[str], optional
        """
        self.comm.project.change_attribute("current_iteration", it_name)
        print("Preparing iteration for", it_name)
        if self.comm.lasif.has_iteration(it_name):
            raise InversionsonError(f"Iteration {it_name} already exists")

        self.comm.lasif.set_up_iteration(it_name, events)
        self.comm.project.create_iteration_toml(it_name)
        self.comm.project.get_iteration_attributes()

        optimizer = self.comm.project.get_optimizer()
        model = optimizer.model_path

        # WIP no average models being uploaded yet.
        remote_mesh_file = (
            self.comm.project.remote_inversionson_dir / "MODELS" / it_name / "mesh.h5"
        )
        hpc_cluster = get_site(self.comm.project.site_name)
        if not hpc_cluster.remote_exists(remote_mesh_file.parent):
            if not hpc_cluster.remote_exists(self.comm.project.remote_mesh_dir):
                hpc_cluster.remote_mkdir(self.comm.project.remote_mesh_dir)
            if not hpc_cluster.remote_exists(self.comm.project.remote_mesh_dir / "MODELS"):
                hpc_cluster.remote_mkdir(self.comm.project.remote_mesh_dir / "MODELS")
            hpc_cluster.remote_mkdir(remote_mesh_file.parent)
        self.print(
            f"Moving mesh to {self.comm.project.interpolation_site}",
            emoji_alias=":package:",
        )
        hpc_cluster.remote_put(model, remote_mesh_file)

        if self.time_for_validation() and self.comm.project.use_model_averaging\
                and self.iteration_number > 0:
            remote_avg_mesh_file = (
                    self.comm.project.remote_mesh_dir / "AVERAGE_MODELS" / it_name / "mesh.h5"
            )
            # this enters when the iteration number is 4
            print("writing average validation model")
            # 4 - 5 + 1 = 0
            starting_it_number = self.iteration_number - self.comm.project.val_it_interval + 1
            self.write_average_model(starting_it_number,
                                     self.iteration_number)
            self.print(
                f"Moving average_model to {self.comm.project.interpolation_site}",
                emoji_alias=":package:",
            )
            if not hpc_cluster.remote_exists(remote_avg_mesh_file.parent):
                hpc_cluster.remote_mkdir(remote_avg_mesh_file.parent)
            hpc_cluster.remote_put(
                self.get_average_model_name(starting_it_number, self.iteration_number),
                remote_avg_mesh_file
            )

        self.comm.lasif.upload_stf(iteration=it_name)
    def sum_gradients(
        self,
        events,
        output_location,
        batch_average=True,
        sum_vpv_vph=True,
        store_norms=True,
    ):
        """
        Sum gradients on the HPC.

        :param events: List of events to be summed.
        :type events: list
        :param output_location: local file path for the end result
        :type: output_location: bool
        :param batch_average: Average the summed gradients
        :type batch_average: bool
        :param sum_vpv_vph: sum vpv and vph
        :type: sum_vpv_vph: bool
        :param store_norms: Store the gradient norms that are computed while
        summing.
        :type: store_norms: bool
        """
        gradient_paths = []
        iteration = self.comm.project.current_iteration

        for event in events:
            if self.comm.project.meshes == "multi-mesh":
                job = self.comm.salvus_flow.get_job(event, "gradient_interp")
                gradient_path = os.path.join(
                    str(job.stderr_path.parent), "output/mesh.h5"
                )

            else:
                job = self.comm.salvus_flow.get_job(event, "adjoint")

                output_files = job.get_output_files()
                gradient_path = output_files[0][
                    ("adjoint", "gradient", "output_filename")
                ]
            gradient_paths.append(str(gradient_path))
        # Connect to daint
        hpc_cluster = get_site(self.comm.project.site_name)

        remote_inversionson_dir = os.path.join(
            self.comm.project.remote_inversionson_dir, "SUMMING"
        )
        if not hpc_cluster.remote_exists(remote_inversionson_dir):
            hpc_cluster.remote_mkdir(remote_inversionson_dir)

        remote_output_path = os.path.join(remote_inversionson_dir, "summed_gradient.h5")
        remote_norms_path = os.path.join(
            remote_inversionson_dir, f"{iteration}_gradient_norms.toml"
        )

        # copy summing script to hpc
        remote_script = os.path.join(remote_inversionson_dir, "gradient_summing.py")
        if not hpc_cluster.remote_exists(remote_script):
            hpc_cluster.remote_put(SUM_GRADIENTS_SCRIPT_PATH, remote_script)

        info = dict(
            filenames=gradient_paths,
            parameters=self.comm.project.inversion_params,
            output_gradient=remote_output_path,
            events_list=events,
            gradient_norms_path=remote_norms_path,
            batch_average=batch_average,
        )

        toml_filename = f"gradient_sum.toml"
        with open(toml_filename, "w") as fh:
            toml.dump(info, fh)

        # Copy toml to HPC and remove locally
        remote_toml = os.path.join(remote_inversionson_dir, toml_filename)
        hpc_cluster.remote_put(toml_filename, remote_toml)
        os.remove(toml_filename)

        # Call script
        self.print("Remote summing of gradients started...")
        hpc_cluster.run_ssh_command(f"python {remote_script} {remote_toml}")
        self.print("Remote summing completed...")

        if store_norms:
            norm_dict_toml = self.optimizer.gradient_norm_path

            hpc_cluster.remote_get(remote_norms_path, norm_dict_toml)
            all_norms_path = os.path.join(
                self.optimizer.gradient_norm_dir, "all_norms.toml"
            )
            if os.path.exists(all_norms_path):
                norm_dict = toml.load(all_norms_path)
            else:
                norm_dict = {}

            norm_iter_dict = toml.load(norm_dict_toml)
            for event, norm in norm_iter_dict.items():
                norm_dict[event] = float(norm)

            with open(all_norms_path, "w") as fh:
                toml.dump(norm_dict, fh)

        hpc_cluster.remote_get(remote_output_path, output_location)

        # Only sum the raw gradient in AdamOpt, not the update
        if sum_vpv_vph:
            sum_two_parameters_h5(output_location, ["VPV", "VPH"])
Esempio n. 28
0
    def move_files_to_cluster(self):
        """
        Move all the remote scripts to hpc.
        Move the bathymetry and topography files if it makes sense.
        """
        hpc_cluster = get_site(self.comm.project.site_name)

        if not hpc_cluster.remote_exists(
                self.comm.project.remote_inversionson_dir):
            hpc_cluster.remote_mkdir(self.comm.project.remote_inversionson_dir)
        for directory in [
                "DIFFUSION_MODELS",
                "SOURCE_TIME_FUNCTIONS",
                "INTERPOLATION_WEIGHTS",
                "MESHES",
                "ADJOINT_SOURCES",
                "PROCESSED_DATA",
                "SCRIPTS",
        ]:
            if not hpc_cluster.remote_exists(
                    self.comm.project.remote_inversionson_dir / directory):
                hpc_cluster.remote_mkdir(
                    self.comm.project.remote_inversionson_dir / directory)

        if self.comm.project.ocean_loading[
                "use"] and self.comm.project.meshes == "multi-mesh":
            if hpc_cluster.remote_exists(
                    self.comm.project.ocean_loading["remote_path"]):
                self.print(
                    "Remote Bathymetry file is already uploaded",
                    emoji_alias=":white_check_mark:",
                )
            else:
                if not hpc_cluster.remote_exists(
                        Path(self.comm.project.ocean_loading["remote_path"]).
                        parent):
                    hpc_cluster.remote_mkdir(
                        Path(self.comm.project.ocean_loading["remote_path"]).
                        parent)
                hpc_cluster.remote_put(
                    self.comm.project.ocean_loading["file"],
                    self.comm.project.ocean_loading["remote_path"],
                )
        if self.comm.project.topography["use"]:
            if hpc_cluster.remote_exists(
                    self.comm.project.topography["remote_path"]):
                self.print(
                    "Remote Topography file is already uploaded",
                    emoji_alias=":white_check_mark:",
                )
            else:
                if not hpc_cluster.remote_exists(
                        Path(self.comm.project.topography["remote_path"]).
                        parent):
                    hpc_cluster.remote_mkdir(
                        Path(self.comm.project.topography["remote_path"]).
                        parent)
                hpc_cluster.remote_put(
                    self.comm.project.topography["file"],
                    self.comm.project.topography["remote_path"],
                )
        remote_interp_path = self.comm.multi_mesh.find_interpolation_script()
        hpc_cluster.remote_put(INTERPOLATION_SCRIPT_PATH, remote_interp_path)

        if self.comm.project.meshes == "multi-mesh":
            self.comm.lasif.move_gradient_to_cluster(hpc_cluster=hpc_cluster,
                                                     overwrite=False)