예제 #1
0
    def spawn_processes(self, start_idx, steps_from_start):
        """
        Spawns processes each dedicated to an instance of CxSystem.
        """
        print(
            " -  Following configurations are going to be simulated with "
            "%d processes using %s device (printed only in letters and numbers): "
            "\n %s" % (self.number_of_process, self.device,
                       str(self.final_namings).replace('_', '')))
        manager = multiprocessing.Manager()
        jobs = []
        working = manager.Value('i', 0)
        paths = manager.dict()
        # number_of_runs = len(self.final_messages) * self.trials_per_config
        self.final_metadata_df = self.final_metadata_df.loc[np.repeat(
            self.final_metadata_df.index.values,
            self.trials_per_config)].reset_index(drop=True)
        assert len(self.final_namings) < 1000, ' -  The array run is trying to run more than 1000 simulations, this is not allowed unless you' \
                                                ' REALLY want it and if you REALLY want it you should konw what to do.'
        # while len(jobs) < number_of_runs:
        while len(jobs) < steps_from_start:
            time.sleep(1.5)
            if working.value < self.number_of_process:
                idx = start_idx + len(jobs)
                p = multiprocessing.Process(target=self.run_parameter_search,
                                            args=(idx, working, paths,
                                                  self.array_run_stdout_file))
                jobs.append(p)
                p.start()
        for j in jobs:
            j.join()

        for item in list(paths.keys()):
            self.final_metadata_df['Full path'][item] = paths[item]
        write_to_file(
            os.path.join(os.path.dirname(paths[list(paths.keys())[0]]),
                         self.metadata_filename), self.final_metadata_df)
        print(" -  Array run metadata saved at: %s" %
              os.path.join(os.path.dirname(paths[list(paths.keys())[0]]),
                           self.metadata_filename))

        if self._is_running_locally() is True:
            tmp_folder_path = Path(
                parameter_finder(self.anatomy_df,
                                 'workspace_path')).expanduser().joinpath(
                                     '.tmp' + self.suffix).as_posix()
            print("cleaning tmp folders " + tmp_folder_path)
            shutil.rmtree(tmp_folder_path)
        elif self._is_running_in_cluster() is True:
            tmp_folder_path = Path(
                parameter_finder(self.anatomy_df,
                                 'cluster_workspace')).expanduser().joinpath(
                                     '.tmp' + self.suffix).as_posix()
            print("cleaning tmp folders " + tmp_folder_path)
            shutil.rmtree(tmp_folder_path)
예제 #2
0
 def _save_tmp_anat_phys_to_downloads(self, tmp_folder_path):
     # After ClusterRun call metadata master file has been created earlier, we read it here to get the downloads folder address
     local_workspace = Path(
         parameter_finder(self.anatomy_df, 'workspace_path')).expanduser()
     local_cluster_folder = local_workspace.joinpath('cluster_run' +
                                                     self.suffix)
     metadata_pkl_fullfile = Path(
         local_cluster_folder.joinpath('cluster_metadata{}.pkl'.format(
             self.suffix)))
     metadata_dict = load_from_file(metadata_pkl_fullfile)
     downloads_folder = metadata_dict['local_cluster_run_download_folder']
     #Create downloads folder
     Path(downloads_folder).mkdir(parents=True, exist_ok=True)
     # Move anat and phys files to download folder
     tmp_folder_contents_list = os.listdir(tmp_folder_path)
     fullfile_source_list = []
     fullfile_target_list = []
     for this_file in tmp_folder_contents_list:
         fullfile_source_list.append(
             os.path.join(tmp_folder_path, this_file))
         fullfile_target_list.append(
             os.path.join(downloads_folder, this_file))
     [
         Path(s).replace(t)
         for s, t in zip(fullfile_source_list, fullfile_target_list)
         if 'anat' in s or 'phys' in s
     ]
예제 #3
0
 def _get_cluster_job_file_path(self):
     cluster_job_file_path = None
     try:
         cluster_job_file_path = parameter_finder(self.anatomy_df,
                                                  'cluster_job_file_path')
     except (TypeError, NameError):
         pass
     return cluster_job_file_path
예제 #4
0
 def _get_run_in_cluster_flag(self):
     run_in_cluster = 0
     try:
         run_in_cluster = int(
             eval(parameter_finder(self.anatomy_df, 'run_in_cluster')))
     except (TypeError, NameError):
         pass
     return run_in_cluster
예제 #5
0
 def _get_benchmark_flag(self):
     benchmark = 0
     try:
         benchmark = int(
             eval(parameter_finder(self.anatomy_df, 'benchmark')))
     except (TypeError, NameError):
         pass
     return benchmark
예제 #6
0
 def _get_device(self):
     device = 'Python'
     try:
         device = parameter_finder(self.anatomy_df, 'device')
     except TypeError:
         print(
             " -    device is not defined in the configuration file, the default device is 'Python'"
         )
     return device
예제 #7
0
 def _get_trials_per_config(self):
     trials_per_config = 1
     try:
         trials_per_config = int(
             parameter_finder(self.anatomy_df, 'trials_per_config'))
     except TypeError:
         print(" - trials_per_config is not defined in the configuration "
               "file, the default value is 1")
     return trials_per_config
예제 #8
0
 def _get_multidim_array_run_flag(self):
     multi_dim_flag = 0
     try:
         multi_dim_flag = int(
             eval(
                 parameter_finder(self.anatomy_df,
                                  'multidimension_array_run')))
     except TypeError:
         pass
     return multi_dim_flag
예제 #9
0
 def _get_num_of_process(self):
     number_of_process = int(multiprocessing.cpu_count() * 3 / 4)
     try:
         number_of_process = int(
             parameter_finder(self.anatomy_df, 'number_of_process'))
     except TypeError:
         print(
             " -  number_of_process is not defined in the configuration file, the default number of processes are"
             " 3/4*number of CPU cores: %d processes" %
             self.number_of_process)
     return number_of_process
예제 #10
0
 def _get_cluster_number_of_nodes(self):
     cluster_number_of_nodes = 1
     try:
         cluster_number_of_nodes = int(
             parameter_finder(self.anatomy_df, 'cluster_number_of_nodes'))
     except (TypeError, NameError):
         pass
     if cluster_number_of_nodes > 40:
         raise Exception(
             ' -  Number of nodes cannot be higher than 40 for your own safety.'
         )
     return cluster_number_of_nodes
예제 #11
0
    def run_parameter_search(self, idx, working, paths, stdout_file):
        """
        The function that each spawned process runs and parallel instances of CxSystems are created here.

        :param idx: index of the requested parallel CxSystem.
        :param working: the index of the process that is being currently performed. This is to keep track of running processes
                to prevent spawning more than required processes.
        :param paths: The path for saving the output of the current instance of CxSystem.
        """
        if stdout_file:
            sys.stdout = open(stdout_file, "a+")
        orig_idx = idx
        working.value += 1
        np.random.seed(idx)
        tr = idx % self.trials_per_config
        idx = int(idx / self.trials_per_config)
        device = parameter_finder(self.list_of_anatomy_dfs[idx], 'device')
        if self.number_of_process == 1 and self.benchmark == 1 and device == 'Python':
            # this should be used to clear the cache of weave for benchmarking. otherwise weave will mess it up
            if sys.platform == 'win32':
                shutil.rmtree(
                    os.path.join(os.environ['USERPROFILE'], 'AppData', 'Local',
                                 'Temp', os.environ['USERNAME'],
                                 'python27_compiled'))
            else:
                shutil.rmtree(os.path.join(os.environ['HOME'], '.cache/scipy'))
            print(" -  scipy cache deleted to prevent benchmarking issues.")
        print(
            "################### Trial %d/%d started running for "
            "simulation number %d: %s ##########################" %
            (tr + 1, self.trials_per_config, idx, self.final_namings[idx][1:]))
        tr_suffix = ''
        if self.trials_per_config > 1:
            tr_suffix = '_' + str(tr).zfill(3)
        # The self.list_of_anatomy/physio_dfs[idx] contains the dataframes with final (single) parameter value for simulation.
        cm = cx.CxSystem(self.list_of_anatomy_dfs[idx],
                         self.list_of_physio_dfs[idx],
                         output_file_suffix=self.final_namings[idx] +
                         tr_suffix,
                         instantiated_from_array_run=1,
                         array_run_in_cluster=self.array_run_is_in_cluster)
        cm.run()
        paths[orig_idx] = cm.workspace.get_results_export_path()
        working.value -= 1
예제 #12
0
    def __init__(self,
                 anatomy_dataframe,
                 physiology_dataframe,
                 job_suffix,
                 cluster_start_idx,
                 cluster_step,
                 anatomy_file_path,
                 physio_file_path,
                 array_run_is_in_cluster=0,
                 array_run_stdout_file=None):
        """
        Initialize the ArrayRun for running several instances of CxSystem in parallel.

        :param  anatomy_dataframe: The dataframe containing the anatomical and system configurations that has an instance for ArrayRun in it.
        :param physiology_dataframe: The dataframe containing the physiology configurations that has an instance for ArrayRun in it.
        :param job_suffix: The job_suffix for the metadata file containing the filename and changing parameters in each of the simulations.
        """
        self.suffix = job_suffix
        self.cluster_start_idx = int(cluster_start_idx)
        self.cluster_step = int(cluster_step)
        self.array_run_is_in_cluster = array_run_is_in_cluster
        self.array_run_stdout_file = None if array_run_stdout_file == 'None' else array_run_stdout_file

        self.metadata_filename = self._get_metadata_filename(
            self.cluster_start_idx, self.cluster_step, job_suffix)

        # these two are the original config files containing the array_run info:
        self.anatomy_df = pd.read_csv(anatomy_dataframe, header=None) if type(
            anatomy_dataframe) == str else anatomy_dataframe
        self.physiology_df = pd.read_csv(physiology_dataframe) if type(
            physiology_dataframe) == str else physiology_dataframe

        # finding array-run related parameters:
        self.multidimension_array_run = self._get_multidim_array_run_flag()
        self.number_of_process = self._get_num_of_process()
        self.benchmark = self._get_benchmark_flag()
        self.trials_per_config = self._get_trials_per_config()
        self.device = self._get_device()
        self.run_in_cluster = self._get_run_in_cluster_flag()
        self.cluster_job_file_path = self._get_cluster_job_file_path()
        self.cluster_number_of_nodes = self._get_cluster_number_of_nodes()

        # get indices of pandas cells containing the arrayruns
        self.anatomy_arrun_cell_indices = self._get_arrun_cell_indices_from_df(
            self.anatomy_df)
        self.physio_arrun_cell_indices = self._get_arrun_cell_indices_from_df(
            self.physiology_df)

        # lists containing final dataframes and experiment names
        self.list_of_anatomy_dfs = []
        self.list_of_physio_dfs = []
        self.final_namings = []

        self.param_search_num_of_params = len(
            self.anatomy_arrun_cell_indices) + len(
                self.physio_arrun_cell_indices)
        self.default_anatomy_df = self.default_df_extractor(self.anatomy_df)
        self.default_physio_df = self.default_df_extractor(self.physiology_df)

        # create a list of names of parameters with corresponding value in each experiment
        self.anat_titles = []
        self.physio_titles = []
        self.metadata_dict = {}

        # self.anat_titles and self.physio_titles are both set inside message_finder() which is called by the preparer functions
        if self.multidimension_array_run:
            self._prepare_multi_dim_arrun_metadata()
        else:
            self._prepare_one_dim_arrun_metadata()

        print(
            " -  array of Dataframes for anatomical and physiological configuration are ready"
        )

        if self._should_submit_to_cluster():
            self.total_configs = len(
                self.list_of_anatomy_dfs) * self.trials_per_config
            self.config_per_node = math.ceil(self.total_configs /
                                             self.cluster_number_of_nodes)
            self.clipping_indices = np.arange(0, self.total_configs,
                                              self.config_per_node)
            ClusterRun(self, Path(anatomy_file_path), Path(physio_file_path),
                       self.suffix)

            print(" -  removing .tmp folder")
            tmp_folder_path = Path(
                parameter_finder(self.anatomy_df,
                                 'workspace_path')).expanduser().joinpath(
                                     '.tmp' + self.suffix).as_posix()
            print("cleaning tmp folders " + tmp_folder_path)
            shutil.rmtree(tmp_folder_path)

        elif self._is_running_in_cluster():
            self.spawn_processes(self.cluster_start_idx, self.cluster_step)

        elif self._is_running_locally():
            self.spawn_processes(
                0,
                len(self.final_namings) *
                self.trials_per_config)  # this runs when not in cluster
예제 #13
0
    def __init__(self,
                 array_run_obj,
                 anat_file_path,
                 physio_file_path,
                 suffix=""):

        try:
            self.cluster_workspace = PurePosixPath(
                parameter_finder(array_run_obj.anatomy_df,
                                 'cluster_workspace'))
        except NameError:
            raise ParameterNotFoundError(
                "cluster_workspace is not defined for running CxSystem on cluster"
            )
        assert self.cluster_workspace.is_absolute(), \
            "cluster_workspace {} must be an absolute path with explicit [remote] home directory path".format(self.cluster_workspace.as_posix())

        try:
            self.cluster_address = parameter_finder(array_run_obj.anatomy_df,
                                                    'cluster_address')
        except NameError:
            raise ParameterNotFoundError(
                "cluster_address is not defined for running CxSystem on cluster"
            )

        try:
            self.cluster_login_node = parameter_finder(
                array_run_obj.anatomy_df, 'cluster_login_node')
        except NameError:
            print(
                " -  No cluster login node found. Directly conencting to cluster address {}"
                .format(self.cluster_address))
            self.cluster_login_node = '--'

        # # the following call will check if the cluster is available or not, but it needs root access
        # self.ping_cluster()

        try:
            self.cluster_username = parameter_finder(array_run_obj.anatomy_df,
                                                     'cluster_username')
            assert self.cluster_username != 'username', \
                "Cluster username must be changed in the configuration file, currently it is the default value 'username'"
            print(" -  Loggin in with user '%s'" % self.cluster_username)
        except NameError:
            self.cluster_username = input(' -  Enter cluster username: '******'password')
        except NameError:
            if 'CLUSTERPASS' in os.environ.keys():
                self.password = os.environ['CLUSTERPASS']
            else:
                self.password = getpass.getpass(
                    ' -  Enter password for user {}: '
                    ''.format(self.cluster_username))

        self.suffix = suffix
        print(" -  temp file suffix is %s" % self.suffix)
        self.client = paramiko.SSHClient()
        self.client.load_system_host_keys()
        self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        if self.cluster_login_node != '--':
            print(" -  Connecting to login node {}".format(
                self.cluster_login_node))
            sock = paramiko.ProxyCommand("ssh {}@{} nc {} 22".format(
                self.cluster_username, self.cluster_login_node,
                self.cluster_address))
            sock.settimeout(30)
            self.client.connect(self.cluster_address,
                                port=22,
                                username=self.cluster_username,
                                password=self.password,
                                sock=sock)
        else:
            self.client.connect(self.cluster_address,
                                port=22,
                                username=self.cluster_username,
                                password=self.password)

        print(" -  Connected to %s" % self.cluster_address)
        print(" -  Creating workspace folder if not exists")
        self.ssh_commander('mkdir -p {}'.format(
            self.cluster_workspace.as_posix()))
        scp = SCPClient(self.client.get_transport())

        if 'json' in anat_file_path.suffix.lower():
            converter = fileconverter.ConfigConverter(
                anat_file_path.as_posix())
            anat_file_path = Path(converter.save_as_csv(overwrite=True))
        if 'json' in physio_file_path.suffix.lower():
            converter = fileconverter.ConfigConverter(
                physio_file_path.as_posix())
            physio_file_path = Path(converter.save_as_csv(overwrite=True))

        print(" -  Transferring configuration files ...")

        self.remote_anat_filename = '_tmp_anat_config{}.csv'.format(
            self.suffix)
        self.remote_phys_filename = '_tmp_physio_config{}.csv'.format(
            self.suffix)
        self.local_workspace_unexpanded = Path(
            parameter_finder(array_run_obj.anatomy_df, 'workspace_path'))
        self.local_workspace = Path(
            parameter_finder(array_run_obj.anatomy_df,
                             'workspace_path')).expanduser()
        self.local_cluster_folder = self.local_workspace.joinpath(
            'cluster_run' + self.suffix)
        if not self.local_cluster_folder.is_dir():
            os.mkdir(self.local_cluster_folder.as_posix())
        try:
            imported_connections_file = Path(
                parameter_finder(array_run_obj.anatomy_df,
                                 'import_connections_from'))
            if imported_connections_file.is_file():
                scp.put(imported_connections_file.as_posix(),
                        self.cluster_workspace.as_posix())
                new_path = Path('./').joinpath(
                    imported_connections_file.name).as_posix()
                change_parameter_value_in_file(
                    anat_file_path.as_posix(),
                    self.local_cluster_folder.joinpath(anat_file_path.name),
                    'import_connections_from', new_path)
                anat_file_path = self.local_cluster_folder.joinpath(
                    anat_file_path.name)
        except TypeError:  # this is when the value is # or -- for instance
            pass
        scp.put(
            anat_file_path.as_posix(),
            self.cluster_workspace.joinpath(
                self.remote_anat_filename).as_posix())
        scp.put(
            physio_file_path.as_posix(),
            self.cluster_workspace.joinpath(
                self.remote_phys_filename).as_posix())

        # ask user to set the number of nodes, time and memory:
        print(
            " -  Please check the default csc_puhti.job file and set the time, memory and uncomment and enter email address if you wish."
            "\nNote that the number of nodes in default slurm file should always be set to 1."
            " Instead you should enter the number of nodes in the CxSystem network config file. "
            "\nAlso the default number of CPUs=16 does not need to be changed most of the times. "
        )

        self.slurm_file_path = Path(
            parameter_finder(array_run_obj.anatomy_df,
                             'cluster_job_file_path')).expanduser()
        if not self.slurm_file_path.is_file():
            if not self.slurm_file_path.is_absolute():
                raise RelativePathError(
                    "\nSlurm file {} not found in local workspace. Make sure the path to the file is "
                    "absolute".format(self.slurm_file_path.as_posix()))
            else:
                raise FileNotFoundError("\nSlurm file {} not found".format(
                    self.slurm_file_path.as_posix()))

        # updating remote cxsystem2
        self.update_remote_cxsystem2(self.slurm_file_path,
                                     self.cluster_workspace)

        # building slurm :
        for item_idx, item in enumerate(array_run_obj.clipping_indices):
            with open(self.slurm_file_path.as_posix(), 'r') as sl1:
                remote_slurm_filename = "_tmp_slurm{}_part{}.job".format(
                    self.suffix, item_idx)
                with open(
                        self.local_cluster_folder.joinpath(
                            remote_slurm_filename).as_posix(),
                        'w') as sl2:  # wb -> w
                    for line in sl1:
                        sl2.write(line)
                    try:
                        sl2.write(
                            "python -c "
                            "\"from cxsystem2.core.cxsystem import CxSystem as cxs; "
                            " cx = cxs('{anatomy}','{physio}', cluster_run_start_idx={cluster_start},cluster_run_step={cluster_step}); "
                            "cx.run()\"".format(
                                anatomy=self.remote_anat_filename,
                                physio=self.remote_phys_filename,
                                cluster_start=item,
                                cluster_step=array_run_obj.clipping_indices[
                                    item_idx + 1] -
                                array_run_obj.clipping_indices[item_idx]))
                    except IndexError:
                        sl2.write(
                            "python -c "
                            "\"from cxsystem2.core.cxsystem import CxSystem as cxs; "
                            " cx = cxs('{anatomy}','{physio}', cluster_run_start_idx={cluster_start},cluster_run_step={cluster_step}); "
                            "cx.run()\"".format(
                                anatomy=self.remote_anat_filename,
                                physio=self.remote_phys_filename,
                                cluster_start=item,
                                cluster_step=array_run_obj.total_configs -
                                array_run_obj.clipping_indices[item_idx]))

            scp.put(
                self.local_cluster_folder.joinpath(
                    remote_slurm_filename).as_posix(),
                self.cluster_workspace.joinpath(
                    remote_slurm_filename).as_posix())
        print(" -  Slurm file generated and copied to cluster")
        self.channel = self.client.invoke_shell()
        for item_idx, item in enumerate(array_run_obj.clipping_indices):
            remote_slurm_filename = "_tmp_slurm{}_part{}.job".format(
                self.suffix, item_idx)
            if platform == 'win32':
                print(" -  Converting the file using dos2unix")
                self.channel.send('cd {} && dos2unix {}\n'.format(
                    self.cluster_workspace.as_posix(), remote_slurm_filename))
                time.sleep(1)
            self.channel.send('cd {} && sbatch {}\n'.format(
                self.cluster_workspace.as_posix(), remote_slurm_filename))
            print(" -  Job file {} submitted".format(remote_slurm_filename))
            time.sleep(1)
        cluster_metadata = \
            {'cluster_address': self.cluster_address,
             'cluster_login_node': self.cluster_login_node,
             'cluster_username': self.cluster_username,
             'local_workspace_unexpanded': self.local_workspace_unexpanded.as_posix(),
             'local_workspace': self.local_workspace.as_posix(),
             'local_cluster_run_folder': self.local_cluster_folder.as_posix(),
             'local_cluster_run_download_folder': self.local_cluster_folder.joinpath('downloads'),
             'cluster_workspace': self.cluster_workspace.as_posix(),
             'cluster_simulation_folder': self.cluster_workspace.joinpath(parameter_finder(array_run_obj.anatomy_df, 'simulation_title')).as_posix(),
             'suffix': self.suffix,
             'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-7]}
        with open(
                self.local_cluster_folder.joinpath(
                    'cluster_metadata{}.pkl'.format(self.suffix)), 'wb') as ff:
            pickle.dump(cluster_metadata, ff)
        print(
            " -  Cluster metadata saved. To download the result and clean the environments after getting the email,"
            " run the following command in the terminal:\n")
        print("cxcluster " + self.local_cluster_folder.joinpath(
            'cluster_metadata{}.pkl'.format(self.suffix)).as_posix())