def output(self):
     project_explorer = ProjectFileExplorer(self.project_folder)
     dataset_explorer = project_explorer.get_dataset_file_explorer(
         self.timestep, self.dataset_id)
     if self.save_format == 'hdf5':
         cond = 1
     elif self.save_format == 'tfrecord':
         cond = 0
         raise ValueError(
             f'Save_format {self.save_format} is not supported yet.')
     else:
         raise ValueError(
             f'save_format parameter not recognized: {self.save_format}, should be `hdf5` or `tfrecord`'
         )
     return [
         luigi.LocalTarget(dataset_explorer.train_fp
                           if cond else dataset_explorer.train_records_fp),
         luigi.LocalTarget(dataset_explorer.test_fp
                           if cond else dataset_explorer.test_records_fp),
         luigi.LocalTarget(dataset_explorer.train_rescaled_fp if cond else
                           dataset_explorer.train_records_rescaled_fp),
         luigi.LocalTarget(dataset_explorer.test_rescaled_fp if cond else
                           dataset_explorer.test_records_rescaled_fp),
         luigi.LocalTarget(dataset_explorer.scaler_fp),
         luigi.LocalTarget(dataset_explorer.log_fp)
     ]
 def output(self):
     project_explorer = ProjectFileExplorer(self.project_folder)
     dataset_explorer = project_explorer.get_dataset_file_explorer(
         self.timestep, self.dataset_id)
     return [
         luigi.LocalTarget(dataset_explorer.settings_fp),
         luigi.LocalTarget(dataset_explorer.dataset_fp),
         luigi.LocalTarget(dataset_explorer.log_fp)
     ]
Exemple #3
0
def get_gillespy_histogram_data(
        project_folder,
        timestep,
        dataset_id,
):
    project_explorer = ProjectFileExplorer(project_folder)
    dataset_explorer = project_explorer.get_dataset_file_explorer(timestep, dataset_id)
    histogram_data = np.load(dataset_explorer.histogram_dataset_fp)
    return histogram_data
 def output(self):
     project_explorer = ProjectFileExplorer(self.project_folder)
     model_explorer = project_explorer.get_model_file_explorer(
         self.timestep, self.model_id)
     return [
         luigi.LocalTarget(model_explorer.mixture_config_fp),
         luigi.LocalTarget(model_explorer.body_config_fp),
         luigi.LocalTarget(model_explorer.scaler_fp),
         luigi.LocalTarget(model_explorer.frozen_graph_fp),
         luigi.LocalTarget(model_explorer.graph_keys_fp),
     ]
 def output(self):
     project_explorer = ProjectFileExplorer(self.project_folder)
     dataset_explorer = project_explorer.get_dataset_file_explorer(
         self.timestep, self.dataset_id)
     return [
         luigi.LocalTarget(dataset_explorer.train_fp),
         luigi.LocalTarget(dataset_explorer.test_fp),
         luigi.LocalTarget(dataset_explorer.train_rescaled_fp),
         luigi.LocalTarget(dataset_explorer.test_rescaled_fp),
         luigi.LocalTarget(dataset_explorer.scaler_fp),
         luigi.LocalTarget(dataset_explorer.log_fp)
     ]
Exemple #6
0
def get_nn_histogram_data(
        project_folder,
        timestep,
        dataset_id,
        model_id,
        nb_past_timesteps,
        nb_features,
        nb_randomized_params,
        n_steps,
        n_traces_per_setting,
        path_to_save_generated_data=None,
        add_timestamps=True,
        keep_params=False,
):
    nn = StochNet(
        nb_past_timesteps=nb_past_timesteps,
        nb_features=nb_features,
        nb_randomized_params=nb_randomized_params,
        project_folder=project_folder,
        timestep=timestep,
        dataset_id=dataset_id,
        model_id=model_id,
        mode='inference'
    )

    project_explorer = ProjectFileExplorer(project_folder)
    dataset_explorer = project_explorer.get_dataset_file_explorer(timestep, dataset_id)
    histogram_data = np.load(dataset_explorer.histogram_dataset_fp)
    initial_settings = histogram_data[:, 0, 0:nb_past_timesteps, -(nb_features+nb_randomized_params):]

    LOGGER.info("Start generating NN traces")
    cnt = 0
    while True:
        if cnt > 10:
            LOGGER.error(f"Failed to generate NN traces after {cnt} attempts...")
            break
        try:
            traces = nn.generate_traces(
                initial_settings,
                n_steps=n_steps,
                n_traces=n_traces_per_setting,
                curr_state_rescaled=False,
                round_result=True,
                add_timestamps=add_timestamps,
                keep_params=keep_params,
            )
            LOGGER.info(f"Done. generated data shape: {traces.shape}")
            if path_to_save_generated_data:
                np.save(path_to_save_generated_data, traces)
            return traces
        except:
            LOGGER.warning("Oops... trying again")
            cnt += 1
Exemple #7
0
    def __init__(
        self,
        nb_past_timesteps,
        nb_features,
        nb_randomized_params,
        project_folder,
        timestep,
        dataset_id,
        model_id,
        body_config_path=None,
        mixture_config_path=None,
        ckpt_path=None,
        mode='normal',
    ):
        """
        Initialize model.
        Model can be initialized in three modes:
            * normal - build MDN for training
            * inference - load trained model (from frozen graph). In this mode it can not be trained.
            * inference_ckpt - load model from a training checkpoint. Can be trained further,
              as well as produce predictions. However, as the graph created in this mode is trainable,
              it is not optimised for predictions, i.e. has many redundancies.

        Parameters
        ----------
        nb_past_timesteps : number of time-steps model can observe in past to make a prediction.
            This number is reflected in the input shape: (bs, nb_past_timesteps, nb_features + nb_randomized_params)
        nb_features : number of CRN model features (i.e. species).
        nb_randomized_params : number of CRN model randomized params. MDN takes params values as additional inputs:
            for input of shape (bs, nb_past_timesteps, nb_features + nb_randomized_params) it samples
            outputs of shape   (bs, nb_past_timesteps, nb_features).
        project_folder : root folder for current project (CRN model).
        timestep : time-step between two consecutive states of CRN (which are basically input
            and ground-truth output of MDN). Used to find dataset-related files such as scaler, and
            save/find self model-related files.
        dataset_id : ID number of training dataset. Used to find dataset-related files such as scaler.
        model_id : ID number of the model (self) save/find related files.
        body_config_path : path to a .json configuration file, defining body-part of MDN
            (body_fn_name, block_name, n_blocks, hidden_size, use_batch_norm, activation, constraints and regularisers).
            This config will be copied to the model folder during initialization.
            If None, then it will try to find it in the model folder.
        mixture_config_path : path to a .json configuration file, defining mixture-part of MDN
            (number and types of components, their hidden_size, activation functions, constraints and regularisers).
            This config will be copied to the model folder during initialization.
            If None, then it will try to find it in the model folder.
        ckpt_path : path to a checkpoint file to initialize model parameters (in 'normal' mode),
            or re-create model graph and initialize parameters (in 'inference_ckpt' mode).
        mode : mode to build the model.
        """
        self.nb_past_timesteps = nb_past_timesteps
        self.nb_features = nb_features
        self.nb_randomized_params = nb_randomized_params
        self.timestep = timestep

        self.project_explorer = ProjectFileExplorer(project_folder)
        self.dataset_explorer = self.project_explorer.get_dataset_file_explorer(
            self.timestep, dataset_id)
        self.model_explorer = self.project_explorer.get_model_file_explorer(
            self.timestep, model_id)

        self._input_placeholder = None
        self._input_placeholder_name = None
        self._pred_tensor = None
        self._pred_tensor_name = None
        self._pred_placeholder = None
        self._pred_placeholder_name = None
        self._sample_shape_placeholder = None
        self._sample_shape_placeholder_name = None
        self._sample_tensor = None
        self._sample_tensor_name = None
        self._description_graphkeys = None
        self.restored = False

        self.graph = tf.compat.v1.Graph()

        with self.graph.as_default():

            self.session = tf.compat.v1.Session()

            if mode == 'normal':
                self._init_normal(body_config_path, mixture_config_path,
                                  ckpt_path)
                self._copy_dataset_scaler()

            elif mode == 'inference':
                self._load_model_from_frozen_graph()

            elif mode == 'inference_ckpt':
                self._load_model_from_checkpoint(ckpt_path)

            else:
                raise ValueError(
                    "Unknown keyword for `mode` parameter. Use 'normal', 'inference' or 'inference_ckpt'"
                )

            LOGGER.info(f"Model created in {mode} mode.")

        self.scaler = self._load_scaler()
Exemple #8
0
class StochNet:
    """
    Main class containing Mixture Density Network (MDN) neural network.
    After trained by Trainer, can be re-initialized for predicting trajectories.
    """
    def __init__(
        self,
        nb_past_timesteps,
        nb_features,
        nb_randomized_params,
        project_folder,
        timestep,
        dataset_id,
        model_id,
        body_config_path=None,
        mixture_config_path=None,
        ckpt_path=None,
        mode='normal',
    ):
        """
        Initialize model.
        Model can be initialized in three modes:
            * normal - build MDN for training
            * inference - load trained model (from frozen graph). In this mode it can not be trained.
            * inference_ckpt - load model from a training checkpoint. Can be trained further,
              as well as produce predictions. However, as the graph created in this mode is trainable,
              it is not optimised for predictions, i.e. has many redundancies.

        Parameters
        ----------
        nb_past_timesteps : number of time-steps model can observe in past to make a prediction.
            This number is reflected in the input shape: (bs, nb_past_timesteps, nb_features + nb_randomized_params)
        nb_features : number of CRN model features (i.e. species).
        nb_randomized_params : number of CRN model randomized params. MDN takes params values as additional inputs:
            for input of shape (bs, nb_past_timesteps, nb_features + nb_randomized_params) it samples
            outputs of shape   (bs, nb_past_timesteps, nb_features).
        project_folder : root folder for current project (CRN model).
        timestep : time-step between two consecutive states of CRN (which are basically input
            and ground-truth output of MDN). Used to find dataset-related files such as scaler, and
            save/find self model-related files.
        dataset_id : ID number of training dataset. Used to find dataset-related files such as scaler.
        model_id : ID number of the model (self) save/find related files.
        body_config_path : path to a .json configuration file, defining body-part of MDN
            (body_fn_name, block_name, n_blocks, hidden_size, use_batch_norm, activation, constraints and regularisers).
            This config will be copied to the model folder during initialization.
            If None, then it will try to find it in the model folder.
        mixture_config_path : path to a .json configuration file, defining mixture-part of MDN
            (number and types of components, their hidden_size, activation functions, constraints and regularisers).
            This config will be copied to the model folder during initialization.
            If None, then it will try to find it in the model folder.
        ckpt_path : path to a checkpoint file to initialize model parameters (in 'normal' mode),
            or re-create model graph and initialize parameters (in 'inference_ckpt' mode).
        mode : mode to build the model.
        """
        self.nb_past_timesteps = nb_past_timesteps
        self.nb_features = nb_features
        self.nb_randomized_params = nb_randomized_params
        self.timestep = timestep

        self.project_explorer = ProjectFileExplorer(project_folder)
        self.dataset_explorer = self.project_explorer.get_dataset_file_explorer(
            self.timestep, dataset_id)
        self.model_explorer = self.project_explorer.get_model_file_explorer(
            self.timestep, model_id)

        self._input_placeholder = None
        self._input_placeholder_name = None
        self._pred_tensor = None
        self._pred_tensor_name = None
        self._pred_placeholder = None
        self._pred_placeholder_name = None
        self._sample_shape_placeholder = None
        self._sample_shape_placeholder_name = None
        self._sample_tensor = None
        self._sample_tensor_name = None
        self._description_graphkeys = None
        self.restored = False

        self.graph = tf.compat.v1.Graph()

        with self.graph.as_default():

            self.session = tf.compat.v1.Session()

            if mode == 'normal':
                self._init_normal(body_config_path, mixture_config_path,
                                  ckpt_path)
                self._copy_dataset_scaler()

            elif mode == 'inference':
                self._load_model_from_frozen_graph()

            elif mode == 'inference_ckpt':
                self._load_model_from_checkpoint(ckpt_path)

            else:
                raise ValueError(
                    "Unknown keyword for `mode` parameter. Use 'normal', 'inference' or 'inference_ckpt'"
                )

            LOGGER.info(f"Model created in {mode} mode.")

        self.scaler = self._load_scaler()

    def _init_normal(
        self,
        body_config_path,
        mixture_config_path,
        ckpt_path,
    ):
        if body_config_path is None:
            body_config = self._read_config(self.model_explorer.body_config_fp)
        else:
            body_config = self._read_config(
                body_config_path,
                os.path.basename(self.model_explorer.body_config_fp))
        if mixture_config_path is None:
            mixture_config = self._read_config(
                self.model_explorer.mixture_config_fp)
        else:
            mixture_config = self._read_config(
                mixture_config_path,
                os.path.basename(self.model_explorer.mixture_config_fp))
        body_fn = self._get_body_fn(body_config)
        self._build_main_graph(body_fn, mixture_config)
        self._build_sampling_graph()
        self._save_graph_keys()
        if ckpt_path:
            self.restore_from_checkpoint(ckpt_path)

    def _read_config(self, file_path, model_dir_save_name=None):
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"Config file not found: {file_path}")

        with open(file_path, 'r') as f:
            config = json.load(f)

        if model_dir_save_name:
            save_path = os.path.join(self.model_explorer.model_folder,
                                     model_dir_save_name)
            with open(save_path, 'w') as f:
                json.dump(config, f, indent='\t')

        return config

    def _get_body_fn(self, body_config):
        return partial(nn_bodies.body_main, **body_config)

    def _build_main_graph(self, body_fn, mixture_config):
        self.input_placeholder = tf.compat.v1.placeholder(
            tf.float32, (None, self.nb_past_timesteps,
                         self.nb_features + self.nb_randomized_params),
            name="input")
        self.rv_output_ph = tf.compat.v1.placeholder(
            tf.float32, (None, self.nb_features),
            name="random_variable_output")
        body = body_fn(self.input_placeholder)
        self.top_layer_obj = _get_mixture(
            mixture_config, sample_space_dimension=self.nb_features)
        self.pred_tensor = self.top_layer_obj.add_layer_on_top(body)
        self.loss = self.top_layer_obj.loss_function(self.rv_output_ph,
                                                     self.pred_tensor)

    def _build_sampling_graph(self):
        if self.sample_tensor is not None:
            return
        self.top_layer_obj.build_sampling_graph(graph=self.graph)
        self.pred_placeholder = self.top_layer_obj.pred_placeholder
        self.sample_shape_placeholder = self.top_layer_obj.sample_shape_placeholder
        self.sample_tensor = self.top_layer_obj.sample_tensor
        self.description_graphkeys = self.top_layer_obj.description_graphkeys

    def restore_from_checkpoint(self, ckpt_path):
        """Restore model parameters from a training checkpoint.

        Parameters
        ----------
        ckpt_path : path to .ckpt file. Tensorflow checkpoints go in three files typically,
            therefore last part of ckpt file-name ('.index', '.meta', '.XXXX-of-XXXX) should be omitted.
        """
        with self.graph.as_default():
            variables = tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)
            saver = tf.compat.v1.train.Saver(var_list=variables)
            saver.restore(self.session, ckpt_path)
        self.restored = True

    def save(self):
        """Save model and all related information.
        Saves model frozen-graph and its important graph-keys:
            input_placeholder : name of input tensor
            pred_tensor : name of body-part output tensor
            pred_placeholder : name of mixture-part input tensor
            sample_shape_placeholder : name input tensor specifying sample_shape (i.e. number of samples)
            sample_tensor : name of mixture-part output tensor (samples of mixture distribution)
            description_graphkeys : tensors corresponding to the parameters of the mixture components.
                Used to produce distribution description.

        Returns
        -------

        """
        if not self.restored:
            raise NotRestoredVariables()
        self._save_frozen_graph()
        self._save_graph_keys()

    def _save_frozen_graph(self):
        frozen_graph_def = self._freeze_graph()
        tf.compat.v1.train.write_graph(
            frozen_graph_def,
            logdir=self.model_explorer.model_folder,
            name=os.path.basename(self.model_explorer.frozen_graph_fp),
            as_text=False,
        )
        LOGGER.info(
            f"Model's frozen graph saved in {self.model_explorer.model_folder}"
        )

    def _freeze_graph(self):
        frozen_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
            sess=self.session,
            input_graph_def=self.graph.as_graph_def(),
            output_node_names=self.dest_nodes,
        )
        return frozen_graph_def

    def _save_graph_keys(self):
        graph_keys_dict = {
            'input_placeholder': self._input_placeholder_name,
            'pred_tensor': self._pred_tensor_name,
            'pred_placeholder': self._pred_placeholder_name,
            'sample_shape_placeholder': self._sample_shape_placeholder_name,
            'sample_tensor': self._sample_tensor_name,
            'description_graphkeys': self.description_graphkeys
        }

        with open(self.model_explorer.graph_keys_fp, 'w') as f:
            json.dump(graph_keys_dict, f, indent='\t')
        LOGGER.info(
            f"Model's graph keys saved at {self.model_explorer.graph_keys_fp}")

    def _load_model_from_frozen_graph(self):
        graph_path = self.model_explorer.frozen_graph_fp
        if not os.path.exists(graph_path):
            raise FileNotFoundError(
                f"Could not find model's frozen graph file: {graph_path}. Did you save the model?"
            )
        graph_def = tf.compat.v1.GraphDef()
        with open(graph_path, 'rb') as f:
            graph_def.ParseFromString(f.read())
        tf.import_graph_def(graph_def, name='')

        self._load_graph_keys()
        self.restored = True

    def _load_model_from_checkpoint(self, ckpt_path):
        if ckpt_path is None:
            raise ValueError("Should provide `ckpt_path` to build model")
        meta_ckpt_path = ckpt_path + '.meta'
        if not os.path.exists(meta_ckpt_path):
            raise FileNotFoundError(
                f"Could not find model's checkpoint: {meta_ckpt_path}.")
        saver = tf.compat.v1.train.import_meta_graph(meta_ckpt_path)
        saver.restore(self.session, ckpt_path)
        self._load_graph_keys()
        self.restored = True

    def _load_graph_keys(self):
        graph_keys_path = self.model_explorer.graph_keys_fp
        if not os.path.exists(graph_keys_path):
            raise FileNotFoundError(
                f"Could not find model's graph keys file: {graph_keys_path}. Did you save the model?"
            )
        with open(graph_keys_path, 'r') as f:
            graph_keys = json.load(f)

        self.input_placeholder = self.graph.get_tensor_by_name(
            graph_keys['input_placeholder'])
        self.pred_tensor = self.graph.get_tensor_by_name(
            graph_keys['pred_tensor'])
        self.pred_placeholder = self.graph.get_tensor_by_name(
            graph_keys['pred_placeholder'])
        self.sample_shape_placeholder = self.graph.get_tensor_by_name(
            graph_keys['sample_shape_placeholder'])
        self.sample_tensor = self.graph.get_tensor_by_name(
            graph_keys['sample_tensor'])
        self.description_graphkeys = graph_keys['description_graphkeys']

    def get_description(
        self,
        nn_prediction_val=None,
        current_state_val=None,
        current_state_rescaled=False,
        visualize=False,
    ):
        """
        Create description of the mixture distribution.
        For every input (current_state), the model predicts parameters specifying
        mixture distribution (nn_prediction), and then samples from this distribution.
        So one can use already computed nn_prediction values, or current_state values
        (and nn_prediction will be computed by the model naturally.)

        Parameters
        ----------
        nn_prediction_val : values of body-part outputs.
        current_state_val : input values (CRN model state).
        current_state_rescaled : boolean, whether or not inputs are rescaled with dataset scaler.
            If False, inputs will be first rescaled: as model is usually trained on rescaled data,
            inputs should be also rescaled.
        visualize : boolean, if True, will create figures (which can be automatically displayed in jupyter).

        Returns
        -------
        Description dictionary containing parameters of mixture distribution components.

        """
        if nn_prediction_val is None:
            if current_state_val is None:
                raise ValueError(
                    "Should provide either current_state_val or nn_prediction_val"
                )
            if not current_state_rescaled:
                current_state_val = self.rescale(current_state_val)
            nn_prediction_val = self.session.run(
                self.pred_tensor,
                feed_dict={self.input_placeholder: current_state_val})
        description = self.session.run(self.description_graphkeys,
                                       feed_dict={
                                           self.pred_placeholder:
                                           nn_prediction_val,
                                           self.sample_shape_placeholder: 1
                                       })

        description = postprocess_description_dict(description)

        if visualize:
            visualize_description(description)

        return description

    @property
    def input_placeholder(self):
        return self._input_placeholder

    @input_placeholder.setter
    def input_placeholder(self, tensor):
        self._input_placeholder = tensor
        self._input_placeholder_name = tensor.name

    @property
    def pred_tensor(self):
        return self._pred_tensor

    @pred_tensor.setter
    def pred_tensor(self, tensor):
        self._pred_tensor = tensor
        self._pred_tensor_name = tensor.name

    @property
    def pred_placeholder(self):
        return self._pred_placeholder

    @pred_placeholder.setter
    def pred_placeholder(self, tensor):
        self._pred_placeholder = tensor
        self._pred_placeholder_name = tensor.name

    @property
    def sample_shape_placeholder(self):
        return self._sample_shape_placeholder

    @sample_shape_placeholder.setter
    def sample_shape_placeholder(self, tensor):
        self._sample_shape_placeholder = tensor
        self._sample_shape_placeholder_name = tensor.name

    @property
    def sample_tensor(self):
        return self._sample_tensor

    @sample_tensor.setter
    def sample_tensor(self, tensor):
        self._sample_tensor = tensor
        self._sample_tensor_name = tensor.name

    @property
    def dest_nodes(self):
        return [
            t.split(':')[0]
            for t in [self._sample_tensor_name, self._pred_tensor_name]
        ]

    @property
    def description_graphkeys(self):
        return self._description_graphkeys

    @description_graphkeys.setter
    def description_graphkeys(self, graphkeys):
        self._description_graphkeys = graphkeys

    def _copy_dataset_scaler(self):
        scaler_fp = os.path.join(self.model_explorer.model_folder,
                                 'scaler.pickle')
        shutil.copy2(
            self.dataset_explorer.scaler_fp,
            scaler_fp,
        )

    def _load_scaler(self):
        scaler_fp = os.path.join(self.model_explorer.model_folder,
                                 'scaler.pickle')
        with open(scaler_fp, 'rb') as file:
            scaler = pickle.load(file)
        return scaler

    # def load_scaler(self):
    #     with open(self.dataset_explorer.scaler_fp, 'rb') as file:
    #         scaler = pickle.load(file)
    #     return scaler

    # def _save_scaler(self):
    #     scaler_fp = os.path.join(self.model_explorer.model_folder, 'scaler.pickle')
    #     with open(scaler_fp, 'wb') as file:
    #         pickle.dump(self.scaler, file)

    def rescale(self, data):
        """Apply scaler to data."""
        if isinstance(self.scaler, StandardScaler):
            try:
                data = (data - self.scaler.mean_) / self.scaler.scale_
            except ValueError:
                data = (data - self.scaler.mean_[:self.nb_features]) \
                       / self.scaler.scale_[:self.nb_features]
        elif isinstance(self.scaler, MinMaxScaler):
            try:
                data = (data * self.scaler.scale_) + self.scaler.min_
            except ValueError:
                data = (data * self.scaler.scale_[:self.nb_features]) \
                       + self.scaler.min_[:self.nb_features]
        return data

    def scale_back(self, data):
        """Apply scaler inverse transform to data."""
        if isinstance(self.scaler, StandardScaler):
            try:
                data = data * self.scaler.scale_ + self.scaler.mean_
            except ValueError:
                data = data * self.scaler.scale_[:self.nb_features] \
                       + self.scaler.mean_[:self.nb_features]
        elif isinstance(self.scaler, MinMaxScaler):
            try:
                data = (data - self.scaler.min_) / self.scaler.scale_
            except ValueError:
                data = (data - self.scaler.min_[:self.nb_features]) \
                       / self.scaler.scale_[:self.nb_features]
        return data

    def predict(self, curr_state_values):
        """
        Return prediction values for mixture components.
        This values then can be forwarded to `sample` method for sampling next state.
        Values should be rescaled first.

        Parameters
        ----------
        curr_state_values : input values (CRN model state). Should be rescaled first.

        Returns
        -------
        prediction_values : array of (concatenated) prediction values for mixture components.
        """

        if not self.restored:
            raise NotRestoredVariables()

        prediction_values = self.session.run(
            self._pred_tensor_name,
            feed_dict={self._input_placeholder_name: curr_state_values})
        return prediction_values

    def sample(self, prediction_values, sample_shape=()):
        """
        Sample from mixture distribution, defined by input prediction_values.

        Parameters
        ----------
        prediction_values : prediction values for mixture components, (returned by `predict` method).
        sample_shape : shape defining the number of samples: sample_shape=(n_samples,)

        Returns
        -------
        sample : array of samples from mixture distribution
        """
        if self.sample_tensor is None:
            self._build_sampling_graph()

        sample = self.session.run(self._sample_tensor_name,
                                  feed_dict={
                                      self._pred_placeholder_name:
                                      prediction_values,
                                      self._sample_shape_placeholder_name:
                                      sample_shape,
                                  })
        sample = np.expand_dims(sample, -2)
        return sample

    def next_state(
        self,
        curr_state_values,
        curr_state_rescaled=False,
        scale_back_result=True,
        round_result=False,
        n_samples=1,
    ):
        """
        Sample next state given values of the current sate.
        The current sate should have shape [n_settings, nb_past_timesteps, nb_features + nb_randomized_params].

        Parameters
        ----------
        curr_state_values : input values (CRN model state).
        curr_state_rescaled : whether or not values are already rescaled. If False, will apply scaler first.
        scale_back_result : whether or not returned values should be scaled back to the original scale.
        round_result : whether or not round returned values (only if scaled back) to imitate
            discrete population dynamics.
        n_samples : number of samples to produce.

        Returns
        -------
        next_state : array of samples of shape [n_samples, n_settings, nb_past_timesteps, nb_features]
        """
        # curr_state_values ~ [n_settings, 1, nb_features]
        if not curr_state_rescaled:
            curr_state_values = self.rescale(curr_state_values)

        nn_prediction_values = self.predict(curr_state_values)
        next_state = self.sample(nn_prediction_values,
                                 sample_shape=(n_samples, ))

        if scale_back_result:
            next_state = self.scale_back(next_state)
            if round_result:
                next_state = np.around(next_state)

        # next_state ~ [n_samples, n_settings, 1, nb_features]
        return next_state

    def generate_traces(
        self,
        curr_state_values,
        n_steps,
        n_traces=1,
        curr_state_rescaled=False,
        scale_back_result=True,
        round_result=False,
        add_timestamps=False,
        keep_params=False,
        batch_size=150,
    ):
        """
        Generate trajectories of the model.
        Trajectories are simulated by consecutive sampling next state for n_steps times.
        The current sate should have shape [n_settings, nb_past_timesteps, nb_features + nb_randomized_params].

        Parameters
        ----------
        curr_state_values : input values (CRN model state).
        n_steps : length of trajectories to simulate.
        n_traces : number of trajectories starting from every initial state
        curr_state_rescaled : whether or not values are already rescaled. If False, will apply scaler first.
        scale_back_result : whether or not returned values should be scaled back to the original scale.
        round_result : whether or not round returned values (only if scaled back) to imitate
            discrete population dynamics.
        add_timestamps : if True, time-step indexes will be added (as 0-th feature)
        keep_params : whether or not to keep randomized parameters in trajectories.
            If False, returned traces will have shape , [n_settings, n_traces, n_steps, nb_features]
            otherwise [n_settings, n_traces, n_steps, nb_features + nb_randomized_params]
        batch_size : batch size to use for simulations. For great number of simulations, it is more
            efficient to feed the neural network with reasonably-sized chunks of data.
        Returns
        -------

        traces : array of shape [n_settings, n_traces, n_steps, nb_features]
            or [n_settings, nb_past_timesteps, nb_features + nb_randomized_params],
            depending on the `keep_params` parameter.
        """
        n_settings, *state_shape = curr_state_values.shape

        traces_final_shape = (n_steps + 1, n_traces, n_settings, *state_shape)
        traces_tmp_shape = (n_steps + 1, n_settings * n_traces, *state_shape)

        traces = np.zeros(traces_tmp_shape, dtype=np.float32)

        if not curr_state_rescaled:
            curr_state_values = self.rescale(curr_state_values)

        curr_state_values = np.tile(
            curr_state_values,
            [n_traces, 1, 1])  # (n_settings * n_traces, *state_shape)
        traces[0] = curr_state_values

        zero_level = self.rescale(
            np.zeros(traces[0, ..., :self.nb_features].shape))

        n_batches = n_settings * n_traces // batch_size
        remainder = n_settings * n_traces % batch_size != 0

        for step_num in tqdm(range(n_steps)):
            for n in range(n_batches):
                next_state = self.next_state(
                    traces[step_num, n * batch_size:(n + 1) * batch_size],
                    curr_state_rescaled=True,
                    scale_back_result=False,
                    round_result=False,
                    n_samples=1)
                params = np.expand_dims(
                    traces[step_num, n * batch_size:(n + 1) * batch_size, ...,
                           self.nb_features:], 0)

                traces[step_num + 1, n * batch_size: (n + 1) * batch_size] = \
                    np.concatenate([next_state, params], -1)

            if remainder:
                next_state = self.next_state(
                    traces[step_num, n_batches * batch_size:],
                    curr_state_rescaled=True,
                    scale_back_result=False,
                    round_result=False,
                    n_samples=1,
                )
                params = np.expand_dims(
                    traces[step_num, n_batches * batch_size:, ...,
                           self.nb_features:], 0)

                traces[step_num + 1, n_batches * batch_size:] =  \
                    np.concatenate([next_state, params], -1)

            traces[step_num + 1, ..., :self.nb_features] = \
                np.maximum(traces[step_num + 1, ..., :self.nb_features], zero_level)

        traces = np.reshape(traces, traces_final_shape)
        traces = np.squeeze(traces, axis=-2)

        if scale_back_result:
            traces = self.scale_back(traces)
            if round_result:
                traces[..., :self.nb_features] = np.around(
                    traces[..., :self.nb_features])

        # [n_steps, n_traces, n_settings, nb_features] -> [n_settings, n_traces, n_steps, nb_features]
        traces = np.transpose(traces, (2, 1, 0, 3))

        if not keep_params:
            traces = traces[..., :self.nb_features]

        if add_timestamps:
            timespan = np.arange(0, (n_steps + 1) * self.timestep,
                                 self.timestep)[:n_steps + 1]
            timespan = np.tile(timespan, reps=(n_settings, n_traces, 1))
            timespan = timespan[..., np.newaxis]
            traces = np.concatenate([timespan, traces], axis=-1)

        return traces
Exemple #9
0
def evaluate(
        model_name,
        project_folder,
        timestep,
        dataset_id,
        model_id,
        nb_randomized_params,
        nb_past_timesteps=1,
        n_bins=100,
        distance_kind='iou',
        with_timestamps=True,
        save_histograms=True,
        time_lag_range=None,
        target_species_names=None,
        path_to_save_nn_traces=None,
        settings_idxs_to_save_histograms=None
):
    project_explorer = ProjectFileExplorer(project_folder)
    dataset_explorer = project_explorer.get_dataset_file_explorer(timestep, dataset_id)

    histogram_data = np.load(dataset_explorer.histogram_dataset_fp)
    histogram_data = histogram_data[..., :-nb_randomized_params]

    n_settings, n_traces, n_steps, n_species = histogram_data.shape
    if with_timestamps:
        n_species = n_species - 1

    CRN_module = import_module("stochnet_v2.CRN_models." + model_name)
    CRN_class = getattr(CRN_module, model_name)
    all_species_names = CRN_class.get_species_names()

    if len(all_species_names) != n_species:
        raise ValueError(
            f"Histogram data has {histogram_data.shape[-1]} species "
            f"({'with' if with_timestamps else 'without'} timesteps), "
            f"but CRN class {CRN_class.__name__} has {len(all_species_names)}."
        )

    target_species_names = target_species_names or all_species_names
    target_species_idxs = [all_species_names.index(name) for name in target_species_names]

    start = time()
    traces = get_nn_histogram_data(
        project_folder,
        timestep,
        dataset_id,
        model_id=model_id,
        nb_past_timesteps=nb_past_timesteps,
        nb_features=n_species,
        nb_randomized_params=nb_randomized_params,
        n_steps=n_steps-1,
        n_traces_per_setting=n_traces,
        path_to_save_generated_data=path_to_save_nn_traces,
        add_timestamps=with_timestamps,
        keep_params=False,
    )
    end = time()
    LOGGER.info(f"Took {end - start:.1f} seconds")
    with open(dataset_explorer.log_fp, 'a') as file:
        file.write(
            f"Simulating NN {n_traces} {model_name}, model_id={model_id} histogram trajectories "
            f"for {n_settings} different settings until {int(timestep * n_steps)}({n_steps} steps) "
            f"took {end - start:.1f} seconds.\n"
        )

    count = (multiprocessing.cpu_count() // 4) * 3 + 1
    pool = multiprocessing.Pool(processes=count)

    task = partial(
        get_distance,
        data_1=histogram_data,
        data_2=traces,
        n_bins=n_bins,
        with_timestamps=with_timestamps,
        target_species_idxs=target_species_idxs,
        histogram_bounds=None,
        kind=distance_kind,
        return_histograms=False,
    )

    LOGGER.info(f"Start calculating distances for different time-lags, using {count} CPU cores for multiprocessing")
    start = time()
    time_lags = list(range(n_steps - 1))

    species_distances = pool.map(task, time_lags)
    end = time()
    LOGGER.info(f"Took {end - start:.1f} seconds")

    mean_distances = [np.mean(dist_i) for dist_i in species_distances]
    species_distances = np.array(species_distances)

    histogram_explorer = dataset_explorer.get_histogram_file_explorer(model_id=model_id, nb_steps=0)
    mean_dist_fig_path = os.path.join(histogram_explorer.histogram_folder, os.path.pardir, f'mean_{distance_kind}')
    spec_dist_fig_path = os.path.join(histogram_explorer.histogram_folder, os.path.pardir, f'spec_{distance_kind}')

    fig = plt.figure(figsize=(12, 8))
    plt.title(f"Mean {distance_kind} distance (averaged over all species and {n_settings} settings)")
    plt.plot(mean_distances)
    plt.xlabel('time lag')
    plt.ylabel(f'distance')
    plt.savefig(mean_dist_fig_path)
    plt.close(fig)

    fig = plt.figure(figsize=(12, 8))
    plt.title(f"Mean {distance_kind} distances (averaged over {n_settings} settings)")
    for i in range(species_distances.shape[-1]):
        plt.plot(species_distances[:, i], label=target_species_names[i])
    plt.xlabel('time lag')
    plt.ylabel(f'distance')
    plt.legend()
    plt.savefig(spec_dist_fig_path)
    plt.close(fig)

    if save_histograms:

        if settings_idxs_to_save_histograms is None:
            settings_idxs_to_save_histograms = [0]

        distance_fn = _histogram_distance if distance_kind == 'l1' else _iou_distance
        time_lag_range = time_lag_range or list(range(5, n_steps - 1, 10))

        LOGGER.info(
            f"Start building histograms for different settings: {settings_idxs_to_save_histograms}\n"
            f"and time-lags: {time_lag_range}"
        )
        start = time()

        for time_lag in tqdm(time_lag_range):

            histogram_explorer = dataset_explorer.get_histogram_file_explorer(model_id=model_id, nb_steps=time_lag)

            self_dist = get_distance(
                data_1=histogram_data[:, :n_traces // 2, ...],
                data_2=histogram_data[:, -n_traces // 2:, ...],
                time_lag=time_lag,
                n_bins=n_bins,
                with_timestamps=with_timestamps,
                target_species_idxs=target_species_idxs,
                histogram_bounds=None,
                kind=distance_kind,
                return_histograms=False,
            )

            species_distances, histograms_1, histograms_2 = get_distance(
                data_1=histogram_data,
                data_2=traces,
                time_lag=time_lag,
                n_bins=n_bins,
                with_timestamps=with_timestamps,
                target_species_idxs=target_species_idxs,
                histogram_bounds=None,
                kind=distance_kind,
                return_histograms=True,
            )
            self_dist_dict = {
                name: self_dist[idx]
                for idx, name in enumerate(target_species_names)
            }
            dist_dict = {
                name: species_distances[idx]
                for idx, name in enumerate(target_species_names)
            }
            with open(histogram_explorer.log_fp, 'w') as f:
                f.write(
                    f"Dataset mean self-distance ({distance_kind}): {np.mean(self_dist):.4f}\n"
                    f"Mean histogram distance ({distance_kind}): {np.mean(species_distances):.4f}\n"
                )

                f.write(f"\nDataset self-distances ({distance_kind}):\n")
                for k, v in self_dist_dict.items():
                    f.write(f"\t{k}: {v:.4f}\n")

                f.write(f"\nSpecies histogram distances ({distance_kind}):\n")
                for k, v in dist_dict.items():
                    f.write(f"\t{k}: {v:.4f}\n")

            for setting_idx in settings_idxs_to_save_histograms:

                for species_idx in range(len(target_species_idxs)):

                    curr_setting_distance = distance_fn(
                        histograms_1[setting_idx:setting_idx + 1, species_idx:species_idx + 1],
                        histograms_2[setting_idx:setting_idx + 1, species_idx:species_idx + 1]
                    )

                    save_path = os.path.join(
                        histogram_explorer.histogram_folder,
                        f'setting_{setting_idx}',
                        f'{target_species_names[species_idx]}'
                    )
                    maybe_create_dir(os.path.dirname(save_path))

                    fig = plt.figure(figsize=(12, 7))
                    plt.title(
                        f"{target_species_names[species_idx]}: "
                        f"{distance_kind}: {curr_setting_distance}, "
                        f"mean ({n_settings} settings): {species_distances[species_idx]:.4f}"
                    )
                    plt.plot(*histograms_1[setting_idx, species_idx], '-', label='gillespy')
                    plt.plot(*histograms_2[setting_idx, species_idx], '-', label='NN')
                    # plt.bar(
                    #     list(range(histograms_1.shape[-1])),
                    #     histograms_1[setting_idx, species_idx, 1],
                    #     label='gillespy', alpha=0.7
                    # )
                    # plt.bar(
                    #     list(range(histograms_2.shape[-1])),
                    #     histograms_2[setting_idx, species_idx, 1],
                    #     label='NN', alpha=0.7
                    # )
                    plt.legend()
                    plt.savefig(save_path)
                    plt.close(fig)

        end = time()
        LOGGER.info(f"Took {end - start:.1f} seconds")

    LOGGER.info("All done.")
Exemple #10
0
def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    parser = argparse.ArgumentParser()
    parser.add_argument('--project_folder', type=str, required=True)
    parser.add_argument('--timestep', type=float, required=True)
    parser.add_argument('--dataset_id', type=int, required=True)
    parser.add_argument('--model_id', type=int, required=True)
    parser.add_argument('--model_name', type=str, required=True)
    parser.add_argument('--nb_past_timesteps', type=int, required=True)
    parser.add_argument('--nb_randomized_params', type=int, required=True)
    parser.add_argument('--distance_kind',
                        type=str,
                        default='dist',
                        choices=['dist', 'iou'])
    parser.add_argument('--target_species_names', default='')
    parser.add_argument('--time_lag_range', default='10')
    parser.add_argument('--settings_idxs_to_save_histograms', default='0 1')

    args = parser.parse_args()

    project_folder = args.project_folder
    timestep = args.timestep
    dataset_id = args.dataset_id
    model_id = args.model_id
    model_name = args.model_name
    nb_past_timesteps = args.nb_past_timesteps
    nb_randomized_params = args.nb_randomized_params
    distance_kind = args.distance_kind
    target_species_names = args.target_species_names.split(' ')
    target_species_names = target_species_names if target_species_names != [
        ''
    ] else []
    time_lag_range = args.time_lag_range
    time_lag_range = list(map(int, time_lag_range.split(' ')))
    settings_idxs_to_save_histograms = list(
        map(int, args.settings_idxs_to_save_histograms.split(' ')))

    project_explorer = ProjectFileExplorer(project_folder)
    dataset_explorer = project_explorer.get_dataset_file_explorer(
        timestep, dataset_id)
    histogram_explorer = dataset_explorer.get_histogram_file_explorer(
        model_id, 0)
    nn_histogram_data_fp = os.path.join(
        histogram_explorer.model_histogram_folder, 'nn_histogram_data.npy')

    evaluate(
        model_name=model_name,
        project_folder=project_folder,
        timestep=timestep,
        dataset_id=dataset_id,
        model_id=model_id,
        nb_randomized_params=nb_randomized_params,
        nb_past_timesteps=nb_past_timesteps,
        n_bins=200,
        distance_kind=distance_kind,
        with_timestamps=True,
        save_histograms=True,
        time_lag_range=time_lag_range,
        target_species_names=target_species_names,
        path_to_save_nn_traces=nn_histogram_data_fp,
        settings_idxs_to_save_histograms=settings_idxs_to_save_histograms,
    )
Exemple #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--project_folder', type=str, required=True)
    parser.add_argument('--timestep', type=float, required=True)
    parser.add_argument('--dataset_id', type=int, required=True)
    parser.add_argument('--nb_past_timesteps', type=int, default=1)
    parser.add_argument('--nb_randomized_params', type=int, required=True)
    parser.add_argument('--positivity', type=str, default='true')
    parser.add_argument('--test_fraction', type=float, default=0.125)
    parser.add_argument('--save_format',
                        type=str,
                        default='hdf5',
                        choices=['hdf5', 'tfrecord'])
    parser.add_argument('--random_seed', type=int, default=23)
    args = parser.parse_args()

    project_folder = args.project_folder
    timestep = args.timestep
    dataset_id = args.dataset_id
    nb_past_timesteps = args.nb_past_timesteps
    nb_randomized_params = args.nb_randomized_params
    positivity = args.positivity
    test_fraction = args.test_fraction
    save_format = args.save_format
    random_seed = args.random_seed

    np.random.seed(random_seed)

    project_explorer = ProjectFileExplorer(project_folder)
    dataset_explorer = project_explorer.get_dataset_file_explorer(
        timestep, dataset_id)

    dt = DataTransformer(dataset_explorer.dataset_fp,
                         with_timestamps=True,
                         nb_randomized_params=nb_randomized_params)

    if save_format == 'hdf5':
        save_fn = dt.save_data_for_ml_hdf5
    # elif save_format == 'tfrecord':
    #     save_fn = dt.save_data_for_ml_tfrecord
    else:
        raise ValueError(
            f"save_format `{save_format}` not recognized. Use 'hdf5'.")

    positivity = str_to_bool(positivity)

    start = time()

    save_fn(dataset_folder=dataset_explorer.dataset_folder,
            nb_past_timesteps=nb_past_timesteps,
            test_fraction=test_fraction,
            keep_timestamps=False,
            rescale=False,
            positivity=positivity,
            shuffle=True,
            slice_size=100,
            force_rewrite=True)

    save_fn(dataset_folder=dataset_explorer.dataset_folder,
            nb_past_timesteps=nb_past_timesteps,
            test_fraction=test_fraction,
            keep_timestamps=False,
            rescale=True,
            positivity=positivity,
            shuffle=True,
            slice_size=100,
            force_rewrite=True)

    end = time()
    execution_time = end - start
    msg = f"\n\nFormatting dataset into {save_format} files took {execution_time} seconds.\n" \
          f"\tnb_past_timesteps={nb_past_timesteps},\n" \
          f"\ttest_fraction={test_fraction},\n" \
          f"\tpositivity={positivity},\n" \

    with open(dataset_explorer.log_fp, 'a') as f:
        f.write(msg)
Exemple #12
0
def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    parser = argparse.ArgumentParser()
    parser.add_argument('--project_folder', type=str, required=True)
    parser.add_argument('--timestep', type=float, required=True)
    parser.add_argument('--dataset_id', type=int, required=True)
    parser.add_argument('--model_id', type=int, required=True)
    parser.add_argument('--nb_features', type=int, required=True)
    parser.add_argument('--nb_past_timesteps', type=int, required=True)
    parser.add_argument('--nb_randomized_params', type=int, required=True)

    parser.add_argument('--body_config_path', type=str, required=True)
    parser.add_argument('--mixture_config_path', type=str, required=True)

    parser.add_argument('--n_epochs_heat_up', type=int, default=20)
    parser.add_argument('--n_epochs_main', type=int, default=100)
    parser.add_argument('--n_epochs_arch', type=int, default=5)
    parser.add_argument('--n_epochs_interval', type=int, default=5)
    parser.add_argument('--n_epochs_finetune', type=int, default=40)
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--add_noise',
                        type=str,
                        default='false',
                        choices=['true', 'false', 'True', 'False'])
    parser.add_argument('--stddev', type=float, default=0.01)
    parser.add_argument('--dataset_kind', type=str, default='hdf5')

    args = parser.parse_args()

    project_folder = args.project_folder
    timestep = args.timestep
    dataset_id = args.dataset_id
    model_id = args.model_id
    nb_features = args.nb_features
    nb_past_timesteps = args.nb_past_timesteps
    nb_randomized_params = args.nb_randomized_params

    body_config_path = args.body_config_path
    mixture_config_path = args.mixture_config_path

    n_epochs_heat_up = args.n_epochs_heat_up
    n_epochs_main = args.n_epochs_main
    n_epochs_arch = args.n_epochs_arch
    n_epochs_interval = args.n_epochs_interval
    n_epochs_finetune = args.n_epochs_finetune
    batch_size = args.batch_size
    add_noise = str_to_bool(args.add_noise)
    stddev = args.stddev

    dataset_kind = args.dataset_kind
    learning_strategy_main = None
    learning_strategy_arch = None
    learning_strategy_finetune = None
    ckpt_path = None

    project_explorer = ProjectFileExplorer(project_folder)
    dataset_explorer = project_explorer.get_dataset_file_explorer(
        timestep, dataset_id)
    model_explorer = project_explorer.get_model_file_explorer(
        timestep, model_id)

    start = time()

    nn = NASStochNet(
        nb_past_timesteps=nb_past_timesteps,
        nb_features=nb_features,
        nb_randomized_params=nb_randomized_params,
        project_folder=project_folder,
        timestep=timestep,
        dataset_id=dataset_id,
        model_id=model_id,
        body_config_path=body_config_path,
        mixture_config_path=mixture_config_path,
    )

    best_ckpt_path = Trainer().train(
        nn,
        n_epochs_main=n_epochs_main,
        n_epochs_heat_up=n_epochs_heat_up,
        n_epochs_arch=n_epochs_arch,
        n_epochs_interval=n_epochs_interval,
        n_epochs_finetune=n_epochs_finetune,
        batch_size=batch_size,
        learning_strategy_main=learning_strategy_main,
        learning_strategy_arch=learning_strategy_arch,
        learning_strategy_finetune=learning_strategy_finetune,
        ckpt_path=ckpt_path,
        dataset_kind=dataset_kind,
        add_noise=add_noise,
        stddev=stddev,
        mode=['search', 'finetune'])

    end = time()
    execution_time = end - start
    msg = f"\n\nTraining model search, model_id={model_id} on dataset {dataset_id}" \
          f" took {execution_time // 60} minutes.\n" \
          f"\tmodel restored from {best_ckpt_path} saved as {model_explorer.frozen_graph_fp}\n"

    with open(dataset_explorer.log_fp, 'a') as f:
        f.write(msg)

    with open(model_explorer.log_fp, 'a') as f:
        f.write(msg)
Exemple #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--project_folder', type=str, required=True)
    parser.add_argument('--timestep', type=float, required=True)
    parser.add_argument('--dataset_id', type=int, required=True)
    parser.add_argument('--nb_settings', type=int, required=True)
    parser.add_argument('--nb_trajectories', type=int, required=True)
    parser.add_argument('--endtime', type=float, required=True)
    parser.add_argument('--model_name', type=str, required=True)
    parser.add_argument('--params_to_randomize', required=True, default='')
    parser.add_argument('--random_seed', type=int, default=23)
    args = parser.parse_args()

    project_folder = args.project_folder
    timestep = args.timestep
    dataset_id = args.dataset_id
    nb_settings = args.nb_settings
    nb_trajectories = args.nb_trajectories
    endtime = args.endtime
    model_name = args.model_name
    params_to_randomize = args.params_to_randomize.split(' ')
    params_to_randomize = params_to_randomize if params_to_randomize != [
        ''
    ] else []
    random_seed = args.random_seed

    start = time()

    np.random.seed(random_seed)

    project_explorer = ProjectFileExplorer(project_folder)
    dataset_explorer = project_explorer.get_dataset_file_explorer(
        timestep, dataset_id)

    # settings = get_histogram_settings(
    #     nb_settings,
    #     # dataset_explorer.train_fp,
    #     dataset_explorer.test_fp,
    # )

    crn_module = import_module("stochnet_v2.CRN_models." + model_name)
    crn_class = getattr(crn_module, model_name)
    settings = crn_class.get_initial_settings(nb_settings)

    np.save(dataset_explorer.histogram_settings_fp, settings)

    histogram_dataset = build_simulation_dataset(
        model_name,
        nb_settings,
        nb_trajectories,
        timestep,
        endtime,
        dataset_explorer.dataset_folder,
        params_to_randomize=params_to_randomize,
        prefix='histogram_partial_',
        how='stack',
        settings_filename=os.path.basename(
            dataset_explorer.histogram_settings_fp),
    )
    np.save(dataset_explorer.histogram_dataset_fp, histogram_dataset)

    end = time()
    execution_time = end - start

    with open(dataset_explorer.log_fp, 'a') as file:
        file.write(
            f"\n\nSimulating {nb_trajectories} {model_name} histogram trajectories "
            f"for {nb_settings} different settings until {endtime} "
            f"took {execution_time} seconds.\n")
Exemple #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--project_folder', type=str, required=True)
    parser.add_argument('--timestep', type=float, required=True)
    parser.add_argument('--dataset_id', type=int, required=True)
    parser.add_argument('--nb_settings', type=int, required=True)
    parser.add_argument('--nb_trajectories', type=int, required=True)
    parser.add_argument('--endtime', type=float, required=True)
    parser.add_argument('--model_name', type=str, required=True)
    parser.add_argument('--params_to_randomize', required=True, default='')
    parser.add_argument('--random_seed', type=int, default=23)
    args = parser.parse_args()

    project_folder = args.project_folder
    timestep = args.timestep
    dataset_id = args.dataset_id
    nb_settings = args.nb_settings
    nb_trajectories = args.nb_trajectories
    endtime = args.endtime
    model_name = args.model_name
    random_seed = args.random_seed
    params_to_randomize = args.params_to_randomize.split(' ')
    params_to_randomize = params_to_randomize if params_to_randomize != [''] else []

    LOGGER.info(">>> START")
    start = time()

    np.random.seed(random_seed)

    project_explorer = ProjectFileExplorer(project_folder)
    dataset_explorer = project_explorer.get_dataset_file_explorer(timestep, dataset_id)

    crn_module = import_module("stochnet_v2.CRN_models." + model_name)
    crn_class = getattr(crn_module, model_name)
    settings = crn_class.get_initial_settings(nb_settings)
    np.save(dataset_explorer.settings_fp, settings)

    LOGGER.info(f"Dataset folder: {dataset_explorer.dataset_folder}")

    dataset = build_simulation_dataset(
        model_name,
        nb_settings,
        nb_trajectories,
        timestep,
        endtime,
        dataset_explorer.dataset_folder,
        params_to_randomize=params_to_randomize,
        how='concat'
    )
    np.save(dataset_explorer.dataset_fp, dataset)

    LOGGER.info(">>> DONE.")

    end = time()
    execution_time = end - start
    msg = f"\n\nSimulating {nb_trajectories} {model_name} " \
          f"trajectories for {nb_settings} different settings " \
          f"with endtime {endtime} took {execution_time} seconds.\n"\

    with open(dataset_explorer.log_fp, 'a') as f:
        f.write(msg)

    LOGGER.info(msg)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--project_folder', type=str, required=True)
    parser.add_argument('--timestep', type=float, required=True)
    parser.add_argument('--dataset_id', type=int, required=True)
    parser.add_argument('--var_list', type=str, required=True,
                        help='string of space-separated variable names to randomize')
    parser.add_argument('--nb_settings', type=int, required=True)
    parser.add_argument('--nb_trajectories', type=int, required=True)
    parser.add_argument('--endtime', type=float, required=True)
    parser.add_argument('--model_name', type=str, required=True)
    parser.add_argument('--random_seed', type=int, default=23)
    args = parser.parse_args()

    project_folder = args.project_folder
    timestep = args.timestep
    dataset_id = args.dataset_id
    var_list = args.var_list.split(' ')
    nb_settings = args.nb_settings
    nb_trajectories = args.nb_trajectories
    endtime = args.endtime
    model_name = args.model_name
    random_seed = args.random_seed

    LOGGER.info(">>> START")
    start = time()

    np.random.seed(random_seed)

    project_explorer = ProjectFileExplorer(project_folder)
    dataset_explorer = project_explorer.get_dataset_file_explorer(timestep, dataset_id)

    settings_filename = 'settings.pickle'
    model_fp = os.path.join(project_folder, f'{model_name}.ka')

    LOGGER.info(f"Dataset folder: {dataset_explorer.dataset_folder}")

    dataset = build_simulation_dataset(
        model_fp,
        nb_settings,
        nb_trajectories,
        timestep,
        endtime,
        dataset_explorer.dataset_folder,
        var_list,
        prefix='partial_',
        how='concat',
        settings_filename=settings_filename,
    )
    np.save(dataset_explorer.dataset_fp, dataset)

    LOGGER.info(">>> DONE.")

    end = time()
    execution_time = end - start
    msg = f"Simulating {nb_trajectories} {model_name} " \
          f"trajectories for {nb_settings} different settings " \
          f"with endtime {endtime} took {execution_time} seconds.\n"\

    with open(dataset_explorer.log_fp, 'a') as f:
        f.write(msg)

    LOGGER.info(msg)