예제 #1
0
    def __init__(self,
                 env,
                 policy,
                 max_search_depth=100,
                 n_search_episodes=1000,
                 discount_factor=0.99,
                 value_weight=1.0,
                 optimizer=Adam(),
                 model_dir=None):
        """Create an object for optimizing a policy.

    Parameters
    ----------
    env: Environment
      the Environment to interact with
    policy: Policy
      the Policy to optimize.  Its create_layers() method must return a dict containing the
      keys 'action_prob' and 'value', corresponding to the action probabilities and value estimate
    max_search_depth: int
      the maximum depth of the tree search, measured in steps
    n_search_episodes: int
      the number of episodes to simulate (up to max_search_depth, if they do not
      terminate first) for each tree search
    discount_factor: float
      the discount factor to use when computing rewards
    value_weight: float
      a scale factor for the value loss term in the loss function
    optimizer: Optimizer
      the optimizer to use
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    """
        self._env = copy.deepcopy(env)
        self._policy = policy
        self.max_search_depth = max_search_depth
        self.n_search_episodes = n_search_episodes
        self.discount_factor = discount_factor
        self.value_weight = value_weight
        self._state_is_list = isinstance(env.state_shape[0],
                                         SequenceCollection)
        if optimizer is None:
            self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999)
        else:
            self._optimizer = optimizer
        (self._graph, self._features, self._pred_prob, self._pred_value,
         self._search_prob,
         self._search_value) = self._build_graph(None, 'global', model_dir)
        with self._graph._get_tf("Graph").as_default():
            with tf.variable_scope('global'):
                self._checkpoint = tf.train.Checkpoint()
                self._checkpoint.save_counter  # Ensure the variable has been created
            self._checkpoint.listed = tf.get_collection(
                tf.GraphKeys.GLOBAL_VARIABLES, scope='global')
            self._graph.session.run(self._checkpoint.save_counter.initializer)
예제 #2
0
  def test_multitask_regression_overfit(self):
    """Test TensorGraph multitask overfits tiny data."""
    n_tasks = 10
    n_samples = 10
    n_features = 3
    n_classes = 2

    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.zeros((n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))

    dataset = dc.data.NumpyDataset(X, y, w, ids)

    regression_metric = dc.metrics.Metric(
        dc.metrics.mean_squared_error, task_averager=np.mean, mode="regression")
    model = dc.models.MultitaskRegressor(
        n_tasks,
        n_features,
        dropouts=[0.],
        weight_init_stddevs=[.1],
        batch_size=n_samples,
        optimizer=Adam(learning_rate=0.0003, beta1=0.9, beta2=0.999))

    # Fit trained model
    model.fit(dataset, nb_epoch=50)

    # Eval model on train
    scores = model.evaluate(dataset, [regression_metric])
    assert scores[regression_metric.name] < .1
예제 #3
0
    def test_save_load(self):
        n_data_points = 20
        n_features = 2
        X = np.random.rand(n_data_points, n_features)
        y = [[0, 1] for x in range(n_data_points)]
        dataset = NumpyDataset(X, y)
        features = Feature(shape=(None, n_features))
        dense = Dense(out_channels=2, in_layers=[features])
        output = SoftMax(in_layers=[dense])
        label = Label(shape=(None, 2))
        smce = SoftMaxCrossEntropy(in_layers=[label, dense])
        loss = ReduceMean(in_layers=[smce])
        tg = dc.models.TensorGraph(learning_rate=0.01)
        tg.add_output(output)
        tg.set_loss(loss)
        submodel_loss = ReduceSum(in_layers=smce)
        submodel_opt = Adam(learning_rate=0.002)
        submodel = tg.create_submodel(layers=[dense],
                                      loss=submodel_loss,
                                      optimizer=submodel_opt)
        tg.fit(dataset, nb_epoch=1)
        prediction = np.squeeze(tg.predict_on_batch(X))
        tg.save()

        dirpath = tempfile.mkdtemp()
        shutil.rmtree(dirpath)
        shutil.move(tg.model_dir, dirpath)

        tg1 = TensorGraph.load_from_dir(dirpath)
        prediction2 = np.squeeze(tg1.predict_on_batch(X))
        assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
예제 #4
0
    def test_fittransform_regression_overfit(self):
        """Test that MultitaskFitTransformRegressor can overfit simple regression datasets."""
        n_samples = 10
        n_features = 3
        n_tasks = 1

        # Generate dummy dataset
        np.random.seed(123)
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features, n_features)
        y = np.zeros((n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))
        dataset = dc.data.NumpyDataset(X, y, w, ids)

        fit_transformers = [dc.trans.CoulombFitTransformer(dataset)]
        regression_metric = dc.metrics.Metric(dc.metrics.mean_squared_error)
        model = dc.models.MultitaskFitTransformRegressor(
            n_tasks, [n_features, n_features],
            dropouts=[0.01],
            weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)],
            batch_size=n_samples,
            fit_transformers=fit_transformers,
            n_evals=1,
            optimizer=Adam(learning_rate=0.003, beta1=0.9, beta2=0.999))

        # Fit trained model
        model.fit(dataset, nb_epoch=100)

        # Eval model on train
        scores = model.evaluate(dataset, [regression_metric])
        assert scores[regression_metric.name] < .1
예제 #5
0
    def test_multitask_classification_overfit(self):
        """Test MultitaskClassifier overfits tiny data."""
        n_tasks = 10
        n_samples = 10
        n_features = 3
        n_classes = 2

        # Generate dummy dataset
        np.random.seed(123)
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features)
        y = np.zeros((n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))
        dataset = dc.data.NumpyDataset(X, y, w, ids)

        classification_metric = dc.metrics.Metric(dc.metrics.accuracy_score,
                                                  task_averager=np.mean)
        model = dc.models.MultitaskClassifier(n_tasks,
                                              n_features,
                                              dropouts=[0.],
                                              weight_init_stddevs=[.1],
                                              batch_size=n_samples,
                                              optimizer=Adam(
                                                  learning_rate=0.0003,
                                                  beta1=0.9,
                                                  beta2=0.999))

        # Fit trained model
        model.fit(dataset)

        # Eval model on train
        scores = model.evaluate(dataset, [classification_metric])
        assert scores[classification_metric.name] > .9
예제 #6
0
    def __init__(self,
                 learner,
                 learning_rate=0.001,
                 optimization_steps=1,
                 meta_batch_size=10,
                 optimizer=Adam(),
                 model_dir=None):
        """Create an object for performing meta-optimization.

    Parameters
    ----------
    learner: MetaLearner
      defines the meta-learning problem
    learning_rate: float or Tensor
      the learning rate to use for optimizing each task (not to be confused with the one used
      for meta-learning).  This can optionally be made a variable (represented as a
      Tensor), in which case the learning rate will itself be learnable.
    optimization_steps: int
      the number of steps of gradient descent to perform for each task
    meta_batch_size: int
      the number of tasks to use for each step of meta-learning
    optimizer: Optimizer
      the optimizer to use for meta-learning (not to be confused with the gradient descent
      optimization performed for each task)
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    """
        # Record inputs.

        self.learner = learner
        self.learning_rate = learning_rate
        self.optimization_steps = optimization_steps
        self.meta_batch_size = meta_batch_size
        self.optimizer = optimizer

        # Create the output directory if necessary.

        self._model_dir_is_temp = False
        if model_dir is not None:
            if not os.path.exists(model_dir):
                os.makedirs(model_dir)
        else:
            model_dir = tempfile.mkdtemp()
            self._model_dir_is_temp = True
        self.model_dir = model_dir
        self.save_file = "%s/%s" % (self.model_dir, "model")

        # Create the optimizers for meta-optimization and task optimization.

        self._global_step = tf.Variable(0, trainable=False)
        self._tf_optimizer = optimizer._create_tf_optimizer(self._global_step)
        task_optimizer = GradientDescent(learning_rate=self.learning_rate)
        self._tf_task_optimizer = task_optimizer._create_tf_optimizer(
            self._global_step)

        # Create a Checkpoint for saving.

        self._checkpoint = tf.train.Checkpoint()
        self._checkpoint.listed = learner.variables
예제 #7
0
def test_ppo_reload():
    env = RouletteEnvironment()
    policy = TestPolicy(env)
    ppo = dc.rl.PPO(env,
                    policy,
                    max_rollout_length=20,
                    optimization_epochs=8,
                    optimizer=Adam(learning_rate=0.003))
    ppo.fit(1000)
    action_prob, value = ppo.predict([[0]])

    new_ppo = dc.rl.PPO(env, policy, model_dir=ppo._model.model_dir)
    new_ppo.restore()
    action_prob2, value2 = new_ppo.predict([[0]])

    assert np.all(action_prob == action_prob2)
    assert value == value2
예제 #8
0
def test_a2c_reload():
    env = RouletteEnvironment()
    policy = TestPolicy(env)

    a2c = dc.rl.A2C(env,
                    policy,
                    max_rollout_length=20,
                    optimizer=Adam(learning_rate=0.001))
    a2c.fit(1000)
    action_prob, value = a2c.predict([[0]])

    new_a2c = dc.rl.A2C(env, policy, model_dir=a2c._model.model_dir)
    new_a2c.restore()
    action_prob2, value2 = new_a2c.predict([[0]])

    assert np.all(action_prob == action_prob2)
    assert value == value2
예제 #9
0
def eval_tic_tac_toe(value_weight,
                     num_epoch_rounds=1,
                     games=10**4,
                     rollouts=10**5):
    """
    Returns the average reward over 1k games after 100k rollouts
    :param value_weight:
    :return:
    """
    env = deepchem.rl.envs.tictactoe.TicTacToeEnvironment()
    policy = TicTacToePolicy()
    model_dir = "/tmp/tictactoe"
    try:
        shutil.rmtree(model_dir)
    except:
        pass

    avg_rewards = []
    for j in range(num_epoch_rounds):
        a3c = dc.rl.A3C(env,
                        policy,
                        entropy_weight=0.01,
                        value_weight=value_weight,
                        model_dir=model_dir,
                        optimizer=Adam(learning_rate=0.001))
        try:
            a3c.restore()
        except:
            print("unable to restore")
            pass
        a3c.fit(rollouts)
        rewards = []
        for i in range(games):
            env.reset()
            reward = -float('inf')
            while not env._terminated:
                action = a3c.select_action(env._state)
                reward = env.step(action)
            rewards.append(reward)
        avg_rewards.append({(j + 1) * rollouts: np.mean(rewards)})
    return avg_rewards
예제 #10
0
class KerasModel(Model):
    """This is a DeepChem model implemented by a Keras model.

  This class provides several advantages over using the Keras
  model's fitting and prediction methods directly.

  1. It provides better integration with the rest of DeepChem,
     such as direct support for Datasets and Transformers.

  2. It defines the loss in a more flexible way.  In particular,
     Keras does not support multidimensional weight matrices,
     which makes it impossible to implement most multitask
     models with Keras.

  3. It provides various additional features not found in the
     Keras Model class, such as uncertainty prediction and
     saliency mapping.

  The loss function for a model can be defined in two different
  ways.  For models that have only a single output and use a
  standard loss function, you can simply provide a
  dc.models.losses.Loss object.  This defines the loss for each
  sample or sample/task pair.  The result is automatically
  multiplied by the weights and averaged over the batch.  Any
  additional losses computed by model layers, such as weight
  decay penalties, are also added.

  For more complicated cases, you can instead provide a function
  that directly computes the total loss.  It must be of the form
  f(outputs, labels, weights), taking the list of outputs from
  the model, the expected values, and any weight matrices.  It
  should return a scalar equal to the value of the loss function
  for the batch.  No additional processing is done to the
  result; it is up to you to do any weighting, averaging, adding
  of penalty terms, etc.

  You can optionally provide an output_types argument, which
  describes how to interpret the model's outputs.  This should
  be a list of strings, one for each output. You can use an
  arbitrary output_type for a output, but some output_types are
  special and will undergo extra processing:

  - 'prediction': This is a normal output, and will be returned by predict().
    If output types are not specified, all outputs are assumed
    to be of this type.

  - 'loss': This output will be used in place of the normal
    outputs for computing the loss function.  For example,
    models that output probability distributions usually do it
    by computing unbounded numbers (the logits), then passing
    them through a softmax function to turn them into
    probabilities.  When computing the cross entropy, it is more
    numerically stable to use the logits directly rather than
    the probabilities.  You can do this by having the model
    produce both probabilities and logits as outputs, then
    specifying output_types=['prediction', 'loss'].  When
    predict() is called, only the first output (the
    probabilities) will be returned.  But during training, it is
    the second output (the logits) that will be passed to the
    loss function.

  - 'variance': This output is used for estimating the
    uncertainty in another output.  To create a model that can
    estimate uncertainty, there must be the same number of
    'prediction' and 'variance' outputs.  Each variance output
    must have the same shape as the corresponding prediction
    output, and each element is an estimate of the variance in
    the corresponding prediction.  Also be aware that if a model
    supports uncertainty, it MUST use dropout on every layer,
    and dropout most be enabled during uncertainty prediction.
    Otherwise, the uncertainties it computes will be inaccurate.
    
  - other: Arbitrary output_types can be used to extract outputs
    produced by the model, but will have no additional
    processing performed.
  """
    def __init__(self,
                 model,
                 loss,
                 output_types=None,
                 batch_size=100,
                 model_dir=None,
                 learning_rate=0.001,
                 optimizer=None,
                 tensorboard=False,
                 tensorboard_log_frequency=100,
                 **kwargs):
        """Create a new KerasModel.

    Parameters
    ----------
    model: tf.keras.Model
      the Keras model implementing the calculation
    loss: dc.models.losses.Loss or function
      a Loss or function defining how to compute the training loss for each
      batch, as described above
    output_types: list of strings
      the type of each output from the model, as described above
    batch_size: int
      default batch size for training and evaluating
    model_dir: str
      the directory on disk where the model will be stored.  If this is None,
      a temporary directory is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for fitting.  If optimizer is specified, this is
      ignored.
    optimizer: Optimizer
      the optimizer to use for fitting.  If this is specified, learning_rate is
      ignored.
    tensorboard: bool
      whether to log progress to TensorBoard during training
    tensorboard_log_frequency: int
      the frequency at which to log data to TensorBoard, measured in batches
    """
        super(KerasModel, self).__init__(model_instance=model,
                                         model_dir=model_dir,
                                         **kwargs)
        self.model = model
        if isinstance(loss, Loss):
            self._loss_fn = _StandardLoss(model, loss)
        else:
            self._loss_fn = loss
        self.batch_size = batch_size
        if optimizer is None:
            self.optimizer = Adam(learning_rate=learning_rate)
        else:
            self.optimizer = optimizer
        self.tensorboard = tensorboard
        self.tensorboard_log_frequency = tensorboard_log_frequency
        if self.tensorboard:
            self._summary_writer = tf.summary.create_file_writer(
                self.model_dir)
        if output_types is None:
            self._prediction_outputs = None
            self._loss_outputs = None
            self._variance_outputs = None
            self._other_outputs = None
        else:
            self._prediction_outputs = []
            self._loss_outputs = []
            self._variance_outputs = []
            self._other_outputs = []
            for i, type in enumerate(output_types):
                if type == 'prediction':
                    self._prediction_outputs.append(i)
                elif type == 'loss':
                    self._loss_outputs.append(i)
                elif type == 'variance':
                    self._variance_outputs.append(i)
                else:
                    self._other_outputs.append(i)
            if len(self._loss_outputs) == 0:
                self._loss_outputs = self._prediction_outputs
        self._built = False
        self._inputs_built = False
        self._training_ops_built = False
        self._output_functions = {}
        self._gradient_fn_for_vars = {}

    def _ensure_built(self):
        """The first time this is called, create internal data structures."""
        if self._built:
            return
        self._built = True
        self._global_step = tf.Variable(0, trainable=False)
        self._tf_optimizer = self.optimizer._create_optimizer(
            self._global_step)
        self._checkpoint = tf.train.Checkpoint(optimizer=self._tf_optimizer,
                                               model=self.model)

    def _create_inputs(self, example_inputs):
        """The first time this is called, create tensors representing the inputs and outputs."""
        if self._inputs_built:
            return
        self._ensure_built()
        self._inputs_built = True
        if (self.model.inputs is not None) and len(self.model.inputs) > 0:
            self._input_shapes = [t.shape for t in self.model.inputs]
            self._input_dtypes = [
                t.dtype.as_numpy_dtype for t in self.model.inputs
            ]
        else:
            self._input_shapes = [(None, ) + i.shape[1:]
                                  for i in example_inputs]
            self._input_dtypes = [
                np.float32 if x.dtype == np.float64 else x.dtype
                for x in example_inputs
            ]

    def _create_training_ops(self, example_batch):
        """The first time this is called, create tensors used in optimization."""
        if self._training_ops_built:
            return
        self._create_inputs(example_batch[0])
        self._training_ops_built = True
        self._label_dtypes = [
            np.float32 if x.dtype == np.float64 else x.dtype
            for x in example_batch[1]
        ]
        self._weights_dtypes = [
            np.float32 if x.dtype == np.float64 else x.dtype
            for x in example_batch[2]
        ]

    def fit(self,
            dataset,
            nb_epoch=10,
            max_checkpoints_to_keep=5,
            checkpoint_interval=1000,
            deterministic=False,
            restore=False,
            variables=None,
            loss=None,
            callbacks=[]):
        """Train this model on a dataset.

    Parameters
    ----------
    dataset: Dataset
      the Dataset to train on
    nb_epoch: int
      the number of epochs to train for
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    deterministic: bool
      if True, the samples are processed in order.  If False, a different random
      order is used for each epoch.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    variables: list of tf.Variable
      the variables to train.  If None (the default), all trainable variables in
      the model are used.
    loss: function
      a function of the form f(outputs, labels, weights) that computes the loss
      for each batch.  If None (the default), the model's standard loss function
      is used.
    callbacks: function or list of functions
      one or more functions of the form f(model, step) that will be invoked after
      every step.  This can be used to perform validation, logging, etc.
   """
        return self.fit_generator(
            self.default_generator(dataset,
                                   epochs=nb_epoch,
                                   deterministic=deterministic),
            max_checkpoints_to_keep, checkpoint_interval, restore, variables,
            loss, callbacks)

    def fit_generator(self,
                      generator,
                      max_checkpoints_to_keep=5,
                      checkpoint_interval=1000,
                      restore=False,
                      variables=None,
                      loss=None,
                      callbacks=[]):
        """Train this model on data from a generator.

    Parameters
    ----------
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    variables: list of tf.Variable
      the variables to train.  If None (the default), all trainable variables in
      the model are used.
    loss: function
      a function of the form f(outputs, labels, weights) that computes the loss
      for each batch.  If None (the default), the model's standard loss function
      is used.
    callbacks: function or list of functions
      one or more functions of the form f(model, step) that will be invoked after
      every step.  This can be used to perform validation, logging, etc.

    Returns
    -------
    the average loss over the most recent checkpoint interval
    """
        if not isinstance(callbacks, Sequence):
            callbacks = [callbacks]
        self._ensure_built()
        if checkpoint_interval > 0:
            manager = tf.train.CheckpointManager(self._checkpoint,
                                                 self.model_dir,
                                                 max_checkpoints_to_keep)
        avg_loss = 0.0
        averaged_batches = 0
        train_op = None
        if loss is None:
            loss = self._loss_fn
        var_key = None
        if variables is not None:
            var_key = tuple(v.ref() for v in variables)

            # The optimizer creates internal variables the first time apply_gradients()
            # is called for a new set of variables.  If that happens inside a function
            # annotated with tf.function it throws an exception, so call it once here.

            zero_grads = [tf.zeros(v.shape) for v in variables]
            self._tf_optimizer.apply_gradients(zip(zero_grads, variables))
        if var_key not in self._gradient_fn_for_vars:
            self._gradient_fn_for_vars[var_key] = self._create_gradient_fn(
                variables)
        apply_gradient_for_batch = self._gradient_fn_for_vars[var_key]
        time1 = time.time()

        # Main training loop.

        for batch in generator:
            self._create_training_ops(batch)
            if restore:
                self.restore()
                restore = False
            inputs, labels, weights = self._prepare_batch(batch)

            # Execute the loss function, accumulating the gradients.

            if len(inputs) == 1:
                inputs = inputs[0]

            batch_loss = apply_gradient_for_batch(inputs, labels, weights,
                                                  loss)
            current_step = self._global_step.numpy()

            avg_loss += batch_loss

            # Report progress and write checkpoints.
            averaged_batches += 1
            should_log = (current_step % self.tensorboard_log_frequency == 0)
            if should_log:
                avg_loss = float(avg_loss) / averaged_batches
                logger.info('Ending global_step %d: Average loss %g' %
                            (current_step, avg_loss))
                avg_loss = 0.0
                averaged_batches = 0

            if checkpoint_interval > 0 and current_step % checkpoint_interval == checkpoint_interval - 1:
                manager.save()
            for c in callbacks:
                c(self, current_step)
            if self.tensorboard and should_log:
                with self._summary_writer.as_default():
                    tf.summary.scalar('loss', batch_loss, current_step)

        # Report final results.
        if averaged_batches > 0:
            avg_loss = float(avg_loss) / averaged_batches
            logger.info('Ending global_step %d: Average loss %g' %
                        (current_step, avg_loss))

        if checkpoint_interval > 0:
            manager.save()

        time2 = time.time()
        logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1))
        return avg_loss

    def _create_gradient_fn(self, variables):
        """Create a function that computes gradients and applies them to the model.
    Because of the way TensorFlow function tracing works, we need to create a
    separate function for each new set of variables.
    """
        @tf.function(experimental_relax_shapes=True)
        def apply_gradient_for_batch(inputs, labels, weights, loss):
            with tf.GradientTape() as tape:
                outputs = self.model(inputs, training=True)
                if isinstance(outputs, tf.Tensor):
                    outputs = [outputs]
                if self._loss_outputs is not None:
                    outputs = [outputs[i] for i in self._loss_outputs]
                batch_loss = loss(outputs, labels, weights)
            if variables is None:
                vars = self.model.trainable_variables
            else:
                vars = variables
            grads = tape.gradient(batch_loss, vars)
            self._tf_optimizer.apply_gradients(zip(grads, vars))
            self._global_step.assign_add(1)
            return batch_loss

        return apply_gradient_for_batch

    def fit_on_batch(self, X, y, w, variables=None, loss=None, callbacks=[]):
        """Perform a single step of training.

    Parameters
    ----------
    X: ndarray
      the inputs for the batch
    y: ndarray
      the labels for the batch
    w: ndarray
      the weights for the batch
    variables: list of tf.Variable
      the variables to train.  If None (the default), all trainable variables in
      the model are used.
    loss: function
      a function of the form f(outputs, labels, weights) that computes the loss
      for each batch.  If None (the default), the model's standard loss function
      is used.
    callbacks: function or list of functions
      one or more functions of the form f(model, step) that will be invoked after
      every step.  This can be used to perform validation, logging, etc.
   """
        if not self.built:
            self.build()
        dataset = NumpyDataset(X, y, w)
        return self.fit(dataset,
                        nb_epoch=1,
                        variables=variables,
                        loss=loss,
                        callbacks=callbacks)

    def _predict(self, generator, transformers, outputs, uncertainty,
                 other_output_types):
        """
    Predict outputs for data provided by a generator.

    This is the private implementation of prediction.  Do not
    call it directly.  Instead call one of the public prediction
    methods.

    Parameters
    ----------
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    outputs: Tensor or list of Tensors
      The outputs to return.  If this is None, the model's standard prediction
      outputs will be returned.  Alternatively one or more Tensors within the
      model may be specified, in which case the output of those Tensors will be
      returned.
    uncertainty: bool
      specifies whether this is being called as part of estimating uncertainty.
      If True, it sets the training flag so that dropout will be enabled, and
      returns the values of the uncertainty outputs.
    other_output_types: list, optional
      Provides a list of other output_types (strings) to predict from model.
    Returns:
      a NumPy array of the model produces a single output, or a list of arrays
      if it produces multiple outputs
    """
        results = None
        variances = None
        if (outputs is not None) and (other_output_types is not None):
            raise ValueError(
                'This model cannot compute outputs and other output_types simultaneously. Please invoke one at a time.'
            )
        if uncertainty and (other_output_types is not None):
            raise ValueError(
                'This model cannot compute uncertainties and other output types simultaneously. Please invoke one at a time.'
            )
        if uncertainty:
            assert outputs is None
            if self._variance_outputs is None or len(
                    self._variance_outputs) == 0:
                raise ValueError('This model cannot compute uncertainties')
            if len(self._variance_outputs) != len(self._prediction_outputs):
                raise ValueError(
                    'The number of variances must exactly match the number of outputs'
                )
        if other_output_types:
            assert outputs is None
            if self._other_outputs is None or len(self._other_outputs) == 0:
                raise ValueError(
                    'This model cannot compute other outputs since no other output_types were specified.'
                )
        if (outputs is not None and self.model.inputs is not None
                and len(self.model.inputs) == 0):
            raise ValueError(
                "Cannot use 'outputs' argument with a model that does not specify its inputs. Note models defined in imperative subclassing style cannot specify outputs"
            )
        if isinstance(outputs, tf.Tensor):
            outputs = [outputs]
        for batch in generator:
            inputs, labels, weights = batch
            self._create_inputs(inputs)
            inputs, _, _ = self._prepare_batch((inputs, None, None))

            # Invoke the model.
            if len(inputs) == 1:
                inputs = inputs[0]
            if outputs is not None:
                outputs = tuple(outputs)
                key = tuple(t.ref() for t in outputs)
                if key not in self._output_functions:
                    self._output_functions[key] = tf.keras.backend.function(
                        self.model.inputs, outputs)
                output_values = self._output_functions[key](inputs)
            else:
                output_values = self._compute_model(inputs)
                if isinstance(output_values, tf.Tensor):
                    output_values = [output_values]
                output_values = [t.numpy() for t in output_values]

            # Apply tranformers and record results.
            if uncertainty:
                var = [output_values[i] for i in self._variance_outputs]
                if variances is None:
                    variances = [var]
                else:
                    for i, t in enumerate(var):
                        variances[i].append(t)
            access_values = []
            if other_output_types:
                access_values += self._other_outputs
            elif self._prediction_outputs is not None:
                access_values += self._prediction_outputs

            if len(access_values) > 0:
                output_values = [output_values[i] for i in access_values]

            if len(transformers) > 0:
                if len(output_values) > 1:
                    raise ValueError(
                        "predict() does not support Transformers for models with multiple outputs."
                    )
                elif len(output_values) == 1:
                    output_values = [
                        undo_transforms(output_values[0], transformers)
                    ]
            if results is None:
                results = [[] for i in range(len(output_values))]
            for i, t in enumerate(output_values):
                results[i].append(t)

        # Concatenate arrays to create the final results.
        final_results = []
        final_variances = []
        for r in results:
            final_results.append(np.concatenate(r, axis=0))
        if uncertainty:
            for v in variances:
                final_variances.append(np.concatenate(v, axis=0))
            return zip(final_results, final_variances)
        if len(final_results) == 1:
            return final_results[0]
        else:
            return final_results

    @tf.function(experimental_relax_shapes=True)
    def _compute_model(self, inputs):
        """Evaluate the model for a set of inputs."""
        return self.model(inputs, training=False)

    def predict_on_generator(self,
                             generator,
                             transformers=[],
                             outputs=None,
                             output_types=None):
        """
    Parameters
    ----------
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    outputs: Tensor or list of Tensors
      The outputs to return.  If this is None, the model's
      standard prediction outputs will be returned.
      Alternatively one or more Tensors within the model may be
      specified, in which case the output of those Tensors will
      be returned. If outputs is specified, output_types must be
      None.
    output_types: String or list of Strings
      If specified, all outputs of this type will be retrieved
      from the model. If output_types is specified, outputs must
      be None.
    Returns:
      a NumPy array of the model produces a single output, or a list of arrays
      if it produces multiple outputs
    """
        return self._predict(generator, transformers, outputs, False,
                             output_types)

    def predict_on_batch(self, X, transformers=[], outputs=None):
        """Generates predictions for input samples, processing samples in a batch.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    outputs: Tensor or list of Tensors
      The outputs to return.  If this is None, the model's standard prediction
      outputs will be returned.  Alternatively one or more Tensors within the
      model may be specified, in which case the output of those Tensors will be
      returned.

    Returns
    -------
    a NumPy array of the model produces a single output, or a list of arrays
    if it produces multiple outputs
    """
        dataset = NumpyDataset(X=X, y=None)
        return self.predict(dataset, transformers, outputs)

    def predict_uncertainty_on_batch(self, X, masks=50):
        """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
        dataset = NumpyDataset(X=X, y=None)
        return self.predict_uncertainty(dataset, masks)

    def predict(self,
                dataset,
                transformers=[],
                outputs=None,
                output_types=None):
        """
    Uses self to make predictions on provided Dataset object.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    outputs: Tensor or list of Tensors
      The outputs to return.  If this is None, the model's standard prediction
      outputs will be returned.  Alternatively one or more Tensors within the
      model may be specified, in which case the output of those Tensors will be
      returned.
    output_types: list of Strings
      The output types to return. Will retrieve all outputs of these types from the model.

    Returns
    -------
    a NumPy array of the model produces a single output, or a list of arrays
    if it produces multiple outputs
    """
        generator = self.default_generator(dataset,
                                           mode='predict',
                                           pad_batches=False)
        return self.predict_on_generator(generator,
                                         transformers=transformers,
                                         outputs=outputs,
                                         output_types=output_types)

    def predict_embedding(self, dataset):
        """
    Predicts embeddings created by underlying model if any exist.
    An embedding must be specified to have `output_type` of
    `'embedding'` in the model definition.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on

    Returns
    -------
    a NumPy array of the embeddings model produces, or a list
    of arrays if it produces multiple embeddings
    """
        generator = self.default_generator(dataset,
                                           mode='predict',
                                           pad_batches=False)
        return self._predict(generator, [], None, False, ['embedding'])

    def predict_uncertainty(self, dataset, masks=50):
        """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
        sum_pred = []
        sum_sq_pred = []
        sum_var = []
        for i in range(masks):
            generator = self.default_generator(dataset,
                                               mode='uncertainty',
                                               pad_batches=False)
            results = self._predict(generator, [], None, True, None)
            if len(sum_pred) == 0:
                for p, v in results:
                    sum_pred.append(p)
                    sum_sq_pred.append(p * p)
                    sum_var.append(v)
            else:
                for j, (p, v) in enumerate(results):
                    sum_pred[j] += p
                    sum_sq_pred[j] += p * p
                    sum_var[j] += v
        output = []
        std = []
        for i in range(len(sum_pred)):
            p = sum_pred[i] / masks
            output.append(p)
            std.append(
                np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks))
        if len(output) == 1:
            return (output[0], std[0])
        else:
            return zip(output, std)

    def evaluate_generator(self,
                           generator,
                           metrics,
                           transformers=[],
                           per_task_metrics=False):
        """Evaluate the performance of this model on the data produced by a generator.

    Parameters
    ----------
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    metric: deepchem.metrics.Metric
      Evaluation metric
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    per_task_metrics: bool
      If True, return per-task scores.

    Returns
    -------
    dict
      Maps tasks to scores under metric.
    """
        evaluator = GeneratorEvaluator(self, generator, transformers)
        return evaluator.compute_model_performance(metrics, per_task_metrics)

    def compute_saliency(self, X):
        """Compute the saliency map for an input sample.

    This computes the Jacobian matrix with the derivative of each output element
    with respect to each input element.  More precisely,

    - If this model has a single output, it returns a matrix of shape
      (output_shape, input_shape) with the derivatives.
    - If this model has multiple outputs, it returns a list of matrices, one
      for each output.

    This method cannot be used on models that take multiple inputs.

    Parameters
    ----------
    X: ndarray
      the input data for a single sample

    Returns
    -------
    the Jacobian matrix, or a list of matrices
    """
        input_shape = X.shape
        X = np.reshape(X, [1] + list(X.shape))
        self._create_inputs([X])
        X, _, _ = self._prepare_batch(([X], None, None))

        # Use a GradientTape to compute gradients.

        X = tf.constant(X[0])
        with tf.GradientTape(persistent=True,
                             watch_accessed_variables=False) as tape:
            tape.watch(X)
            outputs = self._compute_model(X)
            if isinstance(outputs, tf.Tensor):
                outputs = [outputs]
            final_result = []
            for output in outputs:
                output_shape = tuple(output.shape.as_list()[1:])
                output = tf.reshape(output, [-1])
                result = []
                for i in range(output.shape[0]):
                    result.append(tape.gradient(output[i], X))
                final_result.append(
                    tf.reshape(tf.stack(result),
                               output_shape + input_shape).numpy())
        if len(final_result) == 1:
            return final_result[0]
        return final_result

    def _prepare_batch(self, batch):
        inputs, labels, weights = batch
        inputs = [
            x if x.dtype == t else x.astype(t)
            for x, t in zip(inputs, self._input_dtypes)
        ]
        if labels is not None:
            labels = [
                x if x.dtype == t else x.astype(t)
                for x, t in zip(labels, self._label_dtypes)
            ]
        if weights is not None:
            weights = [
                x if x.dtype == t else x.astype(t)
                for x, t in zip(weights, self._weights_dtypes)
            ]
        for i in range(len(inputs)):
            shape = inputs[i].shape
            dims = len(shape)
            expected_dims = len(self._input_shapes[i])
            if dims < expected_dims:
                inputs[i] = inputs[i].reshape(shape + (1, ) *
                                              (expected_dims - dims))
            elif dims > expected_dims and all(d == 1
                                              for d in shape[expected_dims:]):
                inputs[i] = inputs[i].reshape(shape[:expected_dims])
        return (inputs, labels, weights)

    def default_generator(self,
                          dataset,
                          epochs=1,
                          mode='fit',
                          deterministic=True,
                          pad_batches=True):
        """Create a generator that iterates batches for a dataset.

    Subclasses may override this method to customize how model inputs are
    generated from the data.

    Parameters
    ----------
    dataset: Dataset
      the data to iterate
    epochs: int
      the number of times to iterate over the full dataset
    mode: str
      allowed values are 'fit' (called during training), 'predict' (called
      during prediction), and 'uncertainty' (called during uncertainty
      prediction)
    deterministic: bool
      whether to iterate over the dataset in order, or randomly shuffle the
      data for each epoch
    pad_batches: bool
      whether to pad each batch up to this model's preferred batch size

    Returns
    -------
    a generator that iterates batches, each represented as a tuple of lists:
    ([inputs], [outputs], [weights])
    """
        for epoch in range(epochs):
            for (X_b, y_b, w_b,
                 ids_b) in dataset.iterbatches(batch_size=self.batch_size,
                                               deterministic=deterministic,
                                               pad_batches=pad_batches):
                yield ([X_b], [y_b], [w_b])

    def save_checkpoint(self, max_checkpoints_to_keep=5, model_dir=None):
        """Save a checkpoint to disk.

    Usually you do not need to call this method, since fit() saves checkpoints
    automatically.  If you have disabled automatic checkpointing during fitting,
    this can be called to manually write checkpoints.

    Parameters
    ----------
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    model_dir: str, default None
      Model directory to save checkpoint to. If None, revert to self.model_dir
    """
        self._ensure_built()
        if model_dir is None:
            model_dir = self.model_dir
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        manager = tf.train.CheckpointManager(self._checkpoint, model_dir,
                                             max_checkpoints_to_keep)
        manager.save()

    def get_checkpoints(self, model_dir=None):
        """Get a list of all available checkpoint files.

    Parameters
    ----------
    model_dir: str, default None
      Directory to get list of checkpoints from. Reverts to self.model_dir if None

    """
        if model_dir is None:
            model_dir = self.model_dir
        return tf.train.get_checkpoint_state(
            model_dir).all_model_checkpoint_paths

    def restore(self, checkpoint=None, model_dir=None, session=None):
        """Reload the values of all variables from a checkpoint file.

    Parameters
    ----------
    checkpoint: str
      the path to the checkpoint file to load.  If this is None, the most recent
      checkpoint will be chosen automatically.  Call get_checkpoints() to get a
      list of all available checkpoints.
    model_dir: str, default None
      Directory to restore checkpoint from. If None, use self.model_dir.
    session: tf.Session(), default None
      Session to run restore ops under. If None, self.session is used.
    """
        self._ensure_built()
        if model_dir is None:
            model_dir = self.model_dir
        if checkpoint is None:
            checkpoint = tf.train.latest_checkpoint(model_dir)
        if checkpoint is None:
            raise ValueError('No checkpoint found')
        self._checkpoint.restore(checkpoint)

    def get_global_step(self):
        """Get the number of steps of fitting that have been performed."""
        return int(self._global_step)

    def _create_assignment_map(self, source_model, include_top=True, **kwargs):
        """
    Creates a default assignment map between variables of source and current model.
    This is used only when a custom assignment map is missing. This assumes the
    model is made of different layers followed by a dense layer for mapping to
    output tasks. include_top is used to control whether or not the final dense
    layer is used. The default assignment map is useful in cases where the type
    of task is different (classification vs regression) and/or number of tasks.

    Parameters
    ----------
    source_model: dc.models.KerasModel
        Source model to copy variable values from.
    include_top: bool, default True
        if true, copies the last dense layer
    """
        assignment_map = {}
        source_vars = source_model.model.trainable_variables
        dest_vars = self.model.trainable_variables

        if not include_top:
            source_vars = source_vars[:-2]
            dest_vars = dest_vars[:-2]

        for source_var, dest_var in zip(source_vars, dest_vars):
            assignment_map[source_var.ref()] = dest_var

        return assignment_map

    def _create_value_map(self, source_model, **kwargs):
        """
    Creates a value map between variables in the source model and their
    current values. This is used only when a custom value map is missing, and
    assumes the restore method has been called under self.session.

    Parameters
    ----------
    source_model: dc.models.KerasModel
        Source model to create value map from
    """
        value_map = {}
        source_vars = source_model.model.trainable_variables

        for source_var in source_vars:
            value_map[source_var.ref()] = source_var.numpy()

        return value_map

    def load_from_pretrained(self,
                             source_model,
                             assignment_map=None,
                             value_map=None,
                             checkpoint=None,
                             model_dir=None,
                             include_top=True,
                             inputs=None,
                             **kwargs):
        """Copies variable values from a pretrained model. `source_model` can either
    be a pretrained model or a model with the same architecture. `value_map`
    is a variable-value dictionary. If no `value_map` is provided, the variable
    values are restored to the `source_model` from a checkpoint and a default
    `value_map` is created. `assignment_map` is a dictionary mapping variables
    from the `source_model` to the current model. If no `assignment_map` is
    provided, one is made from scratch and assumes the model is composed of
    several different layers, with the final one being a dense layer. include_top
    is used to control whether or not the final dense layer is used. The default
    assignment map is useful in cases where the type of task is different
    (classification vs regression) and/or number of tasks in the setting.

    Parameters
    ----------
    source_model: dc.KerasModel, required
      source_model can either be the pretrained model or a dc.KerasModel with
      the same architecture as the pretrained model. It is used to restore from
      a checkpoint, if value_map is None and to create a default assignment map
      if assignment_map is None
    assignment_map: Dict, default None
      Dictionary mapping the source_model variables and current model variables
    value_map: Dict, default None
      Dictionary containing source_model trainable variables mapped to numpy
      arrays. If value_map is None, the values are restored and a default
      variable map is created using the restored values
    checkpoint: str, default None
      the path to the checkpoint file to load.  If this is None, the most recent
      checkpoint will be chosen automatically.  Call get_checkpoints() to get a
      list of all available checkpoints
    model_dir: str, default None
      Restore model from custom model directory if needed
    include_top: bool, default True
        if True, copies the weights and bias associated with the final dense
        layer. Used only when assignment map is None
    inputs: List, input tensors for model
        if not None, then the weights are built for both the source and self. 
        This option is useful only for models that are built by 
        subclassing tf.keras.Model, and not using the functional API by tf.keras
    """
        if inputs is not None:
            # Ensure weights for both models are built.
            source_model.model(inputs)
            self.model(inputs)

        self._ensure_built()
        if value_map is None:
            logger.info(
                "No value map provided. Creating default value map from restored model."
            )
            source_model.restore(model_dir=model_dir, checkpoint=checkpoint)
            value_map = self._create_value_map(source_model=source_model)

        if assignment_map is None:
            logger.info(
                "No assignment map provided. Creating custom assignment map.")
            assignment_map = self._create_assignment_map(
                source_model=source_model, include_top=include_top)

        for source_var, dest_var in assignment_map.items():
            assert source_var.deref().shape == dest_var.shape
            dest_var.assign(value_map[source_var])
예제 #11
0
    def __init__(self,
                 learner,
                 learning_rate=0.001,
                 optimization_steps=1,
                 meta_batch_size=10,
                 optimizer=Adam(),
                 model_dir=None):
        """Create an object for performing meta-optimization.

    Parameters
    ----------
    learner: MetaLearner
      defines the meta-learning problem
    learning_rate: float or Tensor
      the learning rate to use for optimizing each task (not to be confused with the one used
      for meta-learning).  This can optionally be made a variable (represented as a
      Tensor), in which case the learning rate will itself be learnable.
    optimization_steps: int
      the number of steps of gradient descent to perform for each task
    meta_batch_size: int
      the number of tasks to use for each step of meta-learning
    optimizer: Optimizer
      the optimizer to use for meta-learning (not to be confused with the gradient descent
      optimization performed for each task)
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    """
        # Record inputs.

        self.learner = learner
        self._learning_rate = learning_rate
        self.meta_batch_size = meta_batch_size
        self.optimizer = optimizer

        # Create the output directory if necessary.

        self._model_dir_is_temp = False
        if model_dir is not None:
            if not os.path.exists(model_dir):
                os.makedirs(model_dir)
        else:
            model_dir = tempfile.mkdtemp()
            self._model_dir_is_temp = True
        self.model_dir = model_dir
        self.save_file = "%s/%s" % (self.model_dir, "model")

        learner.select_task()
        example_inputs = learner.get_batch()
        self._input_shapes = [(None, ) + i.shape[1:] for i in example_inputs]
        self._input_dtypes = [x.dtype for x in example_inputs]
        self._input_placeholders = [
            tf.placeholder(dtype=tf.as_dtype(t), shape=s)
            for s, t in zip(self._input_shapes, self._input_dtypes)
        ]
        self._meta_placeholders = [
            tf.placeholder(dtype=tf.as_dtype(t), shape=s)
            for s, t in zip(self._input_shapes, self._input_dtypes)
        ]
        variables = learner.variables
        self._loss, self._outputs = learner.compute_model(
            self._input_placeholders, variables, False)
        loss, _ = learner.compute_model(self._input_placeholders, variables,
                                        True)

        # Build the meta-learning model.

        updated_variables = variables
        for i in range(optimization_steps):
            gradients = tf.gradients(loss, updated_variables)
            updated_variables = [
                v if g is None else v - self._learning_rate * g
                for v, g in zip(updated_variables, gradients)
            ]
            if i == optimization_steps - 1:
                # In the final loss, use different placeholders for all inputs so the loss will be
                # computed from a different batch.

                inputs = self._meta_placeholders
            else:
                inputs = self._input_placeholders
            loss, outputs = learner.compute_model(inputs, updated_variables,
                                                  True)
        self._meta_loss = loss

        # Create variables for accumulating the gradients.

        variables = list(learner.variables)
        gradients = tf.gradients(self._meta_loss, variables)
        for i in reversed(range(len(variables))):
            if gradients[i] is None:
                del variables[i]
                del gradients[i]
        zero_gradients = [tf.zeros(g.shape, g.dtype) for g in gradients]
        summed_gradients = [
            tf.Variable(z, trainable=False) for z in zero_gradients
        ]
        self._clear_gradients = tf.group(
            *[s.assign(z) for s, z in zip(summed_gradients, zero_gradients)])
        self._add_gradients = tf.group(
            *[s.assign_add(g) for s, g in zip(summed_gradients, gradients)])

        # Create the optimizers for meta-optimization and task optimization.

        self._global_step = tf.placeholder(tf.int32, [])
        grads_and_vars = list(zip(summed_gradients, variables))
        self._meta_train_op = optimizer._create_optimizer(
            self._global_step).apply_gradients(grads_and_vars)
        task_optimizer = GradientDescent(learning_rate=self._learning_rate)
        self._task_train_op = task_optimizer._create_optimizer(
            self._global_step).minimize(self._loss)
        self._session = tf.Session()
        self._session.run(tf.global_variables_initializer())

        # Create a Checkpoint for saving.

        self._checkpoint = tf.train.Checkpoint()
        self._checkpoint.listed = learner.variables
예제 #12
0
  def __init__(self,
               model: tf.keras.Model,
               loss: Union[Loss, LossFn],
               output_types: Optional[List[str]] = None,
               batch_size: int = 100,
               model_dir: Optional[str] = None,
               learning_rate: Union[float, LearningRateSchedule] = 0.001,
               optimizer: Optional[Optimizer] = None,
               tensorboard: bool = False,
               wandb: bool = False,
               log_frequency: int = 100,
               wandb_logger: Optional[WandbLogger] = None,
               **kwargs) -> None:
    """Create a new KerasModel.

    Parameters
    ----------
    model: tf.keras.Model
      the Keras model implementing the calculation
    loss: dc.models.losses.Loss or function
      a Loss or function defining how to compute the training loss for each
      batch, as described above
    output_types: list of strings
      the type of each output from the model, as described above
    batch_size: int
      default batch size for training and evaluating
    model_dir: str
      the directory on disk where the model will be stored.  If this is None,
      a temporary directory is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for fitting.  If optimizer is specified, this is
      ignored.
    optimizer: Optimizer
      the optimizer to use for fitting.  If this is specified, learning_rate is
      ignored.
    tensorboard: bool
      whether to log progress to TensorBoard during training
    wandb: bool
      whether to log progress to Weights & Biases during training (deprecated)
    log_frequency: int
      The frequency at which to log data. Data is logged using
      `logging` by default. If `tensorboard` is set, data is also
      logged to TensorBoard. If `wandb` is set, data is also logged
      to Weights & Biases. Logging happens at global steps. Roughly,
      a global step corresponds to one batch of training. If you'd
      like a printout every 10 batch steps, you'd set
      `log_frequency=10` for example.
    wandb_logger: WandbLogger
      the Weights & Biases logger object used to log data and metrics
    """
    super(KerasModel, self).__init__(model=model, model_dir=model_dir, **kwargs)
    self.loss = loss  # not used
    self.learning_rate = learning_rate  # not used
    self.output_types = output_types  # not used
    if isinstance(loss, Loss):
      self._loss_fn: LossFn = _StandardLoss(model, loss)
    else:
      self._loss_fn = loss
    self.batch_size = batch_size
    if optimizer is None:
      self.optimizer: Optimizer = Adam(learning_rate=learning_rate)
    else:
      self.optimizer = optimizer
    self.tensorboard = tensorboard

    # W&B flag support (DEPRECATED)
    if wandb:
      logger.warning(
          "`wandb` argument is deprecated. Please use `wandb_logger` instead. "
          "This argument will be removed in a future release of DeepChem.")
    if wandb and not _has_wandb:
      logger.warning(
          "You set wandb to True but W&B is not installed. To use wandb logging, "
          "run `pip install wandb; wandb login`")
    self.wandb = wandb and _has_wandb

    self.wandb_logger = wandb_logger
    # If `wandb=True` and no logger is provided, initialize default logger
    if self.wandb and (self.wandb_logger is None):
      self.wandb_logger = WandbLogger()

    # Setup and initialize W&B logging
    if (self.wandb_logger is not None) and (not self.wandb_logger.initialized):
      self.wandb_logger.setup()

    # Update config with KerasModel params
    wandb_logger_config = dict(
        loss=loss,
        output_types=output_types,
        batch_size=batch_size,
        model_dir=model_dir,
        learning_rate=learning_rate,
        optimizer=optimizer,
        tensorboard=tensorboard,
        log_frequency=log_frequency)
    wandb_logger_config.update(**kwargs)

    if self.wandb_logger is not None:
      self.wandb_logger.update_config(wandb_logger_config)

    # Backwards compatibility
    if "tensorboard_log_frequency" in kwargs:
      logger.warning(
          "tensorboard_log_frequency is deprecated. Please use log_frequency instead. This argument will be removed in a future release of DeepChem."
      )
      self.log_frequency = kwargs["tensorboard_log_frequency"]
    else:
      self.log_frequency = log_frequency
    if self.tensorboard:
      self._summary_writer = tf.summary.create_file_writer(self.model_dir)
    if output_types is None:
      self._prediction_outputs = None
      self._loss_outputs = None
      self._variance_outputs = None
      self._other_outputs = None
    else:
      self._prediction_outputs = []
      self._loss_outputs = []
      self._variance_outputs = []
      self._other_outputs = []
      for i, type in enumerate(output_types):
        if type == 'prediction':
          self._prediction_outputs.append(i)
        elif type == 'loss':
          self._loss_outputs.append(i)
        elif type == 'variance':
          self._variance_outputs.append(i)
        else:
          self._other_outputs.append(i)
      if len(self._loss_outputs) == 0:
        self._loss_outputs = self._prediction_outputs
    self._built = False
    self._inputs_built = False
    self._training_ops_built = False
    self._output_functions: Dict[Any, Any] = {}
    self._gradient_fn_for_vars: Dict[Any, Any] = {}
예제 #13
0
    def __init__(self,
                 model: torch.nn.Module,
                 loss: Union[Loss, LossFn],
                 output_types: Optional[List[str]] = None,
                 batch_size: int = 100,
                 model_dir: Optional[str] = None,
                 learning_rate: Union[float, LearningRateSchedule] = 0.001,
                 optimizer: Optional[Optimizer] = None,
                 tensorboard: bool = False,
                 wandb: bool = False,
                 log_frequency: int = 100,
                 device: Optional[torch.device] = None,
                 **kwargs) -> None:
        """Create a new TorchModel.

    Parameters
    ----------
    model: torch.nn.Module
      the PyTorch model implementing the calculation
    loss: dc.models.losses.Loss or function
      a Loss or function defining how to compute the training loss for each
      batch, as described above
    output_types: list of strings
      the type of each output from the model, as described above
    batch_size: int
      default batch size for training and evaluating
    model_dir: str
      the directory on disk where the model will be stored.  If this is None,
      a temporary directory is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for fitting.  If optimizer is specified, this is
      ignored.
    optimizer: Optimizer
      the optimizer to use for fitting.  If this is specified, learning_rate is
      ignored.
    tensorboard: bool
      whether to log progress to TensorBoard during training
    wandb: bool
      whether to log progress to Weights & Biases during training
    log_frequency: int
      The frequency at which to log data. Data is logged using
      `logging` by default. If `tensorboard` is set, data is also
      logged to TensorBoard. If `wandb` is set, data is also logged
      to Weights & Biases. Logging happens at global steps. Roughly,
      a global step corresponds to one batch of training. If you'd
      like a printout every 10 batch steps, you'd set
      `log_frequency=10` for example.
    device: torch.device
      the device on which to run computations.  If None, a device is
      chosen automatically.
    """
        super(TorchModel, self).__init__(model_instance=model,
                                         model_dir=model_dir,
                                         **kwargs)
        self.model = model
        if isinstance(loss, Loss):
            self._loss_fn: LossFn = _StandardLoss(model, loss)
        else:
            self._loss_fn = loss
        self.batch_size = batch_size
        if optimizer is None:
            self.optimizer: Optimizer = Adam(learning_rate=learning_rate)
        else:
            self.optimizer = optimizer
        self.tensorboard = tensorboard

        # Select a device.

        if device is None:
            if torch.cuda.is_available():
                device = torch.device('cuda')
            else:
                device = torch.device('cpu')
        self.device = device
        model.to(device)

        # W&B logging
        if wandb and not is_wandb_available():
            logger.warning(
                "You set wandb to True but W&B is not installed. To use wandb logging, "
                "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
            )
        self.wandb = wandb and is_wandb_available()

        self.log_frequency = log_frequency
        if self.tensorboard:
            self._summary_writer = torch.utils.tensorboard.SummaryWriter(
                self.model_dir)
        if output_types is None:
            self._prediction_outputs = None
            self._loss_outputs = None
            self._variance_outputs = None
            self._other_outputs = None
        else:
            self._prediction_outputs = []
            self._loss_outputs = []
            self._variance_outputs = []
            self._other_outputs = []
            for i, type in enumerate(output_types):
                if type == 'prediction':
                    self._prediction_outputs.append(i)
                elif type == 'loss':
                    self._loss_outputs.append(i)
                elif type == 'variance':
                    self._variance_outputs.append(i)
                else:
                    self._other_outputs.append(i)
            if len(self._loss_outputs) == 0:
                self._loss_outputs = self._prediction_outputs
        self._built = False
        self._output_functions: Dict[Any, Any] = {}
        self._optimizer_for_vars: Dict[Any, Any] = {}
예제 #14
0
    def __init__(self,
                 model,
                 loss,
                 output_types=None,
                 batch_size=100,
                 model_dir=None,
                 learning_rate=0.001,
                 optimizer=None,
                 tensorboard=False,
                 tensorboard_log_frequency=100,
                 **kwargs):
        """Create a new KerasModel.

    Parameters
    ----------
    model: tf.keras.Model
      the Keras model implementing the calculation
    loss: dc.models.losses.Loss or function
      a Loss or function defining how to compute the training loss for each
      batch, as described above
    output_types: list of strings
      the type of each output from the model, as described above
    batch_size: int
      default batch size for training and evaluating
    model_dir: str
      the directory on disk where the model will be stored.  If this is None,
      a temporary directory is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for fitting.  If optimizer is specified, this is
      ignored.
    optimizer: Optimizer
      the optimizer to use for fitting.  If this is specified, learning_rate is
      ignored.
    tensorboard: bool
      whether to log progress to TensorBoard during training
    tensorboard_log_frequency: int
      the frequency at which to log data to TensorBoard, measured in batches
    """
        super(KerasModel, self).__init__(model_instance=model,
                                         model_dir=model_dir,
                                         **kwargs)
        self.model = model
        if isinstance(loss, Loss):
            self._loss_fn = _StandardLoss(model, loss)
        else:
            self._loss_fn = loss
        self.batch_size = batch_size
        if optimizer is None:
            self.optimizer = Adam(learning_rate=learning_rate)
        else:
            self.optimizer = optimizer
        self.tensorboard = tensorboard
        self.tensorboard_log_frequency = tensorboard_log_frequency
        if self.tensorboard:
            self._summary_writer = tf.summary.create_file_writer(
                self.model_dir)
        if output_types is None:
            self._prediction_outputs = None
            self._loss_outputs = None
            self._variance_outputs = None
            self._other_outputs = None
        else:
            self._prediction_outputs = []
            self._loss_outputs = []
            self._variance_outputs = []
            self._other_outputs = []
            for i, type in enumerate(output_types):
                if type == 'prediction':
                    self._prediction_outputs.append(i)
                elif type == 'loss':
                    self._loss_outputs.append(i)
                elif type == 'variance':
                    self._variance_outputs.append(i)
                else:
                    self._other_outputs.append(i)
            if len(self._loss_outputs) == 0:
                self._loss_outputs = self._prediction_outputs
        self._built = False
        self._inputs_built = False
        self._training_ops_built = False
        self._output_functions = {}
        self._gradient_fn_for_vars = {}
예제 #15
0
파일: ppo.py 프로젝트: ComMedX/deepchem-1
    def __init__(self,
                 env,
                 policy,
                 max_rollout_length=20,
                 optimization_rollouts=8,
                 optimization_epochs=4,
                 batch_size=64,
                 clipping_width=0.2,
                 discount_factor=0.99,
                 advantage_lambda=0.98,
                 value_weight=1.0,
                 entropy_weight=0.01,
                 optimizer=None,
                 model_dir=None,
                 use_hindsight=False):
        """Create an object for optimizing a policy.

    Parameters
    ----------
    env: Environment
      the Environment to interact with
    policy: Policy
      the Policy to optimize.  It must have outputs with the names 'action_prob'
      and 'value', corresponding to the action probabilities and value estimate
    max_rollout_length: int
      the maximum length of rollouts to generate
    optimization_rollouts: int
      the number of rollouts to generate for each iteration of optimization
    optimization_epochs: int
      the number of epochs of optimization to perform within each iteration
    batch_size: int
      the batch size to use during optimization.  If this is 0, each rollout will be used as a
      separate batch.
    clipping_width: float
      in computing the PPO loss function, the probability ratio is clipped to the range
      (1-clipping_width, 1+clipping_width)
    discount_factor: float
      the discount factor to use when computing rewards
    advantage_lambda: float
      the parameter for trading bias vs. variance in Generalized Advantage Estimation
    value_weight: float
      a scale factor for the value loss term in the loss function
    entropy_weight: float
      a scale factor for the entropy term in the loss function
    optimizer: Optimizer
      the optimizer to use.  If None, a default optimizer is used.
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    use_hindsight: bool
      if True, use Hindsight Experience Replay
    """
        self._env = env
        self._policy = policy
        self.max_rollout_length = max_rollout_length
        self.optimization_rollouts = optimization_rollouts
        self.optimization_epochs = optimization_epochs
        self.batch_size = batch_size
        self.clipping_width = clipping_width
        self.discount_factor = discount_factor
        self.advantage_lambda = advantage_lambda
        self.value_weight = value_weight
        self.entropy_weight = entropy_weight
        self.use_hindsight = use_hindsight
        self._state_is_list = isinstance(env.state_shape[0],
                                         collections.Sequence)
        if optimizer is None:
            self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999)
        else:
            self._optimizer = optimizer
        self._model = self._build_model(model_dir)
        output_names = policy.output_names
        output_tensors = self._model._output_tensors
        self._value = output_tensors[output_names.index('value')]
        self._action_prob = output_tensors[output_names.index('action_prob')]
        rnn_outputs = [
            i for i, n in enumerate(output_names) if n == 'rnn_state'
        ]
        self._rnn_final_states = [output_tensors[i] for i in rnn_outputs]
        self._session = tf.Session()
        self._train_op = self._model._tf_optimizer.minimize(
            self._model._loss_tensor)
        self._rnn_states = policy.rnn_initial_states
        if len(self._rnn_states) > 0 and batch_size != 0:
            raise ValueError(
                'Cannot batch rollouts when the policy contains a recurrent layer.  Set batch_size to 0.'
            )
        self._checkpoint = tf.train.Checkpoint()
        self._checkpoint.save_counter  # Ensure the variable has been created
        self._checkpoint.listed = self._model.model.trainable_variables
        self._session.run(self._checkpoint.save_counter.initializer)
예제 #16
0
    def test_roulette(self):
        """Test training a policy for the roulette environment."""

        # This is modeled after the Roulette-v0 environment from OpenAI Gym.
        # The player can bet on any number from 0 to 36, or walk away (which ends the
        # game).  The average reward for any bet is slightly negative, so the best
        # strategy is to walk away.

        class RouletteEnvironment(dc.rl.Environment):
            def __init__(self):
                super(RouletteEnvironment, self).__init__([(1, )], 38)
                self._state = [np.array([0])]

            def step(self, action):
                if action == 37:
                    self._terminated = True  # Walk away.
                    return 0.0
                wheel = np.random.randint(37)
                if wheel == 0:
                    if action == 0:
                        return 35.0
                    return -1.0
                if action != 0 and wheel % 2 == action % 2:
                    return 1.0
                return -1.0

            def reset(self):
                self._terminated = False

        env = RouletteEnvironment()

        # This policy just learns a constant probability for each action, and a constant for the value.

        class TestPolicy(dc.rl.Policy):
            def __init__(self):
                super(TestPolicy, self).__init__(['action_prob', 'value'])

            def create_model(self, **kwargs):
                class TestModel(tf.keras.Model):
                    def __init__(self):
                        super(TestModel, self).__init__(**kwargs)
                        self.action = tf.Variable(
                            np.ones(env.n_actions, np.float32))
                        self.value = tf.Variable([0.0], tf.float32)

                    def call(self, inputs, **kwargs):
                        prob = tf.nn.softmax(
                            tf.reshape(self.action, (-1, env.n_actions)))
                        return (prob, self.value)

                return TestModel()

        # Optimize it.

        ppo = dc.rl.PPO(env,
                        TestPolicy(),
                        max_rollout_length=20,
                        optimization_epochs=8,
                        optimizer=Adam(learning_rate=0.003))
        ppo.fit(100000)

        # It should have learned that the expected value is very close to zero, and that the best
        # action is to walk away.  (To keep the test fast, we allow that to be either of the two
        # top actions).

        action_prob, value = ppo.predict([[0]])
        assert -0.8 < value[0] < 0.5
        assert 37 in np.argsort(action_prob.flatten())[-2:]
        assert ppo.select_action([[0]],
                                 deterministic=True) == np.argmax(action_prob)

        # Verify that we can create a new PPO object, reload the parameters from the first one, and
        # get the same result.

        new_ppo = dc.rl.PPO(env, TestPolicy(), model_dir=ppo._model.model_dir)
        new_ppo.restore()
        action_prob2, value2 = new_ppo.predict([[0]])
        assert value2 == value

        # Do the same thing, only using the "restore" argument to fit().

        new_ppo = dc.rl.PPO(env, TestPolicy(), model_dir=ppo._model.model_dir)
        new_ppo.fit(0, restore=True)
        action_prob2, value2 = new_ppo.predict([[0]])
        assert value2 == value
예제 #17
0
def test_sine_x():
    """
    Here we are solving the differential equation- f'(x) = -sin(x) and f(0) = 1
    We give initial for the neural network at x_init --> np.linspace(-1 * np.pi, 1 * np.pi, 5)
    And we try to approximate the function for the domain (-np.pi, np.pi)
  """

    # The PINNModel requires you to create two functions
    # `create_eval`_fn for letting the model know how to compute the model in inference and
    # `gradient_fn` for letting model know how to compute the gradient and different regulariser
    # equation loss depending on the differential equation
    def create_eval_fn(forward_fn, params):
        """
      Calls the function to evaluate the model
    """
        @jax.jit
        def eval_model(x, rng=None):

            bu = forward_fn(params, rng, x)
            return jnp.squeeze(bu)

        return eval_model

    def gradient_fn(forward_fn, loss_outputs, initial_data):
        """
    This function calls the gradient function, to implement the backpropagation
    """
        boundary_data = initial_data['X0']
        boundary_target = initial_data['u0']

        @jax.jit
        def model_loss(params, target, weights, rng, x_train):
            @functools.partial(jax.vmap, in_axes=(None, 0))
            def periodic_loss(params, x):
                """
        diffrential equation => grad(f(x)) = - sin(x)
        minimize f(x) := grad(f(x)) + sin(x)
        """
                x = jnp.expand_dims(x, 0)
                u_x = jacrev(forward_fn, argnums=(2))(params, rng, x)
                return u_x + jnp.sin(x)

            u_pred = forward_fn(params, rng, boundary_data)
            loss_u = jnp.mean((u_pred - boundary_target)**2)

            f_pred = periodic_loss(params, x_train)
            loss_f = jnp.mean((f_pred**2))

            return loss_u + loss_f

        return model_loss

    # defining the Haiku model
    def f(x):
        net = hk.nets.MLP(output_sizes=[256, 128, 1],
                          activation=jax.nn.softplus)
        val = net(x)
        return val

    init_params, forward_fn = hk.transform(f)
    rng = jax.random.PRNGKey(500)
    params = init_params(rng, np.random.rand(1000, 1))

    opt = Adam(learning_rate=1e-2)
    # giving an initial boundary condition at 5 points between [-pi, pi] which will be used in l2 loss
    in_array = np.linspace(-1 * np.pi, 1 * np.pi, 5)
    out_array = np.cos(in_array)
    initial_data = {
        'X0': jnp.expand_dims(in_array, 1),
        'u0': jnp.expand_dims(out_array, 1)
    }

    j_m = PINNModel(forward_fn=forward_fn,
                    params=params,
                    initial_data=initial_data,
                    batch_size=1000,
                    optimizer=opt,
                    grad_fn=gradient_fn,
                    eval_fn=create_eval_fn,
                    deterministic=True,
                    log_frequency=1000)

    # defining our training data. We feed 100 points between [-pi, pi] without the labels,
    # which will be used as the differential loss(regulariser)
    X_f = np.expand_dims(np.linspace(-1 * np.pi, 1 * np.pi, 100), 1)
    dataset = NumpyDataset(X_f)
    _ = j_m.fit(dataset, nb_epochs=1000)

    # The expected solution must be as close to cos(x)
    test = np.expand_dims(np.linspace(-1 * np.pi, 1 * np.pi, 1000), 1)
    dataset_test = NumpyDataset(test)
    ans = j_m.predict(dataset_test)
    out_array = np.cos(test).squeeze()

    assert np.allclose(out_array, ans, atol=1e-01)
예제 #18
0
    def test_continuous(self):
        """Test A2C on an environment with a continous action space."""

        # The state consists of two numbers: a current value and a target value.
        # The policy just needs to learn to output the target value (or at least
        # move toward it).

        class TestEnvironment(dc.rl.Environment):
            def __init__(self):
                super(TestEnvironment, self).__init__((2, ),
                                                      action_shape=(1, ))

            def reset(self):
                target = np.random.uniform(-50, 50)
                self._state = np.array([0, target], dtype=np.float32)
                self._terminated = False
                self.count = 0

            def step(self, action):
                target = self._state[1]
                dist = np.abs(target - action[0])
                old_dist = np.abs(target - self._state[0])
                new_state = np.array([action[0], target], dtype=np.float32)
                self._state = new_state
                self.count += 1
                reward = old_dist - dist
                self._terminated = (self.count == 10)
                return reward

        # A simple policy with no hidden layers.

        class TestPolicy(dc.rl.Policy):
            def __init__(self):
                super(TestPolicy,
                      self).__init__(['action_mean', 'action_std', 'value'])

            def create_model(self, **kwargs):
                class TestModel(tf.keras.Model):
                    def __init__(self):
                        super(TestModel, self).__init__(**kwargs)
                        self.mean = Dense(1, kernel_initializer='zeros')
                        self.std = tf.constant([10.0])
                        self.value = Dense(1)

                    def call(self, inputs, **kwargs):
                        return (self.mean(inputs[0]), self.std,
                                self.value(inputs[0]))

                return TestModel()

        # Optimize it.

        env = TestEnvironment()
        learning_rate = PolynomialDecay(initial_rate=0.005,
                                        final_rate=0.0005,
                                        decay_steps=25000)
        a2c = dc.rl.A2C(env,
                        TestPolicy(),
                        discount_factor=0,
                        optimizer=Adam(learning_rate=learning_rate))
        a2c.fit(25000)

        # Try running it and see if it reaches the target

        env.reset()
        while not env.terminated:
            env.step(a2c.select_action(env.state, deterministic=True))
        distance = np.abs(env.state[0] - env.state[1])
        tolerance = max(1.0, 0.1 * np.abs(env.state[1]))
        assert distance < tolerance
예제 #19
0
    def __init__(self,
                 forward_fn: hk.State,
                 params: hk.Params,
                 loss: Optional[Union[Loss, LossFn]],
                 output_types: Optional[List[str]] = None,
                 batch_size: int = 100,
                 learning_rate: float = 0.001,
                 optimizer: Union[optax.GradientTransformation,
                                  Optimizer] = None,
                 grad_fn: Callable = create_default_gradient_fn,
                 update_fn: Callable = create_default_update_fn,
                 eval_fn: Callable = create_default_eval_fn,
                 rng=jax.random.PRNGKey(1),
                 log_frequency: int = 100,
                 **kwargs):
        """
    Create a new JaxModel

    Parameters
    ----------
    model: hk.State or Function
      Any Jax based model that has a `apply` method for computing the network. Currently
      only haiku models are supported.
    params: hk.Params
      The parameter of the Jax based networks
    loss: dc.models.losses.Loss or function
      a Loss or function defining how to compute the training loss for each
      batch, as described above
    output_types: list of strings, optional (default None)
      the type of each output from the model, as described above
    batch_size: int, optional (default 100)
      default batch size for training and evaluating
    learning_rate: float or LearningRateSchedule, optional (default 0.001)
      the learning rate to use for fitting.  If optimizer is specified, this is
      ignored.
    optimizer: optax object
      For the time being, it is optax object
    rng: jax.random.PRNGKey, optional (default 1)
      A default global PRNG key to use for drawing random numbers.
    log_frequency: int, optional (default 100)
      The frequency at which to log data. Data is logged using
      `logging` by default.


    Miscellanous Parameters Yet To Add
    ----------------------------------
    model_dir: str, optional (default None)
      Will be added along with the save & load method
    tensorboard: bool, optional (default False)
      whether to log progress to TensorBoard during training
    wandb: bool, optional (default False)
      whether to log progress to Weights & Biases during training


    Work in Progress
    ----------------
    [1] Integrate the optax losses, optimizers, schedulers with Deepchem
    [2] Support for saving & loading the model.
    """
        super(JaxModel, self).__init__(model=(forward_fn, params), **kwargs)
        warnings.warn(
            'JaxModel is still in active development and all features may not yet be implemented'
        )
        self._loss_fn = loss  # lambda pred, tar: jnp.mean(optax.l2_loss(pred, tar))
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        if optimizer is None:
            optimizer = Adam(1e-3)

        if not isinstance(optimizer, optax.GradientTransformation):
            self.optimizer = optimizer._create_jax_optimizer()
        else:
            self.optimizer = optimizer
        self.forward_fn = forward_fn
        self.params = params
        self._built = False
        self.log_frequency = log_frequency
        self.rng = rng
        self._create_gradient_fn = grad_fn
        self._create_update_fn = update_fn
        self._create_eval_fn = eval_fn

        if output_types is None:
            self._prediction_outputs = None
            self._loss_outputs = None
            self._variance_outputs = None
            self._other_outputs = None
        else:
            self._prediction_outputs = []
            self._loss_outputs = []
            self._variance_outputs = []
            self._other_outputs = []
            for i, type in enumerate(output_types):
                if type == 'prediction':
                    self._prediction_outputs.append(i)
                elif type == 'loss':
                    self._loss_outputs.append(i)
                elif type == 'variance':
                    self._variance_outputs.append(i)
                else:
                    self._other_outputs.append(i)
            if len(self._loss_outputs) == 0:
                self._loss_outputs = self._prediction_outputs
예제 #20
0
    def __init__(self,
                 model,
                 loss,
                 output_types=None,
                 batch_size=100,
                 model_dir=None,
                 learning_rate=0.001,
                 optimizer=None,
                 tensorboard=False,
                 log_frequency=100,
                 **kwargs):
        """Create a new KerasModel.

    Parameters
    ----------
    model: tf.keras.Model
      the Keras model implementing the calculation
    loss: dc.models.losses.Loss or function
      a Loss or function defining how to compute the training loss for each
      batch, as described above
    output_types: list of strings
      the type of each output from the model, as described above
    batch_size: int
      default batch size for training and evaluating
    model_dir: str
      the directory on disk where the model will be stored.  If this is None,
      a temporary directory is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for fitting.  If optimizer is specified, this is
      ignored.
    optimizer: Optimizer
      the optimizer to use for fitting.  If this is specified, learning_rate is
      ignored.
    tensorboard: bool
      whether to log progress to TensorBoard during training
    log_frequency: int
      The frequency at which to log data. Data is logged using
      `logging` by default. If `tensorboard` is set, data is also
      logged to TensorBoard. Logging happens at global steps. Roughly,
      a global step corresponds to one batch of training. If you'd
      like a printout every 10 batch steps, you'd set
      `log_frequency=10` for example.
    """
        super(KerasModel, self).__init__(model_instance=model,
                                         model_dir=model_dir,
                                         **kwargs)
        self.model = model
        if isinstance(loss, Loss):
            self._loss_fn = _StandardLoss(model, loss)
        else:
            self._loss_fn = loss
        self.batch_size = batch_size
        if optimizer is None:
            self.optimizer = Adam(learning_rate=learning_rate)
        else:
            self.optimizer = optimizer
        self.tensorboard = tensorboard
        # Backwards compatibility
        if "tensorboard_log_frequency" in kwargs:
            logger.warning(
                "tensorboard_log_frequency is deprecated. Please use log_frequency instead. This argument will be removed in a future release of DeepChem."
            )
            self.log_frequency = kwargs["tensorboard_log_frequency"]
        else:
            self.log_frequency = log_frequency
        if self.tensorboard:
            self._summary_writer = tf.summary.create_file_writer(
                self.model_dir)
        if output_types is None:
            self._prediction_outputs = None
            self._loss_outputs = None
            self._variance_outputs = None
            self._other_outputs = None
        else:
            self._prediction_outputs = []
            self._loss_outputs = []
            self._variance_outputs = []
            self._other_outputs = []
            for i, type in enumerate(output_types):
                if type == 'prediction':
                    self._prediction_outputs.append(i)
                elif type == 'loss':
                    self._loss_outputs.append(i)
                elif type == 'variance':
                    self._variance_outputs.append(i)
                else:
                    self._other_outputs.append(i)
            if len(self._loss_outputs) == 0:
                self._loss_outputs = self._prediction_outputs
        self._built = False
        self._inputs_built = False
        self._training_ops_built = False
        self._output_functions = {}
        self._gradient_fn_for_vars = {}
예제 #21
0
파일: a2c.py 프로젝트: yoonaeko082/deepchem
    def __init__(self,
                 env,
                 policy,
                 max_rollout_length=20,
                 discount_factor=0.99,
                 advantage_lambda=0.98,
                 value_weight=1.0,
                 entropy_weight=0.01,
                 optimizer=None,
                 model_dir=None,
                 use_hindsight=False):
        """Create an object for optimizing a policy.

    Parameters
    ----------
    env: Environment
      the Environment to interact with
    policy: Policy
      the Policy to optimize.  It must have outputs with the names 'action_prob'
      and 'value' (for discrete action spaces) or 'action_mean', 'action_std',
      and 'value' (for continuous action spaces)
    max_rollout_length: int
      the maximum length of rollouts to generate
    discount_factor: float
      the discount factor to use when computing rewards
    advantage_lambda: float
      the parameter for trading bias vs. variance in Generalized Advantage Estimation
    value_weight: float
      a scale factor for the value loss term in the loss function
    entropy_weight: float
      a scale factor for the entropy term in the loss function
    optimizer: Optimizer
      the optimizer to use.  If None, a default optimizer is used.
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    use_hindsight: bool
      if True, use Hindsight Experience Replay
    """
        self._env = env
        self._policy = policy
        self.max_rollout_length = max_rollout_length
        self.discount_factor = discount_factor
        self.advantage_lambda = advantage_lambda
        self.value_weight = value_weight
        self.entropy_weight = entropy_weight
        self.use_hindsight = use_hindsight
        self._state_is_list = isinstance(env.state_shape[0],
                                         SequenceCollection)
        if optimizer is None:
            self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999)
        else:
            self._optimizer = optimizer
        output_names = policy.output_names
        self.continuous = ('action_mean' in output_names)
        self._value_index = output_names.index('value')
        if self.continuous:
            self._action_mean_index = output_names.index('action_mean')
            self._action_std_index = output_names.index('action_std')
        else:
            self._action_prob_index = output_names.index('action_prob')
        self._rnn_final_state_indices = [
            i for i, n in enumerate(output_names) if n == 'rnn_state'
        ]
        self._rnn_states = policy.rnn_initial_states
        self._model = self._build_model(model_dir)
        self._checkpoint = tf.train.Checkpoint()
        self._checkpoint.save_counter  # Ensure the variable has been created
        self._checkpoint.listed = self._model.model.trainable_variables
예제 #22
0
    def __init__(self,
                 model: torch.nn.Module,
                 loss: Union[Loss, LossFn],
                 output_types: Optional[List[str]] = None,
                 batch_size: int = 100,
                 model_dir: Optional[str] = None,
                 learning_rate: Union[float, LearningRateSchedule] = 0.001,
                 optimizer: Optional[Optimizer] = None,
                 tensorboard: bool = False,
                 wandb: bool = False,
                 log_frequency: int = 100,
                 device: Optional[torch.device] = None,
                 regularization_loss: Optional[Callable] = None,
                 wandb_logger: Optional[WandbLogger] = None,
                 **kwargs) -> None:
        """Create a new TorchModel.

    Parameters
    ----------
    model: torch.nn.Module
      the PyTorch model implementing the calculation
    loss: dc.models.losses.Loss or function
      a Loss or function defining how to compute the training loss for each
      batch, as described above
    output_types: list of strings, optional (default None)
      the type of each output from the model, as described above
    batch_size: int, optional (default 100)
      default batch size for training and evaluating
    model_dir: str, optional (default None)
      the directory on disk where the model will be stored.  If this is None,
      a temporary directory is created.
    learning_rate: float or LearningRateSchedule, optional (default 0.001)
      the learning rate to use for fitting.  If optimizer is specified, this is
      ignored.
    optimizer: Optimizer, optional (default None)
      the optimizer to use for fitting.  If this is specified, learning_rate is
      ignored.
    tensorboard: bool, optional (default False)
      whether to log progress to TensorBoard during training
    wandb: bool, optional (default False)
      whether to log progress to Weights & Biases during training
    log_frequency: int, optional (default 100)
      The frequency at which to log data. Data is logged using
      `logging` by default. If `tensorboard` is set, data is also
      logged to TensorBoard. If `wandb` is set, data is also logged
      to Weights & Biases. Logging happens at global steps. Roughly,
      a global step corresponds to one batch of training. If you'd
      like a printout every 10 batch steps, you'd set
      `log_frequency=10` for example.
    device: torch.device, optional (default None)
      the device on which to run computations.  If None, a device is
      chosen automatically.
    regularization_loss: Callable, optional
      a function that takes no arguments, and returns an extra contribution to add
      to the loss function
    wandb_logger: WandbLogger
      the Weights & Biases logger object used to log data and metrics
    """
        super(TorchModel, self).__init__(model=model,
                                         model_dir=model_dir,
                                         **kwargs)
        self.loss = loss  # not used
        self.learning_rate = learning_rate  # not used
        self.output_types = output_types  # not used
        if isinstance(loss, Loss):
            self._loss_fn: LossFn = _StandardLoss(self, loss)
        else:
            self._loss_fn = loss
        self.batch_size = batch_size
        if optimizer is None:
            self.optimizer: Optimizer = Adam(learning_rate=learning_rate)
        else:
            self.optimizer = optimizer
        self.tensorboard = tensorboard
        self.regularization_loss = regularization_loss

        # Select a device.

        if device is None:
            if torch.cuda.is_available():
                device = torch.device('cuda')
            else:
                device = torch.device('cpu')
        self.device = device
        self.model = model.to(device)

        # W&B logging
        if wandb:
            logger.warning(
                "`wandb` argument is deprecated. Please use `wandb_logger` instead. "
                "This argument will be removed in a future release of DeepChem."
            )
        if wandb and not _has_wandb:
            logger.warning(
                "You set wandb to True but W&B is not installed. To use wandb logging, "
                "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
            )
        self.wandb = wandb and _has_wandb

        self.wandb_logger = wandb_logger
        # If `wandb=True` and no logger is provided, initialize default logger
        if self.wandb and (self.wandb_logger is None):
            self.wandb_logger = WandbLogger()

        # Setup and initialize W&B logging
        if (self.wandb_logger
                is not None) and (not self.wandb_logger.initialized):
            self.wandb_logger.setup()

        # Update config with KerasModel params
        wandb_logger_config = dict(loss=loss,
                                   output_types=output_types,
                                   batch_size=batch_size,
                                   model_dir=model_dir,
                                   learning_rate=learning_rate,
                                   optimizer=optimizer,
                                   tensorboard=tensorboard,
                                   log_frequency=log_frequency,
                                   regularization_loss=regularization_loss)
        wandb_logger_config.update(**kwargs)

        if self.wandb_logger is not None:
            self.wandb_logger.update_config(wandb_logger_config)

        self.log_frequency = log_frequency
        if self.tensorboard and not _has_tensorboard:
            raise ImportError(
                "This class requires tensorboard to be installed.")
        if self.tensorboard:
            self._summary_writer = torch.utils.tensorboard.SummaryWriter(
                self.model_dir)
        if output_types is None:
            self._prediction_outputs = None
            self._loss_outputs = None
            self._variance_outputs = None
            self._other_outputs = None
        else:
            self._prediction_outputs = []
            self._loss_outputs = []
            self._variance_outputs = []
            self._other_outputs = []
            for i, type in enumerate(output_types):
                if type == 'prediction':
                    self._prediction_outputs.append(i)
                elif type == 'loss':
                    self._loss_outputs.append(i)
                elif type == 'variance':
                    self._variance_outputs.append(i)
                else:
                    self._other_outputs.append(i)
            if len(self._loss_outputs) == 0:
                self._loss_outputs = self._prediction_outputs
        self._built = False
        self._output_functions: Dict[Any, Any] = {}
        self._optimizer_for_vars: Dict[Any, Any] = {}
예제 #23
0
    def test_hindsight(self):
        """Test Hindsight Experience Replay."""

        # The environment is a plane in which the agent moves by steps until it reaches a randomly
        # positioned goal.  No reward is given until it reaches the goal.  That makes it very hard
        # to learn by standard methods, since it may take a very long time to receive any feedback
        # at all.  Using hindsight makes it much easier.

        class TestEnvironment(dc.rl.Environment):
            def __init__(self):
                super(TestEnvironment, self).__init__((4, ), 4)
                self.moves = [(-1, 0), (1, 0), (0, -1), (0, 1)]

            def reset(self):
                self._state = np.concatenate([[0, 0],
                                              np.random.randint(-50, 50, 2)])
                self._terminated = False
                self.count = 0

            def step(self, action):
                new_state = self._state.copy()
                new_state[:2] += self.moves[action]
                self._state = new_state
                self.count += 1
                reward = 0
                if np.array_equal(new_state[:2], new_state[2:]):
                    self._terminated = True
                    reward = 1
                elif self.count == 1000:
                    self._terminated = True
                return reward

            def apply_hindsight(self, states, actions, goal):
                new_states = []
                rewards = []
                goal_pos = goal[:2]
                for state, action in zip(states, actions):
                    new_state = state.copy()
                    new_state[2:] = goal_pos
                    new_states.append(new_state)
                    pos_after_action = new_state[:2] + self.moves[action]
                    if np.array_equal(pos_after_action, goal_pos):
                        rewards.append(1)
                        break
                    else:
                        rewards.append(0)
                return new_states, rewards

        # A simple policy with two hidden layers.

        class TestPolicy(dc.rl.Policy):
            def __init__(self):
                super(TestPolicy, self).__init__(['action_prob', 'value'])

            def create_model(self, **kwargs):
                state = Input(shape=(4, ))
                dense1 = Dense(8, activation=tf.nn.relu)(state)
                dense2 = Dense(8, activation=tf.nn.relu)(dense1)
                output = Dense(4, activation=tf.nn.softmax,
                               use_bias=False)(dense2)
                value = Dense(1)(dense2)
                return tf.keras.Model(inputs=state, outputs=[output, value])

        # Optimize it.

        env = TestEnvironment()
        ppo = dc.rl.PPO(env,
                        TestPolicy(),
                        use_hindsight=True,
                        optimization_epochs=1,
                        batch_size=0,
                        optimizer=Adam(learning_rate=0.001))
        ppo.fit(1500000)

        # Try running it a few times and see if it succeeds.

        pass_count = 0
        for i in range(5):
            env.reset()
            while not env.terminated:
                env.step(ppo.select_action(env.state))
            if np.array_equal(env.state[:2], env.state[2:]):
                pass_count += 1
        assert pass_count >= 3
예제 #24
0
    def test_roulette(self):
        """Test training a policy for the roulette environment."""

        # This is modeled after the Roulette-v0 environment from OpenAI Gym.
        # The player can bet on any number from 0 to 36, or walk away (which ends the
        # game).  The average reward for any bet is slightly negative, so the best
        # strategy is to walk away.

        class RouletteEnvironment(dc.rl.Environment):
            def __init__(self):
                super(RouletteEnvironment, self).__init__([(1, )], 38)
                self._state = [np.array([0])]

            def step(self, action):
                if action == 37:
                    self._terminated = True  # Walk away.
                    return 0.0
                wheel = np.random.randint(37)
                if wheel == 0:
                    if action == 0:
                        return 35.0
                    return -1.0
                if action != 0 and wheel % 2 == action % 2:
                    return 1.0
                return -1.0

            def reset(self):
                self._terminated = False

        env = RouletteEnvironment()

        # This policy just learns a constant probability for each action, and a constant for the value.

        class TestPolicy(dc.rl.Policy):
            def create_layers(self, state, **kwargs):
                action = Variable(np.ones(env.n_actions))
                output = SoftMax(in_layers=[
                    Reshape(in_layers=[action], shape=(-1, env.n_actions))
                ])
                value = Variable([0.0])
                return {'action_prob': output, 'value': value}

        # Optimize it.

        mcts = dc.rl.MCTS(env,
                          TestPolicy(),
                          max_search_depth=5,
                          n_search_episodes=200,
                          optimizer=Adam(learning_rate=0.005))
        mcts.fit(10, steps_per_iteration=50, epochs_per_iteration=50)

        # It should have learned that the expected value is very close to zero, and that the best
        # action is to walk away.

        action_prob, value = mcts.predict([[0]])
        assert -0.5 < value[0] < 0.5
        assert action_prob.argmax() == 37
        assert mcts.select_action([[0]], deterministic=True) == 37

        # Verify that we can create a new MCTS object, reload the parameters from the first one, and
        # get the same result.

        new_mcts = dc.rl.MCTS(env,
                              TestPolicy(),
                              model_dir=mcts._graph.model_dir)
        new_mcts.restore()
        action_prob2, value2 = new_mcts.predict([[0]])
        assert value2 == value

        # Do the same thing, only using the "restore" argument to fit().

        new_mcts = dc.rl.MCTS(env,
                              TestPolicy(),
                              model_dir=mcts._graph.model_dir)
        new_mcts.fit(0, restore=True)
        action_prob2, value2 = new_mcts.predict([[0]])
        assert value2 == value