Example #1
0
    def test_regression_overfit(self):
        """Test that TensorGraph models can overfit simple regression datasets."""
        n_samples = 10
        n_features = 3
        n_tasks = 1

        # Generate dummy dataset
        np.random.seed(123)
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features)
        y = np.zeros((n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))
        dataset = dc.data.NumpyDataset(X, y, w, ids)

        regression_metric = dc.metrics.Metric(dc.metrics.mean_squared_error)
        # TODO(rbharath): This breaks with optimizer="momentum". Why?
        model = dc.models.MultiTaskRegressor(
            n_tasks,
            n_features,
            dropouts=[0.],
            weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)],
            batch_size=n_samples)
        model.set_optimizer(Adam(learning_rate=0.003, beta1=0.9, beta2=0.999))

        # Fit trained model
        model.fit(dataset, nb_epoch=100)
        model.save()

        # Eval model on train
        scores = model.evaluate(dataset, [regression_metric])
        assert scores[regression_metric.name] < .1
Example #2
0
    def test_fittransform_regression_overfit(self):
        """Test that TensorGraph FitTransform models can overfit simple regression datasets."""
        n_samples = 10
        n_features = 3
        n_tasks = 1

        # Generate dummy dataset
        np.random.seed(123)
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features, n_features)
        y = np.zeros((n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))
        dataset = dc.data.NumpyDataset(X, y, w, ids)

        fit_transformers = [dc.trans.CoulombFitTransformer(dataset)]
        regression_metric = dc.metrics.Metric(dc.metrics.mean_squared_error)
        model = dc.models.MultiTaskFitTransformRegressor(
            n_tasks, [n_features, n_features],
            dropouts=[0.],
            weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)],
            batch_size=n_samples,
            fit_transformers=fit_transformers,
            n_evals=1)
        model.set_optimizer(Adam(learning_rate=0.003, beta1=0.9, beta2=0.999))

        # Fit trained model
        model.fit(dataset, nb_epoch=100)
        model.save()

        # Eval model on train
        scores = model.evaluate(dataset, [regression_metric])
        assert scores[regression_metric.name] < .1
Example #3
0
    def test_skewed_classification_overfit(self):
        """Test TensorGraph models can overfit 0/1 datasets with few actives."""
        #n_samples = 100
        n_samples = 100
        n_features = 3
        n_tasks = 1
        n_classes = 2

        # Generate dummy dataset
        np.random.seed(123)
        p = .05
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features)
        y = np.random.binomial(1, p, size=(n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))

        dataset = dc.data.NumpyDataset(X, y, w, ids)

        classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
        model = dc.models.MultiTaskClassifier(n_tasks,
                                              n_features,
                                              dropouts=[0.],
                                              weight_init_stddevs=[.1],
                                              batch_size=n_samples)
        model.set_optimizer(Adam(learning_rate=0.003, beta1=0.9, beta2=0.999))

        # Fit trained model
        model.fit(dataset, nb_epoch=100)
        model.save()

        # Eval model on train
        scores = model.evaluate(dataset, [classification_metric])
        assert scores[classification_metric.name] > .75
Example #4
0
    def test_multitask_regression_overfit(self):
        """Test TensorGraph multitask overfits tiny data."""
        n_tasks = 10
        n_samples = 10
        n_features = 3
        n_classes = 2

        # Generate dummy dataset
        np.random.seed(123)
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features)
        y = np.zeros((n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))

        dataset = dc.data.NumpyDataset(X, y, w, ids)

        regression_metric = dc.metrics.Metric(dc.metrics.mean_squared_error,
                                              task_averager=np.mean,
                                              mode="regression")
        model = dc.models.MultiTaskRegressor(n_tasks,
                                             n_features,
                                             dropouts=[0.],
                                             weight_init_stddevs=[.1],
                                             batch_size=n_samples)
        model.set_optimizer(Adam(learning_rate=0.0003, beta1=0.9, beta2=0.999))

        # Fit trained model
        model.fit(dataset, nb_epoch=50)
        model.save()

        # Eval model on train
        scores = model.evaluate(dataset, [regression_metric])
        assert scores[regression_metric.name] < .1
Example #5
0
    def test_classification_overfit(self):
        """Test that TensorGraph models can overfit simple classification datasets."""
        n_samples = 10
        n_features = 3
        n_tasks = 1
        n_classes = 2

        # Generate dummy dataset
        np.random.seed(123)
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features)
        y = np.zeros((n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))
        dataset = dc.data.NumpyDataset(X, y, w, ids)

        classification_metric = dc.metrics.Metric(dc.metrics.accuracy_score)
        model = dc.models.MultitaskClassifier(n_tasks,
                                              n_features,
                                              dropouts=[0.],
                                              weight_init_stddevs=[.1],
                                              batch_size=n_samples)
        model.set_optimizer(Adam(learning_rate=0.0003, beta1=0.9, beta2=0.999))

        # Fit trained model
        model.fit(dataset, nb_epoch=100)
        model.save()

        # Eval model on train
        scores = model.evaluate(dataset, [classification_metric])
        assert scores[classification_metric.name] > .9
Example #6
0
    def test_save_load(self):
        n_data_points = 20
        n_features = 2
        X = np.random.rand(n_data_points, n_features)
        y = [[0, 1] for x in range(n_data_points)]
        dataset = NumpyDataset(X, y)
        features = Feature(shape=(None, n_features))
        dense = Dense(out_channels=2, in_layers=[features])
        output = SoftMax(in_layers=[dense])
        label = Label(shape=(None, 2))
        smce = SoftMaxCrossEntropy(in_layers=[label, dense])
        loss = ReduceMean(in_layers=[smce])
        tg = dc.models.TensorGraph(learning_rate=0.01)
        tg.add_output(output)
        tg.set_loss(loss)
        submodel_loss = ReduceSum(in_layers=smce)
        submodel_opt = Adam(learning_rate=0.002)
        submodel = tg.create_submodel(layers=[dense],
                                      loss=submodel_loss,
                                      optimizer=submodel_opt)
        tg.fit(dataset, nb_epoch=1)
        prediction = np.squeeze(tg.predict_on_batch(X))
        tg.save()

        dirpath = tempfile.mkdtemp()
        shutil.rmtree(dirpath)
        shutil.move(tg.model_dir, dirpath)

        tg1 = TensorGraph.load_from_dir(dirpath)
        prediction2 = np.squeeze(tg1.predict_on_batch(X))
        assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
Example #7
0
    def __init__(self,
                 env,
                 policy,
                 max_search_depth=100,
                 n_search_episodes=1000,
                 discount_factor=0.99,
                 value_weight=1.0,
                 optimizer=Adam(),
                 model_dir=None):
        """Create an object for optimizing a policy.

    Parameters
    ----------
    env: Environment
      the Environment to interact with
    policy: Policy
      the Policy to optimize.  Its create_layers() method must return a dict containing the
      keys 'action_prob' and 'value', corresponding to the action probabilities and value estimate
    max_search_depth: int
      the maximum depth of the tree search, measured in steps
    n_search_episodes: int
      the number of episodes to simulate (up to max_search_depth, if they do not
      terminate first) for each tree search
    discount_factor: float
      the discount factor to use when computing rewards
    value_weight: float
      a scale factor for the value loss term in the loss function
    optimizer: Optimizer
      the optimizer to use
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    """
        self._env = copy.deepcopy(env)
        self._policy = policy
        self.max_search_depth = max_search_depth
        self.n_search_episodes = n_search_episodes
        self.discount_factor = discount_factor
        self.value_weight = value_weight
        self._state_is_list = isinstance(env.state_shape[0],
                                         collections.Sequence)
        if optimizer is None:
            self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999)
        else:
            self._optimizer = optimizer
        (self._graph, self._features, self._pred_prob, self._pred_value,
         self._search_prob,
         self._search_value) = self._build_graph(None, 'global', model_dir)
Example #8
0
    def __init__(self,
                 env,
                 policy,
                 max_rollout_length=20,
                 discount_factor=0.99,
                 advantage_lambda=0.98,
                 value_weight=1.0,
                 entropy_weight=0.01,
                 optimizer=None,
                 model_dir=None,
                 use_hindsight=False):
        """Create an object for optimizing a policy.

    Parameters
    ----------
    env: Environment
      the Environment to interact with
    policy: Policy
      the Policy to optimize.  Its create_layers() method must return a dict containing the
      keys 'action_prob' and 'value', corresponding to the action probabilities and value estimate
    max_rollout_length: int
      the maximum length of rollouts to generate
    discount_factor: float
      the discount factor to use when computing rewards
    advantage_lambda: float
      the parameter for trading bias vs. variance in Generalized Advantage Estimation
    value_weight: float
      a scale factor for the value loss term in the loss function
    entropy_weight: float
      a scale factor for the entropy term in the loss function
    optimizer: Optimizer
      the optimizer to use.  If None, a default optimizer is used.
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    use_hindsight: bool
      if True, use Hindsight Experience Replay
    """
        self._env = env
        self._policy = policy
        self.max_rollout_length = max_rollout_length
        self.discount_factor = discount_factor
        self.advantage_lambda = advantage_lambda
        self.value_weight = value_weight
        self.entropy_weight = entropy_weight
        self.use_hindsight = use_hindsight
        self._state_is_list = isinstance(env.state_shape[0],
                                         collections.Sequence)
        if optimizer is None:
            self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999)
        else:
            self._optimizer = optimizer
        (self._graph, self._features, self._rewards, self._actions,
         self._action_prob, self._value,
         self._advantages) = self._build_graph(None, 'global', model_dir)
        with self._graph._get_tf("Graph").as_default():
            self._session = tf.Session()
        self._rnn_states = self._graph.rnn_zero_states
Example #9
0
    def _get_tf(self, obj):
        """Fetches underlying TensorFlow primitives.

    Parameters
    ----------
    obj: str
      If "Graph", returns tf.Graph instance. If "FileWriter", returns
      tf.summary.FileWriter. If "Optimizer", returns the optimizer. If
      "train_op", returns the train operation. If "summary_op", returns the
      merged summary. If "GlobalStep" returns the global step.
    Returns
    -------
    TensorFlow Object

    """

        if obj in self.tensor_objects and self.tensor_objects[obj] is not None:
            return self.tensor_objects[obj]
        if obj == "Graph":
            self.tensor_objects['Graph'] = tf.Graph()
        elif obj == "FileWriter":
            self.tensor_objects['FileWriter'] = tf.summary.FileWriter(
                self.model_dir)
        elif obj == 'Optimizer':
            if self.optimizer is None:
                self.optimizer = Adam(learning_rate=self.learning_rate,
                                      beta1=0.9,
                                      beta2=0.999,
                                      epsilon=1e-7)
            self.tensor_objects[
                'Optimizer'] = self.optimizer._create_optimizer(
                    self._get_tf('GlobalStep'))
        elif obj == 'train_op':
            self.tensor_objects['train_op'] = self._get_tf(
                'Optimizer').minimize(self.loss.out_tensor,
                                      global_step=self._get_tf('GlobalStep'))
        elif obj == 'summary_op':
            self.tensor_objects['summary_op'] = tf.summary.merge_all(
                key=tf.GraphKeys.SUMMARIES)
        elif obj == 'GlobalStep':
            with self._get_tf("Graph").as_default():
                self.tensor_objects['GlobalStep'] = tf.Variable(
                    0, trainable=False)
        return self._get_tf(obj)
Example #10
0
  def test_skewed_missing_classification_overfit(self):
    """TG, skewed data, few actives

    Test TensorGraph models overfit 0/1 datasets with missing data and few
    actives. This is intended to be as close to singletask MUV datasets as
    possible.
    """
    n_samples = 5120
    n_features = 6
    n_tasks = 1
    n_classes = 2

    # Generate dummy dataset
    np.random.seed(123)
    p = .002
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.binomial(1, p, size=(n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
    y_flat, w_flat = np.squeeze(y), np.squeeze(w)
    y_nonzero = y_flat[w_flat != 0]
    num_nonzero = np.count_nonzero(y_nonzero)
    weight_nonzero = len(y_nonzero) / num_nonzero
    w_flat[y_flat != 0] = weight_nonzero
    w = np.reshape(w_flat, (n_samples, n_tasks))

    dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids)

    classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
    model = dc.models.MultiTaskClassifier(
        n_tasks,
        n_features,
        dropouts=[0.],
        weight_init_stddevs=[1.],
        batch_size=n_samples)
    model.set_optimizer(Adam(learning_rate=0.003, beta1=0.9, beta2=0.999))

    # Fit trained model
    model.fit(dataset, nb_epoch=100)
    model.save()

    # Eval model on train
    scores = model.evaluate(dataset, [classification_metric])
    assert scores[classification_metric.name] > .7
Example #11
0
def eval_tic_tac_toe(value_weight,
                     num_epoch_rounds=1,
                     games=10**4,
                     rollouts=10**5):
    """
    Returns the average reward over 1k games after 100k rollouts
    :param value_weight:
    :return:
    """
    env = deepchem.rl.envs.tictactoe.TicTacToeEnvironment()
    policy = TicTacToePolicy()
    model_dir = "/tmp/tictactoe"
    try:
        shutil.rmtree(model_dir)
    except:
        pass

    avg_rewards = []
    for j in range(num_epoch_rounds):
        a3c = dc.rl.A3C(env,
                        policy,
                        entropy_weight=0.01,
                        value_weight=value_weight,
                        model_dir=model_dir,
                        optimizer=Adam(learning_rate=0.001))
        try:
            a3c.restore()
        except:
            print("unable to restore")
            pass
        a3c.fit(rollouts)
        rewards = []
        for i in range(games):
            env.reset()
            reward = -float('inf')
            while not env._terminated:
                action = a3c.select_action(env._state)
                reward = env.step(action)
            rewards.append(reward)
        avg_rewards.append({(j + 1) * rollouts: np.mean(rewards)})
    return avg_rewards
Example #12
0
    def __init__(self,
                 env,
                 policy,
                 max_rollout_length=20,
                 discount_factor=0.99,
                 advantage_lambda=0.98,
                 value_weight=1.0,
                 entropy_weight=0.01,
                 optimizer=None,
                 model_dir=None,
                 use_hindsight=False,
                 worker_count=multiprocessing.cpu_count(),
                 zero_terminal=True,
                 callbacks=[]):
        """Create an object for optimizing a policy.

    Parameters
    ----------
    env: Environment
      the Environment to interact with
    policy: Policy
      the Policy to optimize.  Its create_layers() method must return a dict containing the
      keys 'action_prob' and 'value' (for discrete action spaces) or 'action_mean', 'action_std',
      and 'value' (for continuous action spaces)
    max_rollout_length: int
      the maximum length of rollouts to generate
    discount_factor: float
      the discount factor to use when computing rewards
    advantage_lambda: float
      the parameter for trading bias vs. variance in Generalized Advantage Estimation
    value_weight: float
      a scale factor for the value loss term in the loss function
    entropy_weight: float
      a scale factor for the entropy term in the loss function
    optimizer: Optimizer
      the optimizer to use.  If None, a default optimizer is used.
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    use_hindsight: bool
      if True, use Hindsight Experience Replay
    zero_terminal: bool
      whether terminal states should be at zero value (default); if False, the
      environment is assumed to terminate at any state on external conditions.
    callbacks: list
      each rollout is passed to the on_callback method of each callback
    """
        self._env = env
        self._policy = policy
        self.max_rollout_length = max_rollout_length
        self.discount_factor = discount_factor
        self.advantage_lambda = advantage_lambda
        self.value_weight = value_weight
        self.entropy_weight = entropy_weight
        self.use_hindsight = use_hindsight
        self.worker_count = worker_count
        self.zero_terminal = zero_terminal
        self.callbacks = callbacks
        self._state_is_list = isinstance(env.state_shape[0],
                                         collections.Sequence)
        if optimizer is None:
            self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999)
        else:
            self._optimizer = optimizer
        fields = self._build_graph(None, 'global', model_dir)
        if self.continuous:
            (self._graph, self._features, self._rewards, self._actions,
             self._action_mean, self._action_std, self._value,
             self._advantages, self._loss_components) = fields
        else:
            (self._graph, self._features, self._rewards, self._actions,
             self._action_prob, self._value, self._advantages,
             self._loss_components) = fields
        with self._graph._get_tf("Graph").as_default():
            self._session = tf.Session()
        self._rnn_states = self._graph.rnn_zero_states
Example #13
0
    def __init__(self,
                 env,
                 policy,
                 max_rollout_length=20,
                 optimization_rollouts=8,
                 optimization_epochs=4,
                 batch_size=64,
                 clipping_width=0.2,
                 discount_factor=0.99,
                 advantage_lambda=0.98,
                 value_weight=1.0,
                 entropy_weight=0.01,
                 optimizer=None,
                 model_dir=None,
                 use_hindsight=False):
        """Create an object for optimizing a policy.

    Parameters
    ----------
    env: Environment
      the Environment to interact with
    policy: Policy
      the Policy to optimize.  It must have outputs with the names 'action_prob'
      and 'value', corresponding to the action probabilities and value estimate
    max_rollout_length: int
      the maximum length of rollouts to generate
    optimization_rollouts: int
      the number of rollouts to generate for each iteration of optimization
    optimization_epochs: int
      the number of epochs of optimization to perform within each iteration
    batch_size: int
      the batch size to use during optimization.  If this is 0, each rollout will be used as a
      separate batch.
    clipping_width: float
      in computing the PPO loss function, the probability ratio is clipped to the range
      (1-clipping_width, 1+clipping_width)
    discount_factor: float
      the discount factor to use when computing rewards
    advantage_lambda: float
      the parameter for trading bias vs. variance in Generalized Advantage Estimation
    value_weight: float
      a scale factor for the value loss term in the loss function
    entropy_weight: float
      a scale factor for the entropy term in the loss function
    optimizer: Optimizer
      the optimizer to use.  If None, a default optimizer is used.
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    use_hindsight: bool
      if True, use Hindsight Experience Replay
    """
        self._env = env
        self._policy = policy
        self.max_rollout_length = max_rollout_length
        self.optimization_rollouts = optimization_rollouts
        self.optimization_epochs = optimization_epochs
        self.batch_size = batch_size
        self.clipping_width = clipping_width
        self.discount_factor = discount_factor
        self.advantage_lambda = advantage_lambda
        self.value_weight = value_weight
        self.entropy_weight = entropy_weight
        self.use_hindsight = use_hindsight
        self._state_is_list = isinstance(env.state_shape[0],
                                         collections.Sequence)
        if optimizer is None:
            self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999)
        else:
            self._optimizer = optimizer
        self._model = self._build_model(model_dir)
        output_names = policy.output_names
        output_tensors = self._model._output_tensors
        self._value = output_tensors[output_names.index('value')]
        self._action_prob = output_tensors[output_names.index('action_prob')]
        rnn_outputs = [
            i for i, n in enumerate(output_names) if n == 'rnn_state'
        ]
        self._rnn_final_states = [output_tensors[i] for i in rnn_outputs]
        self._session = tf.Session()
        self._train_op = self._model._tf_optimizer.minimize(
            self._model._loss_tensor)
        self._rnn_states = policy.rnn_initial_states
        if len(self._rnn_states) > 0 and batch_size != 0:
            raise ValueError(
                'Cannot batch rollouts when the policy contains a recurrent layer.  Set batch_size to 0.'
            )
        self._checkpoint = tf.train.Checkpoint()
        self._checkpoint.save_counter  # Ensure the variable has been created
        self._checkpoint.listed = self._model.model.trainable_variables
        self._session.run(self._checkpoint.save_counter.initializer)
Example #14
0
    def test_continuous(self):
        """Test A3C on an environment with a continous action space."""

        # The state consists of two numbers: a current value and a target value.
        # The policy just needs to learn to output the target value (or at least
        # move toward it).

        class TestEnvironment(dc.rl.Environment):
            def __init__(self):
                super(TestEnvironment, self).__init__((2, ),
                                                      action_shape=(1, ))

            def reset(self):
                target = np.random.uniform(-50, 50)
                self._state = np.array([0, target])
                self._terminated = False
                self.count = 0

            def step(self, action):
                target = self._state[1]
                dist = np.abs(target - action[0])
                old_dist = np.abs(target - self._state[0])
                new_state = np.array([action[0], target])
                self._state = new_state
                self.count += 1
                reward = old_dist - dist
                self._terminated = (self.count == 10)
                return reward

        # A simple policy with no hidden layers.

        class TestPolicy(dc.rl.Policy):
            def create_layers(self, state, **kwargs):
                action_mean = Dense(1,
                                    in_layers=state,
                                    weights_initializer=tf.zeros_initializer)
                action_std = Constant([10.0])
                value = Dense(1, in_layers=state)
                return {
                    'action_mean': action_mean,
                    'action_std': action_std,
                    'value': value
                }

        # Optimize it.

        env = TestEnvironment()
        learning_rate = PolynomialDecay(initial_rate=0.005,
                                        final_rate=0.0005,
                                        decay_steps=25000)
        a3c = dc.rl.A3C(env,
                        TestPolicy(),
                        discount_factor=0,
                        optimizer=Adam(learning_rate=learning_rate))
        a3c.fit(25000)

        # Try running it and see if it reaches the target

        env.reset()
        while not env.terminated:
            env.step(a3c.select_action(env.state, deterministic=True))
        distance = np.abs(env.state[0] - env.state[1])
        tolerance = max(1.0, 0.1 * np.abs(env.state[1]))
        assert distance < tolerance
Example #15
0
class TensorGraph(Model):

  def __init__(self,
               tensorboard=False,
               tensorboard_log_frequency=100,
               batch_size=100,
               random_seed=None,
               use_queue=True,
               mode="regression",
               graph=None,
               learning_rate=0.001,
               **kwargs):
    """
    TODO(LESWING) allow a model to change its learning rate
    Parameters
    ----------
    tensorboard: bool
      Should we log to model_dir data for tensorboard?
    tensorboard_log_frequency: int
      How many training batches before logging tensorboard?
    batch_size: int
      default batch size for training and evaluating
    use_queue: boolean
      if True when building we will create a tf.FIFO queue, which will hold
      all features, weights, and labels.  We will feed the inputs into this
      queue in batches of self.batch_size in a separate thread from the
      thread training the model.  You cannot use a queue when
      batches are not of consistent size
    mode: str
      "regression" or "classification".  "classification" models on
      predict will do an argmax(axis=2) to determine the class of the
      prediction.
    graph: tensorflow.Graph
      the Graph in which to create Tensorflow objects.  If None, a new Graph
      is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for optimization
    kwargs
    """

    # Layer Management
    self.nxgraph = nx.DiGraph()
    self.layers = dict()
    self.features = list()
    self.labels = list()
    self.outputs = list()
    self.task_weights = list()
    self.loss = None
    self.built = False
    self.queue_installed = False
    self.optimizer = Adam(
        learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7)

    # Singular place to hold Tensor objects which don't serialize
    # These have to be reconstructed on restoring from pickle
    # See TensorGraph._get_tf() for more details on lazy construction
    self.tensor_objects = {
        "FileWriter": None,
        "Graph": graph,
        "train_op": None,
        "summary_op": None,
    }
    self.tensorboard = tensorboard
    self.tensorboard_log_frequency = tensorboard_log_frequency
    self.tensorboard_step = 0
    self.mode = mode
    self.global_step = 0
    self.last_checkpoint = None
    self.use_queue = use_queue

    self.batch_size = batch_size
    self.random_seed = random_seed
    super(TensorGraph, self).__init__(**kwargs)
    self.save_file = "%s/%s" % (self.model_dir, "model")
    self.model_class = None

    self.rnn_initial_states = []
    self.rnn_final_states = []
    self.rnn_zero_states = []

  def _add_layer(self, layer):
    if layer.name is None:
      layer.name = "%s_%s" % (layer.__class__.__name__, len(self.layers) + 1)
    if layer.name in self.layers:
      return
    if isinstance(layer, Feature):
      self.features.append(layer)
    if isinstance(layer, Label):
      self.labels.append(layer)
    if isinstance(layer, Weights):
      self.task_weights.append(layer)
    self.nxgraph.add_node(layer.name)
    self.layers[layer.name] = layer
    for in_layer in layer.in_layers:
      self._add_layer(in_layer)
      self.nxgraph.add_edge(in_layer.name, layer.name)

  def fit(self,
          dataset,
          nb_epoch=10,
          max_checkpoints_to_keep=5,
          checkpoint_interval=1000):
    return self.fit_generator(
        self.default_generator(dataset, epochs=nb_epoch),
        max_checkpoints_to_keep, checkpoint_interval)

  def fit_generator(self,
                    feed_dict_generator,
                    max_checkpoints_to_keep=5,
                    checkpoint_interval=1000):

    def create_feed_dict():
      if self.use_queue:
        while True:
          yield {self._training_placeholder: 1.0}
      for d in feed_dict_generator:
        feed_dict = {k.out_tensor: v for k, v in six.iteritems(d)}
        feed_dict[self._training_placeholder] = 1.0
        yield feed_dict

    if not self.built:
      self.build()
    with self._get_tf("Graph").as_default():
      time1 = time.time()
      train_op = self._get_tf('train_op')
      saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
      with tf.Session() as sess:
        self._initialize_weights(sess, saver)
        avg_loss, n_batches = 0.0, 0.0
        coord = tf.train.Coordinator()
        n_samples = 0
        if self.use_queue:
          enqueue_thread = threading.Thread(
              target=_enqueue_batch,
              args=(self, feed_dict_generator, self._get_tf("Graph"), sess,
                    coord))
          enqueue_thread.start()
        output_tensors = [x.out_tensor for x in self.outputs]
        fetches = output_tensors + [train_op, self.loss.out_tensor]
        for feed_dict in create_feed_dict():
          try:
            fetched_values = sess.run(fetches, feed_dict=feed_dict)
            loss = fetched_values[-1]
            avg_loss += loss
            n_batches += 1
            self.global_step += 1
            n_samples += 1
            if self.tensorboard and n_samples % self.tensorboard_log_frequency == 0:
              summary = sess.run(
                  self._get_tf("summary_op"), feed_dict=feed_dict)
              self._log_tensorboard(summary)
          except OutOfRangeError:
            break
          if self.global_step % checkpoint_interval == checkpoint_interval - 1:
            saver.save(sess, self.save_file, global_step=self.global_step)
            self.last_checkpoint = saver.last_checkpoints[-1]
            avg_loss = float(avg_loss) / n_batches
            print('Ending global_step %d: Average loss %g' % (self.global_step,
                                                              avg_loss))
            avg_loss, n_batches = 0.0, 0.0
        avg_loss = float(avg_loss) / n_batches
        print('Ending global_step %d: Average loss %g' % (self.global_step,
                                                          avg_loss))
        saver.save(sess, self.save_file, global_step=self.global_step)
        self.last_checkpoint = saver.last_checkpoints[-1]
      ############################################################## TIMING
      time2 = time.time()
      print("TIMING: model fitting took %0.3f s" % (time2 - time1))
      ############################################################## TIMING

  def _log_tensorboard(self, summary):
    """
    TODO(LESWING) set epoch
    Parameters
    ----------
    Returns
    -------
    """
    global_step = int(self.global_step)
    writer = self._get_tf("FileWriter")
    writer.reopen()
    writer.add_summary(summary, global_step=global_step)
    writer.close()

  def fit_on_batch(self, X, y, w):
    if not self.built:
      self.build()
    dataset = NumpyDataset(X, y)
    return self.fit(dataset, nb_epoch=1)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        pad_batches=True):
    if len(self.features) > 1:
      raise ValueError("More than one Feature, must use generator")
    if len(self.labels) > 1:
      raise ValueError("More than one Label, must use generator")
    if len(self.task_weights) > 1:
      raise ValueError("More than one Weights, must use generator")
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
          pad_batches=pad_batches):
        feed_dict = dict()
        if len(self.labels) == 1 and y_b is not None and not predict:
          feed_dict[self.labels[0]] = y_b
        if len(self.features) == 1 and X_b is not None:
          feed_dict[self.features[0]] = X_b
        if len(self.task_weights) == 1 and w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        for (initial_state, zero_state) in zip(self.rnn_initial_states,
                                               self.rnn_zero_states):
          feed_dict[initial_state] = zero_state
        yield feed_dict

  def predict_on_generator(self, generator, transformers=[]):
    """Generates output predictions for the input samples,
      processing the samples in a batched way.

    # Arguments
        x: the input data, as a Numpy array.
        batch_size: integer.
        verbose: verbosity mode, 0 or 1.

    # Returns
        A Numpy array of predictions.
    """
    retval = self.predict_proba_on_generator(generator, transformers)
    if self.mode == 'classification':
      retval = np.expand_dims(from_one_hot(retval, axis=2), axis=1)
    return retval

  def predict_proba_on_generator(self, generator, transformers=[]):
    """
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
    if not self.built:
      self.build()
    with self._get_tf("Graph").as_default():
      with tf.Session() as sess:
        saver = tf.train.Saver()
        self._initialize_weights(sess, saver)
        out_tensors = [x.out_tensor for x in self.outputs]
        results = []
        for feed_dict in generator:
          feed_dict = {
              self.layers[k.name].out_tensor: v
              for k, v in six.iteritems(feed_dict)
          }
          feed_dict[self._training_placeholder] = 0.0
          result = np.array(sess.run(out_tensors, feed_dict=feed_dict))
          if len(result.shape) == 3:
            result = np.transpose(result, axes=[1, 0, 2])
          result = undo_transforms(result, transformers)
          results.append(result)
        return np.concatenate(results, axis=0)

  def bayesian_predict_on_batch(self, X, transformers=[], n_passes=4):
    """
    Returns:
      mu: numpy ndarray of shape (n_samples, n_tasks)
      sigma: numpy ndarray of shape (n_samples, n_tasks)
    """
    dataset = NumpyDataset(X=X, y=None, n_tasks=len(self.outputs))
    y_ = []
    for i in range(n_passes):
      generator = self.default_generator(
          dataset, predict=True, pad_batches=True)
      y_.append(self.predict_on_generator(generator, transformers))

    y_ = np.concatenate(y_, axis=2)
    mu = np.mean(y_, axis=2)
    sigma = np.std(y_, axis=2)

    return mu, sigma

  def predict_on_smiles_batch(self,
                              smiles,
                              featurizer,
                              n_tasks,
                              transformers=[]):
    """
    # Returns:
      A numpy ndarray of shape (n_samples, n_tasks)
    """
    convmols = featurize_smiles_np(smiles, featurizer)

    dataset = NumpyDataset(X=convmols, y=None, n_tasks=len(self.outputs))
    generator = self.default_generator(dataset, predict=True, pad_batches=True)
    return self.predict_on_generator(generator, transformers)

  def predict_on_batch(self, X, sess=None, transformers=[]):
    """Generates output predictions for the input samples,
      processing the samples in a batched way.

    # Arguments
        x: the input data, as a Numpy array.
        batch_size: integer.
        verbose: verbosity mode, 0 or 1.

    # Returns
        A Numpy array of predictions.
    """
    dataset = NumpyDataset(X=X, y=None)
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers)

  def predict_proba_on_batch(self, X, sess=None, transformers=[]):
    dataset = NumpyDataset(X=X, y=None)
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_proba_on_generator(generator, transformers)

  def predict(self, dataset, transformers=[], batch_size=None):
    """
    Uses self to make predictions on provided Dataset object.

    Returns:
      y_pred: numpy ndarray of shape (n_samples,)
    """
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers)

  def predict_proba(self, dataset, transformers=[], batch_size=None):
    """
    TODO: Do transformers even make sense here?

    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_proba_on_generator(generator, transformers)

  def topsort(self):
    return nx.topological_sort(self.nxgraph)

  def build(self):
    if self.built:
      return
    with self._get_tf("Graph").as_default():
      self._training_placeholder = tf.placeholder(dtype=tf.float32, shape=())
      if self.random_seed is not None:
        tf.set_random_seed(self.random_seed)
      self._install_queue()
      order = self.topsort()
      for node in order:
        with tf.name_scope(node):
          node_layer = self.layers[node]
          node_layer.create_tensor(training=self._training_placeholder)
          self.rnn_initial_states += node_layer.rnn_initial_states
          self.rnn_final_states += node_layer.rnn_final_states
          self.rnn_zero_states += node_layer.rnn_zero_states
          node_layer.add_summary_to_tg()

      self.built = True

    for layer in self.layers.values():
      if layer.tensorboard:
        self.tensorboard = True
    tf.summary.scalar("loss", self.loss.out_tensor)
    for layer in self.layers.values():
      if layer.tensorboard:
        tf.summary.tensor_summary(layer.name, layer.out_tensor)
    if self.tensorboard:
      writer = self._get_tf("FileWriter")
      writer.add_graph(self._get_tf("Graph"))
      writer.close()

    # As a sanity check, make sure all tensors have the correct shape.

    for layer in self.layers.values():
      try:
        assert list(layer.shape) == layer.out_tensor.get_shape().as_list(
        ), '%s: Expected shape %s does not match actual shape %s' % (
            layer.name, layer.shape, layer.out_tensor.get_shape().as_list())
      except NotImplementedError:
        pass

  def _install_queue(self):
    """
    """
    if not self.use_queue or self.queue_installed:
      for layer in self.features + self.labels + self.task_weights:
        layer.pre_queue = True
      return
    names = []
    shapes = []
    pre_q_inputs = []
    q = InputFifoQueue(shapes, names, in_layers=pre_q_inputs)
    q.name = "%s_%s" % (q.__class__.__name__, len(self.layers) + 1)

    for layer in self.features + self.labels + self.task_weights:
      pre_q_input = layer.create_pre_q(self.batch_size)
      shapes.append(pre_q_input.shape)
      names.append(pre_q_input.name)
      pre_q_inputs.append(pre_q_input)

      layer.in_layers.append(q)
      self.nxgraph.add_edge(q.name, layer.name)

    self._add_layer(q)
    self.input_queue = q
    self.queue_installed = True

  def set_loss(self, layer):
    self._add_layer(layer)
    self.loss = layer

  def add_output(self, layer):
    self._add_layer(layer)
    self.outputs.append(layer)

  def set_optimizer(self, optimizer):
    """Set the optimizer to use for fitting."""
    self.optimizer = optimizer

  def get_pickling_errors(self, obj, seen=None):
    if seen == None:
      seen = []
    try:
      state = obj.__getstate__()
    except AttributeError:
      return
    if state == None:
      return
    if isinstance(state, tuple):
      if not isinstance(state[0], dict):
        state = state[1]
      else:
        state = state[0].update(state[1])
    result = {}
    for i in state:
      try:
        pickle.dumps(state[i], protocol=2)
      except pickle.PicklingError:
        if not state[i] in seen:
          seen.append(state[i])
          result[i] = self.get_pickling_errors(state[i], seen)
    return result

  def save(self):
    # Remove out_tensor from the object to be pickled
    must_restore = False
    tensor_objects = self.tensor_objects
    rnn_initial_states = self.rnn_initial_states
    rnn_final_states = self.rnn_final_states
    rnn_zero_states = self.rnn_zero_states
    self.tensor_objects = {}
    self.rnn_initial_states = []
    self.rnn_final_states = []
    self.rnn_zero_states = []
    out_tensors = []
    if self.built:
      must_restore = True
      for node in self.topsort():
        node_layer = self.layers[node]
        out_tensors.append(node_layer.none_tensors())
      optimizer = self.optimizer
      self.optimizer = None
      training_placeholder = self._training_placeholder
      self._training_placeholder = None
      self.built = False

    # Pickle itself
    pickle_name = os.path.join(self.model_dir, "model.pickle")
    with open(pickle_name, 'wb') as fout:
      try:
        pickle.dump(self, fout)
      except Exception as e:
        print(self.get_pickling_errors(self))
        raise e

    # add out_tensor back to everyone
    if must_restore:
      for index, node in enumerate(self.topsort()):
        node_layer = self.layers[node]
        node_layer.set_tensors(out_tensors[index])
      self._training_placeholder = training_placeholder
      self.optimizer = optimizer
      self.built = True
    self.tensor_objects = tensor_objects
    self.rnn_initial_states = rnn_initial_states
    self.rnn_final_states = rnn_final_states
    self.rnn_zero_states = rnn_zero_states

  def evaluate_generator(self,
                         feed_dict_generator,
                         metrics,
                         transformers=[],
                         labels=None,
                         outputs=None,
                         weights=[],
                         per_task_metrics=False):

    if labels is None:
      raise ValueError
    n_tasks = len(self.outputs)
    n_classes = self.outputs[0].out_tensor.get_shape()[-1].value
    evaluator = GeneratorEvaluator(
        self,
        feed_dict_generator,
        transformers,
        labels=labels,
        outputs=outputs,
        weights=weights,
        n_tasks=n_tasks,
        n_classes=n_classes)
    if not per_task_metrics:
      scores = evaluator.compute_model_performance(metrics)
      return scores
    else:
      scores, per_task_scores = evaluator.compute_model_performance(
          metrics, per_task_metrics=per_task_metrics)
      return scores, per_task_scores

  def get_layer_variables(self, layer):
    """Get the list of trainable variables in a layer of the graph."""
    if not self.built:
      self.build()
    with self._get_tf("Graph").as_default():
      return tf.get_collection(
          tf.GraphKeys.GLOBAL_VARIABLES, scope=layer.variable_scope)

  def get_global_step(self):
    return self._get_tf("GlobalStep")

  def _get_tf(self, obj):
    """
    TODO(LESWING) REALLY NEED TO DOCUMENT THIS
    Parameters
    ----------
    obj

    Returns
    -------
    TensorFlow Object

    """

    if obj in self.tensor_objects and self.tensor_objects[obj] is not None:
      return self.tensor_objects[obj]
    if obj == "Graph":
      self.tensor_objects['Graph'] = tf.Graph()
    elif obj == "FileWriter":
      self.tensor_objects['FileWriter'] = tf.summary.FileWriter(self.model_dir)
    elif obj == 'Optimizer':
      self.tensor_objects['Optimizer'] = self.optimizer._create_optimizer(
          self._get_tf('GlobalStep'))
    elif obj == 'train_op':
      self.tensor_objects['train_op'] = self._get_tf('Optimizer').minimize(
          self.loss.out_tensor, global_step=self._get_tf('GlobalStep'))
    elif obj == 'summary_op':
      self.tensor_objects['summary_op'] = tf.summary.merge_all(
          key=tf.GraphKeys.SUMMARIES)
    elif obj == 'GlobalStep':
      with self._get_tf("Graph").as_default():
        self.tensor_objects['GlobalStep'] = tf.Variable(0, trainable=False)
    return self._get_tf(obj)

  def _initialize_weights(self, sess, saver):
    """
    Parameters
    ----------
    sess: tf.Session
      The Session must be open
    saver: tf.train.Saver
      A saver object to save/restore checkpoints

    Returns
    -------

    """
    if self.last_checkpoint is None:
      sess.run(tf.global_variables_initializer())
      saver.save(sess, self.save_file, global_step=self.global_step)
      self.last_checkpoint = saver.last_checkpoints[-1]
    else:
      saver.restore(sess, self.last_checkpoint)

  def get_num_tasks(self):
    return len(self.outputs)

  def get_pre_q_input(self, input_layer):
    layer_name = input_layer.name
    pre_q_name = "%s_pre_q" % layer_name
    return self.layers[pre_q_name]

  @staticmethod
  def load_from_dir(model_dir):
    pickle_name = os.path.join(model_dir, "model.pickle")
    with open(pickle_name, 'rb') as fout:
      tensorgraph = pickle.load(fout)
      tensorgraph.built = False
      return tensorgraph

  def __del__(self):
    pass
Example #16
0
    def test_roulette(self):
        """Test training a policy for the roulette environment."""

        # This is modeled after the Roulette-v0 environment from OpenAI Gym.
        # The player can bet on any number from 0 to 36, or walk away (which ends the
        # game).  The average reward for any bet is slightly negative, so the best
        # strategy is to walk away.

        class RouletteEnvironment(dc.rl.Environment):
            def __init__(self):
                super(RouletteEnvironment, self).__init__([(1, )], 38)
                self._state = [np.array([0])]

            def step(self, action):
                if action == 37:
                    self._terminated = True  # Walk away.
                    return 0.0
                wheel = np.random.randint(37)
                if wheel == 0:
                    if action == 0:
                        return 35.0
                    return -1.0
                if action != 0 and wheel % 2 == action % 2:
                    return 1.0
                return -1.0

            def reset(self):
                self._terminated = False

        env = RouletteEnvironment()

        # This policy just learns a constant probability for each action, and a constant for the value.

        class TestPolicy(dc.rl.Policy):
            def create_layers(self, state, **kwargs):
                action = Variable(np.ones(env.n_actions))
                output = SoftMax(in_layers=[
                    Reshape(in_layers=[action], shape=(-1, env.n_actions))
                ])
                value = Variable([0.0])
                return {'action_prob': output, 'value': value}

        # Optimize it.

        ppo = dc.rl.PPO(env,
                        TestPolicy(),
                        max_rollout_length=20,
                        optimizer=Adam(learning_rate=0.001))
        ppo.fit(30000)

        # It should have learned that the expected value is very close to zero, and that the best
        # action is to walk away.

        action_prob, value = ppo.predict([[0]])
        assert -0.5 < value[0] < 0.5
        assert action_prob.argmax() == 37
        assert ppo.select_action([[0]], deterministic=True) == 37

        # Verify that we can create a new PPO object, reload the parameters from the first one, and
        # get the same result.

        new_ppo = dc.rl.PPO(env, TestPolicy(), model_dir=ppo._graph.model_dir)
        new_ppo.restore()
        action_prob2, value2 = new_ppo.predict([[0]])
        assert value2 == value

        # Do the same thing, only using the "restore" argument to fit().

        new_ppo = dc.rl.PPO(env, TestPolicy(), model_dir=ppo._graph.model_dir)
        new_ppo.fit(0, restore=True)
        action_prob2, value2 = new_ppo.predict([[0]])
        assert value2 == value
Example #17
0
tokens = sorted(list(tokens))

print(tokens[0:5])

max_length = max(len(s) for s in train_smiles)
model = dc.models.SeqToSeq(tokens,
                           tokens,
                           max_length,
                           encoder_layers=2,
                           decoder_layers=2,
                           embedding_dimension=256,
                           model_dir='fingerprint')
batches_per_epoch = len(train_smiles) / model.batch_size

model.set_optimizer(
    Adam(learning_rate=ExponentialDecay(0.004, 0.9, batches_per_epoch)))


def generate_sequences(epochs):
    for i in range(epochs):
        for s in train_smiles:
            yield (s, s)


model.fit_sequences(generate_sequences(40))

predicted = model.predict_from_sequences(valid_smiles[:500])
count = 0
for s, p in zip(valid_smiles[:500], predicted):
    if ''.join(p) == s:
        count += 1
Example #18
0
    def __init__(self,
                 env,
                 policy,
                 max_rollout_length=20,
                 optimization_rollouts=8,
                 optimization_epochs=4,
                 batch_size=64,
                 clipping_width=0.2,
                 discount_factor=0.99,
                 advantage_lambda=0.98,
                 value_weight=1.0,
                 entropy_weight=0.01,
                 optimizer=None,
                 model_dir=None,
                 use_hindsight=False):
        """Create an object for optimizing a policy.

    Parameters
    ----------
    env: Environment
      the Environment to interact with
    policy: Policy
      the Policy to optimize.  Its create_layers() method must return a map containing the
      keys 'action_prob' and 'value', corresponding to the action probabilities and value estimate
    max_rollout_length: int
      the maximum length of rollouts to generate
    optimization_rollouts: int
      the number of rollouts to generate for each iteration of optimization
    optimization_epochs: int
      the number of epochs of optimization to perform within each iteration
    batch_size: int
      the batch size to use during optimization.  If this is 0, each rollout will be used as a
      separate batch.
    clipping_width: float
      in computing the PPO loss function, the probability ratio is clipped to the range
      (1-clipping_width, 1+clipping_width)
    discount_factor: float
      the discount factor to use when computing rewards
    advantage_lambda: float
      the parameter for trading bias vs. variance in Generalized Advantage Estimation
    value_weight: float
      a scale factor for the value loss term in the loss function
    entropy_weight: float
      a scale factor for the entropy term in the loss function
    optimizer: Optimizer
      the optimizer to use.  If None, a default optimizer is used.
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    use_hindsight: bool
      if True, use Hindsight Experience Replay
    """
        self._env = env
        self._policy = policy
        self.max_rollout_length = max_rollout_length
        self.optimization_rollouts = optimization_rollouts
        self.optimization_epochs = optimization_epochs
        self.batch_size = batch_size
        self.clipping_width = clipping_width
        self.discount_factor = discount_factor
        self.advantage_lambda = advantage_lambda
        self.value_weight = value_weight
        self.entropy_weight = entropy_weight
        self.use_hindsight = use_hindsight
        self._state_is_list = isinstance(env.state_shape[0],
                                         collections.Sequence)
        if optimizer is None:
            self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999)
        else:
            self._optimizer = optimizer
        (self._graph, self._features, self._rewards, self._actions,
         self._action_prob, self._value, self._advantages,
         self._old_action_prob) = self._build_graph(None, 'global', model_dir)
        with self._graph._get_tf("Graph").as_default():
            self._session = tf.Session()
            self._train_op = self._graph._get_tf('Optimizer').minimize(
                self._graph.loss.out_tensor)
        self._rnn_states = self._graph.rnn_zero_states
        if len(self._rnn_states) > 0 and batch_size != 0:
            raise ValueError(
                'Cannot batch rollouts when the policy contains a recurrent layer.  Set batch_size to 0.'
            )
Example #19
0
  tokens = tokens.union(set(c for c in s))
tokens = sorted(list(tokens))

print(tokens[0:5])

max_length = max(len(s) for s in train_smiles)
model = dc.models.SeqToSeq(tokens,
                           tokens,
                           max_length,
                           encoder_layers=2,
                           decoder_layers=2,
                           embedding_dimension=256,
                           model_dir='fingerprint')
batches_per_epoch = len(train_smiles)/model.batch_size

model.set_optimizer(Adam(learning_rate=ExponentialDecay(0.004, 0.9, batches_per_epoch)))

def generate_sequences(epochs):
  for i in range(epochs):
    for s in train_smiles:
      yield (s, s)

model.fit_sequences(generate_sequences(40))


predicted = model.predict_from_sequences(valid_smiles[:500])
count = 0
for s,p in zip(valid_smiles[:500], predicted):
  if ''.join(p) == s:
    count += 1
print('reproduced', count, 'of 500 validation SMILES strings')
Example #20
0
class TensorGraph(Model):

  def __init__(self,
               tensorboard=False,
               tensorboard_log_frequency=100,
               batch_size=100,
               random_seed=None,
               use_queue=True,
               graph=None,
               learning_rate=0.001,
               configproto=None,
               **kwargs):
    """
    Parameters
    ----------
    tensorboard: bool
      Should we log to model_dir data for tensorboard?
    tensorboard_log_frequency: int
      How many training batches before logging tensorboard?
    batch_size: int
      default batch size for training and evaluating
    use_queue: boolean
      if True when building we will create a tf.FIFO queue, which will hold
      all features, weights, and labels.  We will feed the inputs into this
      queue in batches of self.batch_size in a separate thread from the
      thread training the model.  You cannot use a queue when
      batches are not of consistent size
    graph: tensorflow.Graph
      the Graph in which to create Tensorflow objects.  If None, a new Graph
      is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for optimization
    configproto: a tf.ConfigProto() object used to create tf.Session()
    """

    # Layer Management
    self.layers = dict()
    self.features = list()
    self.labels = list()
    self.outputs = list()
    self.variances = list()
    self.task_weights = list()
    self.submodels = list()
    self.loss = Constant(0)
    self.built = False
    self.queue_installed = False
    self.optimizer = Adam(
        learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7)
    self.configproto = configproto

    # Singular place to hold Tensor objects which don't serialize
    # These have to be reconstructed on restoring from pickle
    # See TensorGraph._get_tf() for more details on lazy construction
    self.tensor_objects = {
        "FileWriter": None,
        "Graph": graph,
        "train_op": None,
        "summary_op": None,
    }
    self.tensorboard = tensorboard
    self.tensorboard_log_frequency = tensorboard_log_frequency
    self.tensorboard_step = 0
    self.global_step = 0
    self.use_queue = use_queue

    self.batch_size = batch_size
    self.random_seed = random_seed
    super(TensorGraph, self).__init__(**kwargs)
    self.save_file = "%s/%s" % (self.model_dir, "model")
    self.model_class = None

    self.rnn_initial_states = []
    self.rnn_final_states = []
    self.rnn_zero_states = []
    if self.use_queue and self.tensorboard:
      raise ValueError(
          "Currently TensorGraph cannot both use_queue and tensorboard at the same time"
      )

  def _add_layer(self, layer):
    if layer.name is None:
      layer.name = "%s_%s" % (layer.__class__.__name__, len(self.layers) + 1)
    if layer.name in self.layers:
      return
    if isinstance(layer, Feature):
      self.features.append(layer)
    if isinstance(layer, Label):
      self.labels.append(layer)
    if isinstance(layer, Weights):
      self.task_weights.append(layer)
    self.layers[layer.name] = layer
    for in_layer in layer.in_layers:
      self._add_layer(in_layer)

  def fit(self,
          dataset,
          nb_epoch=10,
          max_checkpoints_to_keep=5,
          checkpoint_interval=1000,
          deterministic=False,
          restore=False,
          submodel=None,
          **kwargs):
    """Train this model on a dataset.

    Parameters
    ----------
    dataset: Dataset
      the Dataset to train on
    nb_epoch: int
      the number of epochs to train for
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    deterministic: bool
      if True, the samples are processed in order.  If False, a different random
      order is used for each epoch.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    submodel: Submodel
      an alternate training objective to use.  This should have been created by
      calling create_submodel().
    """
    return self.fit_generator(
        self.default_generator(
            dataset, epochs=nb_epoch, deterministic=deterministic),
        max_checkpoints_to_keep, checkpoint_interval, restore, submodel)

  def fit_generator(self,
                    feed_dict_generator,
                    max_checkpoints_to_keep=5,
                    checkpoint_interval=1000,
                    restore=False,
                    submodel=None):
    """Train this model on data from a generator.

    Parameters
    ----------
    feed_dict_generator: generator
      this should generate batches, each represented as a dict that maps
      Layers to values.
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    submodel: Submodel
      an alternate training objective to use.  This should have been created by
      calling create_submodel().

    Returns
    -------
    the average loss over the most recent checkpoint interval
    """
    if not self.built:
      self.build()
    with self._get_tf("Graph").as_default():
      time1 = time.time()
      loss = self.loss
      if submodel is not None and submodel.loss is not None:
        loss = submodel.loss
      if tfe.in_eager_mode():
        # In eager mode we want an optimizer and a function to compute the
        # gradient of the loss.

        submodel_vars = None
        if submodel is None:
          optimizer = self._get_tf("Optimizer")
        else:
          optimizer = submodel.create_optimizer()
          if submodel.layers is not None:
            submodel_vars = set()
            for layer in submodel.layers:
              for var in layer.variables:
                submodel_vars.add(var)
        val_grad_fn = tfe.implicit_value_and_gradients(
            lambda x: self._run_graph([loss], x, True)[0])
      else:
        # In graph mode we want a training operation.

        if submodel is None:
          train_op = self._get_tf('train_op')
        else:
          train_op = submodel.get_train_op()
      if checkpoint_interval > 0:
        saver = tf.train.Saver(
            self.get_variables(),
            max_to_keep=max_checkpoints_to_keep,
            save_relative_paths=True)
      if restore:
        self.restore()
      avg_loss, n_averaged_batches = 0.0, 0.0
      n_samples = 0
      n_enqueued = [0]
      final_sample = [None]
      if self.queue_installed:
        enqueue_thread = threading.Thread(
            target=_enqueue_batch,
            args=(self, feed_dict_generator, self._get_tf("Graph"),
                  self.session, n_enqueued, final_sample))
        enqueue_thread.start()
      for feed_dict in self._create_feed_dicts(feed_dict_generator, True):
        if self.queue_installed:
          # Don't let this thread get ahead of the enqueue thread, since if
          # we try to read more batches than the total number that get queued,
          # this thread will hang indefinitely.
          while n_enqueued[0] <= n_samples:
            if n_samples == final_sample[0]:
              break
            time.sleep(0)
          if n_samples == final_sample[0]:
            break
        n_samples += 1
        should_log = (self.tensorboard and
                      n_samples % self.tensorboard_log_frequency == 0)
        if tfe.in_eager_mode():
          value, grads_and_vars = val_grad_fn(feed_dict)
          if submodel_vars is not None:
            grads_and_vars = [
                x for x in grads_and_vars if x[1] in submodel_vars
            ]
          optimizer.apply_gradients(grads_and_vars)
          avg_loss += value
        else:
          fetches = [train_op, loss.out_tensor]
          if should_log:
            fetches.append(self._get_tf("summary_op"))
          fetched_values = self.session.run(fetches, feed_dict=feed_dict)
          if should_log:
            self._log_tensorboard(fetched_values[2])
          avg_loss += fetched_values[1]
        n_averaged_batches += 1
        self.global_step += 1
        if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1:
          saver.save(self.session, self.save_file, global_step=self.global_step)
          avg_loss = float(avg_loss) / n_averaged_batches
          logger.info('Ending global_step %d: Average loss %g' %
                      (self.global_step, avg_loss))
          avg_loss, n_averaged_batches = 0.0, 0.0
      if n_averaged_batches > 0:
        avg_loss = float(avg_loss) / n_averaged_batches
      if checkpoint_interval > 0:
        if n_averaged_batches > 0:
          logger.info('Ending global_step %d: Average loss %g' %
                      (self.global_step, avg_loss))
        saver.save(self.session, self.save_file, global_step=self.global_step)
        time2 = time.time()
        logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1))
    return avg_loss

  def _log_tensorboard(self, summary):
    """
    TODO(LESWING) set epoch
    Parameters
    ----------
    Returns
    -------
    """
    global_step = int(self.global_step)
    writer = self._get_tf("FileWriter")
    writer.reopen()
    writer.add_summary(summary, global_step=global_step)
    writer.close()

  def fit_on_batch(self, X, y, w, submodel=None):
    if not self.built:
      self.build()
    dataset = NumpyDataset(X, y)
    return self.fit(dataset, nb_epoch=1, submodel=submodel)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    if len(self.features) > 1:
      raise ValueError("More than one Feature, must use generator")
    if len(self.labels) > 1:
      raise ValueError("More than one Label, must use generator")
    if len(self.task_weights) > 1:
      raise ValueError("More than one Weights, must use generator")
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
        feed_dict = dict()
        if len(self.labels) == 1 and y_b is not None and not predict:
          feed_dict[self.labels[0]] = y_b
        if len(self.features) == 1 and X_b is not None:
          feed_dict[self.features[0]] = X_b
        if len(self.task_weights) == 1 and w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        for (initial_state, zero_state) in zip(self.rnn_initial_states,
                                               self.rnn_zero_states):
          feed_dict[initial_state] = zero_state
        yield feed_dict

  def __call__(self, *inputs, **kwargs):
    """Execute the model in eager mode to compute outputs as a function of inputs.

    This is very similar to predict_on_batch(), except that it returns the outputs
    as tensors rather than numpy arrays.  That means you can compute the graph's
    outputs, then do additional calculations based on them, and gradients will
    be tracked correctly through the whole process.

    Parameters
    ----------
    inputs: tensors
      the values to use for the model's features.  The number of inputs must
      exactly match the length of the model's `features` property.  The values
      may be tensors, numpy arrays, or anything else that can be converted to
      tensors of the correct shape.
    outputs: list of Layers
      the output layers to compute.  If this is omitted, self.outputs is used
      (that is, all outputs that have been added by calling add_output()).

    Returns
    -------
    The output tensors, or a list of tensors if multiple outputs were requested.
    """
    if len(inputs) != len(self.features):
      raise ValueError('Expected %d inputs, received %d' % len(self.features),
                       len(inputs))
    # TODO Once we drop Python 2 support, turn outputs into a proper keyword arg
    # instead of using the **kwargs hack.
    if 'outputs' in kwargs:
      outputs = kwargs['outputs']
    else:
      outputs = self.outputs
    feed_dict = dict(zip(self.features, inputs))
    results = self._run_graph(outputs, feed_dict, False)
    if len(results) == 1:
      return results[0]
    return results

  def _predict(self, generator, transformers, outputs, uncertainty):
    """
    Predict outputs for data provided by a generator.

    This is the private implementation of prediction.  Do not call it directly.
    Instead call one of the public prediction methods.

    Parameters
    ----------
    generator: Generator
      Generator that constructs feed dictionaries for TensorGraph.
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs.
      If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.
    uncertainty: bool
      specifies whether this is being called as part of estimating uncertainty.
      If True, it sets the training flag so that dropout will be enabled, and
      returns the values of the uncertainty outputs.
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
    if not self.built:
      self.build()
    if outputs is None:
      outputs = self.outputs
    elif not isinstance(outputs, collections.Sequence):
      outputs = [outputs]
    if uncertainty:
      if len(self.variances) == 0:
        raise ValueError('This model cannot compute uncertainties')
      if len(self.variances) != len(outputs):
        raise ValueError(
            'The number of variances must exactly match the number of outputs')
      tensors = outputs + self.variances
    else:
      tensors = outputs

    with self._get_tf("Graph").as_default():
      # Gather results for each output
      results = [[] for out in tensors]
      n_samples = 0
      n_enqueued = [0]
      final_sample = [None]
      if self.queue_installed:
        enqueue_thread = threading.Thread(
            target=_enqueue_batch,
            args=(self, generator, self._get_tf("Graph"), self.session,
                  n_enqueued, final_sample))
        enqueue_thread.start()
      for feed_dict in self._create_feed_dicts(generator, uncertainty):
        if self.queue_installed:
          # Don't let this thread get ahead of the enqueue thread, since if
          # we try to read more batches than the total number that get queued,
          # this thread will hang indefinitely.
          while n_enqueued[0] <= n_samples:
            if n_samples == final_sample[0]:
              break
            time.sleep(0)
          if n_samples == final_sample[0]:
            break
        n_samples += 1
        feed_results = self._run_graph(tensors, feed_dict, uncertainty)
        if tfe.in_eager_mode():
          feed_results = [f.numpy() for f in feed_results]
        if len(feed_results) > 1:
          if len(transformers):
            raise ValueError("Does not support transformations "
                             "for multiple outputs.")
        elif len(feed_results) == 1:
          result = undo_transforms(feed_results[0], transformers)
          feed_results = [result]
        for ind, result in enumerate(feed_results):
          results[ind].append(result)

      final_results = []
      for result_list in results:
        final_results.append(np.concatenate(result_list, axis=0))
      # If only one output, just return array
      if len(final_results) == 1:
        return final_results[0]
      elif uncertainty:
        return zip(final_results[:len(outputs)], final_results[len(outputs):])
      else:
        return final_results

  def predict_on_generator(self, generator, transformers=[], outputs=None):
    """
    Parameters
    ----------
    generator: Generator
      Generator that constructs feed dictionaries for TensorGraph.
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs.
      If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
    return self._predict(generator, transformers, outputs, False)

  def predict_on_batch(self, X, transformers=[], outputs=None):
    """Generates predictions for input samples, processing samples in a batch.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    transformers: List
      List of dc.trans.Transformers

    Returns
    -------
    A Numpy array of predictions.
    """
    dataset = NumpyDataset(X=X, y=None)
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers, outputs)

  def predict_uncertainty_on_batch(self, X, masks=50):
    """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
    dataset = NumpyDataset(X=X, y=None)
    return self.predict_uncertainty(dataset, masks)

  def predict(self, dataset, transformers=[], outputs=None):
    """
    Uses self to make predictions on provided Dataset object.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs=self.outputs. If outputs is
      a Layer/Tensor, then will evaluate and return as a single ndarray. If
      outputs is a list of Layers/Tensors, will return a list of ndarrays.

    Returns
    -------
    results: numpy ndarray or list of numpy ndarrays
    """
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers, outputs)

  def predict_uncertainty(self, dataset, masks=50):
    """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
    sum_pred = []
    sum_sq_pred = []
    sum_var = []
    for i in range(masks):
      generator = self.default_generator(
          dataset, predict=True, pad_batches=False)
      results = self._predict(generator, [], self.outputs, True)
      if len(sum_pred) == 0:
        for p, v in results:
          sum_pred.append(p)
          sum_sq_pred.append(p * p)
          sum_var.append(v)
      else:
        for j, (p, v) in enumerate(results):
          sum_pred[j] += p
          sum_sq_pred[j] += p * p
          sum_var[j] += v
    output = []
    std = []
    for i in range(len(sum_pred)):
      p = sum_pred[i] / masks
      output.append(p)
      std.append(np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks))
    if len(output) == 1:
      return (output[0], std[0])
    else:
      return zip(output, std)

  def topsort(self):

    def add_layers_to_list(layer, sorted_layers):
      if layer in sorted_layers:
        return
      for in_layer in layer.in_layers:
        add_layers_to_list(in_layer, sorted_layers)
      sorted_layers.append(layer)

    sorted_layers = []
    for l in self.features + self.labels + self.task_weights + self.outputs + self.variances:
      add_layers_to_list(l, sorted_layers)
    add_layers_to_list(self.loss, sorted_layers)
    for submodel in self.submodels:
      if submodel.loss is not None:
        add_layers_to_list(submodel.loss, sorted_layers)
    return sorted_layers

  def build(self):
    if self.built:
      return
    if tfe.in_eager_mode():
      # In eager mode, we need to execute every layer once to ensure its variables
      # have been created.

      def build_layers(layer, tensors):
        if layer in tensors:
          return tensors[layer]
        inputs = [build_layers(input, tensors) for input in layer.in_layers]
        if isinstance(layer, Input):
          # We can't execute Input layers in eager mode, since they would try
          # to create placeholders.  Instead create a tensor of the correct
          # size and type.
          shape = [1 if s is None else s for s in layer.shape]
          tensor = tf.zeros(shape, layer.dtype)
        else:
          with tf.name_scope(layer.name):
            tensor = layer.create_tensor(in_layers=inputs, set_tensors=False)
        tensors[layer] = tensor
        return tensor

      tensors = {}
      with self._get_tf("Graph").as_default():
        # Build the layers.

        build_layers(self.loss, tensors)
        for output in self.outputs:
          build_layers(output, tensors)
        for variance in self.variances:
          build_layers(variance, tensors)
        for submodel in self.submodels:
          build_layers(submodel.loss, tensors)

        # Initialize variables.

        for layer in self.layers.values():
          if layer.variable_values is not None:
            for var, val in zip(layer.variables, layer.variable_values):
              var.assign(val)
      self.session = None
      self._training_placeholder = None
      self.built = True
      return

    # In graph mode we need to create the computation graph.

    with self._get_tf("Graph").as_default():
      self._training_placeholder = tf.placeholder(dtype=tf.float32, shape=())
      if self.random_seed is not None:
        tf.set_random_seed(self.random_seed)
      self._install_queue()
      for layer in self.topsort():
        with tf.name_scope(layer.name):
          layer.create_tensor(training=self._training_placeholder)
          self.rnn_initial_states += layer.rnn_initial_states
          self.rnn_final_states += layer.rnn_final_states
          self.rnn_zero_states += layer.rnn_zero_states
          layer.add_summary_to_tg()
      self.session = tf.Session(config=self.configproto)
      self.built = True

      # Ensure all training operators have been created.

      self._get_tf('train_op')
      for submodel in self.submodels:
        train_op = submodel.get_train_op()

      # Initialize variables.

      self.session.run(tf.global_variables_initializer())
      for layer in self.layers.values():
        if layer.variable_values is not None:
          variables = self.get_layer_variables(layer)
          for var, val in zip(variables, layer.variable_values):
            self.session.run(var.assign(val))

    for layer in self.layers.values():
      if layer.tensorboard:
        self.tensorboard = True
    tf.summary.scalar("loss", self.loss.out_tensor)
    for layer in self.layers.values():
      if layer.tensorboard:
        tf.summary.tensor_summary(layer.name, layer.out_tensor)
    if self.tensorboard:
      writer = self._get_tf("FileWriter")
      writer.add_graph(self._get_tf("Graph"))
      writer.close()

    # As a sanity check, make sure all tensors have the correct shape.

    for layer in self.layers.values():
      try:
        assert list(layer.shape) == layer.out_tensor.get_shape().as_list(
        ), '%s: Expected shape %s does not match actual shape %s' % (
            layer.name, layer.shape, layer.out_tensor.get_shape().as_list())
      except NotImplementedError:
        pass

  def _install_queue(self):
    """
    """
    if not self.use_queue or self.queue_installed:
      for layer in self.features + self.labels + self.task_weights:
        layer.pre_queue = True
      return
    inputs = self.features + self.labels + self.task_weights
    if len(inputs) == 0:
      return
    names = []
    shapes = []
    pre_q_inputs = []
    q = InputFifoQueue(shapes, names, in_layers=pre_q_inputs)
    q.name = "%s_%s" % (q.__class__.__name__, len(self.layers) + 1)

    for layer in inputs:
      pre_q_input = layer.create_pre_q()
      shapes.append(pre_q_input.shape)
      names.append(pre_q_input.name)
      pre_q_inputs.append(pre_q_input)

      layer.in_layers.append(q)

    self._add_layer(q)
    self.input_queue = q
    self.queue_installed = True

  def set_loss(self, layer):
    self._add_layer(layer)
    self.loss = layer

  def add_output(self, layer):
    """Add an output layer that can be computed by predict()"""
    self._add_layer(layer)
    self.outputs.append(layer)

  def add_variance(self, layer):
    """Add a layer that computes the variance in an output.

    If a model supports uncertainty, it must call add_variance() once for every
    output.  Each variance layer has the same shape as the corresponding output,
    and each element computes an estimate of the variance from aleatoric
    uncertainty in the corresponding element of the output.

    In addition, if a model supports uncertainty it MUST use dropout on every
    layer.  Otherwise, the uncertainties it computes will be inaccurate.
    """
    self._add_layer(layer)
    self.variances.append(layer)

  def set_optimizer(self, optimizer):
    """Set the optimizer to use for fitting."""
    self.optimizer = optimizer

  def create_submodel(self, layers=None, loss=None, optimizer=None):
    """Create an alternate objective for training one piece of a TensorGraph.

    A TensorGraph consists of a set of layers, and specifies a loss function and
    optimizer to use for training those layers.  Usually this is sufficient, but
    there are cases where you want to train different parts of a model separately.
    For example, a GAN consists of a generator and a discriminator.  They are
    trained separately, and they use different loss functions.

    A submodel defines an alternate objective to use in cases like this.  It may
    optionally specify any of the following: a subset of layers in the model to
    train; a different loss function; and a different optimizer to use.  This
    method creates a submodel, which you can then pass to fit() to use it for
    training.

    Parameters
    ----------
    layers: list
      the list of layers to train.  If None, all layers in the model will be
      trained.
    loss: Layer
      the loss function to optimize.  If None, the model's main loss function
      will be used.
    optimizer: Optimizer
      the optimizer to use for training.  If None, the model's main optimizer
      will be used.

    Returns
    -------
    the newly created submodel, which can be passed to any of the fitting
    methods.
    """
    if self.built:
      raise ValueError('Submodels must be created before build() is called.')
    submodel = Submodel(self, layers, loss, optimizer)
    self.submodels.append(submodel)
    if loss is not None:
      self._add_layer(loss)
    return submodel

  def get_pickling_errors(self, obj, seen=None):
    if seen == None:
      seen = []
    try:
      state = obj.__getstate__()
    except AttributeError:
      return
    if state == None:
      return
    if isinstance(state, tuple):
      if not isinstance(state[0], dict):
        state = state[1]
      else:
        state = state[0].update(state[1])
    result = {}
    for i in state:
      try:
        pickle.dumps(state[i], protocol=2)
      except pickle.PicklingError:
        if not state[i] in seen:
          seen.append(state[i])
          result[i] = self.get_pickling_errors(state[i], seen)
    return result

  def save(self):
    # Remove out_tensor from the object to be pickled
    must_restore = False
    tensor_objects = self.tensor_objects
    rnn_initial_states = self.rnn_initial_states
    rnn_final_states = self.rnn_final_states
    rnn_zero_states = self.rnn_zero_states
    session = self.session
    self.tensor_objects = {}
    self.rnn_initial_states = []
    self.rnn_final_states = []
    self.rnn_zero_states = []
    self.session = None
    out_tensors = []
    submodel_ops = []
    if self.built:
      must_restore = True
      for layer in self.topsort():
        out_tensors.append(layer.none_tensors())
      for submodel in self.submodels:
        submodel_ops.append(submodel._train_op)
        submodel._train_op = None
      training_placeholder = self._training_placeholder
      self._training_placeholder = None
      self.built = False

    # Pickle itself
    pickle_name = os.path.join(self.model_dir, "model.pickle")

    with open(pickle_name, 'wb') as fout:
      try:
        pickle.dump(self, fout)
      except Exception as e:
        logger.info(self.get_pickling_errors(self))
        raise e

    # add out_tensor back to everyone
    if must_restore:
      for index, layer in enumerate(self.topsort()):
        layer.set_tensors(out_tensors[index])
      for submodel, op in zip(self.submodels, submodel_ops):
        submodel._train_op = op
      self._training_placeholder = training_placeholder
      self.built = True
    self.tensor_objects = tensor_objects
    self.rnn_initial_states = rnn_initial_states
    self.rnn_final_states = rnn_final_states
    self.rnn_zero_states = rnn_zero_states
    self.session = session

  def evaluate_generator(self,
                         feed_dict_generator,
                         metrics,
                         transformers=[],
                         labels=None,
                         outputs=None,
                         weights=[],
                         per_task_metrics=False):

    if labels is None:
      raise ValueError
    n_tasks = len(self.outputs)
    n_classes = self.outputs[0].out_tensor.get_shape()[-1].value
    evaluator = GeneratorEvaluator(
        self,
        feed_dict_generator,
        transformers,
        labels=labels,
        outputs=outputs,
        weights=weights,
        n_tasks=n_tasks,
        n_classes=n_classes)
    if not per_task_metrics:
      scores = evaluator.compute_model_performance(metrics)
      return scores
    else:
      scores, per_task_scores = evaluator.compute_model_performance(
          metrics, per_task_metrics=per_task_metrics)
      return scores, per_task_scores

  def get_layer_variables(self, layer):
    """Get the list of trainable variables in a layer of the graph."""
    if tfe.in_eager_mode():
      return layer.variables
    if not self.built:
      self.build()
    with self._get_tf("Graph").as_default():
      if layer.variable_scope == '':
        return []
      return tf.get_collection(
          tf.GraphKeys.TRAINABLE_VARIABLES, scope=layer.variable_scope)

  def get_variables(self):
    """Get the list of all trainable variables in the graph."""
    if not self.built:
      self.build()
    if tfe.in_eager_mode():
      variables = []
      for layer in self.layers.values():
        variables += layer.variables
      return variables
    else:
      with self._get_tf("Graph").as_default():
        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

  def get_global_step(self):
    return self._get_tf("GlobalStep")

  def _get_tf(self, obj):
    """Fetches underlying TensorFlow primitives.

    Parameters
    ----------
    obj: str
      If "Graph", returns tf.Graph instance. If "FileWriter", returns
      tf.summary.FileWriter. If "Optimizer", returns the optimizer. If
      "train_op", returns the train operation. If "summary_op", returns the
      merged summary. If "GlobalStep" returns the global step.
    Returns
    -------
    TensorFlow Object

    """

    if obj in self.tensor_objects and self.tensor_objects[obj] is not None:
      return self.tensor_objects[obj]
    if obj == "Graph":
      self.tensor_objects['Graph'] = tf.Graph()
    elif obj == "FileWriter":
      self.tensor_objects['FileWriter'] = tf.summary.FileWriter(self.model_dir)
    elif obj == 'Optimizer':
      self.tensor_objects['Optimizer'] = self.optimizer._create_optimizer(
          self._get_tf('GlobalStep'))
    elif obj == 'train_op':
      opt = self._get_tf('Optimizer')
      global_step = self._get_tf('GlobalStep')
      try:
        self.tensor_objects['train_op'] = opt.minimize(
            self.loss.out_tensor, global_step=global_step)
      except ValueError:
        # The loss doesn't depend on any variables.
        self.tensor_objects['train_op'] = 0
    elif obj == 'summary_op':
      self.tensor_objects['summary_op'] = tf.summary.merge_all(
          key=tf.GraphKeys.SUMMARIES)
    elif obj == 'GlobalStep':
      with self._get_tf("Graph").as_default():
        self.tensor_objects['GlobalStep'] = create_variable(0, trainable=False)
    return self._get_tf(obj)

  def save_checkpoint(self, max_checkpoints_to_keep=5):
    """Save a checkpoint to disk.

    Usually you do not need to call this method, since fit() saves checkpoints
    automatically.  If you have disabled automatic checkpointing during fitting,
    this can be called to manually write checkpoints.

    Parameters
    ----------
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    """
    saver = tf.train.Saver(
        self.get_variables(), max_to_keep=max_checkpoints_to_keep)
    saver.save(self.session, self.save_file, global_step=self.global_step)

  def get_checkpoints(self):
    """Get a list of all available checkpoint files."""
    return tf.train.get_checkpoint_state(
        self.model_dir).all_model_checkpoint_paths

  def restore(self, checkpoint=None):
    """Reload the values of all variables from a checkpoint file.

    Parameters
    ----------
    checkpoint: str
      the path to the checkpoint file to load.  If this is None, the most recent
      checkpoint will be chosen automatically.  Call get_checkpoints() to get a
      list of all available checkpoints.
    """
    if not self.built:
      self.build()
    if checkpoint is None:
      checkpoint = tf.train.latest_checkpoint(self.model_dir)
    if checkpoint is None:
      raise ValueError('No checkpoint found')
    with self._get_tf("Graph").as_default():
      reader = NewCheckpointReader(checkpoint)
      var_names = set([x for x in reader.get_variable_to_shape_map()])
      var_list = []
      for var in self.get_variables():
        name = var.name
        if ':' in name:
          name = name[:name.rfind(':')]
        if name in var_names:
          var_list.append(var)
      saver = tf.train.Saver(var_list=var_list)
      saver.restore(self.session, checkpoint)

  def get_num_tasks(self):
    return len(self.outputs)

  def get_pre_q_input(self, input_layer):
    layer_name = input_layer.name
    pre_q_name = "%s_pre_q" % layer_name
    return self.layers[pre_q_name]

  @staticmethod
  def load_from_dir(model_dir, restore=True):
    pickle_name = os.path.join(model_dir, "model.pickle")
    with open(pickle_name, 'rb') as fout:
      tensorgraph = pickle.load(fout)
      tensorgraph.built = False
      tensorgraph.model_dir = model_dir
      if restore:
        try:
          tensorgraph.restore()
        except ValueError:
          pass  # No checkpoint to load
      return tensorgraph

  def __del__(self):
    pass

  def _create_feed_dicts(self, generator, training):
    """Create feed dicts for use in fitting or prediction.

    Parameters
    ----------
    generator: Generator
      the feed dict generator that was passed to fit_generator() or predict_on_generator()
    training: bool
      True during training, False during prediction
    """
    train_value = 1.0 if training else 0.0
    if self.queue_installed:
      while True:
        yield {self._training_placeholder: train_value}
    else:
      for d in generator:
        feed_dict = {}
        for key, value in d.items():
          if isinstance(key, Input):
            value = _ensure_value_shape(value, key)
            if tfe.in_eager_mode():
              value = tf.cast(value, key.dtype)
            feed_dict[key] = value
          else:
            feed_dict[key] = value
        if not tfe.in_eager_mode():
          feed_dict[self._training_placeholder] = train_value
        yield feed_dict

  def _run_graph(self, outputs, feed_dict, training):
    """Run the calculations in the graph to compute some outputs.

    In graph mode, this just calls session.run().  In eager mode, it executes
    all required layers to compute the output.

    Parameters
    ----------
    outputs: list of Layers
      the output layers to compute
    feed_dict: dict
      maps input layers to values
    training: bool
      whether this is being executed in training mode
    """
    if not tfe.in_eager_mode():
      return self.session.run(outputs, feed_dict)

    def run_layers(layer, tensors):
      if layer in tensors:
        return tensors[layer]
      inputs = [run_layers(input, tensors) for input in layer.in_layers]
      tensor = layer.create_tensor(
          in_layers=inputs, set_tensors=False, training=training)
      tensors[layer] = tensor
      return tensor

    tensors = feed_dict.copy()
    return [run_layers(o, tensors) for o in outputs]

  def make_estimator(self,
                     feature_columns,
                     weight_column=None,
                     metrics={},
                     model_dir=None,
                     config=None):
    """Construct a Tensorflow Estimator from this model.

    tf.estimator.Estimator is the standard Tensorflow API for representing models.
    This method provides interoperability between DeepChem and other Tensorflow
    based tools by allowing any model to be used an Estimator.

    Once this method returns, the Estimator it created is independent of the model
    it was created from.  They do not share tensors, variables, save files, or any
    other resources.  The Estimator is a self contained object with its own methods
    for training, evaluation, prediction, checkpointing, etc.

    Parameters
    ----------
    feature_columns: list of tf.feature_column objects
      this describes the input features to the models.  There must be one entry
      for each Feature layer in this model's features field.
    weight_column: tf.feature_column or None
      if this model includes a Weights layer, this describes the input weights.
      Otherwise, this should be None.
    metrics: map
      metrics that should be computed in calls to evaluate().  For each entry,
      the key is the name to report for the metric, and the value is a function
      of the form f(labels, predictions, weights) that returns the tensors for
      computing the metric.  Any of the functions in tf.metrics can be used, as
      can other functions that satisfy the same interface.
    model_dir: str
      the directory in which the Estimator should save files.  If None, this
      defaults to the model's model_dir.
    config: RunConfig
      configuration options for the Estimator
    """
    # Check the inputs.

    if tfe.in_eager_mode():
      raise ValueError('make_estimator() is not supported in eager mode')
    if len(feature_columns) != len(self.features):
      raise ValueError(
          'This model requires %d feature column(s)' % len(self.features))
    if len(self.labels) != 1:
      raise ValueError(
          'Can only create an Estimator from a model with exactly one Label input'
      )
    if len(self.task_weights) > 1:
      raise ValueError(
          'Cannot create an Estimator from a model with multiple Weight inputs')
    if weight_column is None:
      if len(self.task_weights) > 0:
        raise ValueError('This model requires a weight column')
    else:
      if len(self.task_weights) == 0:
        raise ValueError(
            'Cannot specify weight_column for a model with no Weight inputs')
    if model_dir is None:
      model_dir = self.model_dir

    # Define a function that recursively creates tensors from layers.

    def create_tensors(layer, tensors, training):
      if layer in tensors:
        return tensors[layer]
      inputs = [
          create_tensors(in_layer, tensors, training)
          for in_layer in layer.in_layers
      ]
      tensor = layer.create_tensor(
          in_layers=inputs, set_tensors=False, training=training)
      tensors[layer] = tensor
      layer.add_summary_to_tg(tensor)
      return tensor

    # Define the model function.

    def model_fn(features, labels, mode):
      # Define the inputs.

      tensors = self.create_estimator_inputs(feature_columns, weight_column,
                                             features, labels, mode)
      for layer, tensor in tensors.items():
        layer.add_summary_to_tg(tensor)

      # Create the correct outputs, based on the mode.

      if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {}
        for i, output in enumerate(self.outputs):
          predictions[i] = create_tensors(output, tensors, 0)
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
      if mode == tf.estimator.ModeKeys.EVAL:
        loss = create_tensors(self.loss, tensors, 0)
        predictions = create_tensors(self.outputs[0], tensors, 0)
        if len(self.task_weights) == 0:
          weights = None
        else:
          weights = tensors[self.task_weights[0]]
        eval_metric_ops = {}
        for name, function in metrics.items():
          eval_metric_ops[name] = function(tensors[self.labels[0]], predictions,
                                           weights)
        return tf.estimator.EstimatorSpec(
            mode, loss=loss, eval_metric_ops=eval_metric_ops)
      if mode == tf.estimator.ModeKeys.TRAIN:
        loss = create_tensors(self.loss, tensors, 1)
        global_step = tf.train.get_global_step()
        optimizer = self.optimizer._create_optimizer(global_step)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
      raise ValueError('Unknown mode')

    # Create the Estimator.

    return tf.estimator.Estimator(
        model_fn=model_fn, model_dir=model_dir, config=config)

  def create_estimator_inputs(self, feature_columns, weight_column, features,
                              labels, mode):
    """This is called by make_estimator() to create tensors for the inputs.

    feature_columns and weight_column are the arguments passed to
    make_estimator().  features, labels, and mode are the arguments passed to
    the estimator's model function.  This method creates and returns a dict with
    one entry for every Feature, Label, or Weights layer in the graph.  The keys
    are the layers, and the values are the tensors that correspond to them.

    Any subclass that overrides default_generator() must also override this
    method.
    """
    if self.__class__.default_generator != TensorGraph.default_generator:
      raise ValueError(
          "Class overrides default_generator() but not create_estimator_inputs()"
      )
    tensors = {}
    for layer, column in zip(self.features, feature_columns):
      tensors[layer] = tf.feature_column.input_layer(features, [column])
    if weight_column is not None:
      tensors[self.task_weights[0]] = tf.feature_column.input_layer(
          features, [weight_column])
    if labels is not None:
      tensors[self.labels[0]] = tf.cast(labels, self.labels[0].dtype)
    return tensors
Example #21
0
    def __init__(self,
                 learner,
                 learning_rate=0.001,
                 optimization_steps=1,
                 meta_batch_size=10,
                 optimizer=Adam(),
                 model_dir=None):
        """Create an object for performing meta-optimization.

    Parameters
    ----------
    learner: MetaLearner
      defines the meta-learning problem
    learning_rate: float, Layer, or Tensor
      the learning rate to use for optimizing each task (not to be confused with the one used
      for meta-learning).  This can optionally be made a variable (represented as a Layer or
      Tensor), in which case the learning rate will itself be learnable.
    optimization_steps: int
      the number of steps of gradient descent to perform for each task
    meta_batch_size: int
      the number of tasks to use for each step of meta-learning
    optimizer: Optimizer
      the optimizer to use for meta-learning (not to be confused with the gradient descent
      optimization performed for each task)
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    """
        # Record inputs.

        self.learner = learner
        if isinstance(learner.loss, Layer):
            self._loss = learner.loss.out_tensor
        else:
            self._loss = learner.loss
        if isinstance(learning_rate, Layer):
            self._learning_rate = learning_rate.out_tensor
        else:
            self._learning_rate = learning_rate
        self.meta_batch_size = meta_batch_size
        self.optimizer = optimizer
        self._graph = self._loss.graph

        # Create the output directory if necessary.

        self._model_dir_is_temp = False
        if model_dir is not None:
            if not os.path.exists(model_dir):
                os.makedirs(model_dir)
        else:
            model_dir = tempfile.mkdtemp()
            self._model_dir_is_temp = True
        self.model_dir = model_dir
        self.save_file = "%s/%s" % (self.model_dir, "model")
        with self._graph.as_default():
            # Create duplicate placeholders for meta-optimization.

            learner.select_task()
            self._meta_placeholders = {}
            for p in learner.get_batch().keys():
                name = 'meta/' + p.name.split(':')[0]
                self._meta_placeholders[p] = tf.placeholder(
                    p.dtype, p.shape, name)

            # Create the loss function for meta-optimization.

            updated_loss = self._loss
            updated_variables = learner.variables
            for i in range(optimization_steps):
                gradients = tf.gradients(updated_loss, updated_variables)
                updated_variables = [
                    v if g is None else v - self._learning_rate * g
                    for v, g in zip(updated_variables, gradients)
                ]
                replacements = dict(
                    (tf.convert_to_tensor(v1), v2)
                    for v1, v2 in zip(learner.variables, updated_variables))
                if i == optimization_steps - 1:
                    # In the final loss, use different placeholders for all inputs so the loss will be
                    # computed from a different batch.

                    for p in self._meta_placeholders:
                        replacements[p] = self._meta_placeholders[p]
                updated_loss = tf.contrib.graph_editor.graph_replace(
                    self._loss, replacements)
            self._meta_loss = updated_loss

            # Create variables for accumulating the gradients.

            variables = list(learner.variables)
            gradients = tf.gradients(self._meta_loss, variables)
            for i in reversed(range(len(variables))):
                if gradients[i] is None:
                    del variables[i]
                    del gradients[i]
            zero_gradients = [tf.zeros(g.shape, g.dtype) for g in gradients]
            summed_gradients = [
                tf.Variable(z, trainable=False) for z in zero_gradients
            ]
            self._clear_gradients = tf.group(*[
                s.assign(z) for s, z in zip(summed_gradients, zero_gradients)
            ])
            self._add_gradients = tf.group(
                *
                [s.assign_add(g) for s, g in zip(summed_gradients, gradients)])

            # Create the optimizers for meta-optimization and task optimization.

            self._global_step = tf.placeholder(tf.int32, [])
            grads_and_vars = list(zip(summed_gradients, variables))
            self._meta_train_op = optimizer._create_optimizer(
                self._global_step).apply_gradients(grads_and_vars)
            task_optimizer = GradientDescent(learning_rate=self._learning_rate)
            self._task_train_op = task_optimizer._create_optimizer(
                self._global_step).minimize(self._loss)
            self._session = tf.Session()

            # Create a Checkpoint for saving.

            self._checkpoint = tf.train.Checkpoint()
            self._checkpoint.listed = learner.variables
Example #22
0
class KerasModel(Model):
  """This is a DeepChem model implemented by a Keras model.

  This class provides several advantages over using the Keras model's fitting
  and prediction methods directly.

  1. It provides better integration with the rest of DeepChem, such as direct
     support for Datasets and Transformers.

  2. It defines the loss in a more flexible way.  In particular, Keras does not
     support multidimensional weight matrices, which makes it impossible to
     implement most multitask models with Keras.

  3. It provides various additional features not found in the Keras Model class,
     such as uncertainty prediction and saliency mapping.

  The loss function for a model can be defined in two different ways.  For
  models that have only a single output and use a standard loss function, you
  can simply provide a dc.models.losses.Loss object.  This defines the loss for
  each sample or sample/task pair.  The result is automatically multiplied by
  the weights and averaged over the batch.  Any additional losses computed by
  model layers, such as weight decay penalties, are also added.

  For more complicated cases, you can instead provide a function that directly
  computes the total loss.  It must be of the form f(outputs, labels, weights),
  taking the list of outputs from the model, the expected values, and any weight
  matrices.  It should return a scalar equal to the value of the loss function
  for the batch.  No additional processing is done to the result; it is up to
  you to do any weighting, averaging, adding of penalty terms, etc.

  You can optionally provide an output_types argument, which describes how to
  interpret the model's outputs.  This should be a list of strings, one for each
  output.  Each entry must have one of the following values:

  - 'prediction': This is a normal output, and will be returned by predict().
    If output types are not specified, all outputs are assumed to be of this
    type.

  - 'loss': This output will be used in place of the normal outputs for
    computing the loss function.  For example, models that output probability
    distributions usually do it by computing unbounded numbers (the logits),
    then passing them through a softmax function to turn them into
    probabilities.  When computing the cross entropy, it is more numerically
    stable to use the logits directly rather than the probabilities.  You can
    do this by having the model produce both probabilities and logits as
    outputs, then specifying output_types=['prediction', 'loss'].  When
    predict() is called, only the first output (the probabilities) will be
    returned.  But during training, it is the second output (the logits) that
    will be passed to the loss function.

  - 'variance': This output is used for estimating the uncertainty in another
    output.  To create a model that can estimate uncertainty, there must be the
    same number of 'prediction' and 'variance' outputs.  Each variance output
    must have the same shape as the corresponding prediction output, and each
    element is an estimate of the variance in the corresponding prediction.
    Also be aware that if a model supports uncertainty, it MUST use dropout on
    every layer.  Otherwise, the uncertainties it computes will be inaccurate.
  """

  def __init__(self,
               model,
               loss,
               output_types=None,
               batch_size=100,
               model_dir=None,
               learning_rate=0.001,
               optimizer=None,
               tensorboard=False,
               tensorboard_log_frequency=100,
               **kwargs):
    """Create a new KerasModel.

    Parameters
    ----------
    model: tf.keras.Model
      the Keras model implementing the calculation
    loss: dc.models.losses.Loss or function
      a Loss or function defining how to compute the training loss for each
      batch, as described above
    output_types: list of strings
      the type of each output from the model, as described above
    batch_size: int
      default batch size for training and evaluating
    model_dir: str
      the directory on disk where the model will be stored.  If this is None,
      a temporary directory is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for fitting.  If optimizer is specified, this is
      ignored.
    optimizer: Optimizer
      the optimizer to use for fitting.  If this is specified, learning_rate is
      ignored.
    tensorboard: bool
      whether to log progress to TensorBoard during training
    tensorboard_log_frequency: int
      the frequency at which to log data to TensorBoard, measured in batches
    """
    super(KerasModel, self).__init__(
        model_instance=model, model_dir=model_dir, **kwargs)
    self.model = model
    if isinstance(loss, Loss):
      self._loss_fn = _StandardLoss(model, loss)
    else:
      self._loss_fn = loss
    self.batch_size = batch_size
    if optimizer is None:
      self.optimizer = Adam(learning_rate=learning_rate)
    else:
      self.optimizer = optimizer
    self.tensorboard = tensorboard
    self.tensorboard_log_frequency = tensorboard_log_frequency
    self._tensorboard_step = 0
    if tensorboard and tf.executing_eagerly():
      raise ValueError(
          "Logging to TensorBoard is not currently supported in eager mode")
    if output_types is None:
      self._prediction_outputs = None
      self._loss_outputs = None
      self._variance_outputs = None
    else:
      self._prediction_outputs = []
      self._loss_outputs = []
      self._variance_outputs = []
      for i, type in enumerate(output_types):
        if type == 'prediction':
          self._prediction_outputs.append(i)
        elif type == 'loss':
          self._loss_outputs.append(i)
        elif type == 'variance':
          self._variance_outputs.append(i)
        else:
          raise ValueError('Unknown output type "%s"' % type)
    self._built = False
    self._inputs_built = False
    self._training_ops_built = False
    self._initialized_vars = set()

  def _ensure_built(self):
    """The first time this is called, create internal data structures."""
    if self._built:
      return
    self._built = True
    if not tf.executing_eagerly():
      self.session = tf.Session()
    self._global_step = tf.Variable(0, trainable=False)
    self._tf_optimizer = self.optimizer._create_optimizer(self._global_step)
    self._checkpoint = tf.train.Checkpoint(
        optimizer=self._tf_optimizer, model=self.model)
    self._init_new_vars()

  def _create_inputs(self, example_inputs):
    """The first time this is called, create tensors representing the inputs and outputs."""
    if self._inputs_built:
      return
    self._ensure_built()
    self._inputs_built = True
    if len(self.model.inputs) > 0:
      self._input_dtypes = [t.dtype.as_numpy_dtype for t in self.model.inputs]
    else:
      self._input_dtypes = [
          np.float32 if x.dtype == np.float64 else x.dtype
          for x in example_inputs
      ]
    if tf.executing_eagerly():
      return
    if len(self.model.inputs) > 0:
      self._input_placeholders = self.model.inputs
    else:
      # The model doesn't specify inputs, so guess the input shapes based on the
      # example batch.
      input_shapes = [(None,) + i.shape[1:] for i in example_inputs]
      self._input_placeholders = [
          tf.placeholder(dtype=tf.as_dtype(t), shape=s)
          for s, t in zip(input_shapes, self._input_dtypes)
      ]
      if len(input_shapes) == 1:
        self.model.build(input_shapes[0])
      else:
        self.model.build(input_shapes)
    if len(self._input_placeholders) == 1:
      self._output_tensors = self.model(
          self._input_placeholders[0], training=False)
      self._uncertainty_tensors = self.model(
          self._input_placeholders[0], training=True)
    else:
      self._output_tensors = self.model(
          self._input_placeholders, training=False)
      self._uncertainty_tensors = self.model(
          self._input_placeholders, training=True)
    if isinstance(self._output_tensors, tf.Tensor):
      self._output_tensors = [self._output_tensors]
    if self._prediction_outputs is None:
      self._prediction_outputs = list(range(len(self._output_tensors)))
      self._loss_outputs = list(range(len(self._output_tensors)))
    self._init_new_vars()

  def _create_training_ops(self, example_batch):
    """The first time this is called, create tensors used in optimization."""
    if self._training_ops_built:
      return
    self._create_inputs(example_batch[0])
    self._training_ops_built = True
    self._label_dtypes = [
        np.float32 if x.dtype == np.float64 else x.dtype
        for x in example_batch[1]
    ]
    self._weights_dtypes = [
        np.float32 if x.dtype == np.float64 else x.dtype
        for x in example_batch[2]
    ]
    if tf.executing_eagerly():
      return
    self._label_placeholders = [
        tf.placeholder(dtype=tf.as_dtype(t), shape=x.shape)
        for x, t in zip(example_batch[1], self._label_dtypes)
    ]
    self._weights_placeholders = [
        tf.placeholder(dtype=tf.as_dtype(t), shape=x.shape)
        for x, t in zip(example_batch[2], self._weights_dtypes)
    ]
    self._loss_tensor = self._loss_fn(
        [self._output_tensors[i] for i in self._loss_outputs],
        self._label_placeholders, self._weights_placeholders)
    try:
      self._train_op = self._tf_optimizer.minimize(
          self._loss_tensor, global_step=self._global_step)
    except ValueError:
      # The loss doesn't depend on any variables.
      self._train_op = 0
    if self.tensorboard:
      self._summary_ops = tf.summary.scalar('loss', self._loss_tensor)
      self._summary_writer = tf.summary.FileWriter(self.model_dir)
    self._init_new_vars()

  def _init_new_vars(self):
    """Initialize any new variables created since the last call to this method."""
    if not tf.executing_eagerly():
      vars = set(tf.global_variables())
      new_vars = vars.difference(self._initialized_vars)
      self.session.run(tf.variables_initializer(new_vars))
      self._initialized_vars = vars

  def fit(self,
          dataset,
          nb_epoch=10,
          max_checkpoints_to_keep=5,
          checkpoint_interval=1000,
          deterministic=False,
          restore=False):
    """Train this model on a dataset.

    Parameters
    ----------
    dataset: Dataset
      the Dataset to train on
    nb_epoch: int
      the number of epochs to train for
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    deterministic: bool
      if True, the samples are processed in order.  If False, a different random
      order is used for each epoch.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
   """
    return self.fit_generator(
        self.default_generator(
            dataset, epochs=nb_epoch, deterministic=deterministic),
        max_checkpoints_to_keep, checkpoint_interval, restore)

  def fit_generator(self,
                    generator,
                    max_checkpoints_to_keep=5,
                    checkpoint_interval=1000,
                    restore=False):
    """Train this model on data from a generator.

    Parameters
    ----------
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.

    Returns
    -------
    the average loss over the most recent checkpoint interval
    """
    self._ensure_built()
    if restore:
      self.restore()
    if checkpoint_interval > 0:
      manager = tf.train.CheckpointManager(self._checkpoint, self.model_dir,
                                           max_checkpoints_to_keep)
    avg_loss = 0.0
    averaged_batches = 0
    time1 = time.time()

    # Main training loop.

    for batch in generator:
      self._create_training_ops(batch)
      inputs, labels, weights = self._prepare_batch(batch)
      self._tensorboard_step += 1
      should_log = (
          self.tensorboard and
          self._tensorboard_step % self.tensorboard_log_frequency == 0)
      if tf.executing_eagerly():

        # In eager mode we execute the loss function, accumulating the gradients.

        with tf.GradientTape() as tape:
          outputs = self.model(inputs[0])
          if isinstance(outputs, tf.Tensor):
            outputs = [outputs]
          if self._loss_outputs is not None:
            outputs = [outputs[i] for i in self._loss_outputs]
          loss = self._loss_fn(outputs, labels, weights)
          avg_loss += loss
          grads = tape.gradient(loss, self.model.trainable_variables)
          self._tf_optimizer.apply_gradients(
              zip(grads, self.model.trainable_variables))
          tf.assign_add(self._global_step, 1)
          current_step = self._global_step.numpy()
      else:

        # In graph mode we execute the training op.

        fetches = [self._train_op, self._loss_tensor, self._global_step]
        if should_log:
          fetches.append(self._summary_ops)
        feed_dict = dict(zip(self._input_placeholders, inputs))
        feed_dict.update(dict(zip(self._label_placeholders, labels)))
        feed_dict.update(dict(zip(self._weights_placeholders, weights)))
        fetched_values = self.session.run(fetches, feed_dict=feed_dict)
        avg_loss += fetched_values[1]
        current_step = fetched_values[2]
        if should_log:
          self._summary_writer.reopen()
          self._summary_writer.add_summary(
              fetched_values[3], global_step=current_step)
          self._summary_writer.close()

      # Report progress and write checkpoints.

      averaged_batches += 1
      if checkpoint_interval > 0 and current_step % checkpoint_interval == checkpoint_interval - 1:
        self._exec_with_session(lambda: manager.save())
        avg_loss = float(avg_loss) / averaged_batches
        print(
            'Ending global_step %d: Average loss %g' % (current_step, avg_loss))
        avg_loss = 0.0
        averaged_batches = 0

    # Report final results.

    if checkpoint_interval > 0:
      if averaged_batches > 0:
        avg_loss = float(avg_loss) / averaged_batches
        print(
            'Ending global_step %d: Average loss %g' % (current_step, avg_loss))
      self._exec_with_session(lambda: manager.save())
    time2 = time.time()
    print("TIMING: model fitting took %0.3f s" % (time2 - time1))
    return avg_loss

  def fit_on_batch(self, X, y, w):
    """Perform a single step of training.

    Parameters
    ----------
    X: ndarray
      the inputs for the batch
    y: ndarray
      the labels for the batch
    w: ndarray
      the weights for the batch
   """
    if not self.built:
      self.build()
    dataset = NumpyDataset(X, y, w)
    return self.fit(dataset, nb_epoch=1)

  def _predict(self, generator, transformers, uncertainty):
    """
    Predict outputs for data provided by a generator.

    This is the private implementation of prediction.  Do not call it directly.
    Instead call one of the public prediction methods.

    Parameters
    ----------
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    uncertainty: bool
      specifies whether this is being called as part of estimating uncertainty.
      If True, it sets the training flag so that dropout will be enabled, and
      returns the values of the uncertainty outputs.
    Returns:
      a NumPy array of the model produces a single output, or a list of arrays
      if it produces multiple outputs
    """
    results = None
    variances = None
    if uncertainty:
      if self._variance_outputs is None or len(self._variance_outputs) == 0:
        raise ValueError('This model cannot compute uncertainties')
      if len(self._variance_outputs) != len(self._prediction_outputs):
        raise ValueError(
            'The number of variances must exactly match the number of outputs')
    for batch in generator:
      inputs, labels, weights = batch
      self._create_inputs(inputs)
      inputs, _, _ = self._prepare_batch((inputs, None, None))
      if tf.executing_eagerly():

        # In eager mode we invoke the model directly.

        if len(inputs) == 1:
          inputs = inputs[0]
        outputs = self.model(inputs, training=uncertainty)
        outputs = [t.numpy() for t in outputs]
      else:

        # In graph mode we execute the output tensors.

        if uncertainty:
          fetches = self._uncertainty_tensors
        else:
          fetches = self._output_tensors
        feed_dict = dict(zip(self._input_placeholders, inputs))
        outputs = self.session.run(fetches, feed_dict=feed_dict)

      # Apply tranformers and record results.

      if uncertainty:
        var = [outputs[i] for i in self._variance_outputs]
        if variances is None:
          variances = var
        else:
          for i, t in enumerate(var):
            variances[i].append(t)
      if self._prediction_outputs is not None:
        outputs = [outputs[i] for i in self._prediction_outputs]
      if len(transformers) > 0:
        if len(outputs) > 1:
          raise ValueError(
              "predict() does not support Transformers for models with multiple outputs."
          )
        elif len(outputs) == 1:
          outputs = [undo_transforms(outputs[0], transformers)]
      if results is None:
        results = [outputs]
      else:
        for i, t in enumerate(outputs):
          results[i].append(t)

    # Concatenate arrays to create the final results.

    final_results = []
    final_variances = []
    for r in results:
      final_results.append(np.concatenate(r, axis=0))
    if uncertainty:
      for v in variances:
        final_variances.append(np.concatenate(v, axis=0))
      return zip(final_results, final_variances)
    # If only one output, just return array
    if len(final_results) == 1:
      return final_results[0]
    else:
      return final_results

  def predict_on_generator(self, generator, transformers=[]):
    """
    Parameters
    ----------
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    Returns:
      a NumPy array of the model produces a single output, or a list of arrays
      if it produces multiple outputs
    """
    return self._predict(generator, transformers, False)

  def predict_on_batch(self, X, transformers=[]):
    """Generates predictions for input samples, processing samples in a batch.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.

    Returns
    -------
    a NumPy array of the model produces a single output, or a list of arrays
    if it produces multiple outputs
    """
    dataset = NumpyDataset(X=X, y=None)
    return self.predict(dataset, transformers)

  def predict_uncertainty_on_batch(self, X, masks=50):
    """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
    dataset = NumpyDataset(X=X, y=None)
    return self.predict_uncertainty(dataset, masks)

  def predict(self, dataset, transformers=[]):
    """
    Uses self to make predictions on provided Dataset object.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.

    Returns
    -------
    a NumPy array of the model produces a single output, or a list of arrays
    if it produces multiple outputs
    """
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers)

  def predict_uncertainty(self, dataset, masks=50):
    """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
    sum_pred = []
    sum_sq_pred = []
    sum_var = []
    for i in range(masks):
      generator = self.default_generator(
          dataset, predict=True, pad_batches=False)
      results = self._predict(generator, [], True)
      if len(sum_pred) == 0:
        for p, v in results:
          sum_pred.append(p)
          sum_sq_pred.append(p * p)
          sum_var.append(v)
      else:
        for j, (p, v) in enumerate(results):
          sum_pred[j] += p
          sum_sq_pred[j] += p * p
          sum_var[j] += v
    output = []
    std = []
    for i in range(len(sum_pred)):
      p = sum_pred[i] / masks
      output.append(p)
      std.append(np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks))
    if len(output) == 1:
      return (output[0], std[0])
    else:
      return zip(output, std)

  def evaluate_generator(self,
                         generator,
                         metrics,
                         transformers=[],
                         per_task_metrics=False):
    """Evaluate the performance of this model on the data produced by a generator.

    Parameters
    ----------
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    metric: deepchem.metrics.Metric
      Evaluation metric
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    per_task_metrics: bool
      If True, return per-task scores.

    Returns
    -------
    dict
      Maps tasks to scores under metric.
    """
    evaluator = GeneratorEvaluator(self, generator, transformers)
    return evaluator.compute_model_performance(metrics, per_task_metrics)

  def compute_saliency(self, X):
    """Compute the saliency map for an input sample.

    This computes the Jacobian matrix with the derivative of each output element
    with respect to each input element.  More precisely,

    - If this model has a single output, it returns a matrix of shape
      (output_shape, input_shape) with the derivatives.
    - If this model has multiple outputs, it returns a list of matrices, one
      for each output.

    This method cannot be used on models that take multiple inputs.

    Parameters
    ----------
    X: ndarray
      the input data for a single sample

    Returns
    -------
    the Jacobian matrix, or a list of matrices
    """
    input_shape = X.shape
    X = np.reshape(X, [1] + list(X.shape))
    self._create_inputs([X])
    X, _, _ = self._prepare_batch((X, None, None))
    if tf.executing_eagerly():
      # In eager mode we use a GradientTape to compute gradients.

      X = tf.constant(X)
      with tf.GradientTape(
          persistent=True, watch_accessed_variables=False) as tape:
        tape.watch(X)
        outputs = self.model(X)
        if isinstance(outputs, tf.Tensor):
          outputs = [outputs]
        final_result = []
        for output in outputs:
          output_shape = tuple(output.shape.as_list()[1:])
          output = tf.reshape(output, [-1])
          result = []
          for i in range(output.shape[0]):
            result.append(tape.gradient(output[i], X))
          final_result.append(
              tf.reshape(tf.stack(result), output_shape + input_shape).numpy())
    else:
      # In graph mode we use tf.gradients().

      def jacobian(y, x):
        # Adapted from https://github.com/tensorflow/tensorflow/issues/675#issuecomment-319891923.
        y = tf.reshape(tf.convert_to_tensor(y)[0], [-1])
        n = y.shape[0]
        loop_vars = [
            tf.constant(0, tf.int32),
            tf.TensorArray(tf.float32, size=n)
        ]
        _, jacobian = tf.while_loop(
            lambda j, _: j < n,
            lambda j, result: (j + 1, result.write(j, tf.gradients(y[j], x))),
            loop_vars)
        return jacobian.stack()

      grads = [
          jacobian(self._output_tensors[i], self._input_placeholders[0])
          for i in self._prediction_outputs
      ]
      feed_dict = {self._input_placeholders[0]: X}
      result = self.session.run(grads, feed_dict=feed_dict)
      output_shapes = [
          tuple(o.shape.as_list()[1:]) for o in self._output_tensors
      ]
      final_result = [
          x.reshape(s + input_shape) for x, s in zip(result, output_shapes)
      ]
    if len(final_result) == 1:
      return final_result[0]
    return final_result

  def _prepare_batch(self, batch):
    inputs, labels, weights = batch
    inputs = [
        x if x.dtype == t else x.astype(t)
        for x, t in zip(inputs, self._input_dtypes)
    ]
    if labels is not None:
      labels = [
          x if x.dtype == t else x.astype(t)
          for x, t in zip(labels, self._label_dtypes)
      ]
    if weights is not None:
      weights = [
          x if x.dtype == t else x.astype(t)
          for x, t in zip(weights, self._weights_dtypes)
      ]
    return (inputs, labels, weights)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
        yield ([X_b], [y_b], [w_b])

  def save_checkpoint(self, max_checkpoints_to_keep=5):
    """Save a checkpoint to disk.

    Usually you do not need to call this method, since fit() saves checkpoints
    automatically.  If you have disabled automatic checkpointing during fitting,
    this can be called to manually write checkpoints.

    Parameters
    ----------
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    """
    self._ensure_built()
    manager = tf.train.CheckpointManager(self._checkpoint, self.model_dir,
                                         max_checkpoints_to_keep)
    self._exec_with_session(lambda: manager.save())

  def _exec_with_session(self, f):
    if tf.executing_eagerly():
      f()
    else:
      with self.session.as_default():
        f()

  def get_checkpoints(self):
    """Get a list of all available checkpoint files."""
    return tf.train.get_checkpoint_state(
        self.model_dir).all_model_checkpoint_paths

  def restore(self, checkpoint=None):
    """Reload the values of all variables from a checkpoint file.

    Parameters
    ----------
    checkpoint: str
      the path to the checkpoint file to load.  If this is None, the most recent
      checkpoint will be chosen automatically.  Call get_checkpoints() to get a
      list of all available checkpoints.
    """
    if checkpoint is None:
      checkpoint = tf.train.latest_checkpoint(self.model_dir)
    if checkpoint is None:
      raise ValueError('No checkpoint found')
    if tf.executing_eagerly():
      self._checkpoint.restore(checkpoint)
    else:
      self._checkpoint.restore(checkpoint).run_restore_ops(self.session)
Example #23
0
  def __init__(self,
               model,
               loss,
               output_types=None,
               batch_size=100,
               model_dir=None,
               learning_rate=0.001,
               optimizer=None,
               tensorboard=False,
               tensorboard_log_frequency=100,
               **kwargs):
    """Create a new KerasModel.

    Parameters
    ----------
    model: tf.keras.Model
      the Keras model implementing the calculation
    loss: dc.models.losses.Loss or function
      a Loss or function defining how to compute the training loss for each
      batch, as described above
    output_types: list of strings
      the type of each output from the model, as described above
    batch_size: int
      default batch size for training and evaluating
    model_dir: str
      the directory on disk where the model will be stored.  If this is None,
      a temporary directory is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for fitting.  If optimizer is specified, this is
      ignored.
    optimizer: Optimizer
      the optimizer to use for fitting.  If this is specified, learning_rate is
      ignored.
    tensorboard: bool
      whether to log progress to TensorBoard during training
    tensorboard_log_frequency: int
      the frequency at which to log data to TensorBoard, measured in batches
    """
    super(KerasModel, self).__init__(
        model_instance=model, model_dir=model_dir, **kwargs)
    self.model = model
    if isinstance(loss, Loss):
      self._loss_fn = _StandardLoss(model, loss)
    else:
      self._loss_fn = loss
    self.batch_size = batch_size
    if optimizer is None:
      self.optimizer = Adam(learning_rate=learning_rate)
    else:
      self.optimizer = optimizer
    self.tensorboard = tensorboard
    self.tensorboard_log_frequency = tensorboard_log_frequency
    self._tensorboard_step = 0
    if tensorboard and tf.executing_eagerly():
      raise ValueError(
          "Logging to TensorBoard is not currently supported in eager mode")
    if output_types is None:
      self._prediction_outputs = None
      self._loss_outputs = None
      self._variance_outputs = None
    else:
      self._prediction_outputs = []
      self._loss_outputs = []
      self._variance_outputs = []
      for i, type in enumerate(output_types):
        if type == 'prediction':
          self._prediction_outputs.append(i)
        elif type == 'loss':
          self._loss_outputs.append(i)
        elif type == 'variance':
          self._variance_outputs.append(i)
        else:
          raise ValueError('Unknown output type "%s"' % type)
    self._built = False
    self._inputs_built = False
    self._training_ops_built = False
    self._initialized_vars = set()
Example #24
0
class TensorGraph(Model):
    def __init__(self,
                 tensorboard=False,
                 tensorboard_log_frequency=100,
                 batch_size=100,
                 random_seed=None,
                 use_queue=True,
                 graph=None,
                 learning_rate=0.001,
                 **kwargs):
        """
    Parameters
    ----------
    tensorboard: bool
      Should we log to model_dir data for tensorboard?
    tensorboard_log_frequency: int
      How many training batches before logging tensorboard?
    batch_size: int
      default batch size for training and evaluating
    use_queue: boolean
      if True when building we will create a tf.FIFO queue, which will hold
      all features, weights, and labels.  We will feed the inputs into this
      queue in batches of self.batch_size in a separate thread from the
      thread training the model.  You cannot use a queue when
      batches are not of consistent size
    graph: tensorflow.Graph
      the Graph in which to create Tensorflow objects.  If None, a new Graph
      is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for optimization
    kwargs
    """

        # Layer Management
        self.layers = dict()
        self.features = list()
        self.labels = list()
        self.outputs = list()
        self.task_weights = list()
        self.submodels = list()
        self.loss = Constant(0)
        self.built = False
        self.queue_installed = False
        self.optimizer = Adam(learning_rate=learning_rate,
                              beta1=0.9,
                              beta2=0.999,
                              epsilon=1e-7)

        # Singular place to hold Tensor objects which don't serialize
        # These have to be reconstructed on restoring from pickle
        # See TensorGraph._get_tf() for more details on lazy construction
        self.tensor_objects = {
            "FileWriter": None,
            "Graph": graph,
            "train_op": None,
            "summary_op": None,
        }
        self.tensorboard = tensorboard
        self.tensorboard_log_frequency = tensorboard_log_frequency
        self.tensorboard_step = 0
        self.global_step = 0
        self.use_queue = use_queue

        self.batch_size = batch_size
        self.random_seed = random_seed
        super(TensorGraph, self).__init__(**kwargs)
        self.save_file = "%s/%s" % (self.model_dir, "model")
        self.model_class = None

        self.rnn_initial_states = []
        self.rnn_final_states = []
        self.rnn_zero_states = []
        if self.use_queue and self.tensorboard:
            raise ValueError(
                "Currently TensorGraph cannot both use_queue and tensorboard at the same time"
            )

    def _add_layer(self, layer):
        if layer.name is None:
            layer.name = "%s_%s" % (layer.__class__.__name__,
                                    len(self.layers) + 1)
        if layer.name in self.layers:
            return
        if isinstance(layer, Feature):
            self.features.append(layer)
        if isinstance(layer, Label):
            self.labels.append(layer)
        if isinstance(layer, Weights):
            self.task_weights.append(layer)
        self.layers[layer.name] = layer
        for in_layer in layer.in_layers:
            self._add_layer(in_layer)

    def fit(self,
            dataset,
            nb_epoch=10,
            max_checkpoints_to_keep=5,
            checkpoint_interval=1000,
            deterministic=False,
            restore=False,
            submodel=None):
        """Train this model on a dataset.

    Parameters
    ----------
    dataset: Dataset
      the Dataset to train on
    nb_epoch: int
      the number of epochs to train for
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    deterministic: bool
      if True, the samples are processed in order.  If False, a different random
      order is used for each epoch.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    submodel: Submodel
      an alternate training objective to use.  This should have been created by
      calling create_submodel().
    """
        return self.fit_generator(
            self.default_generator(dataset,
                                   epochs=nb_epoch,
                                   deterministic=deterministic),
            max_checkpoints_to_keep, checkpoint_interval, restore, submodel)

    def fit_generator(self,
                      feed_dict_generator,
                      max_checkpoints_to_keep=5,
                      checkpoint_interval=1000,
                      restore=False,
                      submodel=None):
        """Train this model on data from a generator.

    Parameters
    ----------
    feed_dict_generator: generator
      this should generate batches, each represented as a dict that maps
      Layers to values.
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    submodel: Submodel
      an alternate training objective to use.  This should have been created by
      calling create_submodel().

    Returns
    -------
    the average loss over the most recent checkpoint interval
    """
        def create_feed_dict():
            if self.use_queue:
                while True:
                    yield {self._training_placeholder: 1.0}
            for d in feed_dict_generator:
                feed_dict = dict(d)
                feed_dict[self._training_placeholder] = 1.0
                yield feed_dict

        if not self.built:
            self.build()
        with self._get_tf("Graph").as_default():
            time1 = time.time()
            loss = self.loss
            if submodel is None:
                train_op = self._get_tf('train_op')
            else:
                train_op = submodel.get_train_op()
                if submodel.loss is not None:
                    loss = submodel.loss
            if checkpoint_interval > 0:
                saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
            if restore:
                self.restore()
            avg_loss, n_averaged_batches = 0.0, 0.0
            n_samples = 0
            n_enqueued = [0]
            final_sample = [None]
            if self.use_queue:
                enqueue_thread = threading.Thread(
                    target=_enqueue_batch,
                    args=(self, feed_dict_generator, self._get_tf("Graph"),
                          self.session, n_enqueued, final_sample))
                enqueue_thread.start()
            for feed_dict in create_feed_dict():
                if self.use_queue:
                    # Don't let this thread get ahead of the enqueue thread, since if
                    # we try to read more batches than the total number that get queued,
                    # this thread will hang indefinitely.
                    while n_enqueued[0] <= n_samples:
                        if n_samples == final_sample[0]:
                            break
                        time.sleep(0)
                    if n_samples == final_sample[0]:
                        break
                n_samples += 1
                should_log = (self.tensorboard and
                              n_samples % self.tensorboard_log_frequency == 0)
                fetches = [train_op, loss.out_tensor]
                if should_log:
                    fetches.append(self._get_tf("summary_op"))
                fetched_values = self.session.run(fetches, feed_dict=feed_dict)
                if should_log:
                    self._log_tensorboard(fetches[2])
                avg_loss += fetched_values[1]
                n_averaged_batches += 1
                self.global_step += 1
                if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1:
                    saver.save(self.session,
                               self.save_file,
                               global_step=self.global_step)
                    avg_loss = float(avg_loss) / n_averaged_batches
                    print('Ending global_step %d: Average loss %g' %
                          (self.global_step, avg_loss))
                    avg_loss, n_averaged_batches = 0.0, 0.0
            if n_averaged_batches > 0:
                avg_loss = float(avg_loss) / n_averaged_batches
            if checkpoint_interval > 0:
                if n_averaged_batches > 0:
                    print('Ending global_step %d: Average loss %g' %
                          (self.global_step, avg_loss))
                saver.save(self.session,
                           self.save_file,
                           global_step=self.global_step)
                time2 = time.time()
                print("TIMING: model fitting took %0.3f s" % (time2 - time1))
        return avg_loss

    def _log_tensorboard(self, summary):
        """
    TODO(LESWING) set epoch
    Parameters
    ----------
    Returns
    -------
    """
        global_step = int(self.global_step)
        writer = self._get_tf("FileWriter")
        writer.reopen()
        writer.add_summary(summary, global_step=global_step)
        writer.close()

    def fit_on_batch(self, X, y, w, submodel=None):
        if not self.built:
            self.build()
        dataset = NumpyDataset(X, y)
        return self.fit(dataset, nb_epoch=1, submodel=submodel)

    def default_generator(self,
                          dataset,
                          epochs=1,
                          predict=False,
                          deterministic=True,
                          pad_batches=True):
        if len(self.features) > 1:
            raise ValueError("More than one Feature, must use generator")
        if len(self.labels) > 1:
            raise ValueError("More than one Label, must use generator")
        if len(self.task_weights) > 1:
            raise ValueError("More than one Weights, must use generator")
        for epoch in range(epochs):
            for (X_b, y_b, w_b,
                 ids_b) in dataset.iterbatches(batch_size=self.batch_size,
                                               deterministic=deterministic,
                                               pad_batches=pad_batches):
                feed_dict = dict()
                if len(self.labels) == 1 and y_b is not None and not predict:
                    feed_dict[self.labels[0]] = y_b
                if len(self.features) == 1 and X_b is not None:
                    feed_dict[self.features[0]] = X_b
                if len(self.task_weights
                       ) == 1 and w_b is not None and not predict:
                    feed_dict[self.task_weights[0]] = w_b
                for (initial_state, zero_state) in zip(self.rnn_initial_states,
                                                       self.rnn_zero_states):
                    feed_dict[initial_state] = zero_state
                yield feed_dict

    def predict_on_generator(self, generator, transformers=[], outputs=None):
        """
    Parameters
    ----------
    generator: Generator
      Generator that constructs feed dictionaries for TensorGraph.
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs.
      If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
        if not self.built:
            self.build()
        if outputs is None:
            outputs = self.outputs
        elif not isinstance(outputs, collections.Sequence):
            outputs = [outputs]
        with self._get_tf("Graph").as_default():
            # Gather results for each output
            results = [[] for out in outputs]
            for feed_dict in generator:
                feed_dict = {
                    self.layers[k.name].out_tensor: v
                    for k, v in six.iteritems(feed_dict)
                }
                feed_dict[self._training_placeholder] = 0.0
                feed_results = self.session.run(outputs, feed_dict=feed_dict)
                if len(feed_results) > 1:
                    if len(transformers):
                        raise ValueError("Does not support transformations "
                                         "for multiple outputs.")
                elif len(feed_results) == 1:
                    result = undo_transforms(feed_results[0], transformers)
                    feed_results = [result]
                for ind, result in enumerate(feed_results):
                    results[ind].append(result)

            final_results = []
            for result_list in results:
                final_results.append(np.concatenate(result_list, axis=0))
            # If only one output, just return array
            if len(final_results) == 1:
                return final_results[0]
            else:
                return final_results

    def predict_proba_on_generator(self,
                                   generator,
                                   transformers=[],
                                   outputs=None):
        """
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
        return self.predict_on_generator(generator, transformers, outputs)

    def predict_on_batch(self, X, transformers=[], outputs=None):
        """Generates predictions for input samples, processing samples in a batch.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    transformers: List
      List of dc.trans.Transformers

    Returns
    -------
    A Numpy array of predictions.
    """
        dataset = NumpyDataset(X=X, y=None)
        generator = self.default_generator(dataset,
                                           predict=True,
                                           pad_batches=False)
        return self.predict_on_generator(generator, transformers, outputs)

    def predict_proba_on_batch(self, X, transformers=[], outputs=None):
        """Generates predictions for input samples, processing samples in a batch.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    transformers: List
      List of dc.trans.Transformers

    Returns
    -------
    A Numpy array of predictions.
    """
        return self.predict_on_batch(X, transformers, outputs)

    def predict(self, dataset, transformers=[], outputs=None):
        """
    Uses self to make predictions on provided Dataset object.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs[0] (single
      output). If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.

    Returns
    -------
    results: numpy ndarray or list of numpy ndarrays
    """
        generator = self.default_generator(dataset,
                                           predict=True,
                                           pad_batches=False)
        return self.predict_on_generator(generator, transformers, outputs)

    def predict_proba(self, dataset, transformers=[], outputs=None):
        """
    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs[0] (single
      output). If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.

    Returns
    -------
    y_pred: numpy ndarray or list of numpy ndarrays
    """
        generator = self.default_generator(dataset,
                                           predict=True,
                                           pad_batches=False)
        return self.predict_proba_on_generator(generator, transformers,
                                               outputs)

    def topsort(self):
        def add_layers_to_list(layer, sorted_layers):
            if layer in sorted_layers:
                return
            for in_layer in layer.in_layers:
                add_layers_to_list(in_layer, sorted_layers)
            sorted_layers.append(layer)

        sorted_layers = []
        for l in self.features + self.labels + self.task_weights + self.outputs:
            add_layers_to_list(l, sorted_layers)
        add_layers_to_list(self.loss, sorted_layers)
        for submodel in self.submodels:
            if submodel.loss is not None:
                add_layers_to_list(submodel.loss, sorted_layers)
        return sorted_layers

    def build(self):
        if self.built:
            return
        with self._get_tf("Graph").as_default():
            self._training_placeholder = tf.placeholder(dtype=tf.float32,
                                                        shape=())
            if self.random_seed is not None:
                tf.set_random_seed(self.random_seed)
            self._install_queue()
            for layer in self.topsort():
                with tf.name_scope(layer.name):
                    layer.create_tensor(training=self._training_placeholder)
                    self.rnn_initial_states += layer.rnn_initial_states
                    self.rnn_final_states += layer.rnn_final_states
                    self.rnn_zero_states += layer.rnn_zero_states
                    layer.add_summary_to_tg()
            self.session = tf.Session()
            self.built = True

            # Ensure all training operators have been created.

            self._get_tf('train_op')
            for submodel in self.submodels:
                train_op = submodel.get_train_op()

            # Initialize variables.

            self.session.run(tf.global_variables_initializer())
            for layer in self.layers.values():
                if layer.variable_values is not None:
                    variables = self.get_layer_variables(layer)
                    for var, val in zip(variables, layer.variable_values):
                        self.session.run(var.assign(val))

        for layer in self.layers.values():
            if layer.tensorboard:
                self.tensorboard = True
        tf.summary.scalar("loss", self.loss.out_tensor)
        for layer in self.layers.values():
            if layer.tensorboard:
                tf.summary.tensor_summary(layer.name, layer.out_tensor)
        if self.tensorboard:
            writer = self._get_tf("FileWriter")
            writer.add_graph(self._get_tf("Graph"))
            writer.close()

        # As a sanity check, make sure all tensors have the correct shape.

        for layer in self.layers.values():
            try:
                assert list(layer.shape) == layer.out_tensor.get_shape(
                ).as_list(
                ), '%s: Expected shape %s does not match actual shape %s' % (
                    layer.name, layer.shape,
                    layer.out_tensor.get_shape().as_list())
            except NotImplementedError:
                pass

    def _install_queue(self):
        """
    """
        if not self.use_queue or self.queue_installed:
            for layer in self.features + self.labels + self.task_weights:
                layer.pre_queue = True
            return
        names = []
        shapes = []
        pre_q_inputs = []
        q = InputFifoQueue(shapes, names, in_layers=pre_q_inputs)
        q.name = "%s_%s" % (q.__class__.__name__, len(self.layers) + 1)

        for layer in self.features + self.labels + self.task_weights:
            pre_q_input = layer.create_pre_q(self.batch_size)
            shapes.append(pre_q_input.shape)
            names.append(pre_q_input.name)
            pre_q_inputs.append(pre_q_input)

            layer.in_layers.append(q)

        self._add_layer(q)
        self.input_queue = q
        self.queue_installed = True

    def set_loss(self, layer):
        self._add_layer(layer)
        self.loss = layer

    def add_output(self, layer):
        self._add_layer(layer)
        self.outputs.append(layer)

    def set_optimizer(self, optimizer):
        """Set the optimizer to use for fitting."""
        self.optimizer = optimizer

    def create_submodel(self, layers=None, loss=None, optimizer=None):
        """Create an alternate objective for training one piece of a TensorGraph.

    A TensorGraph consists of a set of layers, and specifies a loss function and
    optimizer to use for training those layers.  Usually this is sufficient, but
    there are cases where you want to train different parts of a model separately.
    For example, a GAN consists of a generator and a discriminator.  They are
    trained separately, and they use different loss functions.

    A submodel defines an alternate objective to use in cases like this.  It may
    optionally specify any of the following: a subset of layers in the model to
    train; a different loss function; and a different optimizer to use.  This
    method creates a submodel, which you can then pass to fit() to use it for
    training.

    Parameters
    ----------
    layers: list
      the list of layers to train.  If None, all layers in the model will be
      trained.
    loss: Layer
      the loss function to optimize.  If None, the model's main loss function
      will be used.
    optimizer: Optimizer
      the optimizer to use for training.  If None, the model's main optimizer
      will be used.

    Returns
    -------
    the newly created submodel, which can be passed to any of the fitting
    methods.
    """
        if self.built:
            raise ValueError(
                'Submodels must be created before build() is called.')
        submodel = Submodel(self, layers, loss, optimizer)
        self.submodels.append(submodel)
        if loss is not None:
            self._add_layer(loss)
        return submodel

    def get_pickling_errors(self, obj, seen=None):
        if seen == None:
            seen = []
        try:
            state = obj.__getstate__()
        except AttributeError:
            return
        if state == None:
            return
        if isinstance(state, tuple):
            if not isinstance(state[0], dict):
                state = state[1]
            else:
                state = state[0].update(state[1])
        result = {}
        for i in state:
            try:
                pickle.dumps(state[i], protocol=2)
            except pickle.PicklingError:
                if not state[i] in seen:
                    seen.append(state[i])
                    result[i] = self.get_pickling_errors(state[i], seen)
        return result

    def save(self):
        # Remove out_tensor from the object to be pickled
        must_restore = False
        tensor_objects = self.tensor_objects
        rnn_initial_states = self.rnn_initial_states
        rnn_final_states = self.rnn_final_states
        rnn_zero_states = self.rnn_zero_states
        session = self.session
        self.tensor_objects = {}
        self.rnn_initial_states = []
        self.rnn_final_states = []
        self.rnn_zero_states = []
        self.session = None
        out_tensors = []
        if self.built:
            must_restore = True
            for layer in self.topsort():
                out_tensors.append(layer.none_tensors())
            training_placeholder = self._training_placeholder
            self._training_placeholder = None
            self.built = False

        # Pickle itself
        pickle_name = os.path.join(self.model_dir, "model.pickle")

        with open(pickle_name, 'wb') as fout:
            try:
                pickle.dump(self, fout)
            except Exception as e:
                print(self.get_pickling_errors(self))
                raise e

        # add out_tensor back to everyone
        if must_restore:
            for index, layer in enumerate(self.topsort()):
                layer.set_tensors(out_tensors[index])
            self._training_placeholder = training_placeholder
            self.built = True
        self.tensor_objects = tensor_objects
        self.rnn_initial_states = rnn_initial_states
        self.rnn_final_states = rnn_final_states
        self.rnn_zero_states = rnn_zero_states
        self.session = session

    def evaluate_generator(self,
                           feed_dict_generator,
                           metrics,
                           transformers=[],
                           labels=None,
                           outputs=None,
                           weights=[],
                           per_task_metrics=False):

        if labels is None:
            raise ValueError
        n_tasks = len(self.outputs)
        n_classes = self.outputs[0].out_tensor.get_shape()[-1].value
        evaluator = GeneratorEvaluator(self,
                                       feed_dict_generator,
                                       transformers,
                                       labels=labels,
                                       outputs=outputs,
                                       weights=weights,
                                       n_tasks=n_tasks,
                                       n_classes=n_classes)
        if not per_task_metrics:
            scores = evaluator.compute_model_performance(metrics)
            return scores
        else:
            scores, per_task_scores = evaluator.compute_model_performance(
                metrics, per_task_metrics=per_task_metrics)
            return scores, per_task_scores

    def get_layer_variables(self, layer):
        """Get the list of trainable variables in a layer of the graph."""
        if not self.built:
            self.build()
        with self._get_tf("Graph").as_default():
            if layer.variable_scope == '':
                return []
            return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     scope=layer.variable_scope)

    def get_global_step(self):
        return self._get_tf("GlobalStep")

    def _get_tf(self, obj):
        """Fetches underlying TensorFlow primitives.

    Parameters
    ----------
    obj: str
      If "Graph", returns tf.Graph instance. If "FileWriter", returns
      tf.summary.FileWriter. If "Optimizer", returns the optimizer. If
      "train_op", returns the train operation. If "summary_op", returns the
      merged summary. If "GlobalStep" returns the global step.
    Returns
    -------
    TensorFlow Object

    """

        if obj in self.tensor_objects and self.tensor_objects[obj] is not None:
            return self.tensor_objects[obj]
        if obj == "Graph":
            self.tensor_objects['Graph'] = tf.Graph()
        elif obj == "FileWriter":
            self.tensor_objects['FileWriter'] = tf.summary.FileWriter(
                self.model_dir)
        elif obj == 'Optimizer':
            self.tensor_objects[
                'Optimizer'] = self.optimizer._create_optimizer(
                    self._get_tf('GlobalStep'))
        elif obj == 'train_op':
            opt = self._get_tf('Optimizer')
            global_step = self._get_tf('GlobalStep')
            try:
                self.tensor_objects['train_op'] = opt.minimize(
                    self.loss.out_tensor, global_step=global_step)
            except ValueError:
                # The loss doesn't depend on any variables.
                self.tensor_objects['train_op'] = 0
        elif obj == 'summary_op':
            self.tensor_objects['summary_op'] = tf.summary.merge_all(
                key=tf.GraphKeys.SUMMARIES)
        elif obj == 'GlobalStep':
            with self._get_tf("Graph").as_default():
                self.tensor_objects['GlobalStep'] = tf.Variable(
                    0, trainable=False)
        return self._get_tf(obj)

    def save_checkpoint(self, max_checkpoints_to_keep=5):
        """Save a checkpoint to disk.

    Usually you do not need to call this method, since fit() saves checkpoints
    automatically.  If you have disabled automatic checkpointing during fitting,
    this can be called to manually write checkpoints.

    Parameters
    ----------
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    """
        saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
        saver.save(self.session, self.save_file, global_step=self.global_step)

    def restore(self):
        """Reload the values of all variables from the most recent checkpoint file."""
        if not self.built:
            self.build()
        last_checkpoint = tf.train.latest_checkpoint(self.model_dir)
        if last_checkpoint is None:
            raise ValueError('No checkpoint found')
        with self._get_tf("Graph").as_default():
            saver = tf.train.Saver()
            saver.restore(self.session, last_checkpoint)

    def get_num_tasks(self):
        return len(self.outputs)

    def get_pre_q_input(self, input_layer):
        layer_name = input_layer.name
        pre_q_name = "%s_pre_q" % layer_name
        return self.layers[pre_q_name]

    @staticmethod
    def load_from_dir(model_dir):
        pickle_name = os.path.join(model_dir, "model.pickle")
        with open(pickle_name, 'rb') as fout:
            tensorgraph = pickle.load(fout)
            tensorgraph.built = False
            tensorgraph.model_dir = model_dir
            try:
                tensorgraph.restore()
            except ValueError:
                pass  # No checkpoint to load
            return tensorgraph

    def __del__(self):
        pass
tokens = set()
for s in train_smiles:
  tokens = tokens.union(set(s))
tokens = sorted(list(tokens))
max_length = max(len(s) for s in train_smiles)

#training
from deepchem.models.tensorgraph.optimizers import Adam, ExponentialDecay
from deepchem.models.tensorgraph.models.seqtoseq import AspuruGuzikAutoEncoder
#the encoder is a CNN and the decoder is a GRU
model = AspuruGuzikAutoEncoder(tokens, max_length, model_dir='vae')

batches_per_epoch = len(train_smiles)/model.batch_size
learning_rate = ExponentialDecay(0.001, 0.95, batches_per_epoch)
model.set_optimizer(Adam(learning_rate=learning_rate))

def generate_sequences(epochs): 
  for i in range(epochs):
    for s in train_smiles: 
      yield (s, s)
model.summary()
model.fit_sequences(generate_sequences(1))

#check that the molecules are valid
import numpy as np
from rdkit import Chem
predictions = model.predict_from_embeddings(np.random.normal(size=(1000,196))) 
molecules = []
for p in predictions:
  smiles = ''.join(p) morning 
Example #26
0
  def __init__(self,
               tensorboard=False,
               tensorboard_log_frequency=100,
               batch_size=100,
               random_seed=None,
               use_queue=True,
               graph=None,
               learning_rate=0.001,
               configproto=None,
               **kwargs):
    """
    Parameters
    ----------
    tensorboard: bool
      Should we log to model_dir data for tensorboard?
    tensorboard_log_frequency: int
      How many training batches before logging tensorboard?
    batch_size: int
      default batch size for training and evaluating
    use_queue: boolean
      if True when building we will create a tf.FIFO queue, which will hold
      all features, weights, and labels.  We will feed the inputs into this
      queue in batches of self.batch_size in a separate thread from the
      thread training the model.  You cannot use a queue when
      batches are not of consistent size
    graph: tensorflow.Graph
      the Graph in which to create Tensorflow objects.  If None, a new Graph
      is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for optimization
    configproto: a tf.ConfigProto() object used to create tf.Session()
    """

    # Layer Management
    self.layers = dict()
    self.features = list()
    self.labels = list()
    self.outputs = list()
    self.variances = list()
    self.task_weights = list()
    self.submodels = list()
    self.loss = Constant(0)
    self.built = False
    self.queue_installed = False
    self.optimizer = Adam(
        learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7)
    self.configproto = configproto

    # Singular place to hold Tensor objects which don't serialize
    # These have to be reconstructed on restoring from pickle
    # See TensorGraph._get_tf() for more details on lazy construction
    self.tensor_objects = {
        "FileWriter": None,
        "Graph": graph,
        "train_op": None,
        "summary_op": None,
    }
    self.tensorboard = tensorboard
    self.tensorboard_log_frequency = tensorboard_log_frequency
    self.tensorboard_step = 0
    self.global_step = 0
    self.use_queue = use_queue

    self.batch_size = batch_size
    self.random_seed = random_seed
    super(TensorGraph, self).__init__(**kwargs)
    self.save_file = "%s/%s" % (self.model_dir, "model")
    self.model_class = None

    self.rnn_initial_states = []
    self.rnn_final_states = []
    self.rnn_zero_states = []
    if self.use_queue and self.tensorboard:
      raise ValueError(
          "Currently TensorGraph cannot both use_queue and tensorboard at the same time"
      )
Example #27
0
    def test_hindsight(self):
        """Test Hindsight Experience Replay."""

        # The environment is a plane in which the agent moves by steps until it reaches a randomly
        # positioned goal.  No reward is given until it reaches the goal.  That makes it very hard
        # to learn by standard methods, since it may take a very long time to receive any feedback
        # at all.  Using hindsight makes it much easier.

        class TestEnvironment(dc.rl.Environment):
            def __init__(self):
                super(TestEnvironment, self).__init__((4, ), 4)
                self.moves = [(-1, 0), (1, 0), (0, -1), (0, 1)]

            def reset(self):
                self._state = np.concatenate([[0, 0],
                                              np.random.randint(-50, 50, 2)])
                self._terminated = False
                self.count = 0

            def step(self, action):
                new_state = self._state.copy()
                new_state[:2] += self.moves[action]
                self._state = new_state
                self.count += 1
                reward = 0
                if np.array_equal(new_state[:2], new_state[2:]):
                    self._terminated = True
                    reward = 1
                elif self.count == 1000:
                    self._terminated = True
                return reward

            def apply_hindsight(self, states, actions, goal):
                new_states = []
                rewards = []
                goal_pos = goal[:2]
                for state, action in zip(states, actions):
                    new_state = state.copy()
                    new_state[2:] = goal_pos
                    new_states.append(new_state)
                    pos_after_action = new_state[:2] + self.moves[action]
                    if np.array_equal(pos_after_action, goal_pos):
                        rewards.append(1)
                    else:
                        rewards.append(0)
                return new_states, rewards

        # A simple policy with two hidden layers.

        class TestPolicy(dc.rl.Policy):
            def create_layers(self, state, **kwargs):

                dense1 = Dense(6, activation_fn=tf.nn.relu, in_layers=state)
                dense2 = Dense(6, activation_fn=tf.nn.relu, in_layers=dense1)
                output = Dense(4,
                               activation_fn=tf.nn.softmax,
                               biases_initializer=None,
                               in_layers=dense2)
                value = Dense(1, in_layers=dense2)
                return {'action_prob': output, 'value': value}

        # Optimize it.

        env = TestEnvironment()
        learning_rate = PolynomialDecay(initial_rate=0.0001,
                                        final_rate=0.00005,
                                        decay_steps=1500000)
        ppo = dc.rl.PPO(env,
                        TestPolicy(),
                        use_hindsight=True,
                        optimization_epochs=8,
                        optimizer=Adam(learning_rate=learning_rate))
        ppo.fit(1500000)

        # Try running it a few times and see if it succeeds.

        pass_count = 0
        for i in range(5):
            env.reset()
            while not env.terminated:
                env.step(ppo.select_action(env.state))
            if np.array_equal(env.state[:2], env.state[2:]):
                pass_count += 1
        assert pass_count >= 3
Example #28
0
    def test_roulette(self):
        """Test training a policy for the roulette environment."""

        # This is modeled after the Roulette-v0 environment from OpenAI Gym.
        # The player can bet on any number from 0 to 36, or walk away (which ends the
        # game).  The average reward for any bet is slightly negative, so the best
        # strategy is to walk away.

        class RouletteEnvironment(dc.rl.Environment):
            def __init__(self):
                super(RouletteEnvironment, self).__init__([(1, )], 38)
                self._state = [np.array([0])]

            def step(self, action):
                if action == 37:
                    self._terminated = True  # Walk away.
                    return 0.0
                wheel = np.random.randint(37)
                if wheel == 0:
                    if action == 0:
                        return 35.0
                    return -1.0
                if action != 0 and wheel % 2 == action % 2:
                    return 1.0
                return -1.0

            def reset(self):
                self._terminated = False

        env = RouletteEnvironment()

        # This policy just learns a constant probability for each action, and a constant for the value.

        class TestPolicy(dc.rl.Policy):
            def __init__(self):
                super(TestPolicy, self).__init__(['action_prob', 'value'])

            def create_model(self, **kwargs):
                class TestModel(tf.keras.Model):
                    def __init__(self):
                        super(TestModel, self).__init__(**kwargs)
                        self.action = tf.Variable(
                            np.ones(env.n_actions, np.float32))
                        self.value = tf.Variable([0.0], tf.float32)

                    def call(self, inputs, **kwargs):
                        prob = tf.nn.softmax(
                            tf.reshape(self.action, (-1, env.n_actions)))
                        return (prob, self.value)

                return TestModel()

        # Optimize it.

        a3c = dc.rl.A3C(env,
                        TestPolicy(),
                        max_rollout_length=20,
                        optimizer=Adam(learning_rate=0.001))
        a3c.fit(100000)

        # It should have learned that the expected value is very close to zero, and that the best
        # action is to walk away.

        action_prob, value = a3c.predict([[0]])
        assert -0.5 < value[0] < 0.5
        assert action_prob.argmax() == 37
        assert a3c.select_action([[0]], deterministic=True) == 37

        # Verify that we can create a new A3C object, reload the parameters from the first one, and
        # get the same result.

        new_a3c = dc.rl.A3C(env, TestPolicy(), model_dir=a3c._model.model_dir)
        new_a3c.restore()
        action_prob2, value2 = new_a3c.predict([[0]])
        assert value2 == value

        # Do the same thing, only using the "restore" argument to fit().

        new_a3c = dc.rl.A3C(env, TestPolicy(), model_dir=a3c._model.model_dir)
        new_a3c.fit(0, restore=True)
        action_prob2, value2 = new_a3c.predict([[0]])
        assert value2 == value
    n_observations = genv.observation_space.shape[0]
    print("n_products={} n_observations={} n_actions={}".format(
        n_products, n_observations, n_actions))

    callbacks = [LogCallback(genv.unprocess_observation)]
    if args.method == "ppo":
        agent = deepchem.rl.PPO(
            denv, MyPolicy(n_products, n_observations, args.single_layer),
            max_rollout_length=10000,
            optimization_rollouts=(8 if args.parallel else 1),
            #optimization_epochs=4,
            discount_factor=gamma,
            advantage_lambda=0.98,
            entropy_weight=0.0,
            value_weight=1.0e-4,
            optimizer=Adam(learning_rate=1e-4),
            model_dir="data.ppo."+args.id,
            zero_terminal=False,
            callbacks=callbacks)
    elif args.method == "a3c":
        agent = deepchem.rl.A3C(
            denv, MyPolicy(n_products, n_observations, args.single_layer),
            max_rollout_length=10000,
            discount_factor=gamma,
            advantage_lambda=0.98,
            entropy_weight=0.0,
            value_weight=1.0e-4,
            optimizer=Adam(learning_rate=1e-4),
            model_dir="data.a3c."+args.id,
            worker_count=16,
            zero_terminal=False,
Example #30
0
    def __init__(self,
                 env,
                 policy,
                 max_rollout_length=20,
                 discount_factor=0.99,
                 advantage_lambda=0.98,
                 value_weight=1.0,
                 entropy_weight=0.01,
                 optimizer=None,
                 model_dir=None,
                 use_hindsight=False):
        """Create an object for optimizing a policy.

    Parameters
    ----------
    env: Environment
      the Environment to interact with
    policy: Policy
      the Policy to optimize.  It must have outputs with the names 'action_prob'
      and 'value' (for discrete action spaces) or 'action_mean', 'action_std',
      and 'value' (for continuous action spaces)
    max_rollout_length: int
      the maximum length of rollouts to generate
    discount_factor: float
      the discount factor to use when computing rewards
    advantage_lambda: float
      the parameter for trading bias vs. variance in Generalized Advantage Estimation
    value_weight: float
      a scale factor for the value loss term in the loss function
    entropy_weight: float
      a scale factor for the entropy term in the loss function
    optimizer: Optimizer
      the optimizer to use.  If None, a default optimizer is used.
    model_dir: str
      the directory in which the model will be saved.  If None, a temporary directory will be created.
    use_hindsight: bool
      if True, use Hindsight Experience Replay
    """
        self._env = env
        self._policy = policy
        self.max_rollout_length = max_rollout_length
        self.discount_factor = discount_factor
        self.advantage_lambda = advantage_lambda
        self.value_weight = value_weight
        self.entropy_weight = entropy_weight
        self.use_hindsight = use_hindsight
        self._state_is_list = isinstance(env.state_shape[0],
                                         collections.Sequence)
        if optimizer is None:
            self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999)
        else:
            self._optimizer = optimizer
        self._model = self._build_model(model_dir)
        output_names = policy.output_names
        output_tensors = self._model._output_tensors
        self._value = output_tensors[output_names.index('value')]
        if self.continuous:
            self._action_mean = output_tensors[output_names.index(
                'action_mean')]
            self._action_std = output_tensors[output_names.index('action_std')]
        else:
            self._action_prob = output_tensors[output_names.index(
                'action_prob')]
        rnn_outputs = [
            i for i, n in enumerate(output_names) if n == 'rnn_state'
        ]
        self._rnn_final_states = [output_tensors[i] for i in rnn_outputs]
        self._session = self._model.session
        self._rnn_states = policy.rnn_initial_states
        self._checkpoint = tf.train.Checkpoint()
        self._checkpoint.save_counter  # Ensure the variable has been created
        self._checkpoint.listed = self._model.model.trainable_variables
        self._session.run(self._checkpoint.save_counter.initializer)
Example #31
0
    def __init__(self,
                 tensorboard=False,
                 tensorboard_log_frequency=100,
                 batch_size=100,
                 random_seed=None,
                 use_queue=True,
                 graph=None,
                 learning_rate=0.001,
                 **kwargs):
        """
    Parameters
    ----------
    tensorboard: bool
      Should we log to model_dir data for tensorboard?
    tensorboard_log_frequency: int
      How many training batches before logging tensorboard?
    batch_size: int
      default batch size for training and evaluating
    use_queue: boolean
      if True when building we will create a tf.FIFO queue, which will hold
      all features, weights, and labels.  We will feed the inputs into this
      queue in batches of self.batch_size in a separate thread from the
      thread training the model.  You cannot use a queue when
      batches are not of consistent size
    graph: tensorflow.Graph
      the Graph in which to create Tensorflow objects.  If None, a new Graph
      is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for optimization
    kwargs
    """

        # Layer Management
        self.layers = dict()
        self.features = list()
        self.labels = list()
        self.outputs = list()
        self.task_weights = list()
        self.submodels = list()
        self.loss = Constant(0)
        self.built = False
        self.queue_installed = False
        self.optimizer = Adam(learning_rate=learning_rate,
                              beta1=0.9,
                              beta2=0.999,
                              epsilon=1e-7)

        # Singular place to hold Tensor objects which don't serialize
        # These have to be reconstructed on restoring from pickle
        # See TensorGraph._get_tf() for more details on lazy construction
        self.tensor_objects = {
            "FileWriter": None,
            "Graph": graph,
            "train_op": None,
            "summary_op": None,
        }
        self.tensorboard = tensorboard
        self.tensorboard_log_frequency = tensorboard_log_frequency
        self.tensorboard_step = 0
        self.global_step = 0
        self.use_queue = use_queue

        self.batch_size = batch_size
        self.random_seed = random_seed
        super(TensorGraph, self).__init__(**kwargs)
        self.save_file = "%s/%s" % (self.model_dir, "model")
        self.model_class = None

        self.rnn_initial_states = []
        self.rnn_final_states = []
        self.rnn_zero_states = []
        if self.use_queue and self.tensorboard:
            raise ValueError(
                "Currently TensorGraph cannot both use_queue and tensorboard at the same time"
            )
Example #32
0
class TensorGraph(Model):

  def __init__(self,
               tensorboard=False,
               tensorboard_log_frequency=100,
               batch_size=100,
               random_seed=None,
               use_queue=True,
               graph=None,
               learning_rate=0.001,
               configproto=None,
               **kwargs):
    """
    Parameters
    ----------
    tensorboard: bool
      Should we log to model_dir data for tensorboard?
    tensorboard_log_frequency: int
      How many training batches before logging tensorboard?
    batch_size: int
      default batch size for training and evaluating
    use_queue: boolean
      if True when building we will create a tf.FIFO queue, which will hold
      all features, weights, and labels.  We will feed the inputs into this
      queue in batches of self.batch_size in a separate thread from the
      thread training the model.  You cannot use a queue when
      batches are not of consistent size
    graph: tensorflow.Graph
      the Graph in which to create Tensorflow objects.  If None, a new Graph
      is created.
    learning_rate: float or LearningRateSchedule
      the learning rate to use for optimization
    configproto: a tf.ConfigProto() object used to create tf.Session()
    """

    # Layer Management
    self.layers = dict()
    self.features = list()
    self.labels = list()
    self.outputs = list()
    self.variances = list()
    self.task_weights = list()
    self.submodels = list()
    self.loss = Constant(0)
    self.built = False
    self.queue_installed = False
    self.optimizer = Adam(
        learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7)
    self.configproto = configproto

    # Singular place to hold Tensor objects which don't serialize
    # These have to be reconstructed on restoring from pickle
    # See TensorGraph._get_tf() for more details on lazy construction
    self.tensor_objects = {
        "FileWriter": None,
        "Graph": graph,
        "train_op": None,
        "summary_op": None,
    }
    self.tensorboard = tensorboard
    self.tensorboard_log_frequency = tensorboard_log_frequency
    self.tensorboard_step = 0
    self.global_step = 0
    self.use_queue = use_queue

    self.batch_size = batch_size
    self.random_seed = random_seed
    super(TensorGraph, self).__init__(**kwargs)
    self.save_file = "%s/%s" % (self.model_dir, "model")
    self.model_class = None

    self.rnn_initial_states = []
    self.rnn_final_states = []
    self.rnn_zero_states = []
    if self.use_queue and self.tensorboard:
      raise ValueError(
          "Currently TensorGraph cannot both use_queue and tensorboard at the same time"
      )

  def _add_layer(self, layer):
    if layer.name is None:
      layer.name = "%s_%s" % (layer.__class__.__name__, len(self.layers) + 1)
    if layer.name in self.layers:
      return
    if isinstance(layer, Feature):
      self.features.append(layer)
    if isinstance(layer, Label):
      self.labels.append(layer)
    if isinstance(layer, Weights):
      self.task_weights.append(layer)
    self.layers[layer.name] = layer
    for in_layer in layer.in_layers:
      self._add_layer(in_layer)

  def fit(self,
          dataset,
          nb_epoch=10,
          max_checkpoints_to_keep=5,
          checkpoint_interval=1000,
          deterministic=False,
          restore=False,
          submodel=None,
          **kwargs):
    """Train this model on a dataset.

    Parameters
    ----------
    dataset: Dataset
      the Dataset to train on
    nb_epoch: int
      the number of epochs to train for
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    deterministic: bool
      if True, the samples are processed in order.  If False, a different random
      order is used for each epoch.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    submodel: Submodel
      an alternate training objective to use.  This should have been created by
      calling create_submodel().
    """
    return self.fit_generator(
        self.default_generator(
            dataset, epochs=nb_epoch, deterministic=deterministic),
        max_checkpoints_to_keep, checkpoint_interval, restore, submodel)

  def fit_generator(self,
                    feed_dict_generator,
                    max_checkpoints_to_keep=5,
                    checkpoint_interval=1000,
                    restore=False,
                    submodel=None):
    """Train this model on data from a generator.

    Parameters
    ----------
    feed_dict_generator: generator
      this should generate batches, each represented as a dict that maps
      Layers to values.
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    submodel: Submodel
      an alternate training objective to use.  This should have been created by
      calling create_submodel().

    Returns
    -------
    the average loss over the most recent checkpoint interval
    """
    if not self.built:
      self.build()
    with self._get_tf("Graph").as_default():
      time1 = time.time()
      loss = self.loss
      if submodel is not None and submodel.loss is not None:
        loss = submodel.loss
      if tfe.in_eager_mode():
        # In eager mode we want an optimizer and a function to compute the
        # gradient of the loss.

        submodel_vars = None
        if submodel is None:
          optimizer = self._get_tf("Optimizer")
        else:
          optimizer = submodel.create_optimizer()
          if submodel.layers is not None:
            submodel_vars = set()
            for layer in submodel.layers:
              for var in layer.variables:
                submodel_vars.add(var)
        val_grad_fn = tfe.implicit_value_and_gradients(
            lambda x: self._run_graph([loss], x, True)[0])
      else:
        # In graph mode we want a training operation.

        if submodel is None:
          train_op = self._get_tf('train_op')
        else:
          train_op = submodel.get_train_op()
      if checkpoint_interval > 0:
        saver = tf.train.Saver(
            self.get_variables(),
            max_to_keep=max_checkpoints_to_keep,
            save_relative_paths=True)
      if restore:
        self.restore()
      avg_loss, n_averaged_batches = 0.0, 0.0
      n_samples = 0
      n_enqueued = [0]
      final_sample = [None]
      if self.queue_installed:
        enqueue_thread = threading.Thread(
            target=_enqueue_batch,
            args=(self, feed_dict_generator, self._get_tf("Graph"),
                  self.session, n_enqueued, final_sample))
        enqueue_thread.start()
      for feed_dict in self._create_feed_dicts(feed_dict_generator, True):
        if self.queue_installed:
          # Don't let this thread get ahead of the enqueue thread, since if
          # we try to read more batches than the total number that get queued,
          # this thread will hang indefinitely.
          while n_enqueued[0] <= n_samples:
            if n_samples == final_sample[0]:
              break
            time.sleep(0)
          if n_samples == final_sample[0]:
            break
        n_samples += 1
        should_log = (self.tensorboard and
                      n_samples % self.tensorboard_log_frequency == 0)
        if tfe.in_eager_mode():
          value, grads_and_vars = val_grad_fn(feed_dict)
          if submodel_vars is not None:
            grads_and_vars = [
                x for x in grads_and_vars if x[1] in submodel_vars
            ]
          optimizer.apply_gradients(grads_and_vars)
          avg_loss += value
        else:
          fetches = [train_op, loss.out_tensor]
          if should_log:
            fetches.append(self._get_tf("summary_op"))
          fetched_values = self.session.run(fetches, feed_dict=feed_dict)
          if should_log:
            self._log_tensorboard(fetched_values[2])
          avg_loss += fetched_values[1]
        n_averaged_batches += 1
        self.global_step += 1
        if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1:
          saver.save(self.session, self.save_file, global_step=self.global_step)
          avg_loss = float(avg_loss) / n_averaged_batches
          logger.info('Ending global_step %d: Average loss %g' %
                      (self.global_step, avg_loss))
          avg_loss, n_averaged_batches = 0.0, 0.0
      if n_averaged_batches > 0:
        avg_loss = float(avg_loss) / n_averaged_batches
      if checkpoint_interval > 0:
        if n_averaged_batches > 0:
          logger.info('Ending global_step %d: Average loss %g' %
                      (self.global_step, avg_loss))
        saver.save(self.session, self.save_file, global_step=self.global_step)
        time2 = time.time()
        logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1))
    return avg_loss

  def _log_tensorboard(self, summary):
    """
    TODO(LESWING) set epoch
    Parameters
    ----------
    Returns
    -------
    """
    global_step = int(self.global_step)
    writer = self._get_tf("FileWriter")
    writer.reopen()
    writer.add_summary(summary, global_step=global_step)
    writer.close()

  def fit_on_batch(self, X, y, w, submodel=None):
    if not self.built:
      self.build()
    dataset = NumpyDataset(X, y)
    return self.fit(dataset, nb_epoch=1, submodel=submodel)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    if len(self.features) > 1:
      raise ValueError("More than one Feature, must use generator")
    if len(self.labels) > 1:
      raise ValueError("More than one Label, must use generator")
    if len(self.task_weights) > 1:
      raise ValueError("More than one Weights, must use generator")
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
        feed_dict = dict()
        if len(self.labels) == 1 and y_b is not None and not predict:
          feed_dict[self.labels[0]] = y_b
        if len(self.features) == 1 and X_b is not None:
          feed_dict[self.features[0]] = X_b
        if len(self.task_weights) == 1 and w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        for (initial_state, zero_state) in zip(self.rnn_initial_states,
                                               self.rnn_zero_states):
          feed_dict[initial_state] = zero_state
        yield feed_dict

  def __call__(self, *inputs, **kwargs):
    """Execute the model in eager mode to compute outputs as a function of inputs.

    This is very similar to predict_on_batch(), except that it returns the outputs
    as tensors rather than numpy arrays.  That means you can compute the graph's
    outputs, then do additional calculations based on them, and gradients will
    be tracked correctly through the whole process.

    Parameters
    ----------
    inputs: tensors
      the values to use for the model's features.  The number of inputs must
      exactly match the length of the model's `features` property.  The values
      may be tensors, numpy arrays, or anything else that can be converted to
      tensors of the correct shape.
    outputs: list of Layers
      the output layers to compute.  If this is omitted, self.outputs is used
      (that is, all outputs that have been added by calling add_output()).

    Returns
    -------
    The output tensors, or a list of tensors if multiple outputs were requested.
    """
    if len(inputs) != len(self.features):
      raise ValueError('Expected %d inputs, received %d' % len(self.features),
                       len(inputs))
    # TODO Once we drop Python 2 support, turn outputs into a proper keyword arg
    # instead of using the **kwargs hack.
    if 'outputs' in kwargs:
      outputs = kwargs['outputs']
    else:
      outputs = self.outputs
    feed_dict = dict(zip(self.features, inputs))
    results = self._run_graph(outputs, feed_dict, False)
    if len(results) == 1:
      return results[0]
    return results

  def _predict(self, generator, transformers, outputs, uncertainty):
    """
    Predict outputs for data provided by a generator.

    This is the private implementation of prediction.  Do not call it directly.
    Instead call one of the public prediction methods.

    Parameters
    ----------
    generator: Generator
      Generator that constructs feed dictionaries for TensorGraph.
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs.
      If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.
    uncertainty: bool
      specifies whether this is being called as part of estimating uncertainty.
      If True, it sets the training flag so that dropout will be enabled, and
      returns the values of the uncertainty outputs.
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
    if not self.built:
      self.build()
    if outputs is None:
      outputs = self.outputs
    elif not isinstance(outputs, collections.Sequence):
      outputs = [outputs]
    if uncertainty:
      if len(self.variances) == 0:
        raise ValueError('This model cannot compute uncertainties')
      if len(self.variances) != len(outputs):
        raise ValueError(
            'The number of variances must exactly match the number of outputs')
      tensors = outputs + self.variances
    else:
      tensors = outputs

    with self._get_tf("Graph").as_default():
      # Gather results for each output
      results = [[] for out in tensors]
      n_samples = 0
      n_enqueued = [0]
      final_sample = [None]
      if self.queue_installed:
        enqueue_thread = threading.Thread(
            target=_enqueue_batch,
            args=(self, generator, self._get_tf("Graph"), self.session,
                  n_enqueued, final_sample))
        enqueue_thread.start()
      for feed_dict in self._create_feed_dicts(generator, uncertainty):
        if self.queue_installed:
          # Don't let this thread get ahead of the enqueue thread, since if
          # we try to read more batches than the total number that get queued,
          # this thread will hang indefinitely.
          while n_enqueued[0] <= n_samples:
            if n_samples == final_sample[0]:
              break
            time.sleep(0)
          if n_samples == final_sample[0]:
            break
        n_samples += 1
        feed_results = self._run_graph(tensors, feed_dict, uncertainty)
        if tfe.in_eager_mode():
          feed_results = [f.numpy() for f in feed_results]
        if len(feed_results) > 1:
          if len(transformers):
            raise ValueError("Does not support transformations "
                             "for multiple outputs.")
        elif len(feed_results) == 1:
          result = undo_transforms(feed_results[0], transformers)
          feed_results = [result]
        for ind, result in enumerate(feed_results):
          results[ind].append(result)

      final_results = []
      for result_list in results:
        final_results.append(np.concatenate(result_list, axis=0))
      # If only one output, just return array
      if len(final_results) == 1:
        return final_results[0]
      elif uncertainty:
        return zip(final_results[:len(outputs)], final_results[len(outputs):])
      else:
        return final_results

  def predict_on_generator(self, generator, transformers=[], outputs=None):
    """
    Parameters
    ----------
    generator: Generator
      Generator that constructs feed dictionaries for TensorGraph.
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs.
      If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
    return self._predict(generator, transformers, outputs, False)

  def predict_on_batch(self, X, transformers=[], outputs=None):
    """Generates predictions for input samples, processing samples in a batch.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    transformers: List
      List of dc.trans.Transformers

    Returns
    -------
    A Numpy array of predictions.
    """
    dataset = NumpyDataset(X=X, y=None)
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers, outputs)

  def predict_uncertainty_on_batch(self, X, masks=50):
    """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
    dataset = NumpyDataset(X=X, y=None)
    return self.predict_uncertainty(dataset, masks)

  def predict(self, dataset, transformers=[], outputs=None):
    """
    Uses self to make predictions on provided Dataset object.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs=self.outputs. If outputs is
      a Layer/Tensor, then will evaluate and return as a single ndarray. If
      outputs is a list of Layers/Tensors, will return a list of ndarrays.

    Returns
    -------
    results: numpy ndarray or list of numpy ndarrays
    """
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers, outputs)

  def predict_uncertainty(self, dataset, masks=50):
    """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
    sum_pred = []
    sum_sq_pred = []
    sum_var = []
    for i in range(masks):
      generator = self.default_generator(
          dataset, predict=True, pad_batches=False)
      results = self._predict(generator, [], self.outputs, True)
      if len(sum_pred) == 0:
        for p, v in results:
          sum_pred.append(p)
          sum_sq_pred.append(p * p)
          sum_var.append(v)
      else:
        for j, (p, v) in enumerate(results):
          sum_pred[j] += p
          sum_sq_pred[j] += p * p
          sum_var[j] += v
    output = []
    std = []
    for i in range(len(sum_pred)):
      p = sum_pred[i] / masks
      output.append(p)
      std.append(np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks))
    if len(output) == 1:
      return (output[0], std[0])
    else:
      return zip(output, std)

  def topsort(self):

    def add_layers_to_list(layer, sorted_layers):
      if layer in sorted_layers:
        return
      for in_layer in layer.in_layers:
        add_layers_to_list(in_layer, sorted_layers)
      sorted_layers.append(layer)

    sorted_layers = []
    for l in self.features + self.labels + self.task_weights + self.outputs + self.variances:
      add_layers_to_list(l, sorted_layers)
    add_layers_to_list(self.loss, sorted_layers)
    for submodel in self.submodels:
      if submodel.loss is not None:
        add_layers_to_list(submodel.loss, sorted_layers)
    return sorted_layers

  def build(self):
    if self.built:
      return
    if tfe.in_eager_mode():
      # In eager mode, we need to execute every layer once to ensure its variables
      # have been created.

      def build_layers(layer, tensors):
        if layer in tensors:
          return tensors[layer]
        inputs = [build_layers(input, tensors) for input in layer.in_layers]
        if isinstance(layer, Input):
          # We can't execute Input layers in eager mode, since they would try
          # to create placeholders.  Instead create a tensor of the correct
          # size and type.
          shape = [1 if s is None else s for s in layer.shape]
          tensor = tf.zeros(shape, layer.dtype)
        else:
          with tf.name_scope(layer.name):
            tensor = layer.create_tensor(in_layers=inputs, set_tensors=False)
        tensors[layer] = tensor
        return tensor

      tensors = {}
      with self._get_tf("Graph").as_default():
        # Build the layers.

        build_layers(self.loss, tensors)
        for output in self.outputs:
          build_layers(output, tensors)
        for variance in self.variances:
          build_layers(variance, tensors)
        for submodel in self.submodels:
          build_layers(submodel.loss, tensors)

        # Initialize variables.

        for layer in self.layers.values():
          if layer.variable_values is not None:
            for var, val in zip(layer.variables, layer.variable_values):
              var.assign(val)
      self.session = None
      self._training_placeholder = None
      self.built = True
      return

    # In graph mode we need to create the computation graph.

    with self._get_tf("Graph").as_default():
      self._training_placeholder = tf.placeholder(dtype=tf.float32, shape=())
      if self.random_seed is not None:
        tf.set_random_seed(self.random_seed)
      self._install_queue()
      self.built = True
      for layer in self.topsort():
        with tf.name_scope(layer.name):
          layer.create_tensor(training=self._training_placeholder)
          self.rnn_initial_states += layer.rnn_initial_states
          self.rnn_final_states += layer.rnn_final_states
          self.rnn_zero_states += layer.rnn_zero_states
          layer.add_summary_to_tg(layer.out_tensor,
                                  self.get_layer_variables(layer))
      self.session = tf.Session(config=self.configproto)

      # Ensure all training operators have been created.

      self._get_tf('train_op')
      for submodel in self.submodels:
        train_op = submodel.get_train_op()

      # Initialize variables.

      self.session.run(tf.global_variables_initializer())
      for layer in self.layers.values():
        if layer.variable_values is not None:
          variables = self.get_layer_variables(layer)
          for var, val in zip(variables, layer.variable_values):
            self.session.run(var.assign(val))

    for layer in self.layers.values():
      if layer.tensorboard:
        self.tensorboard = True
    tf.summary.scalar("loss", self.loss.out_tensor)
    for layer in self.layers.values():
      if layer.tensorboard:
        tf.summary.tensor_summary(layer.name, layer.out_tensor)
    if self.tensorboard:
      writer = self._get_tf("FileWriter")
      writer.add_graph(self._get_tf("Graph"))
      writer.close()

    # As a sanity check, make sure all tensors have the correct shape.

    for layer in self.layers.values():
      try:
        assert list(layer.shape) == layer.out_tensor.get_shape().as_list(
        ), '%s: Expected shape %s does not match actual shape %s' % (
            layer.name, layer.shape, layer.out_tensor.get_shape().as_list())
      except NotImplementedError:
        pass

  def _install_queue(self):
    """
    """
    if not self.use_queue or self.queue_installed:
      for layer in self.features + self.labels + self.task_weights:
        layer.pre_queue = True
      return
    inputs = self.features + self.labels + self.task_weights
    if len(inputs) == 0:
      return
    names = []
    shapes = []
    pre_q_inputs = []
    q = InputFifoQueue(shapes, names, in_layers=pre_q_inputs)
    q.name = "%s_%s" % (q.__class__.__name__, len(self.layers) + 1)

    for layer in inputs:
      pre_q_input = layer.create_pre_q()
      shapes.append(pre_q_input.shape)
      names.append(pre_q_input.name)
      pre_q_inputs.append(pre_q_input)

      layer.in_layers.append(q)

    self._add_layer(q)
    self.input_queue = q
    self.queue_installed = True

  def set_loss(self, layer):
    self._add_layer(layer)
    self.loss = layer

  def add_output(self, layer):
    """Add an output layer that can be computed by predict()"""
    self._add_layer(layer)
    self.outputs.append(layer)

  def add_variance(self, layer):
    """Add a layer that computes the variance in an output.

    If a model supports uncertainty, it must call add_variance() once for every
    output.  Each variance layer has the same shape as the corresponding output,
    and each element computes an estimate of the variance from aleatoric
    uncertainty in the corresponding element of the output.

    In addition, if a model supports uncertainty it MUST use dropout on every
    layer.  Otherwise, the uncertainties it computes will be inaccurate.
    """
    self._add_layer(layer)
    self.variances.append(layer)

  def set_optimizer(self, optimizer):
    """Set the optimizer to use for fitting."""
    self.optimizer = optimizer

  def create_submodel(self, layers=None, loss=None, optimizer=None):
    """Create an alternate objective for training one piece of a TensorGraph.

    A TensorGraph consists of a set of layers, and specifies a loss function and
    optimizer to use for training those layers.  Usually this is sufficient, but
    there are cases where you want to train different parts of a model separately.
    For example, a GAN consists of a generator and a discriminator.  They are
    trained separately, and they use different loss functions.

    A submodel defines an alternate objective to use in cases like this.  It may
    optionally specify any of the following: a subset of layers in the model to
    train; a different loss function; and a different optimizer to use.  This
    method creates a submodel, which you can then pass to fit() to use it for
    training.

    Parameters
    ----------
    layers: list
      the list of layers to train.  If None, all layers in the model will be
      trained.
    loss: Layer
      the loss function to optimize.  If None, the model's main loss function
      will be used.
    optimizer: Optimizer
      the optimizer to use for training.  If None, the model's main optimizer
      will be used.

    Returns
    -------
    the newly created submodel, which can be passed to any of the fitting
    methods.
    """
    if self.built:
      raise ValueError('Submodels must be created before build() is called.')
    submodel = Submodel(self, layers, loss, optimizer)
    self.submodels.append(submodel)
    if loss is not None:
      self._add_layer(loss)
    return submodel

  def get_pickling_errors(self, obj, seen=None):
    if seen == None:
      seen = []
    try:
      state = obj.__getstate__()
    except AttributeError:
      return
    if state == None:
      return
    if isinstance(state, tuple):
      if not isinstance(state[0], dict):
        state = state[1]
      else:
        state = state[0].update(state[1])
    result = {}
    for i in state:
      try:
        pickle.dumps(state[i], protocol=2)
      except pickle.PicklingError:
        if not state[i] in seen:
          seen.append(state[i])
          result[i] = self.get_pickling_errors(state[i], seen)
    return result

  def save(self):
    # Remove out_tensor from the object to be pickled
    must_restore = False
    tensor_objects = self.tensor_objects
    rnn_initial_states = self.rnn_initial_states
    rnn_final_states = self.rnn_final_states
    rnn_zero_states = self.rnn_zero_states
    session = self.session
    self.tensor_objects = {}
    self.rnn_initial_states = []
    self.rnn_final_states = []
    self.rnn_zero_states = []
    self.session = None
    out_tensors = []
    submodel_ops = []
    if self.built:
      must_restore = True
      for layer in self.topsort():
        out_tensors.append(layer.none_tensors())
      for submodel in self.submodels:
        submodel_ops.append(submodel._train_op)
        submodel._train_op = None
      training_placeholder = self._training_placeholder
      self._training_placeholder = None
      self.built = False

    # Pickle itself
    pickle_name = os.path.join(self.model_dir, "model.pickle")

    with open(pickle_name, 'wb') as fout:
      try:
        pickle.dump(self, fout)
      except Exception as e:
        logger.info(self.get_pickling_errors(self))
        raise e

    # add out_tensor back to everyone
    if must_restore:
      for index, layer in enumerate(self.topsort()):
        layer.set_tensors(out_tensors[index])
      for submodel, op in zip(self.submodels, submodel_ops):
        submodel._train_op = op
      self._training_placeholder = training_placeholder
      self.built = True
    self.tensor_objects = tensor_objects
    self.rnn_initial_states = rnn_initial_states
    self.rnn_final_states = rnn_final_states
    self.rnn_zero_states = rnn_zero_states
    self.session = session

  def evaluate_generator(self,
                         feed_dict_generator,
                         metrics,
                         transformers=[],
                         labels=None,
                         outputs=None,
                         weights=[],
                         per_task_metrics=False):

    if labels is None:
      raise ValueError
    n_tasks = len(self.outputs)
    n_classes = self.outputs[0].out_tensor.get_shape()[-1].value
    evaluator = GeneratorEvaluator(
        self,
        feed_dict_generator,
        transformers,
        labels=labels,
        outputs=outputs,
        weights=weights,
        n_tasks=n_tasks,
        n_classes=n_classes)
    if not per_task_metrics:
      scores = evaluator.compute_model_performance(metrics)
      return scores
    else:
      scores, per_task_scores = evaluator.compute_model_performance(
          metrics, per_task_metrics=per_task_metrics)
      return scores, per_task_scores

  def get_layer_variables(self, layer):
    """Get the list of trainable variables in a layer of the graph."""
    if tfe.in_eager_mode():
      return layer.variables
    if not self.built:
      self.build()
    with self._get_tf("Graph").as_default():
      if layer.variable_scope == '':
        return []
      return tf.get_collection(
          tf.GraphKeys.TRAINABLE_VARIABLES, scope=layer.variable_scope)

  def get_variables(self):
    """Get the list of all trainable variables in the graph."""
    if not self.built:
      self.build()
    if tfe.in_eager_mode():
      variables = []
      for layer in self.layers.values():
        variables += layer.variables
      return variables
    else:
      with self._get_tf("Graph").as_default():
        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

  def get_global_step(self):
    return self._get_tf("GlobalStep")

  def _get_tf(self, obj):
    """Fetches underlying TensorFlow primitives.

    Parameters
    ----------
    obj: str
      If "Graph", returns tf.Graph instance. If "FileWriter", returns
      tf.summary.FileWriter. If "Optimizer", returns the optimizer. If
      "train_op", returns the train operation. If "summary_op", returns the
      merged summary. If "GlobalStep" returns the global step.
    Returns
    -------
    TensorFlow Object

    """

    if obj in self.tensor_objects and self.tensor_objects[obj] is not None:
      return self.tensor_objects[obj]
    if obj == "Graph":
      self.tensor_objects['Graph'] = tf.Graph()
    elif obj == "FileWriter":
      self.tensor_objects['FileWriter'] = tf.summary.FileWriter(self.model_dir)
    elif obj == 'Optimizer':
      self.tensor_objects['Optimizer'] = self.optimizer._create_optimizer(
          self._get_tf('GlobalStep'))
    elif obj == 'train_op':
      opt = self._get_tf('Optimizer')
      global_step = self._get_tf('GlobalStep')
      try:
        self.tensor_objects['train_op'] = opt.minimize(
            self.loss.out_tensor, global_step=global_step)
      except ValueError:
        # The loss doesn't depend on any variables.
        self.tensor_objects['train_op'] = 0
    elif obj == 'summary_op':
      self.tensor_objects['summary_op'] = tf.summary.merge_all(
          key=tf.GraphKeys.SUMMARIES)
    elif obj == 'GlobalStep':
      with self._get_tf("Graph").as_default():
        self.tensor_objects['GlobalStep'] = create_variable(0, trainable=False)
    return self._get_tf(obj)

  def save_checkpoint(self, max_checkpoints_to_keep=5):
    """Save a checkpoint to disk.

    Usually you do not need to call this method, since fit() saves checkpoints
    automatically.  If you have disabled automatic checkpointing during fitting,
    this can be called to manually write checkpoints.

    Parameters
    ----------
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    """
    saver = tf.train.Saver(
        self.get_variables(), max_to_keep=max_checkpoints_to_keep)
    saver.save(self.session, self.save_file, global_step=self.global_step)

  def get_checkpoints(self):
    """Get a list of all available checkpoint files."""
    return tf.train.get_checkpoint_state(
        self.model_dir).all_model_checkpoint_paths

  def restore(self, checkpoint=None):
    """Reload the values of all variables from a checkpoint file.

    Parameters
    ----------
    checkpoint: str
      the path to the checkpoint file to load.  If this is None, the most recent
      checkpoint will be chosen automatically.  Call get_checkpoints() to get a
      list of all available checkpoints.
    """
    if not self.built:
      self.build()
    if checkpoint is None:
      checkpoint = tf.train.latest_checkpoint(self.model_dir)
    if checkpoint is None:
      raise ValueError('No checkpoint found')
    with self._get_tf("Graph").as_default():
      reader = NewCheckpointReader(checkpoint)
      var_names = set([x for x in reader.get_variable_to_shape_map()])
      var_list = []
      for var in self.get_variables():
        name = var.name
        if ':' in name:
          name = name[:name.rfind(':')]
        if name in var_names:
          var_list.append(var)
      saver = tf.train.Saver(var_list=var_list)
      saver.restore(self.session, checkpoint)

  def get_num_tasks(self):
    return len(self.outputs)

  def get_pre_q_input(self, input_layer):
    layer_name = input_layer.name
    pre_q_name = "%s_pre_q" % layer_name
    return self.layers[pre_q_name]

  @staticmethod
  def load_from_dir(model_dir, restore=True):
    pickle_name = os.path.join(model_dir, "model.pickle")
    with open(pickle_name, 'rb') as fout:
      tensorgraph = pickle.load(fout)
      tensorgraph.built = False
      tensorgraph.model_dir = model_dir
      if restore:
        try:
          tensorgraph.restore()
        except ValueError:
          pass  # No checkpoint to load
      return tensorgraph

  def __del__(self):
    pass

  def _create_feed_dicts(self, generator, training):
    """Create feed dicts for use in fitting or prediction.

    Parameters
    ----------
    generator: Generator
      the feed dict generator that was passed to fit_generator() or predict_on_generator()
    training: bool
      True during training, False during prediction
    """
    train_value = 1.0 if training else 0.0
    if self.queue_installed:
      while True:
        yield {self._training_placeholder: train_value}
    else:
      for d in generator:
        feed_dict = {}
        for key, value in d.items():
          if isinstance(key, Input):
            value = _ensure_value_shape(value, key)
            if tfe.in_eager_mode():
              value = tf.cast(value, key.dtype)
            feed_dict[key] = value
          else:
            feed_dict[key] = value
        if not tfe.in_eager_mode():
          feed_dict[self._training_placeholder] = train_value
        yield feed_dict

  def _run_graph(self, outputs, feed_dict, training):
    """Run the calculations in the graph to compute some outputs.

    In graph mode, this just calls session.run().  In eager mode, it executes
    all required layers to compute the output.

    Parameters
    ----------
    outputs: list of Layers
      the output layers to compute
    feed_dict: dict
      maps input layers to values
    training: bool
      whether this is being executed in training mode
    """
    if not tfe.in_eager_mode():
      return self.session.run(outputs, feed_dict)

    def run_layers(layer, tensors):
      if layer in tensors:
        return tensors[layer]
      inputs = [run_layers(input, tensors) for input in layer.in_layers]
      tensor = layer.create_tensor(
          in_layers=inputs, set_tensors=False, training=training)
      tensors[layer] = tensor
      return tensor

    tensors = feed_dict.copy()
    return [run_layers(o, tensors) for o in outputs]

  def make_estimator(self,
                     feature_columns,
                     weight_column=None,
                     metrics={},
                     model_dir=None,
                     config=None):
    """Construct a Tensorflow Estimator from this model.

    tf.estimator.Estimator is the standard Tensorflow API for representing models.
    This method provides interoperability between DeepChem and other Tensorflow
    based tools by allowing any model to be used an Estimator.

    Once this method returns, the Estimator it created is independent of the model
    it was created from.  They do not share tensors, variables, save files, or any
    other resources.  The Estimator is a self contained object with its own methods
    for training, evaluation, prediction, checkpointing, etc.

    Parameters
    ----------
    feature_columns: list of tf.feature_column objects
      this describes the input features to the models.  There must be one entry
      for each Feature layer in this model's features field.
    weight_column: tf.feature_column or None
      if this model includes a Weights layer, this describes the input weights.
      Otherwise, this should be None.
    metrics: map
      metrics that should be computed in calls to evaluate().  For each entry,
      the key is the name to report for the metric, and the value is a function
      of the form f(labels, predictions, weights) that returns the tensors for
      computing the metric.  Any of the functions in tf.metrics can be used, as
      can other functions that satisfy the same interface.
    model_dir: str
      the directory in which the Estimator should save files.  If None, this
      defaults to the model's model_dir.
    config: RunConfig
      configuration options for the Estimator
    """
    # Check the inputs.

    if tfe.in_eager_mode():
      raise ValueError('make_estimator() is not supported in eager mode')
    if len(feature_columns) != len(self.features):
      raise ValueError(
          'This model requires %d feature column(s)' % len(self.features))
    if len(self.labels) != 1:
      raise ValueError(
          'Can only create an Estimator from a model with exactly one Label input'
      )
    if len(self.task_weights) > 1:
      raise ValueError(
          'Cannot create an Estimator from a model with multiple Weight inputs')
    if weight_column is None:
      if len(self.task_weights) > 0:
        raise ValueError('This model requires a weight column')
    else:
      if len(self.task_weights) == 0:
        raise ValueError(
            'Cannot specify weight_column for a model with no Weight inputs')
    if model_dir is None:
      model_dir = self.model_dir

    # Define a function that recursively creates tensors from layers.

    def create_tensors(layer, tensors, training):
      if layer in tensors:
        return tensors[layer]
      inputs = [
          create_tensors(in_layer, tensors, training)
          for in_layer in layer.in_layers
      ]
      tensor = layer.create_tensor(
          in_layers=inputs, set_tensors=False, training=training)
      tensors[layer] = tensor
      vars = tf.get_collection(
          tf.GraphKeys.TRAINABLE_VARIABLES, scope=layer.name)
      layer.add_summary_to_tg(tensor, vars)
      return tensor

    # Define the model function.

    def model_fn(features, labels, mode):
      # Define the inputs.

      tensors = self.create_estimator_inputs(feature_columns, weight_column,
                                             features, labels, mode)
      for layer, tensor in tensors.items():
        layer.add_summary_to_tg(tensor, [])

      # Create the correct outputs, based on the mode.

      if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {}
        for i, output in enumerate(self.outputs):
          predictions[i] = create_tensors(output, tensors, 0)
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
      if mode == tf.estimator.ModeKeys.EVAL:
        loss = create_tensors(self.loss, tensors, 0)
        predictions = create_tensors(self.outputs[0], tensors, 0)
        if len(self.task_weights) == 0:
          weights = None
        else:
          weights = tensors[self.task_weights[0]]
        eval_metric_ops = {}
        for name, function in metrics.items():
          eval_metric_ops[name] = function(tensors[self.labels[0]], predictions,
                                           weights)
        return tf.estimator.EstimatorSpec(
            mode, loss=loss, eval_metric_ops=eval_metric_ops)
      if mode == tf.estimator.ModeKeys.TRAIN:
        loss = create_tensors(self.loss, tensors, 1)
        global_step = tf.train.get_global_step()
        optimizer = self.optimizer._create_optimizer(global_step)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
      raise ValueError('Unknown mode')

    # Create the Estimator.

    return tf.estimator.Estimator(
        model_fn=model_fn, model_dir=model_dir, config=config)

  def create_estimator_inputs(self, feature_columns, weight_column, features,
                              labels, mode):
    """This is called by make_estimator() to create tensors for the inputs.

    feature_columns and weight_column are the arguments passed to
    make_estimator().  features, labels, and mode are the arguments passed to
    the estimator's model function.  This method creates and returns a dict with
    one entry for every Feature, Label, or Weights layer in the graph.  The keys
    are the layers, and the values are the tensors that correspond to them.

    Any subclass that overrides default_generator() must also override this
    method.
    """
    if self.__class__.default_generator != TensorGraph.default_generator:
      raise ValueError(
          "Class overrides default_generator() but not create_estimator_inputs()"
      )
    tensors = {}
    for layer, column in zip(self.features, feature_columns):
      tensors[layer] = tf.feature_column.input_layer(features, [column])
    if weight_column is not None:
      tensors[self.task_weights[0]] = tf.feature_column.input_layer(
          features, [weight_column])
    if labels is not None:
      tensors[self.labels[0]] = tf.cast(labels, self.labels[0].dtype)
    return tensors