def test_regression_overfit(self): """Test that TensorGraph models can overfit simple regression datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) regression_metric = dc.metrics.Metric(dc.metrics.mean_squared_error) # TODO(rbharath): This breaks with optimizer="momentum". Why? model = dc.models.MultiTaskRegressor( n_tasks, n_features, dropouts=[0.], weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)], batch_size=n_samples) model.set_optimizer(Adam(learning_rate=0.003, beta1=0.9, beta2=0.999)) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train scores = model.evaluate(dataset, [regression_metric]) assert scores[regression_metric.name] < .1
def test_fittransform_regression_overfit(self): """Test that TensorGraph FitTransform models can overfit simple regression datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) fit_transformers = [dc.trans.CoulombFitTransformer(dataset)] regression_metric = dc.metrics.Metric(dc.metrics.mean_squared_error) model = dc.models.MultiTaskFitTransformRegressor( n_tasks, [n_features, n_features], dropouts=[0.], weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)], batch_size=n_samples, fit_transformers=fit_transformers, n_evals=1) model.set_optimizer(Adam(learning_rate=0.003, beta1=0.9, beta2=0.999)) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train scores = model.evaluate(dataset, [regression_metric]) assert scores[regression_metric.name] < .1
def test_skewed_classification_overfit(self): """Test TensorGraph models can overfit 0/1 datasets with few actives.""" #n_samples = 100 n_samples = 100 n_features = 3 n_tasks = 1 n_classes = 2 # Generate dummy dataset np.random.seed(123) p = .05 ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.binomial(1, p, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score) model = dc.models.MultiTaskClassifier(n_tasks, n_features, dropouts=[0.], weight_init_stddevs=[.1], batch_size=n_samples) model.set_optimizer(Adam(learning_rate=0.003, beta1=0.9, beta2=0.999)) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train scores = model.evaluate(dataset, [classification_metric]) assert scores[classification_metric.name] > .75
def test_multitask_regression_overfit(self): """Test TensorGraph multitask overfits tiny data.""" n_tasks = 10 n_samples = 10 n_features = 3 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) regression_metric = dc.metrics.Metric(dc.metrics.mean_squared_error, task_averager=np.mean, mode="regression") model = dc.models.MultiTaskRegressor(n_tasks, n_features, dropouts=[0.], weight_init_stddevs=[.1], batch_size=n_samples) model.set_optimizer(Adam(learning_rate=0.0003, beta1=0.9, beta2=0.999)) # Fit trained model model.fit(dataset, nb_epoch=50) model.save() # Eval model on train scores = model.evaluate(dataset, [regression_metric]) assert scores[regression_metric.name] < .1
def test_classification_overfit(self): """Test that TensorGraph models can overfit simple classification datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) classification_metric = dc.metrics.Metric(dc.metrics.accuracy_score) model = dc.models.MultitaskClassifier(n_tasks, n_features, dropouts=[0.], weight_init_stddevs=[.1], batch_size=n_samples) model.set_optimizer(Adam(learning_rate=0.0003, beta1=0.9, beta2=0.999)) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train scores = model.evaluate(dataset, [classification_metric]) assert scores[classification_metric.name] > .9
def test_save_load(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01) tg.add_output(output) tg.set_loss(loss) submodel_loss = ReduceSum(in_layers=smce) submodel_opt = Adam(learning_rate=0.002) submodel = tg.create_submodel(layers=[dense], loss=submodel_loss, optimizer=submodel_opt) tg.fit(dataset, nb_epoch=1) prediction = np.squeeze(tg.predict_on_batch(X)) tg.save() dirpath = tempfile.mkdtemp() shutil.rmtree(dirpath) shutil.move(tg.model_dir, dirpath) tg1 = TensorGraph.load_from_dir(dirpath) prediction2 = np.squeeze(tg1.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
def __init__(self, env, policy, max_search_depth=100, n_search_episodes=1000, discount_factor=0.99, value_weight=1.0, optimizer=Adam(), model_dir=None): """Create an object for optimizing a policy. Parameters ---------- env: Environment the Environment to interact with policy: Policy the Policy to optimize. Its create_layers() method must return a dict containing the keys 'action_prob' and 'value', corresponding to the action probabilities and value estimate max_search_depth: int the maximum depth of the tree search, measured in steps n_search_episodes: int the number of episodes to simulate (up to max_search_depth, if they do not terminate first) for each tree search discount_factor: float the discount factor to use when computing rewards value_weight: float a scale factor for the value loss term in the loss function optimizer: Optimizer the optimizer to use model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. """ self._env = copy.deepcopy(env) self._policy = policy self.max_search_depth = max_search_depth self.n_search_episodes = n_search_episodes self.discount_factor = discount_factor self.value_weight = value_weight self._state_is_list = isinstance(env.state_shape[0], collections.Sequence) if optimizer is None: self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999) else: self._optimizer = optimizer (self._graph, self._features, self._pred_prob, self._pred_value, self._search_prob, self._search_value) = self._build_graph(None, 'global', model_dir)
def __init__(self, env, policy, max_rollout_length=20, discount_factor=0.99, advantage_lambda=0.98, value_weight=1.0, entropy_weight=0.01, optimizer=None, model_dir=None, use_hindsight=False): """Create an object for optimizing a policy. Parameters ---------- env: Environment the Environment to interact with policy: Policy the Policy to optimize. Its create_layers() method must return a dict containing the keys 'action_prob' and 'value', corresponding to the action probabilities and value estimate max_rollout_length: int the maximum length of rollouts to generate discount_factor: float the discount factor to use when computing rewards advantage_lambda: float the parameter for trading bias vs. variance in Generalized Advantage Estimation value_weight: float a scale factor for the value loss term in the loss function entropy_weight: float a scale factor for the entropy term in the loss function optimizer: Optimizer the optimizer to use. If None, a default optimizer is used. model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. use_hindsight: bool if True, use Hindsight Experience Replay """ self._env = env self._policy = policy self.max_rollout_length = max_rollout_length self.discount_factor = discount_factor self.advantage_lambda = advantage_lambda self.value_weight = value_weight self.entropy_weight = entropy_weight self.use_hindsight = use_hindsight self._state_is_list = isinstance(env.state_shape[0], collections.Sequence) if optimizer is None: self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999) else: self._optimizer = optimizer (self._graph, self._features, self._rewards, self._actions, self._action_prob, self._value, self._advantages) = self._build_graph(None, 'global', model_dir) with self._graph._get_tf("Graph").as_default(): self._session = tf.Session() self._rnn_states = self._graph.rnn_zero_states
def _get_tf(self, obj): """Fetches underlying TensorFlow primitives. Parameters ---------- obj: str If "Graph", returns tf.Graph instance. If "FileWriter", returns tf.summary.FileWriter. If "Optimizer", returns the optimizer. If "train_op", returns the train operation. If "summary_op", returns the merged summary. If "GlobalStep" returns the global step. Returns ------- TensorFlow Object """ if obj in self.tensor_objects and self.tensor_objects[obj] is not None: return self.tensor_objects[obj] if obj == "Graph": self.tensor_objects['Graph'] = tf.Graph() elif obj == "FileWriter": self.tensor_objects['FileWriter'] = tf.summary.FileWriter( self.model_dir) elif obj == 'Optimizer': if self.optimizer is None: self.optimizer = Adam(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7) self.tensor_objects[ 'Optimizer'] = self.optimizer._create_optimizer( self._get_tf('GlobalStep')) elif obj == 'train_op': self.tensor_objects['train_op'] = self._get_tf( 'Optimizer').minimize(self.loss.out_tensor, global_step=self._get_tf('GlobalStep')) elif obj == 'summary_op': self.tensor_objects['summary_op'] = tf.summary.merge_all( key=tf.GraphKeys.SUMMARIES) elif obj == 'GlobalStep': with self._get_tf("Graph").as_default(): self.tensor_objects['GlobalStep'] = tf.Variable( 0, trainable=False) return self._get_tf(obj)
def test_skewed_missing_classification_overfit(self): """TG, skewed data, few actives Test TensorGraph models overfit 0/1 datasets with missing data and few actives. This is intended to be as close to singletask MUV datasets as possible. """ n_samples = 5120 n_features = 6 n_tasks = 1 n_classes = 2 # Generate dummy dataset np.random.seed(123) p = .002 ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.binomial(1, p, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) y_flat, w_flat = np.squeeze(y), np.squeeze(w) y_nonzero = y_flat[w_flat != 0] num_nonzero = np.count_nonzero(y_nonzero) weight_nonzero = len(y_nonzero) / num_nonzero w_flat[y_flat != 0] = weight_nonzero w = np.reshape(w_flat, (n_samples, n_tasks)) dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids) classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score) model = dc.models.MultiTaskClassifier( n_tasks, n_features, dropouts=[0.], weight_init_stddevs=[1.], batch_size=n_samples) model.set_optimizer(Adam(learning_rate=0.003, beta1=0.9, beta2=0.999)) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train scores = model.evaluate(dataset, [classification_metric]) assert scores[classification_metric.name] > .7
def eval_tic_tac_toe(value_weight, num_epoch_rounds=1, games=10**4, rollouts=10**5): """ Returns the average reward over 1k games after 100k rollouts :param value_weight: :return: """ env = deepchem.rl.envs.tictactoe.TicTacToeEnvironment() policy = TicTacToePolicy() model_dir = "/tmp/tictactoe" try: shutil.rmtree(model_dir) except: pass avg_rewards = [] for j in range(num_epoch_rounds): a3c = dc.rl.A3C(env, policy, entropy_weight=0.01, value_weight=value_weight, model_dir=model_dir, optimizer=Adam(learning_rate=0.001)) try: a3c.restore() except: print("unable to restore") pass a3c.fit(rollouts) rewards = [] for i in range(games): env.reset() reward = -float('inf') while not env._terminated: action = a3c.select_action(env._state) reward = env.step(action) rewards.append(reward) avg_rewards.append({(j + 1) * rollouts: np.mean(rewards)}) return avg_rewards
def __init__(self, env, policy, max_rollout_length=20, discount_factor=0.99, advantage_lambda=0.98, value_weight=1.0, entropy_weight=0.01, optimizer=None, model_dir=None, use_hindsight=False, worker_count=multiprocessing.cpu_count(), zero_terminal=True, callbacks=[]): """Create an object for optimizing a policy. Parameters ---------- env: Environment the Environment to interact with policy: Policy the Policy to optimize. Its create_layers() method must return a dict containing the keys 'action_prob' and 'value' (for discrete action spaces) or 'action_mean', 'action_std', and 'value' (for continuous action spaces) max_rollout_length: int the maximum length of rollouts to generate discount_factor: float the discount factor to use when computing rewards advantage_lambda: float the parameter for trading bias vs. variance in Generalized Advantage Estimation value_weight: float a scale factor for the value loss term in the loss function entropy_weight: float a scale factor for the entropy term in the loss function optimizer: Optimizer the optimizer to use. If None, a default optimizer is used. model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. use_hindsight: bool if True, use Hindsight Experience Replay zero_terminal: bool whether terminal states should be at zero value (default); if False, the environment is assumed to terminate at any state on external conditions. callbacks: list each rollout is passed to the on_callback method of each callback """ self._env = env self._policy = policy self.max_rollout_length = max_rollout_length self.discount_factor = discount_factor self.advantage_lambda = advantage_lambda self.value_weight = value_weight self.entropy_weight = entropy_weight self.use_hindsight = use_hindsight self.worker_count = worker_count self.zero_terminal = zero_terminal self.callbacks = callbacks self._state_is_list = isinstance(env.state_shape[0], collections.Sequence) if optimizer is None: self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999) else: self._optimizer = optimizer fields = self._build_graph(None, 'global', model_dir) if self.continuous: (self._graph, self._features, self._rewards, self._actions, self._action_mean, self._action_std, self._value, self._advantages, self._loss_components) = fields else: (self._graph, self._features, self._rewards, self._actions, self._action_prob, self._value, self._advantages, self._loss_components) = fields with self._graph._get_tf("Graph").as_default(): self._session = tf.Session() self._rnn_states = self._graph.rnn_zero_states
def __init__(self, env, policy, max_rollout_length=20, optimization_rollouts=8, optimization_epochs=4, batch_size=64, clipping_width=0.2, discount_factor=0.99, advantage_lambda=0.98, value_weight=1.0, entropy_weight=0.01, optimizer=None, model_dir=None, use_hindsight=False): """Create an object for optimizing a policy. Parameters ---------- env: Environment the Environment to interact with policy: Policy the Policy to optimize. It must have outputs with the names 'action_prob' and 'value', corresponding to the action probabilities and value estimate max_rollout_length: int the maximum length of rollouts to generate optimization_rollouts: int the number of rollouts to generate for each iteration of optimization optimization_epochs: int the number of epochs of optimization to perform within each iteration batch_size: int the batch size to use during optimization. If this is 0, each rollout will be used as a separate batch. clipping_width: float in computing the PPO loss function, the probability ratio is clipped to the range (1-clipping_width, 1+clipping_width) discount_factor: float the discount factor to use when computing rewards advantage_lambda: float the parameter for trading bias vs. variance in Generalized Advantage Estimation value_weight: float a scale factor for the value loss term in the loss function entropy_weight: float a scale factor for the entropy term in the loss function optimizer: Optimizer the optimizer to use. If None, a default optimizer is used. model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. use_hindsight: bool if True, use Hindsight Experience Replay """ self._env = env self._policy = policy self.max_rollout_length = max_rollout_length self.optimization_rollouts = optimization_rollouts self.optimization_epochs = optimization_epochs self.batch_size = batch_size self.clipping_width = clipping_width self.discount_factor = discount_factor self.advantage_lambda = advantage_lambda self.value_weight = value_weight self.entropy_weight = entropy_weight self.use_hindsight = use_hindsight self._state_is_list = isinstance(env.state_shape[0], collections.Sequence) if optimizer is None: self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999) else: self._optimizer = optimizer self._model = self._build_model(model_dir) output_names = policy.output_names output_tensors = self._model._output_tensors self._value = output_tensors[output_names.index('value')] self._action_prob = output_tensors[output_names.index('action_prob')] rnn_outputs = [ i for i, n in enumerate(output_names) if n == 'rnn_state' ] self._rnn_final_states = [output_tensors[i] for i in rnn_outputs] self._session = tf.Session() self._train_op = self._model._tf_optimizer.minimize( self._model._loss_tensor) self._rnn_states = policy.rnn_initial_states if len(self._rnn_states) > 0 and batch_size != 0: raise ValueError( 'Cannot batch rollouts when the policy contains a recurrent layer. Set batch_size to 0.' ) self._checkpoint = tf.train.Checkpoint() self._checkpoint.save_counter # Ensure the variable has been created self._checkpoint.listed = self._model.model.trainable_variables self._session.run(self._checkpoint.save_counter.initializer)
def test_continuous(self): """Test A3C on an environment with a continous action space.""" # The state consists of two numbers: a current value and a target value. # The policy just needs to learn to output the target value (or at least # move toward it). class TestEnvironment(dc.rl.Environment): def __init__(self): super(TestEnvironment, self).__init__((2, ), action_shape=(1, )) def reset(self): target = np.random.uniform(-50, 50) self._state = np.array([0, target]) self._terminated = False self.count = 0 def step(self, action): target = self._state[1] dist = np.abs(target - action[0]) old_dist = np.abs(target - self._state[0]) new_state = np.array([action[0], target]) self._state = new_state self.count += 1 reward = old_dist - dist self._terminated = (self.count == 10) return reward # A simple policy with no hidden layers. class TestPolicy(dc.rl.Policy): def create_layers(self, state, **kwargs): action_mean = Dense(1, in_layers=state, weights_initializer=tf.zeros_initializer) action_std = Constant([10.0]) value = Dense(1, in_layers=state) return { 'action_mean': action_mean, 'action_std': action_std, 'value': value } # Optimize it. env = TestEnvironment() learning_rate = PolynomialDecay(initial_rate=0.005, final_rate=0.0005, decay_steps=25000) a3c = dc.rl.A3C(env, TestPolicy(), discount_factor=0, optimizer=Adam(learning_rate=learning_rate)) a3c.fit(25000) # Try running it and see if it reaches the target env.reset() while not env.terminated: env.step(a3c.select_action(env.state, deterministic=True)) distance = np.abs(env.state[0] - env.state[1]) tolerance = max(1.0, 0.1 * np.abs(env.state[1])) assert distance < tolerance
class TensorGraph(Model): def __init__(self, tensorboard=False, tensorboard_log_frequency=100, batch_size=100, random_seed=None, use_queue=True, mode="regression", graph=None, learning_rate=0.001, **kwargs): """ TODO(LESWING) allow a model to change its learning rate Parameters ---------- tensorboard: bool Should we log to model_dir data for tensorboard? tensorboard_log_frequency: int How many training batches before logging tensorboard? batch_size: int default batch size for training and evaluating use_queue: boolean if True when building we will create a tf.FIFO queue, which will hold all features, weights, and labels. We will feed the inputs into this queue in batches of self.batch_size in a separate thread from the thread training the model. You cannot use a queue when batches are not of consistent size mode: str "regression" or "classification". "classification" models on predict will do an argmax(axis=2) to determine the class of the prediction. graph: tensorflow.Graph the Graph in which to create Tensorflow objects. If None, a new Graph is created. learning_rate: float or LearningRateSchedule the learning rate to use for optimization kwargs """ # Layer Management self.nxgraph = nx.DiGraph() self.layers = dict() self.features = list() self.labels = list() self.outputs = list() self.task_weights = list() self.loss = None self.built = False self.queue_installed = False self.optimizer = Adam( learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7) # Singular place to hold Tensor objects which don't serialize # These have to be reconstructed on restoring from pickle # See TensorGraph._get_tf() for more details on lazy construction self.tensor_objects = { "FileWriter": None, "Graph": graph, "train_op": None, "summary_op": None, } self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency self.tensorboard_step = 0 self.mode = mode self.global_step = 0 self.last_checkpoint = None self.use_queue = use_queue self.batch_size = batch_size self.random_seed = random_seed super(TensorGraph, self).__init__(**kwargs) self.save_file = "%s/%s" % (self.model_dir, "model") self.model_class = None self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] def _add_layer(self, layer): if layer.name is None: layer.name = "%s_%s" % (layer.__class__.__name__, len(self.layers) + 1) if layer.name in self.layers: return if isinstance(layer, Feature): self.features.append(layer) if isinstance(layer, Label): self.labels.append(layer) if isinstance(layer, Weights): self.task_weights.append(layer) self.nxgraph.add_node(layer.name) self.layers[layer.name] = layer for in_layer in layer.in_layers: self._add_layer(in_layer) self.nxgraph.add_edge(in_layer.name, layer.name) def fit(self, dataset, nb_epoch=10, max_checkpoints_to_keep=5, checkpoint_interval=1000): return self.fit_generator( self.default_generator(dataset, epochs=nb_epoch), max_checkpoints_to_keep, checkpoint_interval) def fit_generator(self, feed_dict_generator, max_checkpoints_to_keep=5, checkpoint_interval=1000): def create_feed_dict(): if self.use_queue: while True: yield {self._training_placeholder: 1.0} for d in feed_dict_generator: feed_dict = {k.out_tensor: v for k, v in six.iteritems(d)} feed_dict[self._training_placeholder] = 1.0 yield feed_dict if not self.built: self.build() with self._get_tf("Graph").as_default(): time1 = time.time() train_op = self._get_tf('train_op') saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep) with tf.Session() as sess: self._initialize_weights(sess, saver) avg_loss, n_batches = 0.0, 0.0 coord = tf.train.Coordinator() n_samples = 0 if self.use_queue: enqueue_thread = threading.Thread( target=_enqueue_batch, args=(self, feed_dict_generator, self._get_tf("Graph"), sess, coord)) enqueue_thread.start() output_tensors = [x.out_tensor for x in self.outputs] fetches = output_tensors + [train_op, self.loss.out_tensor] for feed_dict in create_feed_dict(): try: fetched_values = sess.run(fetches, feed_dict=feed_dict) loss = fetched_values[-1] avg_loss += loss n_batches += 1 self.global_step += 1 n_samples += 1 if self.tensorboard and n_samples % self.tensorboard_log_frequency == 0: summary = sess.run( self._get_tf("summary_op"), feed_dict=feed_dict) self._log_tensorboard(summary) except OutOfRangeError: break if self.global_step % checkpoint_interval == checkpoint_interval - 1: saver.save(sess, self.save_file, global_step=self.global_step) self.last_checkpoint = saver.last_checkpoints[-1] avg_loss = float(avg_loss) / n_batches print('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) avg_loss, n_batches = 0.0, 0.0 avg_loss = float(avg_loss) / n_batches print('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) saver.save(sess, self.save_file, global_step=self.global_step) self.last_checkpoint = saver.last_checkpoints[-1] ############################################################## TIMING time2 = time.time() print("TIMING: model fitting took %0.3f s" % (time2 - time1)) ############################################################## TIMING def _log_tensorboard(self, summary): """ TODO(LESWING) set epoch Parameters ---------- Returns ------- """ global_step = int(self.global_step) writer = self._get_tf("FileWriter") writer.reopen() writer.add_summary(summary, global_step=global_step) writer.close() def fit_on_batch(self, X, y, w): if not self.built: self.build() dataset = NumpyDataset(X, y) return self.fit(dataset, nb_epoch=1) def default_generator(self, dataset, epochs=1, predict=False, pad_batches=True): if len(self.features) > 1: raise ValueError("More than one Feature, must use generator") if len(self.labels) > 1: raise ValueError("More than one Label, must use generator") if len(self.task_weights) > 1: raise ValueError("More than one Weights, must use generator") for epoch in range(epochs): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches( batch_size=self.batch_size, deterministic=True, pad_batches=pad_batches): feed_dict = dict() if len(self.labels) == 1 and y_b is not None and not predict: feed_dict[self.labels[0]] = y_b if len(self.features) == 1 and X_b is not None: feed_dict[self.features[0]] = X_b if len(self.task_weights) == 1 and w_b is not None and not predict: feed_dict[self.task_weights[0]] = w_b for (initial_state, zero_state) in zip(self.rnn_initial_states, self.rnn_zero_states): feed_dict[initial_state] = zero_state yield feed_dict def predict_on_generator(self, generator, transformers=[]): """Generates output predictions for the input samples, processing the samples in a batched way. # Arguments x: the input data, as a Numpy array. batch_size: integer. verbose: verbosity mode, 0 or 1. # Returns A Numpy array of predictions. """ retval = self.predict_proba_on_generator(generator, transformers) if self.mode == 'classification': retval = np.expand_dims(from_one_hot(retval, axis=2), axis=1) return retval def predict_proba_on_generator(self, generator, transformers=[]): """ Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ if not self.built: self.build() with self._get_tf("Graph").as_default(): with tf.Session() as sess: saver = tf.train.Saver() self._initialize_weights(sess, saver) out_tensors = [x.out_tensor for x in self.outputs] results = [] for feed_dict in generator: feed_dict = { self.layers[k.name].out_tensor: v for k, v in six.iteritems(feed_dict) } feed_dict[self._training_placeholder] = 0.0 result = np.array(sess.run(out_tensors, feed_dict=feed_dict)) if len(result.shape) == 3: result = np.transpose(result, axes=[1, 0, 2]) result = undo_transforms(result, transformers) results.append(result) return np.concatenate(results, axis=0) def bayesian_predict_on_batch(self, X, transformers=[], n_passes=4): """ Returns: mu: numpy ndarray of shape (n_samples, n_tasks) sigma: numpy ndarray of shape (n_samples, n_tasks) """ dataset = NumpyDataset(X=X, y=None, n_tasks=len(self.outputs)) y_ = [] for i in range(n_passes): generator = self.default_generator( dataset, predict=True, pad_batches=True) y_.append(self.predict_on_generator(generator, transformers)) y_ = np.concatenate(y_, axis=2) mu = np.mean(y_, axis=2) sigma = np.std(y_, axis=2) return mu, sigma def predict_on_smiles_batch(self, smiles, featurizer, n_tasks, transformers=[]): """ # Returns: A numpy ndarray of shape (n_samples, n_tasks) """ convmols = featurize_smiles_np(smiles, featurizer) dataset = NumpyDataset(X=convmols, y=None, n_tasks=len(self.outputs)) generator = self.default_generator(dataset, predict=True, pad_batches=True) return self.predict_on_generator(generator, transformers) def predict_on_batch(self, X, sess=None, transformers=[]): """Generates output predictions for the input samples, processing the samples in a batched way. # Arguments x: the input data, as a Numpy array. batch_size: integer. verbose: verbosity mode, 0 or 1. # Returns A Numpy array of predictions. """ dataset = NumpyDataset(X=X, y=None) generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_on_generator(generator, transformers) def predict_proba_on_batch(self, X, sess=None, transformers=[]): dataset = NumpyDataset(X=X, y=None) generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_proba_on_generator(generator, transformers) def predict(self, dataset, transformers=[], batch_size=None): """ Uses self to make predictions on provided Dataset object. Returns: y_pred: numpy ndarray of shape (n_samples,) """ generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_on_generator(generator, transformers) def predict_proba(self, dataset, transformers=[], batch_size=None): """ TODO: Do transformers even make sense here? Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_proba_on_generator(generator, transformers) def topsort(self): return nx.topological_sort(self.nxgraph) def build(self): if self.built: return with self._get_tf("Graph").as_default(): self._training_placeholder = tf.placeholder(dtype=tf.float32, shape=()) if self.random_seed is not None: tf.set_random_seed(self.random_seed) self._install_queue() order = self.topsort() for node in order: with tf.name_scope(node): node_layer = self.layers[node] node_layer.create_tensor(training=self._training_placeholder) self.rnn_initial_states += node_layer.rnn_initial_states self.rnn_final_states += node_layer.rnn_final_states self.rnn_zero_states += node_layer.rnn_zero_states node_layer.add_summary_to_tg() self.built = True for layer in self.layers.values(): if layer.tensorboard: self.tensorboard = True tf.summary.scalar("loss", self.loss.out_tensor) for layer in self.layers.values(): if layer.tensorboard: tf.summary.tensor_summary(layer.name, layer.out_tensor) if self.tensorboard: writer = self._get_tf("FileWriter") writer.add_graph(self._get_tf("Graph")) writer.close() # As a sanity check, make sure all tensors have the correct shape. for layer in self.layers.values(): try: assert list(layer.shape) == layer.out_tensor.get_shape().as_list( ), '%s: Expected shape %s does not match actual shape %s' % ( layer.name, layer.shape, layer.out_tensor.get_shape().as_list()) except NotImplementedError: pass def _install_queue(self): """ """ if not self.use_queue or self.queue_installed: for layer in self.features + self.labels + self.task_weights: layer.pre_queue = True return names = [] shapes = [] pre_q_inputs = [] q = InputFifoQueue(shapes, names, in_layers=pre_q_inputs) q.name = "%s_%s" % (q.__class__.__name__, len(self.layers) + 1) for layer in self.features + self.labels + self.task_weights: pre_q_input = layer.create_pre_q(self.batch_size) shapes.append(pre_q_input.shape) names.append(pre_q_input.name) pre_q_inputs.append(pre_q_input) layer.in_layers.append(q) self.nxgraph.add_edge(q.name, layer.name) self._add_layer(q) self.input_queue = q self.queue_installed = True def set_loss(self, layer): self._add_layer(layer) self.loss = layer def add_output(self, layer): self._add_layer(layer) self.outputs.append(layer) def set_optimizer(self, optimizer): """Set the optimizer to use for fitting.""" self.optimizer = optimizer def get_pickling_errors(self, obj, seen=None): if seen == None: seen = [] try: state = obj.__getstate__() except AttributeError: return if state == None: return if isinstance(state, tuple): if not isinstance(state[0], dict): state = state[1] else: state = state[0].update(state[1]) result = {} for i in state: try: pickle.dumps(state[i], protocol=2) except pickle.PicklingError: if not state[i] in seen: seen.append(state[i]) result[i] = self.get_pickling_errors(state[i], seen) return result def save(self): # Remove out_tensor from the object to be pickled must_restore = False tensor_objects = self.tensor_objects rnn_initial_states = self.rnn_initial_states rnn_final_states = self.rnn_final_states rnn_zero_states = self.rnn_zero_states self.tensor_objects = {} self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] out_tensors = [] if self.built: must_restore = True for node in self.topsort(): node_layer = self.layers[node] out_tensors.append(node_layer.none_tensors()) optimizer = self.optimizer self.optimizer = None training_placeholder = self._training_placeholder self._training_placeholder = None self.built = False # Pickle itself pickle_name = os.path.join(self.model_dir, "model.pickle") with open(pickle_name, 'wb') as fout: try: pickle.dump(self, fout) except Exception as e: print(self.get_pickling_errors(self)) raise e # add out_tensor back to everyone if must_restore: for index, node in enumerate(self.topsort()): node_layer = self.layers[node] node_layer.set_tensors(out_tensors[index]) self._training_placeholder = training_placeholder self.optimizer = optimizer self.built = True self.tensor_objects = tensor_objects self.rnn_initial_states = rnn_initial_states self.rnn_final_states = rnn_final_states self.rnn_zero_states = rnn_zero_states def evaluate_generator(self, feed_dict_generator, metrics, transformers=[], labels=None, outputs=None, weights=[], per_task_metrics=False): if labels is None: raise ValueError n_tasks = len(self.outputs) n_classes = self.outputs[0].out_tensor.get_shape()[-1].value evaluator = GeneratorEvaluator( self, feed_dict_generator, transformers, labels=labels, outputs=outputs, weights=weights, n_tasks=n_tasks, n_classes=n_classes) if not per_task_metrics: scores = evaluator.compute_model_performance(metrics) return scores else: scores, per_task_scores = evaluator.compute_model_performance( metrics, per_task_metrics=per_task_metrics) return scores, per_task_scores def get_layer_variables(self, layer): """Get the list of trainable variables in a layer of the graph.""" if not self.built: self.build() with self._get_tf("Graph").as_default(): return tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=layer.variable_scope) def get_global_step(self): return self._get_tf("GlobalStep") def _get_tf(self, obj): """ TODO(LESWING) REALLY NEED TO DOCUMENT THIS Parameters ---------- obj Returns ------- TensorFlow Object """ if obj in self.tensor_objects and self.tensor_objects[obj] is not None: return self.tensor_objects[obj] if obj == "Graph": self.tensor_objects['Graph'] = tf.Graph() elif obj == "FileWriter": self.tensor_objects['FileWriter'] = tf.summary.FileWriter(self.model_dir) elif obj == 'Optimizer': self.tensor_objects['Optimizer'] = self.optimizer._create_optimizer( self._get_tf('GlobalStep')) elif obj == 'train_op': self.tensor_objects['train_op'] = self._get_tf('Optimizer').minimize( self.loss.out_tensor, global_step=self._get_tf('GlobalStep')) elif obj == 'summary_op': self.tensor_objects['summary_op'] = tf.summary.merge_all( key=tf.GraphKeys.SUMMARIES) elif obj == 'GlobalStep': with self._get_tf("Graph").as_default(): self.tensor_objects['GlobalStep'] = tf.Variable(0, trainable=False) return self._get_tf(obj) def _initialize_weights(self, sess, saver): """ Parameters ---------- sess: tf.Session The Session must be open saver: tf.train.Saver A saver object to save/restore checkpoints Returns ------- """ if self.last_checkpoint is None: sess.run(tf.global_variables_initializer()) saver.save(sess, self.save_file, global_step=self.global_step) self.last_checkpoint = saver.last_checkpoints[-1] else: saver.restore(sess, self.last_checkpoint) def get_num_tasks(self): return len(self.outputs) def get_pre_q_input(self, input_layer): layer_name = input_layer.name pre_q_name = "%s_pre_q" % layer_name return self.layers[pre_q_name] @staticmethod def load_from_dir(model_dir): pickle_name = os.path.join(model_dir, "model.pickle") with open(pickle_name, 'rb') as fout: tensorgraph = pickle.load(fout) tensorgraph.built = False return tensorgraph def __del__(self): pass
def test_roulette(self): """Test training a policy for the roulette environment.""" # This is modeled after the Roulette-v0 environment from OpenAI Gym. # The player can bet on any number from 0 to 36, or walk away (which ends the # game). The average reward for any bet is slightly negative, so the best # strategy is to walk away. class RouletteEnvironment(dc.rl.Environment): def __init__(self): super(RouletteEnvironment, self).__init__([(1, )], 38) self._state = [np.array([0])] def step(self, action): if action == 37: self._terminated = True # Walk away. return 0.0 wheel = np.random.randint(37) if wheel == 0: if action == 0: return 35.0 return -1.0 if action != 0 and wheel % 2 == action % 2: return 1.0 return -1.0 def reset(self): self._terminated = False env = RouletteEnvironment() # This policy just learns a constant probability for each action, and a constant for the value. class TestPolicy(dc.rl.Policy): def create_layers(self, state, **kwargs): action = Variable(np.ones(env.n_actions)) output = SoftMax(in_layers=[ Reshape(in_layers=[action], shape=(-1, env.n_actions)) ]) value = Variable([0.0]) return {'action_prob': output, 'value': value} # Optimize it. ppo = dc.rl.PPO(env, TestPolicy(), max_rollout_length=20, optimizer=Adam(learning_rate=0.001)) ppo.fit(30000) # It should have learned that the expected value is very close to zero, and that the best # action is to walk away. action_prob, value = ppo.predict([[0]]) assert -0.5 < value[0] < 0.5 assert action_prob.argmax() == 37 assert ppo.select_action([[0]], deterministic=True) == 37 # Verify that we can create a new PPO object, reload the parameters from the first one, and # get the same result. new_ppo = dc.rl.PPO(env, TestPolicy(), model_dir=ppo._graph.model_dir) new_ppo.restore() action_prob2, value2 = new_ppo.predict([[0]]) assert value2 == value # Do the same thing, only using the "restore" argument to fit(). new_ppo = dc.rl.PPO(env, TestPolicy(), model_dir=ppo._graph.model_dir) new_ppo.fit(0, restore=True) action_prob2, value2 = new_ppo.predict([[0]]) assert value2 == value
tokens = sorted(list(tokens)) print(tokens[0:5]) max_length = max(len(s) for s in train_smiles) model = dc.models.SeqToSeq(tokens, tokens, max_length, encoder_layers=2, decoder_layers=2, embedding_dimension=256, model_dir='fingerprint') batches_per_epoch = len(train_smiles) / model.batch_size model.set_optimizer( Adam(learning_rate=ExponentialDecay(0.004, 0.9, batches_per_epoch))) def generate_sequences(epochs): for i in range(epochs): for s in train_smiles: yield (s, s) model.fit_sequences(generate_sequences(40)) predicted = model.predict_from_sequences(valid_smiles[:500]) count = 0 for s, p in zip(valid_smiles[:500], predicted): if ''.join(p) == s: count += 1
def __init__(self, env, policy, max_rollout_length=20, optimization_rollouts=8, optimization_epochs=4, batch_size=64, clipping_width=0.2, discount_factor=0.99, advantage_lambda=0.98, value_weight=1.0, entropy_weight=0.01, optimizer=None, model_dir=None, use_hindsight=False): """Create an object for optimizing a policy. Parameters ---------- env: Environment the Environment to interact with policy: Policy the Policy to optimize. Its create_layers() method must return a map containing the keys 'action_prob' and 'value', corresponding to the action probabilities and value estimate max_rollout_length: int the maximum length of rollouts to generate optimization_rollouts: int the number of rollouts to generate for each iteration of optimization optimization_epochs: int the number of epochs of optimization to perform within each iteration batch_size: int the batch size to use during optimization. If this is 0, each rollout will be used as a separate batch. clipping_width: float in computing the PPO loss function, the probability ratio is clipped to the range (1-clipping_width, 1+clipping_width) discount_factor: float the discount factor to use when computing rewards advantage_lambda: float the parameter for trading bias vs. variance in Generalized Advantage Estimation value_weight: float a scale factor for the value loss term in the loss function entropy_weight: float a scale factor for the entropy term in the loss function optimizer: Optimizer the optimizer to use. If None, a default optimizer is used. model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. use_hindsight: bool if True, use Hindsight Experience Replay """ self._env = env self._policy = policy self.max_rollout_length = max_rollout_length self.optimization_rollouts = optimization_rollouts self.optimization_epochs = optimization_epochs self.batch_size = batch_size self.clipping_width = clipping_width self.discount_factor = discount_factor self.advantage_lambda = advantage_lambda self.value_weight = value_weight self.entropy_weight = entropy_weight self.use_hindsight = use_hindsight self._state_is_list = isinstance(env.state_shape[0], collections.Sequence) if optimizer is None: self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999) else: self._optimizer = optimizer (self._graph, self._features, self._rewards, self._actions, self._action_prob, self._value, self._advantages, self._old_action_prob) = self._build_graph(None, 'global', model_dir) with self._graph._get_tf("Graph").as_default(): self._session = tf.Session() self._train_op = self._graph._get_tf('Optimizer').minimize( self._graph.loss.out_tensor) self._rnn_states = self._graph.rnn_zero_states if len(self._rnn_states) > 0 and batch_size != 0: raise ValueError( 'Cannot batch rollouts when the policy contains a recurrent layer. Set batch_size to 0.' )
tokens = tokens.union(set(c for c in s)) tokens = sorted(list(tokens)) print(tokens[0:5]) max_length = max(len(s) for s in train_smiles) model = dc.models.SeqToSeq(tokens, tokens, max_length, encoder_layers=2, decoder_layers=2, embedding_dimension=256, model_dir='fingerprint') batches_per_epoch = len(train_smiles)/model.batch_size model.set_optimizer(Adam(learning_rate=ExponentialDecay(0.004, 0.9, batches_per_epoch))) def generate_sequences(epochs): for i in range(epochs): for s in train_smiles: yield (s, s) model.fit_sequences(generate_sequences(40)) predicted = model.predict_from_sequences(valid_smiles[:500]) count = 0 for s,p in zip(valid_smiles[:500], predicted): if ''.join(p) == s: count += 1 print('reproduced', count, 'of 500 validation SMILES strings')
class TensorGraph(Model): def __init__(self, tensorboard=False, tensorboard_log_frequency=100, batch_size=100, random_seed=None, use_queue=True, graph=None, learning_rate=0.001, configproto=None, **kwargs): """ Parameters ---------- tensorboard: bool Should we log to model_dir data for tensorboard? tensorboard_log_frequency: int How many training batches before logging tensorboard? batch_size: int default batch size for training and evaluating use_queue: boolean if True when building we will create a tf.FIFO queue, which will hold all features, weights, and labels. We will feed the inputs into this queue in batches of self.batch_size in a separate thread from the thread training the model. You cannot use a queue when batches are not of consistent size graph: tensorflow.Graph the Graph in which to create Tensorflow objects. If None, a new Graph is created. learning_rate: float or LearningRateSchedule the learning rate to use for optimization configproto: a tf.ConfigProto() object used to create tf.Session() """ # Layer Management self.layers = dict() self.features = list() self.labels = list() self.outputs = list() self.variances = list() self.task_weights = list() self.submodels = list() self.loss = Constant(0) self.built = False self.queue_installed = False self.optimizer = Adam( learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7) self.configproto = configproto # Singular place to hold Tensor objects which don't serialize # These have to be reconstructed on restoring from pickle # See TensorGraph._get_tf() for more details on lazy construction self.tensor_objects = { "FileWriter": None, "Graph": graph, "train_op": None, "summary_op": None, } self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency self.tensorboard_step = 0 self.global_step = 0 self.use_queue = use_queue self.batch_size = batch_size self.random_seed = random_seed super(TensorGraph, self).__init__(**kwargs) self.save_file = "%s/%s" % (self.model_dir, "model") self.model_class = None self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] if self.use_queue and self.tensorboard: raise ValueError( "Currently TensorGraph cannot both use_queue and tensorboard at the same time" ) def _add_layer(self, layer): if layer.name is None: layer.name = "%s_%s" % (layer.__class__.__name__, len(self.layers) + 1) if layer.name in self.layers: return if isinstance(layer, Feature): self.features.append(layer) if isinstance(layer, Label): self.labels.append(layer) if isinstance(layer, Weights): self.task_weights.append(layer) self.layers[layer.name] = layer for in_layer in layer.in_layers: self._add_layer(in_layer) def fit(self, dataset, nb_epoch=10, max_checkpoints_to_keep=5, checkpoint_interval=1000, deterministic=False, restore=False, submodel=None, **kwargs): """Train this model on a dataset. Parameters ---------- dataset: Dataset the Dataset to train on nb_epoch: int the number of epochs to train for max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. deterministic: bool if True, the samples are processed in order. If False, a different random order is used for each epoch. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. submodel: Submodel an alternate training objective to use. This should have been created by calling create_submodel(). """ return self.fit_generator( self.default_generator( dataset, epochs=nb_epoch, deterministic=deterministic), max_checkpoints_to_keep, checkpoint_interval, restore, submodel) def fit_generator(self, feed_dict_generator, max_checkpoints_to_keep=5, checkpoint_interval=1000, restore=False, submodel=None): """Train this model on data from a generator. Parameters ---------- feed_dict_generator: generator this should generate batches, each represented as a dict that maps Layers to values. max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. submodel: Submodel an alternate training objective to use. This should have been created by calling create_submodel(). Returns ------- the average loss over the most recent checkpoint interval """ if not self.built: self.build() with self._get_tf("Graph").as_default(): time1 = time.time() loss = self.loss if submodel is not None and submodel.loss is not None: loss = submodel.loss if tfe.in_eager_mode(): # In eager mode we want an optimizer and a function to compute the # gradient of the loss. submodel_vars = None if submodel is None: optimizer = self._get_tf("Optimizer") else: optimizer = submodel.create_optimizer() if submodel.layers is not None: submodel_vars = set() for layer in submodel.layers: for var in layer.variables: submodel_vars.add(var) val_grad_fn = tfe.implicit_value_and_gradients( lambda x: self._run_graph([loss], x, True)[0]) else: # In graph mode we want a training operation. if submodel is None: train_op = self._get_tf('train_op') else: train_op = submodel.get_train_op() if checkpoint_interval > 0: saver = tf.train.Saver( self.get_variables(), max_to_keep=max_checkpoints_to_keep, save_relative_paths=True) if restore: self.restore() avg_loss, n_averaged_batches = 0.0, 0.0 n_samples = 0 n_enqueued = [0] final_sample = [None] if self.queue_installed: enqueue_thread = threading.Thread( target=_enqueue_batch, args=(self, feed_dict_generator, self._get_tf("Graph"), self.session, n_enqueued, final_sample)) enqueue_thread.start() for feed_dict in self._create_feed_dicts(feed_dict_generator, True): if self.queue_installed: # Don't let this thread get ahead of the enqueue thread, since if # we try to read more batches than the total number that get queued, # this thread will hang indefinitely. while n_enqueued[0] <= n_samples: if n_samples == final_sample[0]: break time.sleep(0) if n_samples == final_sample[0]: break n_samples += 1 should_log = (self.tensorboard and n_samples % self.tensorboard_log_frequency == 0) if tfe.in_eager_mode(): value, grads_and_vars = val_grad_fn(feed_dict) if submodel_vars is not None: grads_and_vars = [ x for x in grads_and_vars if x[1] in submodel_vars ] optimizer.apply_gradients(grads_and_vars) avg_loss += value else: fetches = [train_op, loss.out_tensor] if should_log: fetches.append(self._get_tf("summary_op")) fetched_values = self.session.run(fetches, feed_dict=feed_dict) if should_log: self._log_tensorboard(fetched_values[2]) avg_loss += fetched_values[1] n_averaged_batches += 1 self.global_step += 1 if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1: saver.save(self.session, self.save_file, global_step=self.global_step) avg_loss = float(avg_loss) / n_averaged_batches logger.info('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) avg_loss, n_averaged_batches = 0.0, 0.0 if n_averaged_batches > 0: avg_loss = float(avg_loss) / n_averaged_batches if checkpoint_interval > 0: if n_averaged_batches > 0: logger.info('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) saver.save(self.session, self.save_file, global_step=self.global_step) time2 = time.time() logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1)) return avg_loss def _log_tensorboard(self, summary): """ TODO(LESWING) set epoch Parameters ---------- Returns ------- """ global_step = int(self.global_step) writer = self._get_tf("FileWriter") writer.reopen() writer.add_summary(summary, global_step=global_step) writer.close() def fit_on_batch(self, X, y, w, submodel=None): if not self.built: self.build() dataset = NumpyDataset(X, y) return self.fit(dataset, nb_epoch=1, submodel=submodel) def default_generator(self, dataset, epochs=1, predict=False, deterministic=True, pad_batches=True): if len(self.features) > 1: raise ValueError("More than one Feature, must use generator") if len(self.labels) > 1: raise ValueError("More than one Label, must use generator") if len(self.task_weights) > 1: raise ValueError("More than one Weights, must use generator") for epoch in range(epochs): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches( batch_size=self.batch_size, deterministic=deterministic, pad_batches=pad_batches): feed_dict = dict() if len(self.labels) == 1 and y_b is not None and not predict: feed_dict[self.labels[0]] = y_b if len(self.features) == 1 and X_b is not None: feed_dict[self.features[0]] = X_b if len(self.task_weights) == 1 and w_b is not None and not predict: feed_dict[self.task_weights[0]] = w_b for (initial_state, zero_state) in zip(self.rnn_initial_states, self.rnn_zero_states): feed_dict[initial_state] = zero_state yield feed_dict def __call__(self, *inputs, **kwargs): """Execute the model in eager mode to compute outputs as a function of inputs. This is very similar to predict_on_batch(), except that it returns the outputs as tensors rather than numpy arrays. That means you can compute the graph's outputs, then do additional calculations based on them, and gradients will be tracked correctly through the whole process. Parameters ---------- inputs: tensors the values to use for the model's features. The number of inputs must exactly match the length of the model's `features` property. The values may be tensors, numpy arrays, or anything else that can be converted to tensors of the correct shape. outputs: list of Layers the output layers to compute. If this is omitted, self.outputs is used (that is, all outputs that have been added by calling add_output()). Returns ------- The output tensors, or a list of tensors if multiple outputs were requested. """ if len(inputs) != len(self.features): raise ValueError('Expected %d inputs, received %d' % len(self.features), len(inputs)) # TODO Once we drop Python 2 support, turn outputs into a proper keyword arg # instead of using the **kwargs hack. if 'outputs' in kwargs: outputs = kwargs['outputs'] else: outputs = self.outputs feed_dict = dict(zip(self.features, inputs)) results = self._run_graph(outputs, feed_dict, False) if len(results) == 1: return results[0] return results def _predict(self, generator, transformers, outputs, uncertainty): """ Predict outputs for data provided by a generator. This is the private implementation of prediction. Do not call it directly. Instead call one of the public prediction methods. Parameters ---------- generator: Generator Generator that constructs feed dictionaries for TensorGraph. transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs = self.outputs. If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. uncertainty: bool specifies whether this is being called as part of estimating uncertainty. If True, it sets the training flag so that dropout will be enabled, and returns the values of the uncertainty outputs. Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ if not self.built: self.build() if outputs is None: outputs = self.outputs elif not isinstance(outputs, collections.Sequence): outputs = [outputs] if uncertainty: if len(self.variances) == 0: raise ValueError('This model cannot compute uncertainties') if len(self.variances) != len(outputs): raise ValueError( 'The number of variances must exactly match the number of outputs') tensors = outputs + self.variances else: tensors = outputs with self._get_tf("Graph").as_default(): # Gather results for each output results = [[] for out in tensors] n_samples = 0 n_enqueued = [0] final_sample = [None] if self.queue_installed: enqueue_thread = threading.Thread( target=_enqueue_batch, args=(self, generator, self._get_tf("Graph"), self.session, n_enqueued, final_sample)) enqueue_thread.start() for feed_dict in self._create_feed_dicts(generator, uncertainty): if self.queue_installed: # Don't let this thread get ahead of the enqueue thread, since if # we try to read more batches than the total number that get queued, # this thread will hang indefinitely. while n_enqueued[0] <= n_samples: if n_samples == final_sample[0]: break time.sleep(0) if n_samples == final_sample[0]: break n_samples += 1 feed_results = self._run_graph(tensors, feed_dict, uncertainty) if tfe.in_eager_mode(): feed_results = [f.numpy() for f in feed_results] if len(feed_results) > 1: if len(transformers): raise ValueError("Does not support transformations " "for multiple outputs.") elif len(feed_results) == 1: result = undo_transforms(feed_results[0], transformers) feed_results = [result] for ind, result in enumerate(feed_results): results[ind].append(result) final_results = [] for result_list in results: final_results.append(np.concatenate(result_list, axis=0)) # If only one output, just return array if len(final_results) == 1: return final_results[0] elif uncertainty: return zip(final_results[:len(outputs)], final_results[len(outputs):]) else: return final_results def predict_on_generator(self, generator, transformers=[], outputs=None): """ Parameters ---------- generator: Generator Generator that constructs feed dictionaries for TensorGraph. transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs = self.outputs. If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ return self._predict(generator, transformers, outputs, False) def predict_on_batch(self, X, transformers=[], outputs=None): """Generates predictions for input samples, processing samples in a batch. Parameters ---------- X: ndarray the input data, as a Numpy array. transformers: List List of dc.trans.Transformers Returns ------- A Numpy array of predictions. """ dataset = NumpyDataset(X=X, y=None) generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_on_generator(generator, transformers, outputs) def predict_uncertainty_on_batch(self, X, masks=50): """ Predict the model's outputs, along with the uncertainty in each one. The uncertainty is computed as described in https://arxiv.org/abs/1703.04977. It involves repeating the prediction many times with different dropout masks. The prediction is computed as the average over all the predictions. The uncertainty includes both the variation among the predicted values (epistemic uncertainty) and the model's own estimates for how well it fits the data (aleatoric uncertainty). Not all models support uncertainty prediction. Parameters ---------- X: ndarray the input data, as a Numpy array. masks: int the number of dropout masks to average over Returns ------- for each output, a tuple (y_pred, y_std) where y_pred is the predicted value of the output, and each element of y_std estimates the standard deviation of the corresponding element of y_pred """ dataset = NumpyDataset(X=X, y=None) return self.predict_uncertainty(dataset, masks) def predict(self, dataset, transformers=[], outputs=None): """ Uses self to make predictions on provided Dataset object. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs=self.outputs. If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. Returns ------- results: numpy ndarray or list of numpy ndarrays """ generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_on_generator(generator, transformers, outputs) def predict_uncertainty(self, dataset, masks=50): """ Predict the model's outputs, along with the uncertainty in each one. The uncertainty is computed as described in https://arxiv.org/abs/1703.04977. It involves repeating the prediction many times with different dropout masks. The prediction is computed as the average over all the predictions. The uncertainty includes both the variation among the predicted values (epistemic uncertainty) and the model's own estimates for how well it fits the data (aleatoric uncertainty). Not all models support uncertainty prediction. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on masks: int the number of dropout masks to average over Returns ------- for each output, a tuple (y_pred, y_std) where y_pred is the predicted value of the output, and each element of y_std estimates the standard deviation of the corresponding element of y_pred """ sum_pred = [] sum_sq_pred = [] sum_var = [] for i in range(masks): generator = self.default_generator( dataset, predict=True, pad_batches=False) results = self._predict(generator, [], self.outputs, True) if len(sum_pred) == 0: for p, v in results: sum_pred.append(p) sum_sq_pred.append(p * p) sum_var.append(v) else: for j, (p, v) in enumerate(results): sum_pred[j] += p sum_sq_pred[j] += p * p sum_var[j] += v output = [] std = [] for i in range(len(sum_pred)): p = sum_pred[i] / masks output.append(p) std.append(np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks)) if len(output) == 1: return (output[0], std[0]) else: return zip(output, std) def topsort(self): def add_layers_to_list(layer, sorted_layers): if layer in sorted_layers: return for in_layer in layer.in_layers: add_layers_to_list(in_layer, sorted_layers) sorted_layers.append(layer) sorted_layers = [] for l in self.features + self.labels + self.task_weights + self.outputs + self.variances: add_layers_to_list(l, sorted_layers) add_layers_to_list(self.loss, sorted_layers) for submodel in self.submodels: if submodel.loss is not None: add_layers_to_list(submodel.loss, sorted_layers) return sorted_layers def build(self): if self.built: return if tfe.in_eager_mode(): # In eager mode, we need to execute every layer once to ensure its variables # have been created. def build_layers(layer, tensors): if layer in tensors: return tensors[layer] inputs = [build_layers(input, tensors) for input in layer.in_layers] if isinstance(layer, Input): # We can't execute Input layers in eager mode, since they would try # to create placeholders. Instead create a tensor of the correct # size and type. shape = [1 if s is None else s for s in layer.shape] tensor = tf.zeros(shape, layer.dtype) else: with tf.name_scope(layer.name): tensor = layer.create_tensor(in_layers=inputs, set_tensors=False) tensors[layer] = tensor return tensor tensors = {} with self._get_tf("Graph").as_default(): # Build the layers. build_layers(self.loss, tensors) for output in self.outputs: build_layers(output, tensors) for variance in self.variances: build_layers(variance, tensors) for submodel in self.submodels: build_layers(submodel.loss, tensors) # Initialize variables. for layer in self.layers.values(): if layer.variable_values is not None: for var, val in zip(layer.variables, layer.variable_values): var.assign(val) self.session = None self._training_placeholder = None self.built = True return # In graph mode we need to create the computation graph. with self._get_tf("Graph").as_default(): self._training_placeholder = tf.placeholder(dtype=tf.float32, shape=()) if self.random_seed is not None: tf.set_random_seed(self.random_seed) self._install_queue() for layer in self.topsort(): with tf.name_scope(layer.name): layer.create_tensor(training=self._training_placeholder) self.rnn_initial_states += layer.rnn_initial_states self.rnn_final_states += layer.rnn_final_states self.rnn_zero_states += layer.rnn_zero_states layer.add_summary_to_tg() self.session = tf.Session(config=self.configproto) self.built = True # Ensure all training operators have been created. self._get_tf('train_op') for submodel in self.submodels: train_op = submodel.get_train_op() # Initialize variables. self.session.run(tf.global_variables_initializer()) for layer in self.layers.values(): if layer.variable_values is not None: variables = self.get_layer_variables(layer) for var, val in zip(variables, layer.variable_values): self.session.run(var.assign(val)) for layer in self.layers.values(): if layer.tensorboard: self.tensorboard = True tf.summary.scalar("loss", self.loss.out_tensor) for layer in self.layers.values(): if layer.tensorboard: tf.summary.tensor_summary(layer.name, layer.out_tensor) if self.tensorboard: writer = self._get_tf("FileWriter") writer.add_graph(self._get_tf("Graph")) writer.close() # As a sanity check, make sure all tensors have the correct shape. for layer in self.layers.values(): try: assert list(layer.shape) == layer.out_tensor.get_shape().as_list( ), '%s: Expected shape %s does not match actual shape %s' % ( layer.name, layer.shape, layer.out_tensor.get_shape().as_list()) except NotImplementedError: pass def _install_queue(self): """ """ if not self.use_queue or self.queue_installed: for layer in self.features + self.labels + self.task_weights: layer.pre_queue = True return inputs = self.features + self.labels + self.task_weights if len(inputs) == 0: return names = [] shapes = [] pre_q_inputs = [] q = InputFifoQueue(shapes, names, in_layers=pre_q_inputs) q.name = "%s_%s" % (q.__class__.__name__, len(self.layers) + 1) for layer in inputs: pre_q_input = layer.create_pre_q() shapes.append(pre_q_input.shape) names.append(pre_q_input.name) pre_q_inputs.append(pre_q_input) layer.in_layers.append(q) self._add_layer(q) self.input_queue = q self.queue_installed = True def set_loss(self, layer): self._add_layer(layer) self.loss = layer def add_output(self, layer): """Add an output layer that can be computed by predict()""" self._add_layer(layer) self.outputs.append(layer) def add_variance(self, layer): """Add a layer that computes the variance in an output. If a model supports uncertainty, it must call add_variance() once for every output. Each variance layer has the same shape as the corresponding output, and each element computes an estimate of the variance from aleatoric uncertainty in the corresponding element of the output. In addition, if a model supports uncertainty it MUST use dropout on every layer. Otherwise, the uncertainties it computes will be inaccurate. """ self._add_layer(layer) self.variances.append(layer) def set_optimizer(self, optimizer): """Set the optimizer to use for fitting.""" self.optimizer = optimizer def create_submodel(self, layers=None, loss=None, optimizer=None): """Create an alternate objective for training one piece of a TensorGraph. A TensorGraph consists of a set of layers, and specifies a loss function and optimizer to use for training those layers. Usually this is sufficient, but there are cases where you want to train different parts of a model separately. For example, a GAN consists of a generator and a discriminator. They are trained separately, and they use different loss functions. A submodel defines an alternate objective to use in cases like this. It may optionally specify any of the following: a subset of layers in the model to train; a different loss function; and a different optimizer to use. This method creates a submodel, which you can then pass to fit() to use it for training. Parameters ---------- layers: list the list of layers to train. If None, all layers in the model will be trained. loss: Layer the loss function to optimize. If None, the model's main loss function will be used. optimizer: Optimizer the optimizer to use for training. If None, the model's main optimizer will be used. Returns ------- the newly created submodel, which can be passed to any of the fitting methods. """ if self.built: raise ValueError('Submodels must be created before build() is called.') submodel = Submodel(self, layers, loss, optimizer) self.submodels.append(submodel) if loss is not None: self._add_layer(loss) return submodel def get_pickling_errors(self, obj, seen=None): if seen == None: seen = [] try: state = obj.__getstate__() except AttributeError: return if state == None: return if isinstance(state, tuple): if not isinstance(state[0], dict): state = state[1] else: state = state[0].update(state[1]) result = {} for i in state: try: pickle.dumps(state[i], protocol=2) except pickle.PicklingError: if not state[i] in seen: seen.append(state[i]) result[i] = self.get_pickling_errors(state[i], seen) return result def save(self): # Remove out_tensor from the object to be pickled must_restore = False tensor_objects = self.tensor_objects rnn_initial_states = self.rnn_initial_states rnn_final_states = self.rnn_final_states rnn_zero_states = self.rnn_zero_states session = self.session self.tensor_objects = {} self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] self.session = None out_tensors = [] submodel_ops = [] if self.built: must_restore = True for layer in self.topsort(): out_tensors.append(layer.none_tensors()) for submodel in self.submodels: submodel_ops.append(submodel._train_op) submodel._train_op = None training_placeholder = self._training_placeholder self._training_placeholder = None self.built = False # Pickle itself pickle_name = os.path.join(self.model_dir, "model.pickle") with open(pickle_name, 'wb') as fout: try: pickle.dump(self, fout) except Exception as e: logger.info(self.get_pickling_errors(self)) raise e # add out_tensor back to everyone if must_restore: for index, layer in enumerate(self.topsort()): layer.set_tensors(out_tensors[index]) for submodel, op in zip(self.submodels, submodel_ops): submodel._train_op = op self._training_placeholder = training_placeholder self.built = True self.tensor_objects = tensor_objects self.rnn_initial_states = rnn_initial_states self.rnn_final_states = rnn_final_states self.rnn_zero_states = rnn_zero_states self.session = session def evaluate_generator(self, feed_dict_generator, metrics, transformers=[], labels=None, outputs=None, weights=[], per_task_metrics=False): if labels is None: raise ValueError n_tasks = len(self.outputs) n_classes = self.outputs[0].out_tensor.get_shape()[-1].value evaluator = GeneratorEvaluator( self, feed_dict_generator, transformers, labels=labels, outputs=outputs, weights=weights, n_tasks=n_tasks, n_classes=n_classes) if not per_task_metrics: scores = evaluator.compute_model_performance(metrics) return scores else: scores, per_task_scores = evaluator.compute_model_performance( metrics, per_task_metrics=per_task_metrics) return scores, per_task_scores def get_layer_variables(self, layer): """Get the list of trainable variables in a layer of the graph.""" if tfe.in_eager_mode(): return layer.variables if not self.built: self.build() with self._get_tf("Graph").as_default(): if layer.variable_scope == '': return [] return tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=layer.variable_scope) def get_variables(self): """Get the list of all trainable variables in the graph.""" if not self.built: self.build() if tfe.in_eager_mode(): variables = [] for layer in self.layers.values(): variables += layer.variables return variables else: with self._get_tf("Graph").as_default(): return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) def get_global_step(self): return self._get_tf("GlobalStep") def _get_tf(self, obj): """Fetches underlying TensorFlow primitives. Parameters ---------- obj: str If "Graph", returns tf.Graph instance. If "FileWriter", returns tf.summary.FileWriter. If "Optimizer", returns the optimizer. If "train_op", returns the train operation. If "summary_op", returns the merged summary. If "GlobalStep" returns the global step. Returns ------- TensorFlow Object """ if obj in self.tensor_objects and self.tensor_objects[obj] is not None: return self.tensor_objects[obj] if obj == "Graph": self.tensor_objects['Graph'] = tf.Graph() elif obj == "FileWriter": self.tensor_objects['FileWriter'] = tf.summary.FileWriter(self.model_dir) elif obj == 'Optimizer': self.tensor_objects['Optimizer'] = self.optimizer._create_optimizer( self._get_tf('GlobalStep')) elif obj == 'train_op': opt = self._get_tf('Optimizer') global_step = self._get_tf('GlobalStep') try: self.tensor_objects['train_op'] = opt.minimize( self.loss.out_tensor, global_step=global_step) except ValueError: # The loss doesn't depend on any variables. self.tensor_objects['train_op'] = 0 elif obj == 'summary_op': self.tensor_objects['summary_op'] = tf.summary.merge_all( key=tf.GraphKeys.SUMMARIES) elif obj == 'GlobalStep': with self._get_tf("Graph").as_default(): self.tensor_objects['GlobalStep'] = create_variable(0, trainable=False) return self._get_tf(obj) def save_checkpoint(self, max_checkpoints_to_keep=5): """Save a checkpoint to disk. Usually you do not need to call this method, since fit() saves checkpoints automatically. If you have disabled automatic checkpointing during fitting, this can be called to manually write checkpoints. Parameters ---------- max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. """ saver = tf.train.Saver( self.get_variables(), max_to_keep=max_checkpoints_to_keep) saver.save(self.session, self.save_file, global_step=self.global_step) def get_checkpoints(self): """Get a list of all available checkpoint files.""" return tf.train.get_checkpoint_state( self.model_dir).all_model_checkpoint_paths def restore(self, checkpoint=None): """Reload the values of all variables from a checkpoint file. Parameters ---------- checkpoint: str the path to the checkpoint file to load. If this is None, the most recent checkpoint will be chosen automatically. Call get_checkpoints() to get a list of all available checkpoints. """ if not self.built: self.build() if checkpoint is None: checkpoint = tf.train.latest_checkpoint(self.model_dir) if checkpoint is None: raise ValueError('No checkpoint found') with self._get_tf("Graph").as_default(): reader = NewCheckpointReader(checkpoint) var_names = set([x for x in reader.get_variable_to_shape_map()]) var_list = [] for var in self.get_variables(): name = var.name if ':' in name: name = name[:name.rfind(':')] if name in var_names: var_list.append(var) saver = tf.train.Saver(var_list=var_list) saver.restore(self.session, checkpoint) def get_num_tasks(self): return len(self.outputs) def get_pre_q_input(self, input_layer): layer_name = input_layer.name pre_q_name = "%s_pre_q" % layer_name return self.layers[pre_q_name] @staticmethod def load_from_dir(model_dir, restore=True): pickle_name = os.path.join(model_dir, "model.pickle") with open(pickle_name, 'rb') as fout: tensorgraph = pickle.load(fout) tensorgraph.built = False tensorgraph.model_dir = model_dir if restore: try: tensorgraph.restore() except ValueError: pass # No checkpoint to load return tensorgraph def __del__(self): pass def _create_feed_dicts(self, generator, training): """Create feed dicts for use in fitting or prediction. Parameters ---------- generator: Generator the feed dict generator that was passed to fit_generator() or predict_on_generator() training: bool True during training, False during prediction """ train_value = 1.0 if training else 0.0 if self.queue_installed: while True: yield {self._training_placeholder: train_value} else: for d in generator: feed_dict = {} for key, value in d.items(): if isinstance(key, Input): value = _ensure_value_shape(value, key) if tfe.in_eager_mode(): value = tf.cast(value, key.dtype) feed_dict[key] = value else: feed_dict[key] = value if not tfe.in_eager_mode(): feed_dict[self._training_placeholder] = train_value yield feed_dict def _run_graph(self, outputs, feed_dict, training): """Run the calculations in the graph to compute some outputs. In graph mode, this just calls session.run(). In eager mode, it executes all required layers to compute the output. Parameters ---------- outputs: list of Layers the output layers to compute feed_dict: dict maps input layers to values training: bool whether this is being executed in training mode """ if not tfe.in_eager_mode(): return self.session.run(outputs, feed_dict) def run_layers(layer, tensors): if layer in tensors: return tensors[layer] inputs = [run_layers(input, tensors) for input in layer.in_layers] tensor = layer.create_tensor( in_layers=inputs, set_tensors=False, training=training) tensors[layer] = tensor return tensor tensors = feed_dict.copy() return [run_layers(o, tensors) for o in outputs] def make_estimator(self, feature_columns, weight_column=None, metrics={}, model_dir=None, config=None): """Construct a Tensorflow Estimator from this model. tf.estimator.Estimator is the standard Tensorflow API for representing models. This method provides interoperability between DeepChem and other Tensorflow based tools by allowing any model to be used an Estimator. Once this method returns, the Estimator it created is independent of the model it was created from. They do not share tensors, variables, save files, or any other resources. The Estimator is a self contained object with its own methods for training, evaluation, prediction, checkpointing, etc. Parameters ---------- feature_columns: list of tf.feature_column objects this describes the input features to the models. There must be one entry for each Feature layer in this model's features field. weight_column: tf.feature_column or None if this model includes a Weights layer, this describes the input weights. Otherwise, this should be None. metrics: map metrics that should be computed in calls to evaluate(). For each entry, the key is the name to report for the metric, and the value is a function of the form f(labels, predictions, weights) that returns the tensors for computing the metric. Any of the functions in tf.metrics can be used, as can other functions that satisfy the same interface. model_dir: str the directory in which the Estimator should save files. If None, this defaults to the model's model_dir. config: RunConfig configuration options for the Estimator """ # Check the inputs. if tfe.in_eager_mode(): raise ValueError('make_estimator() is not supported in eager mode') if len(feature_columns) != len(self.features): raise ValueError( 'This model requires %d feature column(s)' % len(self.features)) if len(self.labels) != 1: raise ValueError( 'Can only create an Estimator from a model with exactly one Label input' ) if len(self.task_weights) > 1: raise ValueError( 'Cannot create an Estimator from a model with multiple Weight inputs') if weight_column is None: if len(self.task_weights) > 0: raise ValueError('This model requires a weight column') else: if len(self.task_weights) == 0: raise ValueError( 'Cannot specify weight_column for a model with no Weight inputs') if model_dir is None: model_dir = self.model_dir # Define a function that recursively creates tensors from layers. def create_tensors(layer, tensors, training): if layer in tensors: return tensors[layer] inputs = [ create_tensors(in_layer, tensors, training) for in_layer in layer.in_layers ] tensor = layer.create_tensor( in_layers=inputs, set_tensors=False, training=training) tensors[layer] = tensor layer.add_summary_to_tg(tensor) return tensor # Define the model function. def model_fn(features, labels, mode): # Define the inputs. tensors = self.create_estimator_inputs(feature_columns, weight_column, features, labels, mode) for layer, tensor in tensors.items(): layer.add_summary_to_tg(tensor) # Create the correct outputs, based on the mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = {} for i, output in enumerate(self.outputs): predictions[i] = create_tensors(output, tensors, 0) return tf.estimator.EstimatorSpec(mode, predictions=predictions) if mode == tf.estimator.ModeKeys.EVAL: loss = create_tensors(self.loss, tensors, 0) predictions = create_tensors(self.outputs[0], tensors, 0) if len(self.task_weights) == 0: weights = None else: weights = tensors[self.task_weights[0]] eval_metric_ops = {} for name, function in metrics.items(): eval_metric_ops[name] = function(tensors[self.labels[0]], predictions, weights) return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=eval_metric_ops) if mode == tf.estimator.ModeKeys.TRAIN: loss = create_tensors(self.loss, tensors, 1) global_step = tf.train.get_global_step() optimizer = self.optimizer._create_optimizer(global_step) train_op = optimizer.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) raise ValueError('Unknown mode') # Create the Estimator. return tf.estimator.Estimator( model_fn=model_fn, model_dir=model_dir, config=config) def create_estimator_inputs(self, feature_columns, weight_column, features, labels, mode): """This is called by make_estimator() to create tensors for the inputs. feature_columns and weight_column are the arguments passed to make_estimator(). features, labels, and mode are the arguments passed to the estimator's model function. This method creates and returns a dict with one entry for every Feature, Label, or Weights layer in the graph. The keys are the layers, and the values are the tensors that correspond to them. Any subclass that overrides default_generator() must also override this method. """ if self.__class__.default_generator != TensorGraph.default_generator: raise ValueError( "Class overrides default_generator() but not create_estimator_inputs()" ) tensors = {} for layer, column in zip(self.features, feature_columns): tensors[layer] = tf.feature_column.input_layer(features, [column]) if weight_column is not None: tensors[self.task_weights[0]] = tf.feature_column.input_layer( features, [weight_column]) if labels is not None: tensors[self.labels[0]] = tf.cast(labels, self.labels[0].dtype) return tensors
def __init__(self, learner, learning_rate=0.001, optimization_steps=1, meta_batch_size=10, optimizer=Adam(), model_dir=None): """Create an object for performing meta-optimization. Parameters ---------- learner: MetaLearner defines the meta-learning problem learning_rate: float, Layer, or Tensor the learning rate to use for optimizing each task (not to be confused with the one used for meta-learning). This can optionally be made a variable (represented as a Layer or Tensor), in which case the learning rate will itself be learnable. optimization_steps: int the number of steps of gradient descent to perform for each task meta_batch_size: int the number of tasks to use for each step of meta-learning optimizer: Optimizer the optimizer to use for meta-learning (not to be confused with the gradient descent optimization performed for each task) model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. """ # Record inputs. self.learner = learner if isinstance(learner.loss, Layer): self._loss = learner.loss.out_tensor else: self._loss = learner.loss if isinstance(learning_rate, Layer): self._learning_rate = learning_rate.out_tensor else: self._learning_rate = learning_rate self.meta_batch_size = meta_batch_size self.optimizer = optimizer self._graph = self._loss.graph # Create the output directory if necessary. self._model_dir_is_temp = False if model_dir is not None: if not os.path.exists(model_dir): os.makedirs(model_dir) else: model_dir = tempfile.mkdtemp() self._model_dir_is_temp = True self.model_dir = model_dir self.save_file = "%s/%s" % (self.model_dir, "model") with self._graph.as_default(): # Create duplicate placeholders for meta-optimization. learner.select_task() self._meta_placeholders = {} for p in learner.get_batch().keys(): name = 'meta/' + p.name.split(':')[0] self._meta_placeholders[p] = tf.placeholder( p.dtype, p.shape, name) # Create the loss function for meta-optimization. updated_loss = self._loss updated_variables = learner.variables for i in range(optimization_steps): gradients = tf.gradients(updated_loss, updated_variables) updated_variables = [ v if g is None else v - self._learning_rate * g for v, g in zip(updated_variables, gradients) ] replacements = dict( (tf.convert_to_tensor(v1), v2) for v1, v2 in zip(learner.variables, updated_variables)) if i == optimization_steps - 1: # In the final loss, use different placeholders for all inputs so the loss will be # computed from a different batch. for p in self._meta_placeholders: replacements[p] = self._meta_placeholders[p] updated_loss = tf.contrib.graph_editor.graph_replace( self._loss, replacements) self._meta_loss = updated_loss # Create variables for accumulating the gradients. variables = list(learner.variables) gradients = tf.gradients(self._meta_loss, variables) for i in reversed(range(len(variables))): if gradients[i] is None: del variables[i] del gradients[i] zero_gradients = [tf.zeros(g.shape, g.dtype) for g in gradients] summed_gradients = [ tf.Variable(z, trainable=False) for z in zero_gradients ] self._clear_gradients = tf.group(*[ s.assign(z) for s, z in zip(summed_gradients, zero_gradients) ]) self._add_gradients = tf.group( * [s.assign_add(g) for s, g in zip(summed_gradients, gradients)]) # Create the optimizers for meta-optimization and task optimization. self._global_step = tf.placeholder(tf.int32, []) grads_and_vars = list(zip(summed_gradients, variables)) self._meta_train_op = optimizer._create_optimizer( self._global_step).apply_gradients(grads_and_vars) task_optimizer = GradientDescent(learning_rate=self._learning_rate) self._task_train_op = task_optimizer._create_optimizer( self._global_step).minimize(self._loss) self._session = tf.Session() # Create a Checkpoint for saving. self._checkpoint = tf.train.Checkpoint() self._checkpoint.listed = learner.variables
class KerasModel(Model): """This is a DeepChem model implemented by a Keras model. This class provides several advantages over using the Keras model's fitting and prediction methods directly. 1. It provides better integration with the rest of DeepChem, such as direct support for Datasets and Transformers. 2. It defines the loss in a more flexible way. In particular, Keras does not support multidimensional weight matrices, which makes it impossible to implement most multitask models with Keras. 3. It provides various additional features not found in the Keras Model class, such as uncertainty prediction and saliency mapping. The loss function for a model can be defined in two different ways. For models that have only a single output and use a standard loss function, you can simply provide a dc.models.losses.Loss object. This defines the loss for each sample or sample/task pair. The result is automatically multiplied by the weights and averaged over the batch. Any additional losses computed by model layers, such as weight decay penalties, are also added. For more complicated cases, you can instead provide a function that directly computes the total loss. It must be of the form f(outputs, labels, weights), taking the list of outputs from the model, the expected values, and any weight matrices. It should return a scalar equal to the value of the loss function for the batch. No additional processing is done to the result; it is up to you to do any weighting, averaging, adding of penalty terms, etc. You can optionally provide an output_types argument, which describes how to interpret the model's outputs. This should be a list of strings, one for each output. Each entry must have one of the following values: - 'prediction': This is a normal output, and will be returned by predict(). If output types are not specified, all outputs are assumed to be of this type. - 'loss': This output will be used in place of the normal outputs for computing the loss function. For example, models that output probability distributions usually do it by computing unbounded numbers (the logits), then passing them through a softmax function to turn them into probabilities. When computing the cross entropy, it is more numerically stable to use the logits directly rather than the probabilities. You can do this by having the model produce both probabilities and logits as outputs, then specifying output_types=['prediction', 'loss']. When predict() is called, only the first output (the probabilities) will be returned. But during training, it is the second output (the logits) that will be passed to the loss function. - 'variance': This output is used for estimating the uncertainty in another output. To create a model that can estimate uncertainty, there must be the same number of 'prediction' and 'variance' outputs. Each variance output must have the same shape as the corresponding prediction output, and each element is an estimate of the variance in the corresponding prediction. Also be aware that if a model supports uncertainty, it MUST use dropout on every layer. Otherwise, the uncertainties it computes will be inaccurate. """ def __init__(self, model, loss, output_types=None, batch_size=100, model_dir=None, learning_rate=0.001, optimizer=None, tensorboard=False, tensorboard_log_frequency=100, **kwargs): """Create a new KerasModel. Parameters ---------- model: tf.keras.Model the Keras model implementing the calculation loss: dc.models.losses.Loss or function a Loss or function defining how to compute the training loss for each batch, as described above output_types: list of strings the type of each output from the model, as described above batch_size: int default batch size for training and evaluating model_dir: str the directory on disk where the model will be stored. If this is None, a temporary directory is created. learning_rate: float or LearningRateSchedule the learning rate to use for fitting. If optimizer is specified, this is ignored. optimizer: Optimizer the optimizer to use for fitting. If this is specified, learning_rate is ignored. tensorboard: bool whether to log progress to TensorBoard during training tensorboard_log_frequency: int the frequency at which to log data to TensorBoard, measured in batches """ super(KerasModel, self).__init__( model_instance=model, model_dir=model_dir, **kwargs) self.model = model if isinstance(loss, Loss): self._loss_fn = _StandardLoss(model, loss) else: self._loss_fn = loss self.batch_size = batch_size if optimizer is None: self.optimizer = Adam(learning_rate=learning_rate) else: self.optimizer = optimizer self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency self._tensorboard_step = 0 if tensorboard and tf.executing_eagerly(): raise ValueError( "Logging to TensorBoard is not currently supported in eager mode") if output_types is None: self._prediction_outputs = None self._loss_outputs = None self._variance_outputs = None else: self._prediction_outputs = [] self._loss_outputs = [] self._variance_outputs = [] for i, type in enumerate(output_types): if type == 'prediction': self._prediction_outputs.append(i) elif type == 'loss': self._loss_outputs.append(i) elif type == 'variance': self._variance_outputs.append(i) else: raise ValueError('Unknown output type "%s"' % type) self._built = False self._inputs_built = False self._training_ops_built = False self._initialized_vars = set() def _ensure_built(self): """The first time this is called, create internal data structures.""" if self._built: return self._built = True if not tf.executing_eagerly(): self.session = tf.Session() self._global_step = tf.Variable(0, trainable=False) self._tf_optimizer = self.optimizer._create_optimizer(self._global_step) self._checkpoint = tf.train.Checkpoint( optimizer=self._tf_optimizer, model=self.model) self._init_new_vars() def _create_inputs(self, example_inputs): """The first time this is called, create tensors representing the inputs and outputs.""" if self._inputs_built: return self._ensure_built() self._inputs_built = True if len(self.model.inputs) > 0: self._input_dtypes = [t.dtype.as_numpy_dtype for t in self.model.inputs] else: self._input_dtypes = [ np.float32 if x.dtype == np.float64 else x.dtype for x in example_inputs ] if tf.executing_eagerly(): return if len(self.model.inputs) > 0: self._input_placeholders = self.model.inputs else: # The model doesn't specify inputs, so guess the input shapes based on the # example batch. input_shapes = [(None,) + i.shape[1:] for i in example_inputs] self._input_placeholders = [ tf.placeholder(dtype=tf.as_dtype(t), shape=s) for s, t in zip(input_shapes, self._input_dtypes) ] if len(input_shapes) == 1: self.model.build(input_shapes[0]) else: self.model.build(input_shapes) if len(self._input_placeholders) == 1: self._output_tensors = self.model( self._input_placeholders[0], training=False) self._uncertainty_tensors = self.model( self._input_placeholders[0], training=True) else: self._output_tensors = self.model( self._input_placeholders, training=False) self._uncertainty_tensors = self.model( self._input_placeholders, training=True) if isinstance(self._output_tensors, tf.Tensor): self._output_tensors = [self._output_tensors] if self._prediction_outputs is None: self._prediction_outputs = list(range(len(self._output_tensors))) self._loss_outputs = list(range(len(self._output_tensors))) self._init_new_vars() def _create_training_ops(self, example_batch): """The first time this is called, create tensors used in optimization.""" if self._training_ops_built: return self._create_inputs(example_batch[0]) self._training_ops_built = True self._label_dtypes = [ np.float32 if x.dtype == np.float64 else x.dtype for x in example_batch[1] ] self._weights_dtypes = [ np.float32 if x.dtype == np.float64 else x.dtype for x in example_batch[2] ] if tf.executing_eagerly(): return self._label_placeholders = [ tf.placeholder(dtype=tf.as_dtype(t), shape=x.shape) for x, t in zip(example_batch[1], self._label_dtypes) ] self._weights_placeholders = [ tf.placeholder(dtype=tf.as_dtype(t), shape=x.shape) for x, t in zip(example_batch[2], self._weights_dtypes) ] self._loss_tensor = self._loss_fn( [self._output_tensors[i] for i in self._loss_outputs], self._label_placeholders, self._weights_placeholders) try: self._train_op = self._tf_optimizer.minimize( self._loss_tensor, global_step=self._global_step) except ValueError: # The loss doesn't depend on any variables. self._train_op = 0 if self.tensorboard: self._summary_ops = tf.summary.scalar('loss', self._loss_tensor) self._summary_writer = tf.summary.FileWriter(self.model_dir) self._init_new_vars() def _init_new_vars(self): """Initialize any new variables created since the last call to this method.""" if not tf.executing_eagerly(): vars = set(tf.global_variables()) new_vars = vars.difference(self._initialized_vars) self.session.run(tf.variables_initializer(new_vars)) self._initialized_vars = vars def fit(self, dataset, nb_epoch=10, max_checkpoints_to_keep=5, checkpoint_interval=1000, deterministic=False, restore=False): """Train this model on a dataset. Parameters ---------- dataset: Dataset the Dataset to train on nb_epoch: int the number of epochs to train for max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. deterministic: bool if True, the samples are processed in order. If False, a different random order is used for each epoch. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. """ return self.fit_generator( self.default_generator( dataset, epochs=nb_epoch, deterministic=deterministic), max_checkpoints_to_keep, checkpoint_interval, restore) def fit_generator(self, generator, max_checkpoints_to_keep=5, checkpoint_interval=1000, restore=False): """Train this model on data from a generator. Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. Returns ------- the average loss over the most recent checkpoint interval """ self._ensure_built() if restore: self.restore() if checkpoint_interval > 0: manager = tf.train.CheckpointManager(self._checkpoint, self.model_dir, max_checkpoints_to_keep) avg_loss = 0.0 averaged_batches = 0 time1 = time.time() # Main training loop. for batch in generator: self._create_training_ops(batch) inputs, labels, weights = self._prepare_batch(batch) self._tensorboard_step += 1 should_log = ( self.tensorboard and self._tensorboard_step % self.tensorboard_log_frequency == 0) if tf.executing_eagerly(): # In eager mode we execute the loss function, accumulating the gradients. with tf.GradientTape() as tape: outputs = self.model(inputs[0]) if isinstance(outputs, tf.Tensor): outputs = [outputs] if self._loss_outputs is not None: outputs = [outputs[i] for i in self._loss_outputs] loss = self._loss_fn(outputs, labels, weights) avg_loss += loss grads = tape.gradient(loss, self.model.trainable_variables) self._tf_optimizer.apply_gradients( zip(grads, self.model.trainable_variables)) tf.assign_add(self._global_step, 1) current_step = self._global_step.numpy() else: # In graph mode we execute the training op. fetches = [self._train_op, self._loss_tensor, self._global_step] if should_log: fetches.append(self._summary_ops) feed_dict = dict(zip(self._input_placeholders, inputs)) feed_dict.update(dict(zip(self._label_placeholders, labels))) feed_dict.update(dict(zip(self._weights_placeholders, weights))) fetched_values = self.session.run(fetches, feed_dict=feed_dict) avg_loss += fetched_values[1] current_step = fetched_values[2] if should_log: self._summary_writer.reopen() self._summary_writer.add_summary( fetched_values[3], global_step=current_step) self._summary_writer.close() # Report progress and write checkpoints. averaged_batches += 1 if checkpoint_interval > 0 and current_step % checkpoint_interval == checkpoint_interval - 1: self._exec_with_session(lambda: manager.save()) avg_loss = float(avg_loss) / averaged_batches print( 'Ending global_step %d: Average loss %g' % (current_step, avg_loss)) avg_loss = 0.0 averaged_batches = 0 # Report final results. if checkpoint_interval > 0: if averaged_batches > 0: avg_loss = float(avg_loss) / averaged_batches print( 'Ending global_step %d: Average loss %g' % (current_step, avg_loss)) self._exec_with_session(lambda: manager.save()) time2 = time.time() print("TIMING: model fitting took %0.3f s" % (time2 - time1)) return avg_loss def fit_on_batch(self, X, y, w): """Perform a single step of training. Parameters ---------- X: ndarray the inputs for the batch y: ndarray the labels for the batch w: ndarray the weights for the batch """ if not self.built: self.build() dataset = NumpyDataset(X, y, w) return self.fit(dataset, nb_epoch=1) def _predict(self, generator, transformers, uncertainty): """ Predict outputs for data provided by a generator. This is the private implementation of prediction. Do not call it directly. Instead call one of the public prediction methods. Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. uncertainty: bool specifies whether this is being called as part of estimating uncertainty. If True, it sets the training flag so that dropout will be enabled, and returns the values of the uncertainty outputs. Returns: a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ results = None variances = None if uncertainty: if self._variance_outputs is None or len(self._variance_outputs) == 0: raise ValueError('This model cannot compute uncertainties') if len(self._variance_outputs) != len(self._prediction_outputs): raise ValueError( 'The number of variances must exactly match the number of outputs') for batch in generator: inputs, labels, weights = batch self._create_inputs(inputs) inputs, _, _ = self._prepare_batch((inputs, None, None)) if tf.executing_eagerly(): # In eager mode we invoke the model directly. if len(inputs) == 1: inputs = inputs[0] outputs = self.model(inputs, training=uncertainty) outputs = [t.numpy() for t in outputs] else: # In graph mode we execute the output tensors. if uncertainty: fetches = self._uncertainty_tensors else: fetches = self._output_tensors feed_dict = dict(zip(self._input_placeholders, inputs)) outputs = self.session.run(fetches, feed_dict=feed_dict) # Apply tranformers and record results. if uncertainty: var = [outputs[i] for i in self._variance_outputs] if variances is None: variances = var else: for i, t in enumerate(var): variances[i].append(t) if self._prediction_outputs is not None: outputs = [outputs[i] for i in self._prediction_outputs] if len(transformers) > 0: if len(outputs) > 1: raise ValueError( "predict() does not support Transformers for models with multiple outputs." ) elif len(outputs) == 1: outputs = [undo_transforms(outputs[0], transformers)] if results is None: results = [outputs] else: for i, t in enumerate(outputs): results[i].append(t) # Concatenate arrays to create the final results. final_results = [] final_variances = [] for r in results: final_results.append(np.concatenate(r, axis=0)) if uncertainty: for v in variances: final_variances.append(np.concatenate(v, axis=0)) return zip(final_results, final_variances) # If only one output, just return array if len(final_results) == 1: return final_results[0] else: return final_results def predict_on_generator(self, generator, transformers=[]): """ Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. Returns: a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ return self._predict(generator, transformers, False) def predict_on_batch(self, X, transformers=[]): """Generates predictions for input samples, processing samples in a batch. Parameters ---------- X: ndarray the input data, as a Numpy array. transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. Returns ------- a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ dataset = NumpyDataset(X=X, y=None) return self.predict(dataset, transformers) def predict_uncertainty_on_batch(self, X, masks=50): """ Predict the model's outputs, along with the uncertainty in each one. The uncertainty is computed as described in https://arxiv.org/abs/1703.04977. It involves repeating the prediction many times with different dropout masks. The prediction is computed as the average over all the predictions. The uncertainty includes both the variation among the predicted values (epistemic uncertainty) and the model's own estimates for how well it fits the data (aleatoric uncertainty). Not all models support uncertainty prediction. Parameters ---------- X: ndarray the input data, as a Numpy array. masks: int the number of dropout masks to average over Returns ------- for each output, a tuple (y_pred, y_std) where y_pred is the predicted value of the output, and each element of y_std estimates the standard deviation of the corresponding element of y_pred """ dataset = NumpyDataset(X=X, y=None) return self.predict_uncertainty(dataset, masks) def predict(self, dataset, transformers=[]): """ Uses self to make predictions on provided Dataset object. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. Returns ------- a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_on_generator(generator, transformers) def predict_uncertainty(self, dataset, masks=50): """ Predict the model's outputs, along with the uncertainty in each one. The uncertainty is computed as described in https://arxiv.org/abs/1703.04977. It involves repeating the prediction many times with different dropout masks. The prediction is computed as the average over all the predictions. The uncertainty includes both the variation among the predicted values (epistemic uncertainty) and the model's own estimates for how well it fits the data (aleatoric uncertainty). Not all models support uncertainty prediction. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on masks: int the number of dropout masks to average over Returns ------- for each output, a tuple (y_pred, y_std) where y_pred is the predicted value of the output, and each element of y_std estimates the standard deviation of the corresponding element of y_pred """ sum_pred = [] sum_sq_pred = [] sum_var = [] for i in range(masks): generator = self.default_generator( dataset, predict=True, pad_batches=False) results = self._predict(generator, [], True) if len(sum_pred) == 0: for p, v in results: sum_pred.append(p) sum_sq_pred.append(p * p) sum_var.append(v) else: for j, (p, v) in enumerate(results): sum_pred[j] += p sum_sq_pred[j] += p * p sum_var[j] += v output = [] std = [] for i in range(len(sum_pred)): p = sum_pred[i] / masks output.append(p) std.append(np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks)) if len(output) == 1: return (output[0], std[0]) else: return zip(output, std) def evaluate_generator(self, generator, metrics, transformers=[], per_task_metrics=False): """Evaluate the performance of this model on the data produced by a generator. Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). metric: deepchem.metrics.Metric Evaluation metric transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. per_task_metrics: bool If True, return per-task scores. Returns ------- dict Maps tasks to scores under metric. """ evaluator = GeneratorEvaluator(self, generator, transformers) return evaluator.compute_model_performance(metrics, per_task_metrics) def compute_saliency(self, X): """Compute the saliency map for an input sample. This computes the Jacobian matrix with the derivative of each output element with respect to each input element. More precisely, - If this model has a single output, it returns a matrix of shape (output_shape, input_shape) with the derivatives. - If this model has multiple outputs, it returns a list of matrices, one for each output. This method cannot be used on models that take multiple inputs. Parameters ---------- X: ndarray the input data for a single sample Returns ------- the Jacobian matrix, or a list of matrices """ input_shape = X.shape X = np.reshape(X, [1] + list(X.shape)) self._create_inputs([X]) X, _, _ = self._prepare_batch((X, None, None)) if tf.executing_eagerly(): # In eager mode we use a GradientTape to compute gradients. X = tf.constant(X) with tf.GradientTape( persistent=True, watch_accessed_variables=False) as tape: tape.watch(X) outputs = self.model(X) if isinstance(outputs, tf.Tensor): outputs = [outputs] final_result = [] for output in outputs: output_shape = tuple(output.shape.as_list()[1:]) output = tf.reshape(output, [-1]) result = [] for i in range(output.shape[0]): result.append(tape.gradient(output[i], X)) final_result.append( tf.reshape(tf.stack(result), output_shape + input_shape).numpy()) else: # In graph mode we use tf.gradients(). def jacobian(y, x): # Adapted from https://github.com/tensorflow/tensorflow/issues/675#issuecomment-319891923. y = tf.reshape(tf.convert_to_tensor(y)[0], [-1]) n = y.shape[0] loop_vars = [ tf.constant(0, tf.int32), tf.TensorArray(tf.float32, size=n) ] _, jacobian = tf.while_loop( lambda j, _: j < n, lambda j, result: (j + 1, result.write(j, tf.gradients(y[j], x))), loop_vars) return jacobian.stack() grads = [ jacobian(self._output_tensors[i], self._input_placeholders[0]) for i in self._prediction_outputs ] feed_dict = {self._input_placeholders[0]: X} result = self.session.run(grads, feed_dict=feed_dict) output_shapes = [ tuple(o.shape.as_list()[1:]) for o in self._output_tensors ] final_result = [ x.reshape(s + input_shape) for x, s in zip(result, output_shapes) ] if len(final_result) == 1: return final_result[0] return final_result def _prepare_batch(self, batch): inputs, labels, weights = batch inputs = [ x if x.dtype == t else x.astype(t) for x, t in zip(inputs, self._input_dtypes) ] if labels is not None: labels = [ x if x.dtype == t else x.astype(t) for x, t in zip(labels, self._label_dtypes) ] if weights is not None: weights = [ x if x.dtype == t else x.astype(t) for x, t in zip(weights, self._weights_dtypes) ] return (inputs, labels, weights) def default_generator(self, dataset, epochs=1, predict=False, deterministic=True, pad_batches=True): for epoch in range(epochs): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches( batch_size=self.batch_size, deterministic=deterministic, pad_batches=pad_batches): yield ([X_b], [y_b], [w_b]) def save_checkpoint(self, max_checkpoints_to_keep=5): """Save a checkpoint to disk. Usually you do not need to call this method, since fit() saves checkpoints automatically. If you have disabled automatic checkpointing during fitting, this can be called to manually write checkpoints. Parameters ---------- max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. """ self._ensure_built() manager = tf.train.CheckpointManager(self._checkpoint, self.model_dir, max_checkpoints_to_keep) self._exec_with_session(lambda: manager.save()) def _exec_with_session(self, f): if tf.executing_eagerly(): f() else: with self.session.as_default(): f() def get_checkpoints(self): """Get a list of all available checkpoint files.""" return tf.train.get_checkpoint_state( self.model_dir).all_model_checkpoint_paths def restore(self, checkpoint=None): """Reload the values of all variables from a checkpoint file. Parameters ---------- checkpoint: str the path to the checkpoint file to load. If this is None, the most recent checkpoint will be chosen automatically. Call get_checkpoints() to get a list of all available checkpoints. """ if checkpoint is None: checkpoint = tf.train.latest_checkpoint(self.model_dir) if checkpoint is None: raise ValueError('No checkpoint found') if tf.executing_eagerly(): self._checkpoint.restore(checkpoint) else: self._checkpoint.restore(checkpoint).run_restore_ops(self.session)
def __init__(self, model, loss, output_types=None, batch_size=100, model_dir=None, learning_rate=0.001, optimizer=None, tensorboard=False, tensorboard_log_frequency=100, **kwargs): """Create a new KerasModel. Parameters ---------- model: tf.keras.Model the Keras model implementing the calculation loss: dc.models.losses.Loss or function a Loss or function defining how to compute the training loss for each batch, as described above output_types: list of strings the type of each output from the model, as described above batch_size: int default batch size for training and evaluating model_dir: str the directory on disk where the model will be stored. If this is None, a temporary directory is created. learning_rate: float or LearningRateSchedule the learning rate to use for fitting. If optimizer is specified, this is ignored. optimizer: Optimizer the optimizer to use for fitting. If this is specified, learning_rate is ignored. tensorboard: bool whether to log progress to TensorBoard during training tensorboard_log_frequency: int the frequency at which to log data to TensorBoard, measured in batches """ super(KerasModel, self).__init__( model_instance=model, model_dir=model_dir, **kwargs) self.model = model if isinstance(loss, Loss): self._loss_fn = _StandardLoss(model, loss) else: self._loss_fn = loss self.batch_size = batch_size if optimizer is None: self.optimizer = Adam(learning_rate=learning_rate) else: self.optimizer = optimizer self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency self._tensorboard_step = 0 if tensorboard and tf.executing_eagerly(): raise ValueError( "Logging to TensorBoard is not currently supported in eager mode") if output_types is None: self._prediction_outputs = None self._loss_outputs = None self._variance_outputs = None else: self._prediction_outputs = [] self._loss_outputs = [] self._variance_outputs = [] for i, type in enumerate(output_types): if type == 'prediction': self._prediction_outputs.append(i) elif type == 'loss': self._loss_outputs.append(i) elif type == 'variance': self._variance_outputs.append(i) else: raise ValueError('Unknown output type "%s"' % type) self._built = False self._inputs_built = False self._training_ops_built = False self._initialized_vars = set()
class TensorGraph(Model): def __init__(self, tensorboard=False, tensorboard_log_frequency=100, batch_size=100, random_seed=None, use_queue=True, graph=None, learning_rate=0.001, **kwargs): """ Parameters ---------- tensorboard: bool Should we log to model_dir data for tensorboard? tensorboard_log_frequency: int How many training batches before logging tensorboard? batch_size: int default batch size for training and evaluating use_queue: boolean if True when building we will create a tf.FIFO queue, which will hold all features, weights, and labels. We will feed the inputs into this queue in batches of self.batch_size in a separate thread from the thread training the model. You cannot use a queue when batches are not of consistent size graph: tensorflow.Graph the Graph in which to create Tensorflow objects. If None, a new Graph is created. learning_rate: float or LearningRateSchedule the learning rate to use for optimization kwargs """ # Layer Management self.layers = dict() self.features = list() self.labels = list() self.outputs = list() self.task_weights = list() self.submodels = list() self.loss = Constant(0) self.built = False self.queue_installed = False self.optimizer = Adam(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7) # Singular place to hold Tensor objects which don't serialize # These have to be reconstructed on restoring from pickle # See TensorGraph._get_tf() for more details on lazy construction self.tensor_objects = { "FileWriter": None, "Graph": graph, "train_op": None, "summary_op": None, } self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency self.tensorboard_step = 0 self.global_step = 0 self.use_queue = use_queue self.batch_size = batch_size self.random_seed = random_seed super(TensorGraph, self).__init__(**kwargs) self.save_file = "%s/%s" % (self.model_dir, "model") self.model_class = None self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] if self.use_queue and self.tensorboard: raise ValueError( "Currently TensorGraph cannot both use_queue and tensorboard at the same time" ) def _add_layer(self, layer): if layer.name is None: layer.name = "%s_%s" % (layer.__class__.__name__, len(self.layers) + 1) if layer.name in self.layers: return if isinstance(layer, Feature): self.features.append(layer) if isinstance(layer, Label): self.labels.append(layer) if isinstance(layer, Weights): self.task_weights.append(layer) self.layers[layer.name] = layer for in_layer in layer.in_layers: self._add_layer(in_layer) def fit(self, dataset, nb_epoch=10, max_checkpoints_to_keep=5, checkpoint_interval=1000, deterministic=False, restore=False, submodel=None): """Train this model on a dataset. Parameters ---------- dataset: Dataset the Dataset to train on nb_epoch: int the number of epochs to train for max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. deterministic: bool if True, the samples are processed in order. If False, a different random order is used for each epoch. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. submodel: Submodel an alternate training objective to use. This should have been created by calling create_submodel(). """ return self.fit_generator( self.default_generator(dataset, epochs=nb_epoch, deterministic=deterministic), max_checkpoints_to_keep, checkpoint_interval, restore, submodel) def fit_generator(self, feed_dict_generator, max_checkpoints_to_keep=5, checkpoint_interval=1000, restore=False, submodel=None): """Train this model on data from a generator. Parameters ---------- feed_dict_generator: generator this should generate batches, each represented as a dict that maps Layers to values. max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. submodel: Submodel an alternate training objective to use. This should have been created by calling create_submodel(). Returns ------- the average loss over the most recent checkpoint interval """ def create_feed_dict(): if self.use_queue: while True: yield {self._training_placeholder: 1.0} for d in feed_dict_generator: feed_dict = dict(d) feed_dict[self._training_placeholder] = 1.0 yield feed_dict if not self.built: self.build() with self._get_tf("Graph").as_default(): time1 = time.time() loss = self.loss if submodel is None: train_op = self._get_tf('train_op') else: train_op = submodel.get_train_op() if submodel.loss is not None: loss = submodel.loss if checkpoint_interval > 0: saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep) if restore: self.restore() avg_loss, n_averaged_batches = 0.0, 0.0 n_samples = 0 n_enqueued = [0] final_sample = [None] if self.use_queue: enqueue_thread = threading.Thread( target=_enqueue_batch, args=(self, feed_dict_generator, self._get_tf("Graph"), self.session, n_enqueued, final_sample)) enqueue_thread.start() for feed_dict in create_feed_dict(): if self.use_queue: # Don't let this thread get ahead of the enqueue thread, since if # we try to read more batches than the total number that get queued, # this thread will hang indefinitely. while n_enqueued[0] <= n_samples: if n_samples == final_sample[0]: break time.sleep(0) if n_samples == final_sample[0]: break n_samples += 1 should_log = (self.tensorboard and n_samples % self.tensorboard_log_frequency == 0) fetches = [train_op, loss.out_tensor] if should_log: fetches.append(self._get_tf("summary_op")) fetched_values = self.session.run(fetches, feed_dict=feed_dict) if should_log: self._log_tensorboard(fetches[2]) avg_loss += fetched_values[1] n_averaged_batches += 1 self.global_step += 1 if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1: saver.save(self.session, self.save_file, global_step=self.global_step) avg_loss = float(avg_loss) / n_averaged_batches print('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) avg_loss, n_averaged_batches = 0.0, 0.0 if n_averaged_batches > 0: avg_loss = float(avg_loss) / n_averaged_batches if checkpoint_interval > 0: if n_averaged_batches > 0: print('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) saver.save(self.session, self.save_file, global_step=self.global_step) time2 = time.time() print("TIMING: model fitting took %0.3f s" % (time2 - time1)) return avg_loss def _log_tensorboard(self, summary): """ TODO(LESWING) set epoch Parameters ---------- Returns ------- """ global_step = int(self.global_step) writer = self._get_tf("FileWriter") writer.reopen() writer.add_summary(summary, global_step=global_step) writer.close() def fit_on_batch(self, X, y, w, submodel=None): if not self.built: self.build() dataset = NumpyDataset(X, y) return self.fit(dataset, nb_epoch=1, submodel=submodel) def default_generator(self, dataset, epochs=1, predict=False, deterministic=True, pad_batches=True): if len(self.features) > 1: raise ValueError("More than one Feature, must use generator") if len(self.labels) > 1: raise ValueError("More than one Label, must use generator") if len(self.task_weights) > 1: raise ValueError("More than one Weights, must use generator") for epoch in range(epochs): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(batch_size=self.batch_size, deterministic=deterministic, pad_batches=pad_batches): feed_dict = dict() if len(self.labels) == 1 and y_b is not None and not predict: feed_dict[self.labels[0]] = y_b if len(self.features) == 1 and X_b is not None: feed_dict[self.features[0]] = X_b if len(self.task_weights ) == 1 and w_b is not None and not predict: feed_dict[self.task_weights[0]] = w_b for (initial_state, zero_state) in zip(self.rnn_initial_states, self.rnn_zero_states): feed_dict[initial_state] = zero_state yield feed_dict def predict_on_generator(self, generator, transformers=[], outputs=None): """ Parameters ---------- generator: Generator Generator that constructs feed dictionaries for TensorGraph. transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs = self.outputs. If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ if not self.built: self.build() if outputs is None: outputs = self.outputs elif not isinstance(outputs, collections.Sequence): outputs = [outputs] with self._get_tf("Graph").as_default(): # Gather results for each output results = [[] for out in outputs] for feed_dict in generator: feed_dict = { self.layers[k.name].out_tensor: v for k, v in six.iteritems(feed_dict) } feed_dict[self._training_placeholder] = 0.0 feed_results = self.session.run(outputs, feed_dict=feed_dict) if len(feed_results) > 1: if len(transformers): raise ValueError("Does not support transformations " "for multiple outputs.") elif len(feed_results) == 1: result = undo_transforms(feed_results[0], transformers) feed_results = [result] for ind, result in enumerate(feed_results): results[ind].append(result) final_results = [] for result_list in results: final_results.append(np.concatenate(result_list, axis=0)) # If only one output, just return array if len(final_results) == 1: return final_results[0] else: return final_results def predict_proba_on_generator(self, generator, transformers=[], outputs=None): """ Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ return self.predict_on_generator(generator, transformers, outputs) def predict_on_batch(self, X, transformers=[], outputs=None): """Generates predictions for input samples, processing samples in a batch. Parameters ---------- X: ndarray the input data, as a Numpy array. transformers: List List of dc.trans.Transformers Returns ------- A Numpy array of predictions. """ dataset = NumpyDataset(X=X, y=None) generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_on_generator(generator, transformers, outputs) def predict_proba_on_batch(self, X, transformers=[], outputs=None): """Generates predictions for input samples, processing samples in a batch. Parameters ---------- X: ndarray the input data, as a Numpy array. transformers: List List of dc.trans.Transformers Returns ------- A Numpy array of predictions. """ return self.predict_on_batch(X, transformers, outputs) def predict(self, dataset, transformers=[], outputs=None): """ Uses self to make predictions on provided Dataset object. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs = self.outputs[0] (single output). If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. Returns ------- results: numpy ndarray or list of numpy ndarrays """ generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_on_generator(generator, transformers, outputs) def predict_proba(self, dataset, transformers=[], outputs=None): """ Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs = self.outputs[0] (single output). If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. Returns ------- y_pred: numpy ndarray or list of numpy ndarrays """ generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_proba_on_generator(generator, transformers, outputs) def topsort(self): def add_layers_to_list(layer, sorted_layers): if layer in sorted_layers: return for in_layer in layer.in_layers: add_layers_to_list(in_layer, sorted_layers) sorted_layers.append(layer) sorted_layers = [] for l in self.features + self.labels + self.task_weights + self.outputs: add_layers_to_list(l, sorted_layers) add_layers_to_list(self.loss, sorted_layers) for submodel in self.submodels: if submodel.loss is not None: add_layers_to_list(submodel.loss, sorted_layers) return sorted_layers def build(self): if self.built: return with self._get_tf("Graph").as_default(): self._training_placeholder = tf.placeholder(dtype=tf.float32, shape=()) if self.random_seed is not None: tf.set_random_seed(self.random_seed) self._install_queue() for layer in self.topsort(): with tf.name_scope(layer.name): layer.create_tensor(training=self._training_placeholder) self.rnn_initial_states += layer.rnn_initial_states self.rnn_final_states += layer.rnn_final_states self.rnn_zero_states += layer.rnn_zero_states layer.add_summary_to_tg() self.session = tf.Session() self.built = True # Ensure all training operators have been created. self._get_tf('train_op') for submodel in self.submodels: train_op = submodel.get_train_op() # Initialize variables. self.session.run(tf.global_variables_initializer()) for layer in self.layers.values(): if layer.variable_values is not None: variables = self.get_layer_variables(layer) for var, val in zip(variables, layer.variable_values): self.session.run(var.assign(val)) for layer in self.layers.values(): if layer.tensorboard: self.tensorboard = True tf.summary.scalar("loss", self.loss.out_tensor) for layer in self.layers.values(): if layer.tensorboard: tf.summary.tensor_summary(layer.name, layer.out_tensor) if self.tensorboard: writer = self._get_tf("FileWriter") writer.add_graph(self._get_tf("Graph")) writer.close() # As a sanity check, make sure all tensors have the correct shape. for layer in self.layers.values(): try: assert list(layer.shape) == layer.out_tensor.get_shape( ).as_list( ), '%s: Expected shape %s does not match actual shape %s' % ( layer.name, layer.shape, layer.out_tensor.get_shape().as_list()) except NotImplementedError: pass def _install_queue(self): """ """ if not self.use_queue or self.queue_installed: for layer in self.features + self.labels + self.task_weights: layer.pre_queue = True return names = [] shapes = [] pre_q_inputs = [] q = InputFifoQueue(shapes, names, in_layers=pre_q_inputs) q.name = "%s_%s" % (q.__class__.__name__, len(self.layers) + 1) for layer in self.features + self.labels + self.task_weights: pre_q_input = layer.create_pre_q(self.batch_size) shapes.append(pre_q_input.shape) names.append(pre_q_input.name) pre_q_inputs.append(pre_q_input) layer.in_layers.append(q) self._add_layer(q) self.input_queue = q self.queue_installed = True def set_loss(self, layer): self._add_layer(layer) self.loss = layer def add_output(self, layer): self._add_layer(layer) self.outputs.append(layer) def set_optimizer(self, optimizer): """Set the optimizer to use for fitting.""" self.optimizer = optimizer def create_submodel(self, layers=None, loss=None, optimizer=None): """Create an alternate objective for training one piece of a TensorGraph. A TensorGraph consists of a set of layers, and specifies a loss function and optimizer to use for training those layers. Usually this is sufficient, but there are cases where you want to train different parts of a model separately. For example, a GAN consists of a generator and a discriminator. They are trained separately, and they use different loss functions. A submodel defines an alternate objective to use in cases like this. It may optionally specify any of the following: a subset of layers in the model to train; a different loss function; and a different optimizer to use. This method creates a submodel, which you can then pass to fit() to use it for training. Parameters ---------- layers: list the list of layers to train. If None, all layers in the model will be trained. loss: Layer the loss function to optimize. If None, the model's main loss function will be used. optimizer: Optimizer the optimizer to use for training. If None, the model's main optimizer will be used. Returns ------- the newly created submodel, which can be passed to any of the fitting methods. """ if self.built: raise ValueError( 'Submodels must be created before build() is called.') submodel = Submodel(self, layers, loss, optimizer) self.submodels.append(submodel) if loss is not None: self._add_layer(loss) return submodel def get_pickling_errors(self, obj, seen=None): if seen == None: seen = [] try: state = obj.__getstate__() except AttributeError: return if state == None: return if isinstance(state, tuple): if not isinstance(state[0], dict): state = state[1] else: state = state[0].update(state[1]) result = {} for i in state: try: pickle.dumps(state[i], protocol=2) except pickle.PicklingError: if not state[i] in seen: seen.append(state[i]) result[i] = self.get_pickling_errors(state[i], seen) return result def save(self): # Remove out_tensor from the object to be pickled must_restore = False tensor_objects = self.tensor_objects rnn_initial_states = self.rnn_initial_states rnn_final_states = self.rnn_final_states rnn_zero_states = self.rnn_zero_states session = self.session self.tensor_objects = {} self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] self.session = None out_tensors = [] if self.built: must_restore = True for layer in self.topsort(): out_tensors.append(layer.none_tensors()) training_placeholder = self._training_placeholder self._training_placeholder = None self.built = False # Pickle itself pickle_name = os.path.join(self.model_dir, "model.pickle") with open(pickle_name, 'wb') as fout: try: pickle.dump(self, fout) except Exception as e: print(self.get_pickling_errors(self)) raise e # add out_tensor back to everyone if must_restore: for index, layer in enumerate(self.topsort()): layer.set_tensors(out_tensors[index]) self._training_placeholder = training_placeholder self.built = True self.tensor_objects = tensor_objects self.rnn_initial_states = rnn_initial_states self.rnn_final_states = rnn_final_states self.rnn_zero_states = rnn_zero_states self.session = session def evaluate_generator(self, feed_dict_generator, metrics, transformers=[], labels=None, outputs=None, weights=[], per_task_metrics=False): if labels is None: raise ValueError n_tasks = len(self.outputs) n_classes = self.outputs[0].out_tensor.get_shape()[-1].value evaluator = GeneratorEvaluator(self, feed_dict_generator, transformers, labels=labels, outputs=outputs, weights=weights, n_tasks=n_tasks, n_classes=n_classes) if not per_task_metrics: scores = evaluator.compute_model_performance(metrics) return scores else: scores, per_task_scores = evaluator.compute_model_performance( metrics, per_task_metrics=per_task_metrics) return scores, per_task_scores def get_layer_variables(self, layer): """Get the list of trainable variables in a layer of the graph.""" if not self.built: self.build() with self._get_tf("Graph").as_default(): if layer.variable_scope == '': return [] return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=layer.variable_scope) def get_global_step(self): return self._get_tf("GlobalStep") def _get_tf(self, obj): """Fetches underlying TensorFlow primitives. Parameters ---------- obj: str If "Graph", returns tf.Graph instance. If "FileWriter", returns tf.summary.FileWriter. If "Optimizer", returns the optimizer. If "train_op", returns the train operation. If "summary_op", returns the merged summary. If "GlobalStep" returns the global step. Returns ------- TensorFlow Object """ if obj in self.tensor_objects and self.tensor_objects[obj] is not None: return self.tensor_objects[obj] if obj == "Graph": self.tensor_objects['Graph'] = tf.Graph() elif obj == "FileWriter": self.tensor_objects['FileWriter'] = tf.summary.FileWriter( self.model_dir) elif obj == 'Optimizer': self.tensor_objects[ 'Optimizer'] = self.optimizer._create_optimizer( self._get_tf('GlobalStep')) elif obj == 'train_op': opt = self._get_tf('Optimizer') global_step = self._get_tf('GlobalStep') try: self.tensor_objects['train_op'] = opt.minimize( self.loss.out_tensor, global_step=global_step) except ValueError: # The loss doesn't depend on any variables. self.tensor_objects['train_op'] = 0 elif obj == 'summary_op': self.tensor_objects['summary_op'] = tf.summary.merge_all( key=tf.GraphKeys.SUMMARIES) elif obj == 'GlobalStep': with self._get_tf("Graph").as_default(): self.tensor_objects['GlobalStep'] = tf.Variable( 0, trainable=False) return self._get_tf(obj) def save_checkpoint(self, max_checkpoints_to_keep=5): """Save a checkpoint to disk. Usually you do not need to call this method, since fit() saves checkpoints automatically. If you have disabled automatic checkpointing during fitting, this can be called to manually write checkpoints. Parameters ---------- max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. """ saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep) saver.save(self.session, self.save_file, global_step=self.global_step) def restore(self): """Reload the values of all variables from the most recent checkpoint file.""" if not self.built: self.build() last_checkpoint = tf.train.latest_checkpoint(self.model_dir) if last_checkpoint is None: raise ValueError('No checkpoint found') with self._get_tf("Graph").as_default(): saver = tf.train.Saver() saver.restore(self.session, last_checkpoint) def get_num_tasks(self): return len(self.outputs) def get_pre_q_input(self, input_layer): layer_name = input_layer.name pre_q_name = "%s_pre_q" % layer_name return self.layers[pre_q_name] @staticmethod def load_from_dir(model_dir): pickle_name = os.path.join(model_dir, "model.pickle") with open(pickle_name, 'rb') as fout: tensorgraph = pickle.load(fout) tensorgraph.built = False tensorgraph.model_dir = model_dir try: tensorgraph.restore() except ValueError: pass # No checkpoint to load return tensorgraph def __del__(self): pass
tokens = set() for s in train_smiles: tokens = tokens.union(set(s)) tokens = sorted(list(tokens)) max_length = max(len(s) for s in train_smiles) #training from deepchem.models.tensorgraph.optimizers import Adam, ExponentialDecay from deepchem.models.tensorgraph.models.seqtoseq import AspuruGuzikAutoEncoder #the encoder is a CNN and the decoder is a GRU model = AspuruGuzikAutoEncoder(tokens, max_length, model_dir='vae') batches_per_epoch = len(train_smiles)/model.batch_size learning_rate = ExponentialDecay(0.001, 0.95, batches_per_epoch) model.set_optimizer(Adam(learning_rate=learning_rate)) def generate_sequences(epochs): for i in range(epochs): for s in train_smiles: yield (s, s) model.summary() model.fit_sequences(generate_sequences(1)) #check that the molecules are valid import numpy as np from rdkit import Chem predictions = model.predict_from_embeddings(np.random.normal(size=(1000,196))) molecules = [] for p in predictions: smiles = ''.join(p) morning
def __init__(self, tensorboard=False, tensorboard_log_frequency=100, batch_size=100, random_seed=None, use_queue=True, graph=None, learning_rate=0.001, configproto=None, **kwargs): """ Parameters ---------- tensorboard: bool Should we log to model_dir data for tensorboard? tensorboard_log_frequency: int How many training batches before logging tensorboard? batch_size: int default batch size for training and evaluating use_queue: boolean if True when building we will create a tf.FIFO queue, which will hold all features, weights, and labels. We will feed the inputs into this queue in batches of self.batch_size in a separate thread from the thread training the model. You cannot use a queue when batches are not of consistent size graph: tensorflow.Graph the Graph in which to create Tensorflow objects. If None, a new Graph is created. learning_rate: float or LearningRateSchedule the learning rate to use for optimization configproto: a tf.ConfigProto() object used to create tf.Session() """ # Layer Management self.layers = dict() self.features = list() self.labels = list() self.outputs = list() self.variances = list() self.task_weights = list() self.submodels = list() self.loss = Constant(0) self.built = False self.queue_installed = False self.optimizer = Adam( learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7) self.configproto = configproto # Singular place to hold Tensor objects which don't serialize # These have to be reconstructed on restoring from pickle # See TensorGraph._get_tf() for more details on lazy construction self.tensor_objects = { "FileWriter": None, "Graph": graph, "train_op": None, "summary_op": None, } self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency self.tensorboard_step = 0 self.global_step = 0 self.use_queue = use_queue self.batch_size = batch_size self.random_seed = random_seed super(TensorGraph, self).__init__(**kwargs) self.save_file = "%s/%s" % (self.model_dir, "model") self.model_class = None self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] if self.use_queue and self.tensorboard: raise ValueError( "Currently TensorGraph cannot both use_queue and tensorboard at the same time" )
def test_hindsight(self): """Test Hindsight Experience Replay.""" # The environment is a plane in which the agent moves by steps until it reaches a randomly # positioned goal. No reward is given until it reaches the goal. That makes it very hard # to learn by standard methods, since it may take a very long time to receive any feedback # at all. Using hindsight makes it much easier. class TestEnvironment(dc.rl.Environment): def __init__(self): super(TestEnvironment, self).__init__((4, ), 4) self.moves = [(-1, 0), (1, 0), (0, -1), (0, 1)] def reset(self): self._state = np.concatenate([[0, 0], np.random.randint(-50, 50, 2)]) self._terminated = False self.count = 0 def step(self, action): new_state = self._state.copy() new_state[:2] += self.moves[action] self._state = new_state self.count += 1 reward = 0 if np.array_equal(new_state[:2], new_state[2:]): self._terminated = True reward = 1 elif self.count == 1000: self._terminated = True return reward def apply_hindsight(self, states, actions, goal): new_states = [] rewards = [] goal_pos = goal[:2] for state, action in zip(states, actions): new_state = state.copy() new_state[2:] = goal_pos new_states.append(new_state) pos_after_action = new_state[:2] + self.moves[action] if np.array_equal(pos_after_action, goal_pos): rewards.append(1) else: rewards.append(0) return new_states, rewards # A simple policy with two hidden layers. class TestPolicy(dc.rl.Policy): def create_layers(self, state, **kwargs): dense1 = Dense(6, activation_fn=tf.nn.relu, in_layers=state) dense2 = Dense(6, activation_fn=tf.nn.relu, in_layers=dense1) output = Dense(4, activation_fn=tf.nn.softmax, biases_initializer=None, in_layers=dense2) value = Dense(1, in_layers=dense2) return {'action_prob': output, 'value': value} # Optimize it. env = TestEnvironment() learning_rate = PolynomialDecay(initial_rate=0.0001, final_rate=0.00005, decay_steps=1500000) ppo = dc.rl.PPO(env, TestPolicy(), use_hindsight=True, optimization_epochs=8, optimizer=Adam(learning_rate=learning_rate)) ppo.fit(1500000) # Try running it a few times and see if it succeeds. pass_count = 0 for i in range(5): env.reset() while not env.terminated: env.step(ppo.select_action(env.state)) if np.array_equal(env.state[:2], env.state[2:]): pass_count += 1 assert pass_count >= 3
def test_roulette(self): """Test training a policy for the roulette environment.""" # This is modeled after the Roulette-v0 environment from OpenAI Gym. # The player can bet on any number from 0 to 36, or walk away (which ends the # game). The average reward for any bet is slightly negative, so the best # strategy is to walk away. class RouletteEnvironment(dc.rl.Environment): def __init__(self): super(RouletteEnvironment, self).__init__([(1, )], 38) self._state = [np.array([0])] def step(self, action): if action == 37: self._terminated = True # Walk away. return 0.0 wheel = np.random.randint(37) if wheel == 0: if action == 0: return 35.0 return -1.0 if action != 0 and wheel % 2 == action % 2: return 1.0 return -1.0 def reset(self): self._terminated = False env = RouletteEnvironment() # This policy just learns a constant probability for each action, and a constant for the value. class TestPolicy(dc.rl.Policy): def __init__(self): super(TestPolicy, self).__init__(['action_prob', 'value']) def create_model(self, **kwargs): class TestModel(tf.keras.Model): def __init__(self): super(TestModel, self).__init__(**kwargs) self.action = tf.Variable( np.ones(env.n_actions, np.float32)) self.value = tf.Variable([0.0], tf.float32) def call(self, inputs, **kwargs): prob = tf.nn.softmax( tf.reshape(self.action, (-1, env.n_actions))) return (prob, self.value) return TestModel() # Optimize it. a3c = dc.rl.A3C(env, TestPolicy(), max_rollout_length=20, optimizer=Adam(learning_rate=0.001)) a3c.fit(100000) # It should have learned that the expected value is very close to zero, and that the best # action is to walk away. action_prob, value = a3c.predict([[0]]) assert -0.5 < value[0] < 0.5 assert action_prob.argmax() == 37 assert a3c.select_action([[0]], deterministic=True) == 37 # Verify that we can create a new A3C object, reload the parameters from the first one, and # get the same result. new_a3c = dc.rl.A3C(env, TestPolicy(), model_dir=a3c._model.model_dir) new_a3c.restore() action_prob2, value2 = new_a3c.predict([[0]]) assert value2 == value # Do the same thing, only using the "restore" argument to fit(). new_a3c = dc.rl.A3C(env, TestPolicy(), model_dir=a3c._model.model_dir) new_a3c.fit(0, restore=True) action_prob2, value2 = new_a3c.predict([[0]]) assert value2 == value
n_observations = genv.observation_space.shape[0] print("n_products={} n_observations={} n_actions={}".format( n_products, n_observations, n_actions)) callbacks = [LogCallback(genv.unprocess_observation)] if args.method == "ppo": agent = deepchem.rl.PPO( denv, MyPolicy(n_products, n_observations, args.single_layer), max_rollout_length=10000, optimization_rollouts=(8 if args.parallel else 1), #optimization_epochs=4, discount_factor=gamma, advantage_lambda=0.98, entropy_weight=0.0, value_weight=1.0e-4, optimizer=Adam(learning_rate=1e-4), model_dir="data.ppo."+args.id, zero_terminal=False, callbacks=callbacks) elif args.method == "a3c": agent = deepchem.rl.A3C( denv, MyPolicy(n_products, n_observations, args.single_layer), max_rollout_length=10000, discount_factor=gamma, advantage_lambda=0.98, entropy_weight=0.0, value_weight=1.0e-4, optimizer=Adam(learning_rate=1e-4), model_dir="data.a3c."+args.id, worker_count=16, zero_terminal=False,
def __init__(self, env, policy, max_rollout_length=20, discount_factor=0.99, advantage_lambda=0.98, value_weight=1.0, entropy_weight=0.01, optimizer=None, model_dir=None, use_hindsight=False): """Create an object for optimizing a policy. Parameters ---------- env: Environment the Environment to interact with policy: Policy the Policy to optimize. It must have outputs with the names 'action_prob' and 'value' (for discrete action spaces) or 'action_mean', 'action_std', and 'value' (for continuous action spaces) max_rollout_length: int the maximum length of rollouts to generate discount_factor: float the discount factor to use when computing rewards advantage_lambda: float the parameter for trading bias vs. variance in Generalized Advantage Estimation value_weight: float a scale factor for the value loss term in the loss function entropy_weight: float a scale factor for the entropy term in the loss function optimizer: Optimizer the optimizer to use. If None, a default optimizer is used. model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. use_hindsight: bool if True, use Hindsight Experience Replay """ self._env = env self._policy = policy self.max_rollout_length = max_rollout_length self.discount_factor = discount_factor self.advantage_lambda = advantage_lambda self.value_weight = value_weight self.entropy_weight = entropy_weight self.use_hindsight = use_hindsight self._state_is_list = isinstance(env.state_shape[0], collections.Sequence) if optimizer is None: self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999) else: self._optimizer = optimizer self._model = self._build_model(model_dir) output_names = policy.output_names output_tensors = self._model._output_tensors self._value = output_tensors[output_names.index('value')] if self.continuous: self._action_mean = output_tensors[output_names.index( 'action_mean')] self._action_std = output_tensors[output_names.index('action_std')] else: self._action_prob = output_tensors[output_names.index( 'action_prob')] rnn_outputs = [ i for i, n in enumerate(output_names) if n == 'rnn_state' ] self._rnn_final_states = [output_tensors[i] for i in rnn_outputs] self._session = self._model.session self._rnn_states = policy.rnn_initial_states self._checkpoint = tf.train.Checkpoint() self._checkpoint.save_counter # Ensure the variable has been created self._checkpoint.listed = self._model.model.trainable_variables self._session.run(self._checkpoint.save_counter.initializer)
def __init__(self, tensorboard=False, tensorboard_log_frequency=100, batch_size=100, random_seed=None, use_queue=True, graph=None, learning_rate=0.001, **kwargs): """ Parameters ---------- tensorboard: bool Should we log to model_dir data for tensorboard? tensorboard_log_frequency: int How many training batches before logging tensorboard? batch_size: int default batch size for training and evaluating use_queue: boolean if True when building we will create a tf.FIFO queue, which will hold all features, weights, and labels. We will feed the inputs into this queue in batches of self.batch_size in a separate thread from the thread training the model. You cannot use a queue when batches are not of consistent size graph: tensorflow.Graph the Graph in which to create Tensorflow objects. If None, a new Graph is created. learning_rate: float or LearningRateSchedule the learning rate to use for optimization kwargs """ # Layer Management self.layers = dict() self.features = list() self.labels = list() self.outputs = list() self.task_weights = list() self.submodels = list() self.loss = Constant(0) self.built = False self.queue_installed = False self.optimizer = Adam(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7) # Singular place to hold Tensor objects which don't serialize # These have to be reconstructed on restoring from pickle # See TensorGraph._get_tf() for more details on lazy construction self.tensor_objects = { "FileWriter": None, "Graph": graph, "train_op": None, "summary_op": None, } self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency self.tensorboard_step = 0 self.global_step = 0 self.use_queue = use_queue self.batch_size = batch_size self.random_seed = random_seed super(TensorGraph, self).__init__(**kwargs) self.save_file = "%s/%s" % (self.model_dir, "model") self.model_class = None self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] if self.use_queue and self.tensorboard: raise ValueError( "Currently TensorGraph cannot both use_queue and tensorboard at the same time" )
class TensorGraph(Model): def __init__(self, tensorboard=False, tensorboard_log_frequency=100, batch_size=100, random_seed=None, use_queue=True, graph=None, learning_rate=0.001, configproto=None, **kwargs): """ Parameters ---------- tensorboard: bool Should we log to model_dir data for tensorboard? tensorboard_log_frequency: int How many training batches before logging tensorboard? batch_size: int default batch size for training and evaluating use_queue: boolean if True when building we will create a tf.FIFO queue, which will hold all features, weights, and labels. We will feed the inputs into this queue in batches of self.batch_size in a separate thread from the thread training the model. You cannot use a queue when batches are not of consistent size graph: tensorflow.Graph the Graph in which to create Tensorflow objects. If None, a new Graph is created. learning_rate: float or LearningRateSchedule the learning rate to use for optimization configproto: a tf.ConfigProto() object used to create tf.Session() """ # Layer Management self.layers = dict() self.features = list() self.labels = list() self.outputs = list() self.variances = list() self.task_weights = list() self.submodels = list() self.loss = Constant(0) self.built = False self.queue_installed = False self.optimizer = Adam( learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-7) self.configproto = configproto # Singular place to hold Tensor objects which don't serialize # These have to be reconstructed on restoring from pickle # See TensorGraph._get_tf() for more details on lazy construction self.tensor_objects = { "FileWriter": None, "Graph": graph, "train_op": None, "summary_op": None, } self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency self.tensorboard_step = 0 self.global_step = 0 self.use_queue = use_queue self.batch_size = batch_size self.random_seed = random_seed super(TensorGraph, self).__init__(**kwargs) self.save_file = "%s/%s" % (self.model_dir, "model") self.model_class = None self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] if self.use_queue and self.tensorboard: raise ValueError( "Currently TensorGraph cannot both use_queue and tensorboard at the same time" ) def _add_layer(self, layer): if layer.name is None: layer.name = "%s_%s" % (layer.__class__.__name__, len(self.layers) + 1) if layer.name in self.layers: return if isinstance(layer, Feature): self.features.append(layer) if isinstance(layer, Label): self.labels.append(layer) if isinstance(layer, Weights): self.task_weights.append(layer) self.layers[layer.name] = layer for in_layer in layer.in_layers: self._add_layer(in_layer) def fit(self, dataset, nb_epoch=10, max_checkpoints_to_keep=5, checkpoint_interval=1000, deterministic=False, restore=False, submodel=None, **kwargs): """Train this model on a dataset. Parameters ---------- dataset: Dataset the Dataset to train on nb_epoch: int the number of epochs to train for max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. deterministic: bool if True, the samples are processed in order. If False, a different random order is used for each epoch. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. submodel: Submodel an alternate training objective to use. This should have been created by calling create_submodel(). """ return self.fit_generator( self.default_generator( dataset, epochs=nb_epoch, deterministic=deterministic), max_checkpoints_to_keep, checkpoint_interval, restore, submodel) def fit_generator(self, feed_dict_generator, max_checkpoints_to_keep=5, checkpoint_interval=1000, restore=False, submodel=None): """Train this model on data from a generator. Parameters ---------- feed_dict_generator: generator this should generate batches, each represented as a dict that maps Layers to values. max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. submodel: Submodel an alternate training objective to use. This should have been created by calling create_submodel(). Returns ------- the average loss over the most recent checkpoint interval """ if not self.built: self.build() with self._get_tf("Graph").as_default(): time1 = time.time() loss = self.loss if submodel is not None and submodel.loss is not None: loss = submodel.loss if tfe.in_eager_mode(): # In eager mode we want an optimizer and a function to compute the # gradient of the loss. submodel_vars = None if submodel is None: optimizer = self._get_tf("Optimizer") else: optimizer = submodel.create_optimizer() if submodel.layers is not None: submodel_vars = set() for layer in submodel.layers: for var in layer.variables: submodel_vars.add(var) val_grad_fn = tfe.implicit_value_and_gradients( lambda x: self._run_graph([loss], x, True)[0]) else: # In graph mode we want a training operation. if submodel is None: train_op = self._get_tf('train_op') else: train_op = submodel.get_train_op() if checkpoint_interval > 0: saver = tf.train.Saver( self.get_variables(), max_to_keep=max_checkpoints_to_keep, save_relative_paths=True) if restore: self.restore() avg_loss, n_averaged_batches = 0.0, 0.0 n_samples = 0 n_enqueued = [0] final_sample = [None] if self.queue_installed: enqueue_thread = threading.Thread( target=_enqueue_batch, args=(self, feed_dict_generator, self._get_tf("Graph"), self.session, n_enqueued, final_sample)) enqueue_thread.start() for feed_dict in self._create_feed_dicts(feed_dict_generator, True): if self.queue_installed: # Don't let this thread get ahead of the enqueue thread, since if # we try to read more batches than the total number that get queued, # this thread will hang indefinitely. while n_enqueued[0] <= n_samples: if n_samples == final_sample[0]: break time.sleep(0) if n_samples == final_sample[0]: break n_samples += 1 should_log = (self.tensorboard and n_samples % self.tensorboard_log_frequency == 0) if tfe.in_eager_mode(): value, grads_and_vars = val_grad_fn(feed_dict) if submodel_vars is not None: grads_and_vars = [ x for x in grads_and_vars if x[1] in submodel_vars ] optimizer.apply_gradients(grads_and_vars) avg_loss += value else: fetches = [train_op, loss.out_tensor] if should_log: fetches.append(self._get_tf("summary_op")) fetched_values = self.session.run(fetches, feed_dict=feed_dict) if should_log: self._log_tensorboard(fetched_values[2]) avg_loss += fetched_values[1] n_averaged_batches += 1 self.global_step += 1 if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1: saver.save(self.session, self.save_file, global_step=self.global_step) avg_loss = float(avg_loss) / n_averaged_batches logger.info('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) avg_loss, n_averaged_batches = 0.0, 0.0 if n_averaged_batches > 0: avg_loss = float(avg_loss) / n_averaged_batches if checkpoint_interval > 0: if n_averaged_batches > 0: logger.info('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) saver.save(self.session, self.save_file, global_step=self.global_step) time2 = time.time() logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1)) return avg_loss def _log_tensorboard(self, summary): """ TODO(LESWING) set epoch Parameters ---------- Returns ------- """ global_step = int(self.global_step) writer = self._get_tf("FileWriter") writer.reopen() writer.add_summary(summary, global_step=global_step) writer.close() def fit_on_batch(self, X, y, w, submodel=None): if not self.built: self.build() dataset = NumpyDataset(X, y) return self.fit(dataset, nb_epoch=1, submodel=submodel) def default_generator(self, dataset, epochs=1, predict=False, deterministic=True, pad_batches=True): if len(self.features) > 1: raise ValueError("More than one Feature, must use generator") if len(self.labels) > 1: raise ValueError("More than one Label, must use generator") if len(self.task_weights) > 1: raise ValueError("More than one Weights, must use generator") for epoch in range(epochs): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches( batch_size=self.batch_size, deterministic=deterministic, pad_batches=pad_batches): feed_dict = dict() if len(self.labels) == 1 and y_b is not None and not predict: feed_dict[self.labels[0]] = y_b if len(self.features) == 1 and X_b is not None: feed_dict[self.features[0]] = X_b if len(self.task_weights) == 1 and w_b is not None and not predict: feed_dict[self.task_weights[0]] = w_b for (initial_state, zero_state) in zip(self.rnn_initial_states, self.rnn_zero_states): feed_dict[initial_state] = zero_state yield feed_dict def __call__(self, *inputs, **kwargs): """Execute the model in eager mode to compute outputs as a function of inputs. This is very similar to predict_on_batch(), except that it returns the outputs as tensors rather than numpy arrays. That means you can compute the graph's outputs, then do additional calculations based on them, and gradients will be tracked correctly through the whole process. Parameters ---------- inputs: tensors the values to use for the model's features. The number of inputs must exactly match the length of the model's `features` property. The values may be tensors, numpy arrays, or anything else that can be converted to tensors of the correct shape. outputs: list of Layers the output layers to compute. If this is omitted, self.outputs is used (that is, all outputs that have been added by calling add_output()). Returns ------- The output tensors, or a list of tensors if multiple outputs were requested. """ if len(inputs) != len(self.features): raise ValueError('Expected %d inputs, received %d' % len(self.features), len(inputs)) # TODO Once we drop Python 2 support, turn outputs into a proper keyword arg # instead of using the **kwargs hack. if 'outputs' in kwargs: outputs = kwargs['outputs'] else: outputs = self.outputs feed_dict = dict(zip(self.features, inputs)) results = self._run_graph(outputs, feed_dict, False) if len(results) == 1: return results[0] return results def _predict(self, generator, transformers, outputs, uncertainty): """ Predict outputs for data provided by a generator. This is the private implementation of prediction. Do not call it directly. Instead call one of the public prediction methods. Parameters ---------- generator: Generator Generator that constructs feed dictionaries for TensorGraph. transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs = self.outputs. If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. uncertainty: bool specifies whether this is being called as part of estimating uncertainty. If True, it sets the training flag so that dropout will be enabled, and returns the values of the uncertainty outputs. Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ if not self.built: self.build() if outputs is None: outputs = self.outputs elif not isinstance(outputs, collections.Sequence): outputs = [outputs] if uncertainty: if len(self.variances) == 0: raise ValueError('This model cannot compute uncertainties') if len(self.variances) != len(outputs): raise ValueError( 'The number of variances must exactly match the number of outputs') tensors = outputs + self.variances else: tensors = outputs with self._get_tf("Graph").as_default(): # Gather results for each output results = [[] for out in tensors] n_samples = 0 n_enqueued = [0] final_sample = [None] if self.queue_installed: enqueue_thread = threading.Thread( target=_enqueue_batch, args=(self, generator, self._get_tf("Graph"), self.session, n_enqueued, final_sample)) enqueue_thread.start() for feed_dict in self._create_feed_dicts(generator, uncertainty): if self.queue_installed: # Don't let this thread get ahead of the enqueue thread, since if # we try to read more batches than the total number that get queued, # this thread will hang indefinitely. while n_enqueued[0] <= n_samples: if n_samples == final_sample[0]: break time.sleep(0) if n_samples == final_sample[0]: break n_samples += 1 feed_results = self._run_graph(tensors, feed_dict, uncertainty) if tfe.in_eager_mode(): feed_results = [f.numpy() for f in feed_results] if len(feed_results) > 1: if len(transformers): raise ValueError("Does not support transformations " "for multiple outputs.") elif len(feed_results) == 1: result = undo_transforms(feed_results[0], transformers) feed_results = [result] for ind, result in enumerate(feed_results): results[ind].append(result) final_results = [] for result_list in results: final_results.append(np.concatenate(result_list, axis=0)) # If only one output, just return array if len(final_results) == 1: return final_results[0] elif uncertainty: return zip(final_results[:len(outputs)], final_results[len(outputs):]) else: return final_results def predict_on_generator(self, generator, transformers=[], outputs=None): """ Parameters ---------- generator: Generator Generator that constructs feed dictionaries for TensorGraph. transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs = self.outputs. If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. Returns: y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks) """ return self._predict(generator, transformers, outputs, False) def predict_on_batch(self, X, transformers=[], outputs=None): """Generates predictions for input samples, processing samples in a batch. Parameters ---------- X: ndarray the input data, as a Numpy array. transformers: List List of dc.trans.Transformers Returns ------- A Numpy array of predictions. """ dataset = NumpyDataset(X=X, y=None) generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_on_generator(generator, transformers, outputs) def predict_uncertainty_on_batch(self, X, masks=50): """ Predict the model's outputs, along with the uncertainty in each one. The uncertainty is computed as described in https://arxiv.org/abs/1703.04977. It involves repeating the prediction many times with different dropout masks. The prediction is computed as the average over all the predictions. The uncertainty includes both the variation among the predicted values (epistemic uncertainty) and the model's own estimates for how well it fits the data (aleatoric uncertainty). Not all models support uncertainty prediction. Parameters ---------- X: ndarray the input data, as a Numpy array. masks: int the number of dropout masks to average over Returns ------- for each output, a tuple (y_pred, y_std) where y_pred is the predicted value of the output, and each element of y_std estimates the standard deviation of the corresponding element of y_pred """ dataset = NumpyDataset(X=X, y=None) return self.predict_uncertainty(dataset, masks) def predict(self, dataset, transformers=[], outputs=None): """ Uses self to make predictions on provided Dataset object. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on transformers: list List of dc.trans.Transformers. outputs: object If outputs is None, then will assume outputs=self.outputs. If outputs is a Layer/Tensor, then will evaluate and return as a single ndarray. If outputs is a list of Layers/Tensors, will return a list of ndarrays. Returns ------- results: numpy ndarray or list of numpy ndarrays """ generator = self.default_generator(dataset, predict=True, pad_batches=False) return self.predict_on_generator(generator, transformers, outputs) def predict_uncertainty(self, dataset, masks=50): """ Predict the model's outputs, along with the uncertainty in each one. The uncertainty is computed as described in https://arxiv.org/abs/1703.04977. It involves repeating the prediction many times with different dropout masks. The prediction is computed as the average over all the predictions. The uncertainty includes both the variation among the predicted values (epistemic uncertainty) and the model's own estimates for how well it fits the data (aleatoric uncertainty). Not all models support uncertainty prediction. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on masks: int the number of dropout masks to average over Returns ------- for each output, a tuple (y_pred, y_std) where y_pred is the predicted value of the output, and each element of y_std estimates the standard deviation of the corresponding element of y_pred """ sum_pred = [] sum_sq_pred = [] sum_var = [] for i in range(masks): generator = self.default_generator( dataset, predict=True, pad_batches=False) results = self._predict(generator, [], self.outputs, True) if len(sum_pred) == 0: for p, v in results: sum_pred.append(p) sum_sq_pred.append(p * p) sum_var.append(v) else: for j, (p, v) in enumerate(results): sum_pred[j] += p sum_sq_pred[j] += p * p sum_var[j] += v output = [] std = [] for i in range(len(sum_pred)): p = sum_pred[i] / masks output.append(p) std.append(np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks)) if len(output) == 1: return (output[0], std[0]) else: return zip(output, std) def topsort(self): def add_layers_to_list(layer, sorted_layers): if layer in sorted_layers: return for in_layer in layer.in_layers: add_layers_to_list(in_layer, sorted_layers) sorted_layers.append(layer) sorted_layers = [] for l in self.features + self.labels + self.task_weights + self.outputs + self.variances: add_layers_to_list(l, sorted_layers) add_layers_to_list(self.loss, sorted_layers) for submodel in self.submodels: if submodel.loss is not None: add_layers_to_list(submodel.loss, sorted_layers) return sorted_layers def build(self): if self.built: return if tfe.in_eager_mode(): # In eager mode, we need to execute every layer once to ensure its variables # have been created. def build_layers(layer, tensors): if layer in tensors: return tensors[layer] inputs = [build_layers(input, tensors) for input in layer.in_layers] if isinstance(layer, Input): # We can't execute Input layers in eager mode, since they would try # to create placeholders. Instead create a tensor of the correct # size and type. shape = [1 if s is None else s for s in layer.shape] tensor = tf.zeros(shape, layer.dtype) else: with tf.name_scope(layer.name): tensor = layer.create_tensor(in_layers=inputs, set_tensors=False) tensors[layer] = tensor return tensor tensors = {} with self._get_tf("Graph").as_default(): # Build the layers. build_layers(self.loss, tensors) for output in self.outputs: build_layers(output, tensors) for variance in self.variances: build_layers(variance, tensors) for submodel in self.submodels: build_layers(submodel.loss, tensors) # Initialize variables. for layer in self.layers.values(): if layer.variable_values is not None: for var, val in zip(layer.variables, layer.variable_values): var.assign(val) self.session = None self._training_placeholder = None self.built = True return # In graph mode we need to create the computation graph. with self._get_tf("Graph").as_default(): self._training_placeholder = tf.placeholder(dtype=tf.float32, shape=()) if self.random_seed is not None: tf.set_random_seed(self.random_seed) self._install_queue() self.built = True for layer in self.topsort(): with tf.name_scope(layer.name): layer.create_tensor(training=self._training_placeholder) self.rnn_initial_states += layer.rnn_initial_states self.rnn_final_states += layer.rnn_final_states self.rnn_zero_states += layer.rnn_zero_states layer.add_summary_to_tg(layer.out_tensor, self.get_layer_variables(layer)) self.session = tf.Session(config=self.configproto) # Ensure all training operators have been created. self._get_tf('train_op') for submodel in self.submodels: train_op = submodel.get_train_op() # Initialize variables. self.session.run(tf.global_variables_initializer()) for layer in self.layers.values(): if layer.variable_values is not None: variables = self.get_layer_variables(layer) for var, val in zip(variables, layer.variable_values): self.session.run(var.assign(val)) for layer in self.layers.values(): if layer.tensorboard: self.tensorboard = True tf.summary.scalar("loss", self.loss.out_tensor) for layer in self.layers.values(): if layer.tensorboard: tf.summary.tensor_summary(layer.name, layer.out_tensor) if self.tensorboard: writer = self._get_tf("FileWriter") writer.add_graph(self._get_tf("Graph")) writer.close() # As a sanity check, make sure all tensors have the correct shape. for layer in self.layers.values(): try: assert list(layer.shape) == layer.out_tensor.get_shape().as_list( ), '%s: Expected shape %s does not match actual shape %s' % ( layer.name, layer.shape, layer.out_tensor.get_shape().as_list()) except NotImplementedError: pass def _install_queue(self): """ """ if not self.use_queue or self.queue_installed: for layer in self.features + self.labels + self.task_weights: layer.pre_queue = True return inputs = self.features + self.labels + self.task_weights if len(inputs) == 0: return names = [] shapes = [] pre_q_inputs = [] q = InputFifoQueue(shapes, names, in_layers=pre_q_inputs) q.name = "%s_%s" % (q.__class__.__name__, len(self.layers) + 1) for layer in inputs: pre_q_input = layer.create_pre_q() shapes.append(pre_q_input.shape) names.append(pre_q_input.name) pre_q_inputs.append(pre_q_input) layer.in_layers.append(q) self._add_layer(q) self.input_queue = q self.queue_installed = True def set_loss(self, layer): self._add_layer(layer) self.loss = layer def add_output(self, layer): """Add an output layer that can be computed by predict()""" self._add_layer(layer) self.outputs.append(layer) def add_variance(self, layer): """Add a layer that computes the variance in an output. If a model supports uncertainty, it must call add_variance() once for every output. Each variance layer has the same shape as the corresponding output, and each element computes an estimate of the variance from aleatoric uncertainty in the corresponding element of the output. In addition, if a model supports uncertainty it MUST use dropout on every layer. Otherwise, the uncertainties it computes will be inaccurate. """ self._add_layer(layer) self.variances.append(layer) def set_optimizer(self, optimizer): """Set the optimizer to use for fitting.""" self.optimizer = optimizer def create_submodel(self, layers=None, loss=None, optimizer=None): """Create an alternate objective for training one piece of a TensorGraph. A TensorGraph consists of a set of layers, and specifies a loss function and optimizer to use for training those layers. Usually this is sufficient, but there are cases where you want to train different parts of a model separately. For example, a GAN consists of a generator and a discriminator. They are trained separately, and they use different loss functions. A submodel defines an alternate objective to use in cases like this. It may optionally specify any of the following: a subset of layers in the model to train; a different loss function; and a different optimizer to use. This method creates a submodel, which you can then pass to fit() to use it for training. Parameters ---------- layers: list the list of layers to train. If None, all layers in the model will be trained. loss: Layer the loss function to optimize. If None, the model's main loss function will be used. optimizer: Optimizer the optimizer to use for training. If None, the model's main optimizer will be used. Returns ------- the newly created submodel, which can be passed to any of the fitting methods. """ if self.built: raise ValueError('Submodels must be created before build() is called.') submodel = Submodel(self, layers, loss, optimizer) self.submodels.append(submodel) if loss is not None: self._add_layer(loss) return submodel def get_pickling_errors(self, obj, seen=None): if seen == None: seen = [] try: state = obj.__getstate__() except AttributeError: return if state == None: return if isinstance(state, tuple): if not isinstance(state[0], dict): state = state[1] else: state = state[0].update(state[1]) result = {} for i in state: try: pickle.dumps(state[i], protocol=2) except pickle.PicklingError: if not state[i] in seen: seen.append(state[i]) result[i] = self.get_pickling_errors(state[i], seen) return result def save(self): # Remove out_tensor from the object to be pickled must_restore = False tensor_objects = self.tensor_objects rnn_initial_states = self.rnn_initial_states rnn_final_states = self.rnn_final_states rnn_zero_states = self.rnn_zero_states session = self.session self.tensor_objects = {} self.rnn_initial_states = [] self.rnn_final_states = [] self.rnn_zero_states = [] self.session = None out_tensors = [] submodel_ops = [] if self.built: must_restore = True for layer in self.topsort(): out_tensors.append(layer.none_tensors()) for submodel in self.submodels: submodel_ops.append(submodel._train_op) submodel._train_op = None training_placeholder = self._training_placeholder self._training_placeholder = None self.built = False # Pickle itself pickle_name = os.path.join(self.model_dir, "model.pickle") with open(pickle_name, 'wb') as fout: try: pickle.dump(self, fout) except Exception as e: logger.info(self.get_pickling_errors(self)) raise e # add out_tensor back to everyone if must_restore: for index, layer in enumerate(self.topsort()): layer.set_tensors(out_tensors[index]) for submodel, op in zip(self.submodels, submodel_ops): submodel._train_op = op self._training_placeholder = training_placeholder self.built = True self.tensor_objects = tensor_objects self.rnn_initial_states = rnn_initial_states self.rnn_final_states = rnn_final_states self.rnn_zero_states = rnn_zero_states self.session = session def evaluate_generator(self, feed_dict_generator, metrics, transformers=[], labels=None, outputs=None, weights=[], per_task_metrics=False): if labels is None: raise ValueError n_tasks = len(self.outputs) n_classes = self.outputs[0].out_tensor.get_shape()[-1].value evaluator = GeneratorEvaluator( self, feed_dict_generator, transformers, labels=labels, outputs=outputs, weights=weights, n_tasks=n_tasks, n_classes=n_classes) if not per_task_metrics: scores = evaluator.compute_model_performance(metrics) return scores else: scores, per_task_scores = evaluator.compute_model_performance( metrics, per_task_metrics=per_task_metrics) return scores, per_task_scores def get_layer_variables(self, layer): """Get the list of trainable variables in a layer of the graph.""" if tfe.in_eager_mode(): return layer.variables if not self.built: self.build() with self._get_tf("Graph").as_default(): if layer.variable_scope == '': return [] return tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=layer.variable_scope) def get_variables(self): """Get the list of all trainable variables in the graph.""" if not self.built: self.build() if tfe.in_eager_mode(): variables = [] for layer in self.layers.values(): variables += layer.variables return variables else: with self._get_tf("Graph").as_default(): return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) def get_global_step(self): return self._get_tf("GlobalStep") def _get_tf(self, obj): """Fetches underlying TensorFlow primitives. Parameters ---------- obj: str If "Graph", returns tf.Graph instance. If "FileWriter", returns tf.summary.FileWriter. If "Optimizer", returns the optimizer. If "train_op", returns the train operation. If "summary_op", returns the merged summary. If "GlobalStep" returns the global step. Returns ------- TensorFlow Object """ if obj in self.tensor_objects and self.tensor_objects[obj] is not None: return self.tensor_objects[obj] if obj == "Graph": self.tensor_objects['Graph'] = tf.Graph() elif obj == "FileWriter": self.tensor_objects['FileWriter'] = tf.summary.FileWriter(self.model_dir) elif obj == 'Optimizer': self.tensor_objects['Optimizer'] = self.optimizer._create_optimizer( self._get_tf('GlobalStep')) elif obj == 'train_op': opt = self._get_tf('Optimizer') global_step = self._get_tf('GlobalStep') try: self.tensor_objects['train_op'] = opt.minimize( self.loss.out_tensor, global_step=global_step) except ValueError: # The loss doesn't depend on any variables. self.tensor_objects['train_op'] = 0 elif obj == 'summary_op': self.tensor_objects['summary_op'] = tf.summary.merge_all( key=tf.GraphKeys.SUMMARIES) elif obj == 'GlobalStep': with self._get_tf("Graph").as_default(): self.tensor_objects['GlobalStep'] = create_variable(0, trainable=False) return self._get_tf(obj) def save_checkpoint(self, max_checkpoints_to_keep=5): """Save a checkpoint to disk. Usually you do not need to call this method, since fit() saves checkpoints automatically. If you have disabled automatic checkpointing during fitting, this can be called to manually write checkpoints. Parameters ---------- max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. """ saver = tf.train.Saver( self.get_variables(), max_to_keep=max_checkpoints_to_keep) saver.save(self.session, self.save_file, global_step=self.global_step) def get_checkpoints(self): """Get a list of all available checkpoint files.""" return tf.train.get_checkpoint_state( self.model_dir).all_model_checkpoint_paths def restore(self, checkpoint=None): """Reload the values of all variables from a checkpoint file. Parameters ---------- checkpoint: str the path to the checkpoint file to load. If this is None, the most recent checkpoint will be chosen automatically. Call get_checkpoints() to get a list of all available checkpoints. """ if not self.built: self.build() if checkpoint is None: checkpoint = tf.train.latest_checkpoint(self.model_dir) if checkpoint is None: raise ValueError('No checkpoint found') with self._get_tf("Graph").as_default(): reader = NewCheckpointReader(checkpoint) var_names = set([x for x in reader.get_variable_to_shape_map()]) var_list = [] for var in self.get_variables(): name = var.name if ':' in name: name = name[:name.rfind(':')] if name in var_names: var_list.append(var) saver = tf.train.Saver(var_list=var_list) saver.restore(self.session, checkpoint) def get_num_tasks(self): return len(self.outputs) def get_pre_q_input(self, input_layer): layer_name = input_layer.name pre_q_name = "%s_pre_q" % layer_name return self.layers[pre_q_name] @staticmethod def load_from_dir(model_dir, restore=True): pickle_name = os.path.join(model_dir, "model.pickle") with open(pickle_name, 'rb') as fout: tensorgraph = pickle.load(fout) tensorgraph.built = False tensorgraph.model_dir = model_dir if restore: try: tensorgraph.restore() except ValueError: pass # No checkpoint to load return tensorgraph def __del__(self): pass def _create_feed_dicts(self, generator, training): """Create feed dicts for use in fitting or prediction. Parameters ---------- generator: Generator the feed dict generator that was passed to fit_generator() or predict_on_generator() training: bool True during training, False during prediction """ train_value = 1.0 if training else 0.0 if self.queue_installed: while True: yield {self._training_placeholder: train_value} else: for d in generator: feed_dict = {} for key, value in d.items(): if isinstance(key, Input): value = _ensure_value_shape(value, key) if tfe.in_eager_mode(): value = tf.cast(value, key.dtype) feed_dict[key] = value else: feed_dict[key] = value if not tfe.in_eager_mode(): feed_dict[self._training_placeholder] = train_value yield feed_dict def _run_graph(self, outputs, feed_dict, training): """Run the calculations in the graph to compute some outputs. In graph mode, this just calls session.run(). In eager mode, it executes all required layers to compute the output. Parameters ---------- outputs: list of Layers the output layers to compute feed_dict: dict maps input layers to values training: bool whether this is being executed in training mode """ if not tfe.in_eager_mode(): return self.session.run(outputs, feed_dict) def run_layers(layer, tensors): if layer in tensors: return tensors[layer] inputs = [run_layers(input, tensors) for input in layer.in_layers] tensor = layer.create_tensor( in_layers=inputs, set_tensors=False, training=training) tensors[layer] = tensor return tensor tensors = feed_dict.copy() return [run_layers(o, tensors) for o in outputs] def make_estimator(self, feature_columns, weight_column=None, metrics={}, model_dir=None, config=None): """Construct a Tensorflow Estimator from this model. tf.estimator.Estimator is the standard Tensorflow API for representing models. This method provides interoperability between DeepChem and other Tensorflow based tools by allowing any model to be used an Estimator. Once this method returns, the Estimator it created is independent of the model it was created from. They do not share tensors, variables, save files, or any other resources. The Estimator is a self contained object with its own methods for training, evaluation, prediction, checkpointing, etc. Parameters ---------- feature_columns: list of tf.feature_column objects this describes the input features to the models. There must be one entry for each Feature layer in this model's features field. weight_column: tf.feature_column or None if this model includes a Weights layer, this describes the input weights. Otherwise, this should be None. metrics: map metrics that should be computed in calls to evaluate(). For each entry, the key is the name to report for the metric, and the value is a function of the form f(labels, predictions, weights) that returns the tensors for computing the metric. Any of the functions in tf.metrics can be used, as can other functions that satisfy the same interface. model_dir: str the directory in which the Estimator should save files. If None, this defaults to the model's model_dir. config: RunConfig configuration options for the Estimator """ # Check the inputs. if tfe.in_eager_mode(): raise ValueError('make_estimator() is not supported in eager mode') if len(feature_columns) != len(self.features): raise ValueError( 'This model requires %d feature column(s)' % len(self.features)) if len(self.labels) != 1: raise ValueError( 'Can only create an Estimator from a model with exactly one Label input' ) if len(self.task_weights) > 1: raise ValueError( 'Cannot create an Estimator from a model with multiple Weight inputs') if weight_column is None: if len(self.task_weights) > 0: raise ValueError('This model requires a weight column') else: if len(self.task_weights) == 0: raise ValueError( 'Cannot specify weight_column for a model with no Weight inputs') if model_dir is None: model_dir = self.model_dir # Define a function that recursively creates tensors from layers. def create_tensors(layer, tensors, training): if layer in tensors: return tensors[layer] inputs = [ create_tensors(in_layer, tensors, training) for in_layer in layer.in_layers ] tensor = layer.create_tensor( in_layers=inputs, set_tensors=False, training=training) tensors[layer] = tensor vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=layer.name) layer.add_summary_to_tg(tensor, vars) return tensor # Define the model function. def model_fn(features, labels, mode): # Define the inputs. tensors = self.create_estimator_inputs(feature_columns, weight_column, features, labels, mode) for layer, tensor in tensors.items(): layer.add_summary_to_tg(tensor, []) # Create the correct outputs, based on the mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = {} for i, output in enumerate(self.outputs): predictions[i] = create_tensors(output, tensors, 0) return tf.estimator.EstimatorSpec(mode, predictions=predictions) if mode == tf.estimator.ModeKeys.EVAL: loss = create_tensors(self.loss, tensors, 0) predictions = create_tensors(self.outputs[0], tensors, 0) if len(self.task_weights) == 0: weights = None else: weights = tensors[self.task_weights[0]] eval_metric_ops = {} for name, function in metrics.items(): eval_metric_ops[name] = function(tensors[self.labels[0]], predictions, weights) return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=eval_metric_ops) if mode == tf.estimator.ModeKeys.TRAIN: loss = create_tensors(self.loss, tensors, 1) global_step = tf.train.get_global_step() optimizer = self.optimizer._create_optimizer(global_step) train_op = optimizer.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) raise ValueError('Unknown mode') # Create the Estimator. return tf.estimator.Estimator( model_fn=model_fn, model_dir=model_dir, config=config) def create_estimator_inputs(self, feature_columns, weight_column, features, labels, mode): """This is called by make_estimator() to create tensors for the inputs. feature_columns and weight_column are the arguments passed to make_estimator(). features, labels, and mode are the arguments passed to the estimator's model function. This method creates and returns a dict with one entry for every Feature, Label, or Weights layer in the graph. The keys are the layers, and the values are the tensors that correspond to them. Any subclass that overrides default_generator() must also override this method. """ if self.__class__.default_generator != TensorGraph.default_generator: raise ValueError( "Class overrides default_generator() but not create_estimator_inputs()" ) tensors = {} for layer, column in zip(self.features, feature_columns): tensors[layer] = tf.feature_column.input_layer(features, [column]) if weight_column is not None: tensors[self.task_weights[0]] = tf.feature_column.input_layer( features, [weight_column]) if labels is not None: tensors[self.labels[0]] = tf.cast(labels, self.labels[0].dtype) return tensors