def test_tensorboard(self): df = TensorBoard.list() if not df.empty: for pid in df['pid']: TensorBoard.stop(int(pid)) TensorBoard.start('./a') TensorBoard.start('./b') df = TensorBoard.list() self.assertEqual(2, len(df)) self.assertEqual(set(df['logdir']), {'./a', './b'}) for pid in df['pid']: TensorBoard.stop(pid)
# 2: Call read_dataset passing in the training CSV file and the appropriate mode input_fn=read_dataset("train.csv", mode=tf.estimator.ModeKeys.TRAIN), max_steps=TRAIN_STEPS, ) exporter = tf.estimator.LatestExporter("exporter", serving_input_fn) eval_spec = tf.estimator.EvalSpec( # 3: Call read_dataset passing in the evaluation CSV file and the appropriate mode input_fn=read_dataset("eval.csv", mode=tf.estimator.ModeKeys.EVAL), steps=None, start_delay_secs=60, # start evaluating after N seconds throttle_secs=EVAL_INTERVAL, # evaluate every N seconds exporters=exporter, ) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) # Run the model shutil.rmtree("babyweight_trained", ignore_errors=True) # start fresh each time tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file train_and_evaluate("babyweight_trained") from google.datalab.ml import TensorBoard TensorBoard().start("./babyweight_trained") for pid in TensorBoard.list()["pid"]: TensorBoard().stop(pid) print("Stopped TensorBoard with pid {}".format(pid))
# ## Monitoring training with TensorBoard # # Use this cell to launch tensorboard # In[10]: from google.datalab.ml import TensorBoard TensorBoard().start('gs://{}/mnist/trained_{}'.format(BUCKET, MODEL_TYPE)) # In[11]: for pid in TensorBoard.list()['pid']: TensorBoard().stop(pid) print('Stopped TensorBoard with pid {}'.format(pid)) # Here are my results: # # Model | Accuracy | Time taken | Model description | Run time parameters # --- | :---: | --- # linear | 91.53 | 3 min | linear | 100 steps, LR=0.01, Batch=512 # linear | 92.73 | 8 min | linear | 1000 steps, LR=0.01, Batch=512 # linear | 92.29 | 18 min | linear | 10000 steps, LR=0.01, Batch=512 # dnn | 98.14 | 15 min | 300-100-30 nodes fully connected | 10000 steps, LR=0.01, Batch=512 # dnn | 97.99 | 48 min | 300-100-30 nodes fully connected | 100000 steps, LR=0.01, Batch=512 # dnn_dropout | 97.84 | 29 min | 300-100-30-DL(0.1)- nodes | 20000 steps, LR=0.01, Batch=512 # cnn | 98.97 | 35 min | maxpool(10 5x5 cnn, 2)-maxpool(20 5x5 cnn, 2)-300-DL(0.25) | 20000 steps, LR=0.01, Batch=512
TensorBoard().start(OUTDIR) # <h2>Run training</h2> # In[ ]: # Run training shutil.rmtree(OUTDIR, ignore_errors=True) # start fresh each time train_and_evaluate(OUTDIR, num_train_steps=2000) # <h4> You can now shut Tensorboard down </h4> # In[ ]: # to list Tensorboard instances TensorBoard().list() # In[ ]: pids_df = TensorBoard.list() if not pids_df.empty: for pid in pids_df['pid']: TensorBoard().stop(pid) print('Stopped TensorBoard with pid {}'.format(pid)) # ## Challenge Exercise # # Modify your solution to the challenge exercise in c_dataset.ipynb appropriately. # Copyright 2017 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License