def setUpClass(cls):
     cls.tf_data = Data({
         'x':
         tf.random.normal(shape=(1, 28, 28, 3)),
         'y':
         tf.random.uniform(shape=(1, ), maxval=10, dtype=tf.int32),
         'images':
         tf.random.normal(shape=(1, 28, 28, 3)),
         'embed':
         np.ones(shape=(1, 3, 3, 3)),
         'embed_images':
         np.ones(shape=(1, 3, 3, 3))
     })
     cls.torch_data = Data({
         'x': torch.rand(size=(1, 1, 28, 28)),
         'y': torch.rand(size=(3, )),
         'images': torch.rand(size=(1, 3, 28, 28)),
         'embed': np.ones(shape=(1, 3, 3, 3)),
         'embed_images': np.ones(shape=(1, 3, 3, 3))
     })
     cls.log_dir = os.path.join(tempfile.gettempdir(), 'tensorboard')
     cls.train_path = os.path.join(cls.log_dir, 'train')
     cls.embed_path = os.path.join(cls.log_dir, 'train', '00001', 'embed')
     cls.on_begin_msg = "FastEstimator-Tensorboard: writing logs to {}".format(
         cls.log_dir)
Beispiel #2
0
 def on_batch_end(self, data: Data) -> None:
     if self.system.mode == "train" and isinstance(self.lr_fn, ARC):
         self.lr_fn.accumulate_single_train_loss(data[min(self.model.loss_name)].numpy())
     if self.system.mode == "train" and self.system.log_steps and (self.system.global_step % self.system.log_steps
                                                                   == 0 or self.system.global_step == 1):
         current_lr = np.float32(get_lr(self.model))
         data.write_with_log(self.outputs[0], current_lr)
Beispiel #3
0
 def on_batch_end(self, data: Data) -> None:
     if self.system.ds_id != '':
         self.fe_per_ds_trace.on_batch_end(DSData(self.system.ds_id, data))
         # Block the main process from writing per-instance info since we already have the more detailed key
         data.per_instance_enabled = False
     super().on_batch_end(data)
     data.per_instance_enabled = True
    def test_max_to_keep_tf_architecture(self):
        save_dir = tempfile.mkdtemp()
        model = fe.build(model_fn=one_layer_tf_model, optimizer_fn='adam')
        model_saver = ModelSaver(model=model, save_dir=save_dir, max_to_keep=2, save_architecture=True)
        model_saver.system = sample_system_object()
        model_saver.on_epoch_end(data=Data())
        model_saver.system.epoch_idx += 1
        model_saver.on_epoch_end(data=Data())
        model_saver.system.epoch_idx += 1
        model_saver.on_epoch_end(data=Data())
        model_name = "{}_epoch_{}".format(model_saver.model.model_name, model_saver.system.epoch_idx)
        tf_model_path1 = os.path.join(save_dir, model_name + '.h5')
        tf_architecture_path1 = os.path.join(save_dir, model_name)

        model_saver.system.epoch_idx += 1
        model_saver.on_epoch_end(data=Data())
        model_name = "{}_epoch_{}".format(model_saver.model.model_name, model_saver.system.epoch_idx)
        tf_model_path2 = os.path.join(save_dir, model_name + '.h5')
        tf_architecture_path2 = os.path.join(save_dir, model_name)

        with self.subTest('Check only four files are kept'):
            self.assertEqual(len(os.listdir(save_dir)), 4)

        with self.subTest('Check two latest models are kept'):
            self.assertTrue(os.path.exists(tf_model_path1))
            self.assertTrue(os.path.exists(tf_model_path2))
            self.assertTrue(os.path.exists(tf_architecture_path1))
            self.assertTrue(os.path.isdir(tf_architecture_path1))
            self.assertTrue(os.path.exists(tf_architecture_path2))
            self.assertTrue(os.path.isdir(tf_architecture_path2))
    def test_tf_traceability(self):
        if os.path.exists(self.tf_dir) and os.path.isdir(self.tf_dir):
            shutil.rmtree(self.tf_dir)

        trace = Traceability(save_path=self.tf_dir)
        est = _build_estimator(
            fe.build(model_fn=LeNet, optimizer_fn="adam",
                     model_name='tfLeNet'), trace)

        trace.system = est.system
        trace.system.epoch_idx = 1
        trace.system.summary.name = "TF Test"

        trace.on_begin(Data())
        trace.on_end(Data())

        crawler = os.walk(self.tf_dir)
        root = next(crawler)
        self.assertIn('resources', root[1],
                      "A resources subdirectory should have been generated")
        self.assertIn('tf_test.tex', root[2],
                      "The tex file should have been generated")
        # Might be a pdf and/or a .ds_store file depending on system, but shouldn't be more than that
        self.assertLessEqual(len(root[2]), 3,
                             "Extra files should not have been generated")
        figs = next(crawler)
        self.assertIn('tf_test_tfLeNet.pdf', figs[2],
                      "A figure for the model should have been generated")
        self.assertIn('tf_test_logs.png', figs[2],
                      "A log image should have been generated")
        self.assertIn('tf_test.txt', figs[2],
                      "A raw log file should have been generated")
Beispiel #6
0
 def on_batch_end(self, data: Data) -> None:
     if self.write_graph and self.system.network.epoch_models.symmetric_difference(
             self.painted_graphs):
         self.writer.write_epoch_models(mode=self.system.mode)
         self.painted_graphs = self.system.network.epoch_models
     if self.system.mode != 'train':
         return
     if self.histogram_freq.freq and self.histogram_freq.is_step and \
             self.system.global_step % self.histogram_freq.freq == 0:
         self.writer.write_weights(mode=self.system.mode,
                                   models=self.system.network.models,
                                   step=self.system.global_step,
                                   visualize=self.paint_weights)
     if self.update_freq.freq and self.update_freq.is_step and self.system.global_step % self.update_freq.freq == 0:
         self.writer.write_scalars(mode=self.system.mode,
                                   step=self.system.global_step,
                                   scalars=filter(lambda x: is_number(x[1]),
                                                  data.items()))
         self.writer.write_images(mode=self.system.mode,
                                  step=self.system.global_step,
                                  images=filter(
                                      lambda x: x[1] is not None,
                                      map(lambda y: (y, data.get(y)),
                                          self.write_images)))
         self.writer.write_embeddings(
             mode=self.system.mode,
             step=self.system.global_step,
             embeddings=filter(
                 lambda x: x[1] is not None,
                 map(
                     lambda t:
                     (t[0], data.get(t[0]), data.get(t[1]), data.get(t[2])),
                     self.write_embeddings)))
Beispiel #7
0
 def on_epoch_end(self, data: Data) -> None:
     if self.system.mode == 'train' and self.histogram_freq.freq and not self.histogram_freq.is_step and \
             self.system.epoch_idx % self.histogram_freq.freq == 0:
         self.writer.write_weights(mode=self.system.mode,
                                   models=self.system.network.models,
                                   step=self.system.global_step,
                                   visualize=self.paint_weights)
     if self.update_freq.freq and (self.update_freq.is_step
                                   or self.system.epoch_idx %
                                   self.update_freq.freq == 0):
         self.writer.write_scalars(mode=self.system.mode,
                                   step=self.system.global_step,
                                   scalars=filter(lambda x: is_number(x[1]),
                                                  data.items()))
         self.writer.write_images(mode=self.system.mode,
                                  step=self.system.global_step,
                                  images=filter(
                                      lambda x: x[1] is not None,
                                      map(lambda y: (y, data.get(y)),
                                          self.write_images)))
         self.writer.write_embeddings(
             mode=self.system.mode,
             step=self.system.global_step,
             embeddings=filter(
                 lambda x: x[1] is not None,
                 map(
                     lambda t:
                     (t[0], data.get(t[0]), data.get(t[1]), data.get(t[2])),
                     self.write_embeddings)))
Beispiel #8
0
 def test_on_batch_end(self):
     self.pbm_calibrator.y_true = []
     self.pbm_calibrator.y_pred = []
     batch1 = {
         'y': np.array([0, 0, 1, 1]),
         'y_pred': np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0,
                                                                  1.0]])
     }
     self.pbm_calibrator.on_batch_end(data=Data(batch1))
     with self.subTest('Check true values'):
         self.assertTrue(
             is_equal(self.pbm_calibrator.y_true, list(batch1['y'])))
     with self.subTest('Check pred values'):
         self.assertTrue(
             is_equal(self.pbm_calibrator.y_pred, list(batch1['y_pred'])))
     batch2 = {
         'y': np.array([1, 1, 0, 0]),
         'y_pred': np.array([[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0,
                                                                  0.0]])
     }
     self.pbm_calibrator.on_batch_end(data=Data(batch2))
     with self.subTest('Check true values (2 batches)'):
         self.assertTrue(
             is_equal(self.pbm_calibrator.y_true,
                      list(batch1['y']) + list(batch2['y'])))
     with self.subTest('Check pred values (2 batches)'):
         self.assertTrue(
             is_equal(self.pbm_calibrator.y_pred,
                      list(batch1['y_pred']) + list(batch2['y_pred'])))
Beispiel #9
0
 def on_end(self, data: Data) -> None:
     data.write_with_log(
         "total_time",
         "{} sec".format(round(time.perf_counter() - self.train_start, 2)))
     for model in self.system.network.models:
         if hasattr(model, "current_optimizer"):
             data.write_with_log(model.model_name + "_lr", get_lr(model))
Beispiel #10
0
 def on_begin(self, data: Data) -> None:
     if fe.fe_deterministic_seed is not None:
         raise RuntimeError(
             "You cannot use RestoreWizard while in deterministic training mode since a restored"
             +
             " training can't guarantee that all prngs will be reset to exactly the same position"
         )
     if not self.should_restore():
         self._cleanup(
             self.dirs)  # Remove any partially completed checkpoints
         print("FastEstimator-RestoreWizard: Backing up to {}".format(
             self.directory))
     else:
         self._load_key()
         directory = self.dirs[self.dir_idx]
         self.system.load_state(directory)
         data.write_with_log("epoch", self.system.epoch_idx)
         print(
             "FastEstimator-RestoreWizard: Restoring from {}, resume training"
             .format(directory))
         self.dir_idx = int(
             not self.dir_idx
         )  # Flip the idx so that next save goes to other dir
         self._cleanup(
             self.dirs[self.dir_idx]
         )  # Clean out the other dir in case it had a partial save
Beispiel #11
0
 def on_batch_end(self, data: Data) -> None:
     y_pred, y_true = to_number(data['pred']), to_number(data['target_real'])
     if y_true.shape[-1] > 1 and y_true.ndim > 2:
         y_true = np.argmax(y_true, axis=-1)
     if y_pred.shape[-1] > 1 and y_pred.ndim > 2:
         y_pred = np.argmax(y_pred, axis=-1)
     sentence_level_scores = self.batch_precision_parameters(y_true, y_pred)
     data.write_per_instance_log(self.outputs[0], sentence_level_scores)
Beispiel #12
0
 def on_epoch_end(self, data: Data) -> None:
     for key, ds_vals in self.test_results.items():
         for ds_id, vals in ds_vals.items():
             if ds_id != '':
                 d = DSData(ds_id, data)
                 d.write_with_log(key, np.mean(np.array(vals), axis=0))
         data.write_with_log(
             key,
             np.mean(np.array([e for x in ds_vals.values() for e in x]),
                     axis=0))
Beispiel #13
0
 def on_batch_end(self, data: Data) -> None:
     if self.system.log_steps and (self.system.global_step % self.system.log_steps == 0
                                   or self.system.global_step == 1):
         for key in self.inputs:
             if key in data:
                 data.write_with_log(key, data[key])
         if self.system.global_step > 1:
             self.elapse_times.append(time.perf_counter() - self.step_start)
             data.write_with_log("steps/sec", round(self.system.log_steps / np.sum(self.elapse_times), 2))
         self.elapse_times = []
         self.step_start = time.perf_counter()
Beispiel #14
0
 def on_epoch_end(self, data: Data) -> None:
     if self.monitor_op(data[self.inputs[0]], self.best):
         self.best = data[self.inputs[0]]
         self.wait = 0
     else:
         self.wait += 1
         if self.wait >= self.patience:
             new_lr = max(self.min_lr, np.float32(self.factor * get_lr(self.model)))
             set_lr(self.model, new_lr)
             self.wait = 0
             data.write_with_log(self.outputs[0], new_lr)
             print("FastEstimator-ReduceLROnPlateau: learning rate reduced to {}".format(new_lr))
 def on_begin(self, data: Data) -> None:
     if not os.path.exists(self.directory) or not os.listdir(
             self.directory):
         print("FastEstimator-RestoreWizard: Backing up in {}".format(
             self.directory))
     else:
         self._scan_files()
         self._load_files()
         data.write_with_log("epoch", self.system.epoch_idx)
         print(
             "FastEstimator-RestoreWizard: Restoring from {}, resume training"
             .format(self.directory))
Beispiel #16
0
 def on_epoch_end(self, data: Data) -> None:
     if self.binary_classification:
         score = f1_score(self.y_true,
                          self.y_pred,
                          average='binary',
                          **self.kwargs)
     else:
         score = f1_score(self.y_true,
                          self.y_pred,
                          average=None,
                          **self.kwargs)
     data.write_with_log(self.outputs[0], score)
 def on_epoch_end(self, data: Data) -> None:
     if self.monitor_op(data[self.metric], self.best):
         self.best = data[self.metric]
         self.since_best = 0
         if self.save_dir:
             self.model_path = save_model(self.model, self.save_dir,
                                          self.model_name)
             print("FastEstimator-BestModelSaver: Saved model to {}".format(
                 self.model_path))
     else:
         self.since_best += 1
     data.write_with_log(self.outputs[0], self.since_best)
     data.write_with_log(self.outputs[1], self.best)
 def on_end(self, data: Data) -> None:
     index_summaries = DefaultKeyDict(default=lambda x: Summary(name=x))
     for mode in self.mode:
         final_scores = sorted([(idx, elem[-1][1]) for idx, elem in self.index_history[mode].items()],
                               key=lambda x: x[1])
         max_idx_list = {elem[0] for elem in final_scores[-1:-self.n_max_to_keep - 1:-1]}
         min_idx_list = {elem[0] for elem in final_scores[:self.n_min_to_keep]}
         target_idx_list = Set.union(min_idx_list, max_idx_list, self.idx_to_keep)
         for idx in target_idx_list:
             for step, score in self.index_history[mode][idx]:
                 index_summaries[idx].history[mode][self.metric_key][step] = score
     self.system.add_graph(self.outputs[0], list(index_summaries.values()))  # So traceability can draw it
     data.write_without_log(self.outputs[0], list(index_summaries.values()))
Beispiel #19
0
 def on_batch_end(self, data: Data) -> None:
     y_true, y_pred = to_number(data[self.true_key]), to_number(data[self.pred_key])
     if y_true.shape[-1] > 1 and y_true.ndim > 1:
         y_true = np.argmax(y_true, axis=-1)
     if y_pred.shape[-1] > 1 and y_pred.ndim > 1:
         y_pred = np.argmax(y_pred, axis=-1)
     else:  # binaray classification (pred shape is [batch, 1])
         if self.from_logits:
             y_pred = 1 / (1 + np.exp(-y_pred))
         y_pred = np.round(y_pred)
     assert y_pred.size == y_true.size
     self.correct += np.sum(y_pred.ravel() == y_true.ravel())
     self.total += len(y_pred.ravel())
     data.write_per_instance_log(self.outputs[0], np.array(y_pred.ravel() == y_true.ravel(), dtype=np.int8))
Beispiel #20
0
 def on_epoch_end(self, data: Data) -> None:
     if self.system.mode == 'train' and self.histogram_freq.freq and not self.histogram_freq.is_step and \
             self.system.epoch_idx % self.histogram_freq.freq == 0:
         self.writer.write_weights(mode=self.system.mode,
                                   models=self.system.network.models,
                                   step=self.system.global_step,
                                   visualize=self.paint_weights)
     # Write out any embeddings which were aggregated over batches
     for name, val_list in self.collected_embeddings.items():
         embeddings = None if any(
             x[0] is None
             for x in val_list) else concat([x[0] for x in val_list])
         labels = None if any(
             x[1] is None
             for x in val_list) else concat([x[1] for x in val_list])
         imgs = None if any(
             x[2] is None
             for x in val_list) else concat([x[2] for x in val_list])
         self.writer.write_embeddings(mode=self.system.mode,
                                      step=self.system.global_step,
                                      embeddings=[(name, embeddings, labels,
                                                   imgs)])
     self.collected_embeddings.clear()
     # Get any embeddings which were generated externally on epoch end
     if self.embedding_freq.freq and (self.embedding_freq.is_step
                                      or self.system.epoch_idx %
                                      self.embedding_freq.freq == 0):
         self.writer.write_embeddings(
             mode=self.system.mode,
             step=self.system.global_step,
             embeddings=filter(
                 lambda x: x[1] is not None,
                 map(
                     lambda t:
                     (t[0], data.get(t[0]), data.get(t[1]), data.get(t[2])),
                     self.write_embeddings)))
     if self.update_freq.freq and (self.update_freq.is_step
                                   or self.system.epoch_idx %
                                   self.update_freq.freq == 0):
         self.writer.write_scalars(mode=self.system.mode,
                                   step=self.system.global_step,
                                   scalars=filter(lambda x: is_number(x[1]),
                                                  data.items()))
         self.writer.write_images(mode=self.system.mode,
                                  step=self.system.global_step,
                                  images=filter(
                                      lambda x: x[1] is not None,
                                      map(lambda y: (y, data.get(y)),
                                          self.write_images)))
Beispiel #21
0
 def test_on_epoch_end(self):
     data = Data({})
     eval_essential = EvalEssential(monitor_names='loss')
     eval_essential.system = sample_system_object()
     eval_essential.eval_results = {'loss': [10, 20]}
     eval_essential.on_epoch_end(data=data)
     self.assertEqual(data['loss'], 15.0)
Beispiel #22
0
 def test_on_epoch_end(self):
     data = Data({})
     test_essential = TestEssential(monitor_names={'loss'})
     test_essential.system = sample_system_object()
     test_essential.test_results['loss'][''].extend([10, 20])
     test_essential.on_epoch_end(data=data)
     self.assertEqual(data['loss'], 15.0)
Beispiel #23
0
    def _print_message(self, header: str, data: Data, log_epoch: bool = False) -> None:
        """Print a log message to the screen, and record the `data` into the `system` summary.

        Args:
            header: The prefix for the log message.
            data: A collection of data to be recorded.
            log_epoch: Whether epoch information should be included in the log message.
        """
        log_message = header
        if log_epoch:
            log_message += "epoch: {}; ".format(self.system.epoch_idx)
            self.system.write_summary('epoch', self.system.epoch_idx)
        deferred = []
        for key, val in humansorted(data.read_logs().items(), key=lambda x: x[0]):
            if isinstance(val, ValWithError):
                log_message += "{}: {}; ".format(key, str(val))
            else:
                val = to_number(val)
                if val.size > 1:
                    deferred.append("\n{}:\n{};".format(key, np.array2string(val, separator=',')))
                else:
                    log_message += "{}: {}; ".format(key, str(val))
            self.system.write_summary(key, val)
        log_message = log_message.strip()
        for elem in deferred:
            log_message += elem
        print(log_message)
Beispiel #24
0
 def setUpClass(cls):
     cls.data = Data({'loss': 10})
     cls.train_essential = TrainEssential(monitor_names='loss')
     cls.train_essential.system = sample_system_object()
     cls.train_essential.system.log_steps = 5
     cls.train_essential.system.global_step = 10
     cls.train_essential.epoch_start = time.perf_counter() - 500
     cls.train_essential.step_start = time.perf_counter() - 300
 def on_epoch_end(self, data: Data) -> None:
     self.y_true = np.squeeze(np.stack(self.y_true))
     self.y_pred = np.stack(self.y_pred)
     calibrator = cal.PlattBinnerMarginalCalibrator(num_calibration=len(
         self.y_true),
                                                    num_bins=10)
     calibrator.train_calibration(probs=self.y_pred, labels=self.y_true)
     if self.save_path:
         if not self.save_key or (self.save_key
                                  and to_number(data[self.save_key]) == 0):
             with open(self.save_path, 'wb') as f:
                 dill.dump(calibrator.calibrate, file=f)
             print(
                 f"FastEstimator-PBMCalibrator: Calibrator written to {self.save_path}"
             )
     data.write_without_log(self.outputs[0],
                            calibrator.calibrate(self.y_pred))
Beispiel #26
0
    def setUpClass(cls):
        cls.data_np = Data({'loss': np.NaN})
        cls.data_tf = Data({'loss': tf.constant(np.NaN)})
        cls.data_torch = Data({'loss': torch.tensor(np.NaN)})
        cls.expected_msg = "FastEstimator-TerminateOnNaN: NaN Detected in: loss"
        cls.expected_loss_keys = {"ce"}
        cls.expected_all_keys = {"ce", "accuracy", "f1_score"}

        tf_model = fe.build(model_fn=one_layer_tf_model, optimizer_fn='adam')
        cls.network = fe.Network(ops=[
            ModelOp(model=tf_model, inputs="x", outputs="y"),
            CrossEntropy(inputs=("y_pred", "y"), outputs="ce"),
            UpdateOp(model=tf_model, loss_name="ce")
        ])
        cls.traces = [
            Accuracy(true_key="y", pred_key="y_pred", output_name="accuracy"),
            F1Score(true_key="y", pred_key="y_pred", output_name="f1_score")
        ]
    def _run_traces_on_end(traces: Iterable[Trace]) -> None:
        """Invoke the on_end methods of given traces.

        Args:
            traces: List of traces.
        """
        data = Data()
        for trace in traces:
            trace.on_end(data)
Beispiel #28
0
 def setUpClass(cls):
     cls.data = Data({})
     cls.on_begin_global_step_msg = "FastEstimator-Start: step: 2;"
     cls.on_begin_msg = "FastEstimator-Start: step: 1;"
     cls.on_batch_end_msg = "FastEstimator-Train: step: 1;"
     cls.on_epoch_end_train_msg = "FastEstimator-Train: step: 2; epoch: 0;"
     cls.on_epoch_end_eval_msg = "FastEstimator-Eval: step: 2; epoch: 0;"
     cls.on_epoch_end_test_msg = "FastEstimator-Test: step: 2; epoch: 0;"
     cls.on_end_msg = "FastEstimator-Finish: step: 2;"
Beispiel #29
0
    def _run_traces_on_ds_begin(self, traces: Iterable[PerDSTrace]) -> None:
        """Invoke the on_ds_begin methods of given traces.

        Args:
            traces: List of traces.
        """
        data = Data()
        for trace in traces:
            trace.on_ds_begin(data)
        self._check_early_exit()
Beispiel #30
0
    def _run_traces_on_epoch_end(self, traces: Iterable[Trace]) -> None:
        """Invoke the on_epoch_end methods of of given traces.

        Args:
            traces: List of traces.
        """
        data = Data()
        for trace in traces:
            trace.on_epoch_end(data)
        self._check_early_exit()