def test_multiple_trains(self, *args): """ Callback TrainLineage and EvalLineage for multiple times. Write TrainLineage and EvalLineage in different files under same directory. EvalLineage log file end with '_lineage'. """ args[0].return_value = 10 for i in range(2): summary_record = SummaryRecord(SUMMARY_DIR_2, create_time=int(time.time()) + i) eval_record = SummaryRecord(SUMMARY_DIR_2, create_time=int(time.time() + 10) + i) args[1].return_value = os.path.join( SUMMARY_DIR_2, f'train_out.events.summary.{str(int(time.time()) + 2*i)}.ubuntu_lineage' ) train_callback = TrainLineage(summary_record, True) train_callback.begin(RunContext(self.run_context)) train_callback.end(RunContext(self.run_context)) args[1].return_value = os.path.join( SUMMARY_DIR_2, f'eval_out.events.summary.{str(int(time.time())+ 2*i + 1)}.ubuntu_lineage' ) eval_callback = EvalLineage(eval_record, True) eval_run_context = self.run_context eval_run_context['metrics'] = {'accuracy': 0.78 + i + 1} eval_run_context['valid_dataset'] = self.run_context[ 'train_dataset'] eval_run_context['step_num'] = 32 eval_callback.end(RunContext(eval_run_context)) file_num = os.listdir(SUMMARY_DIR_2) assert len(file_num) == 8
def train_lenet(): context.set_context(mode=context.GRAPH_MODE, save_graphs=True, device_target="CPU") dataset_sink_mode = False # download mnist dataset download_dataset() # learning rate setting lr = 0.01 momentum = 0.9 epoch_size = 1 mnist_path = "../MNIST_Data" # define the loss function net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") repeat_size = epoch_size # create the network network = LeNet5() # define the optimizer net_opt = nn.Momentum(network.trainable_params(), lr, momentum) config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10) # save the network model and parameters for subsequence fine-tuning ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) # group layers into an object with training and evaluation features model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) summary_writer = SummaryRecord(log_dir="../../summary", network=network) summary_callback = SummaryStep(summary_writer, flush_step=10) # Init TrainLineage to record the training information train_callback = TrainLineage(summary_writer) train_net( model, epoch_size, mnist_path, repeat_size, ckpoint_cb, dataset_sink_mode, callbacks=[summary_callback, train_callback], ) test_net(network, model, mnist_path) summary_writer.close()
def setup_class(cls): """Setup method.""" cls.optimizer = Momentum(Tensor(0.12)) cls.loss_fn = SoftmaxCrossEntropyWithLogits() cls.net = ResNet() cls.run_context = dict() cls.run_context['train_network'] = cls.net cls.run_context['loss_fn'] = cls.loss_fn cls.run_context['net_outputs'] = Tensor(np.array([0.03])) cls.run_context['optimizer'] = cls.optimizer cls.run_context['train_dataset'] = MindDataset(dataset_size=32) cls.run_context['epoch_num'] = 10 cls.run_context['cur_step_num'] = 320 cls.run_context['parallel_mode'] = "stand_alone" cls.run_context['device_number'] = 2 cls.run_context['batch_num'] = 32 cls.summary_record = SummaryRecord(SUMMARY_DIR) callback = [ ModelCheckpoint(directory=SUMMARY_DIR), SummaryStep(cls.summary_record), TrainLineage(cls.summary_record) ] cls.run_context['list_callback'] = _ListCallback(callback) cls.user_defined_info = {"info": "info1", "version": "v1"}
def test_raise_exception_non_lineage_file(self): """Test exception when lineage summary file cannot be found.""" summary_dir = os.path.join(BASE_SUMMARY_DIR, 'run4') if os.path.exists(summary_dir): shutil.rmtree(summary_dir) summary_record = SummaryRecord(summary_dir, file_suffix='_MS_lineage_none') full_file_name = summary_record.full_file_name assert full_file_name.endswith('_lineage_none') assert os.path.isfile(full_file_name)
def test_raise_exception_init(self): """Test exception when error happened during the initialization process.""" if os.path.exists(SUMMARY_DIR_3): shutil.rmtree(SUMMARY_DIR_3) summary_record = SummaryRecord(SUMMARY_DIR_3) train_callback = TrainLineage('fake_summary_record', False) eval_callback = EvalLineage('fake_summary_record', False) train_callback.begin(RunContext(self.run_context)) eval_callback.end(RunContext(self.run_context)) file_num = os.listdir(SUMMARY_DIR_3) full_file_name = summary_record.full_file_name assert len(file_num) == 1 assert os.path.isfile(full_file_name + "_lineage") is False
def test_raise_exception(self): """Test exception when raise_exception is set True.""" summary_record = SummaryRecord(SUMMARY_DIR_3) full_file_name = summary_record.full_file_name assert os.path.isfile(full_file_name) is True assert os.path.isfile(full_file_name + "_lineage") is False train_callback = TrainLineage(summary_record, True) eval_callback = EvalLineage(summary_record, False) with self.assertRaises(LineageParamRunContextError): train_callback.begin(self.run_context) eval_callback.end(self.run_context) file_num = os.listdir(SUMMARY_DIR_3) assert len(file_num) == 1 assert os.path.isfile(full_file_name + "_lineage") is False
def test_graph_summary_sample(): """ test_graph_summary_sample """ log.debug("begin test_graph_summary_sample") dataset = get_dataset() net = Net() loss = nn.SoftmaxCrossEntropyWithLogits() optim = Momentum(net.trainable_params(), 0.1, 0.9) context.set_context(mode=context.GRAPH_MODE) model = Model(net, loss_fn=loss, optimizer=optim, metrics=None) with SummaryRecord(SUMMARY_DIR, file_suffix="_MS_GRAPH", network=model._train_network) as test_writer: model.train(2, dataset) for i in range(1, 5): test_writer.record(i)
def test_raise_exception_create_file(self): """Test exception when error happened after creating file.""" if os.path.exists(SUMMARY_DIR_3): shutil.rmtree(SUMMARY_DIR_3) summary_record = SummaryRecord(SUMMARY_DIR_3) eval_callback = EvalLineage(summary_record, False) full_file_name = summary_record.full_file_name + "_lineage" eval_run_context = self.run_context eval_run_context['metrics'] = {'accuracy': 0.78} eval_run_context['step_num'] = 32 eval_run_context['valid_dataset'] = self.run_context['train_dataset'] with open(full_file_name, 'ab'): with mock.patch('builtins.open') as mock_handler: mock_handler.return_value.__enter__.return_value.write.side_effect = IOError eval_callback.end(RunContext(eval_run_context)) assert os.path.isfile(full_file_name) is True assert os.path.getsize(full_file_name) == 0
def test_raise_exception_record_trainlineage(self, *args): """Test exception when error happened after recording training infos.""" if os.path.exists(SUMMARY_DIR_3): shutil.rmtree(SUMMARY_DIR_3) args[1].side_effect = MindInsightException( error=LineageErrors.PARAM_RUN_CONTEXT_ERROR, message="RunContext error.") summary_record = SummaryRecord(SUMMARY_DIR_3) train_callback = TrainLineage(summary_record, True) train_callback.begin(RunContext(self.run_context)) full_file_name = train_callback.lineage_summary.lineage_log_path file_size1 = os.path.getsize(full_file_name) train_callback.end(RunContext(self.run_context)) file_size2 = os.path.getsize(full_file_name) assert file_size2 > file_size1 eval_callback = EvalLineage(summary_record, False) eval_callback.end(RunContext(self.run_context)) file_size3 = os.path.getsize(full_file_name) assert file_size3 == file_size2
def test_eval_only(self): """Test record evaluation event only.""" summary_dir = os.path.join(BASE_SUMMARY_DIR, 'eval_only_dir') summary_record = SummaryRecord(summary_dir) eval_run_context = self.run_context eval_run_context['metrics'] = {'accuracy': 0.58} eval_run_context['valid_dataset'] = self.run_context['train_dataset'] eval_run_context['step_num'] = 32 eval_only_callback = EvalLineage(summary_record) eval_only_callback.end(RunContext(eval_run_context)) res = get_summary_lineage(summary_dir, ['metric', 'dataset_graph']) expect_res = { 'summary_dir': summary_dir, 'dataset_graph': {}, 'metric': { 'accuracy': 0.58 } } assert res == expect_res shutil.rmtree(summary_dir)