class DummyTrainer(Trainer): def _finish_init(self): self.net = DistFastNet(self.learning_rate, self.image_shape, self.n_out, self.init_model) def get_from_master(self, data): data = comm.bcast(data, root=0) comm.barrier() return data def train(self): util.log('rank %d starting training...' % rank) while self.should_continue_training(): train_data = self.get_from_master(None) self.curr_epoch = self.train_data.epoch input, label = train_data.data, train_data.labels self.net.train_batch(input, label) self.curr_batch += 1 if self.check_test_data(): self.get_test_error() if self.factor != 1.0 and self.check_adjust_lr(): self.adjust_lr() def get_test_error(self): test_data = self.get_from_master(None) input, label = test_data.data, test_data.labels self.net.train_batch(input, label, TEST) def save_checkpoint(self): self.net.get_dumped_layers()
class DummyTrainer(Trainer): def _finish_init(self): self.net = DistFastNet(self.learning_rate, self.image_shape, self.n_out, self.init_model) def get_from_master(self, data): data = comm.bcast(data, root = 0) comm.barrier() return data def train(self): util.log('rank %d starting training...' % rank) while self.should_continue_training(): train_data = self.get_from_master(None) self.curr_epoch = self.train_data.epoch input, label = train_data.data, train_data.labels self.net.train_batch(input, label) self.curr_batch += 1 if self.check_test_data(): self.get_test_error() if self.factor != 1.0 and self.check_adjust_lr(): self.adjust_lr() def get_test_error(self): test_data = self.get_from_master(None) input, label = test_data.data, test_data.labels self.net.train_batch(input, label, TEST) def save_checkpoint(self): self.net.get_dumped_layers()