def _test(self): self.encoder.eval() with torch.no_grad(): macro_f1s = [] test_losses = [] for input_tensor, target_tensor in logger.iterate( "Test", self.test_loader): encoder_hidden = self.encoder.init_hidden( self.device).double().to(self.device) input_tensor = input_tensor.to(self.device).unsqueeze(1) target_tensor = target_tensor.to(self.device).double() encoder_output, encoder_hidden = self.encoder( input_tensor, encoder_hidden) test_loss = self.loss(encoder_output, target_tensor) macro_f1 = f1_score( y_true=target_tensor.cpu().detach().numpy().ravel(), y_pred=encoder_output.cpu().detach().to( torch.int32).numpy().ravel(), average='macro') test_losses.append(test_loss) macro_f1s.append(macro_f1) logger.store(test_loss=np.mean(test_losses)) logger.store(accuracy=np.mean(macro_f1s)) logger.write()
def loop_section(): for step in logger.loop(range(0, 10)): with logger.section("Step"): time.sleep(0.5) with logger.section("Step2"): time.sleep(0.1) logger.write() logger.new_line()
def loop(): logger.info(a=2, b=1) logger.add_indicator(Queue("reward", 10, True)) for i in range(100): logger.write(i, loss=100 / (i + 1), reward=math.pow(2, (i + 1))) if (i + 1) % 2 == 0: logger.write(valid=i**10) logger.new_line() time.sleep(0.3)
def loop(): logger.info(a=2, b=1) logger.add_indicator('loss_ma', IndicatorType.queue, IndicatorOptions(queue_size=10)) for i in range(10): logger.add_global_step(1) logger.store(loss=100 / (i + 1), loss_ma=100 / (i + 1)) logger.write() if (i + 1) % 2 == 0: logger.new_line() time.sleep(2)
def _train(self): self.model.train() for i, (data, target) in logger.enum("Train", self.train_loader): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) loss = F.cross_entropy(output, target) loss.backward() self.optimizer.step() logger.store(train_loss=loss) logger.add_global_step() if i % self.train_log_interval == 0: logger.write()
def _train(self): self.model.train() for i, (data, target) in logger.enumerator("Train", self.train_loader): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) loss = F.nll_loss(output, target) loss.backward() self.optimizer.step() # Add training loss to the logger. # The logger will queue the values and output the mean logger.store(train_loss=loss.item()) logger.add_global_step() # Print output to the console if i % self.train_log_interval == 0: # Output the indicators logger.write()
def loop(self): # Loop through the monitored iterator for epoch in logger.loop(range(0, self.__epochs)): self._train() self._test() self.__log_model_params() # Clear line and output to console logger.write() # Clear line and go to the next line; # that is, we add a new line to the output # at the end of each epoch if (epoch + 1) % self.__log_new_line_interval == 0: logger.new_line() if self.__is_save_models: logger.save_checkpoint()
def train(args, session: tf.Session, loss_value, train_op, batches, epoch): with logger.section("Train", total_steps=batches): batch_idx = -1 while True: batch_idx += 1 try: l, _ = session.run([loss_value, train_op]) except tf.errors.OutOfRangeError: break # Add training loss to the logger. # The logger will queue the values and output the mean logger.store(train_loss=l) logger.progress(batch_idx + 1) logger.set_global_step(epoch * batches + batch_idx) # Print output to the console if batch_idx % args.log_interval == 0: # Output the indicators logger.write()
def train(args, model, device, train_loader, optimizer, epoch): with logger.section("Train", total_steps=len(train_loader)): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() # Add training loss to the logger. # The logger will queue the values and output the mean logger.store(train_loss=loss.item()) logger.progress(batch_idx + 1) logger.set_global_step(epoch * len(train_loader) + batch_idx) # Print output to the console if batch_idx % args.log_interval == 0: # Output the indicators logger.write()
def _train(self): for i, (input_tensor, target_tensor) in logger.enum("Train", self.train_loader): encoder_hidden = self.encoder.init_hidden(self.device).double().to( self.device) input_tensor = input_tensor.to(self.device).unsqueeze(1) target_tensor = target_tensor.to(self.device).double() self.optimizer.zero_grad() encoder_output, encoder_hidden = self.encoder( input_tensor, encoder_hidden) train_loss = self.loss(encoder_output, target_tensor) train_loss.backward() self.optimizer.step() logger.store(loss=train_loss.item()) logger.add_global_step() logger.write()
def main(): args = parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # Loading data with logger.section("Loading data"): train_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('./data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) # Model creation with logger.section("Create model"): model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) # Specify indicators logger.add_indicator("train_loss", queue_limit=10, is_print=True) logger.add_indicator("test_loss", is_histogram=False, is_print=True) logger.add_indicator("accuracy", is_histogram=False, is_print=True) for name, param in model.named_parameters(): if param.requires_grad: logger.add_indicator(name, is_histogram=True, is_print=False) logger.add_indicator(f"{name}_grad", is_histogram=True, is_print=False) # Start the experiment EXPERIMENT.start_train() # Loop through the monitored iterator for epoch in logger.loop(range(0, args.epochs)): # Delayed keyboard interrupt handling to use # keyboard interrupts to end the loop. # This will capture interrupts and finish # the loop at the end of processing the iteration; # i.e. the loop won't stop in the middle of an epoch. try: with logger.delayed_keyboard_interrupt(): # Training and testing train(args, model, device, train_loader, optimizer, epoch) test(model, device, test_loader) # Add histograms with model parameter values and gradients for name, param in model.named_parameters(): if param.requires_grad: logger.store(name, param.data.cpu().numpy()) logger.store(f"{name}_grad", param.grad.cpu().numpy()) # Clear line and output to console logger.write() # Output the progress summaries to `trial.yaml` and # to the python file header logger.save_progress() # Clear line and go to the next line; # that is, we add a new line to the output # at the end of each epoch logger.new_line() # Handled delayed interrupt except KeyboardInterrupt: logger.finish_loop() logger.new_line() logger.log("\nKilling loop...") break
with logger.section("process_samples", is_silent=True): time.sleep(0.5) # A third section with an inner loop with logger.section("train", total_steps=100): # Let it run for multiple iterations. # We'll track the progress of that too for i in range(100): time.sleep(0.01) # Progress is tracked manually unlike in the top level iterator. # The progress updates do not have to be sequential. logger.progress(i + 1) # Log stored values. # This will output to the console and write TensorBoard summaries. logger.write() # Store progress in the trials file and in the python code as a comment if (global_step + 1) % 10 == 0: logger.save_progress() # By default we will overwrite the same console line. # `new_line` makes it go to the next line. # This helps keep the console output concise. if (global_step + 1) % 10 == 0: logger.new_line() except KeyboardInterrupt: logger.finish_loop() logger.new_line() logger.log( f"Stopping the training at {global_step} and saving checkpoints"
def main(): args = parse_args() # Loading data with logger.section("Load data"): mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 train_dataset = create_mnist_dataset(x_train, y_train, args.batch_size) test_dataset = create_mnist_dataset(x_test, y_test, args.batch_size) # Model creation with logger.section("Create model"): model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(512, activation=tf.nn.relu), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation=tf.nn.softmax) ]) # Creation of the trainer with logger.section("Create trainer"): optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_iterator = train_dataset.make_initializable_iterator() data, target = train_iterator.get_next() train_loss = loss(model, data, target) train_op = optimizer.minimize(train_loss) test_iterator = test_dataset.make_initializable_iterator() data, target = test_iterator.get_next() test_loss = loss(model, data, target) test_accuracy = accuracy(model, data, target) logger.add_indicator("train_loss", queue_limit=10, is_print=True) logger.add_indicator("test_loss", is_histogram=False, is_print=True) logger.add_indicator("accuracy", is_histogram=False, is_print=True) # batches = len(x_train) // args.batch_size with tf.Session() as session: EXPERIMENT.start_train(session) # Loop through the monitored iterator for epoch in logger.loop(range(0, args.epochs)): # Delayed keyboard interrupt handling to use # keyboard interrupts to end the loop. # This will capture interrupts and finish # the loop at the end of processing the iteration; # i.e. the loop won't stop in the middle of an epoch. try: with logger.delayed_keyboard_interrupt(): # Training and testing session.run(train_iterator.initializer) train(args, session, train_loss, train_op, batches, epoch) session.run(test_iterator.initializer) test(session, test_loss, test_accuracy, len(x_test) // args.batch_size) # Clear line and output to console logger.write() # Output the progress summaries to `trial.yaml` and # to the python file header logger.save_progress() # Clear line and go to the next line; # that is, we add a new line to the output # at the end of each epoch logger.new_line() # Handled delayed interrupt except KeyboardInterrupt: logger.finish_loop() logger.new_line() logger.log("\nKilling loop...") break
def loop_partial_section(): for step in logger.loop(range(0, 10)): with logger.section("Step", is_partial=True): time.sleep(0.5) logger.progress((step % 5 + 1) / 5) logger.write()