def __print_info_and_check_repo(self): """ ## 🖨 Print the experiment info and check git repo status """ logger.new_line() logger.log([ (self.name, Text.title), ': ', (str(self.run.index), Text.meta) ]) if self.run.comment != '': logger.log(['\t', (self.run.comment, Text.highlight)]) logger.log([ "\t" "[dirty]" if self.run.is_dirty else "[clean]", ": ", (f"\"{self.run.commit_message.strip()}\"", Text.highlight) ]) # Exit if git repository is dirty if self.check_repo_dirty and self.run.is_dirty: logger.log([("[FAIL]", Text.danger), " Cannot trial an experiment with uncommitted changes."]) exit(1)
def loop_section(): for step in logger.loop(range(0, 10)): with logger.section("Step"): time.sleep(0.5) with logger.section("Step2"): time.sleep(0.1) logger.write() logger.new_line()
def calc_configs(self, configs: Optional[Configs], configs_dict: Dict[str, any] = None, run_order: Optional[List[Union[List[str], str]]] = None): if configs_dict is None: configs_dict = {} self.configs_processor = ConfigProcessor(configs, configs_dict) self.configs_processor(run_order) logger.new_line()
def loop(): logger.info(a=2, b=1) logger.add_indicator(Queue("reward", 10, True)) for i in range(100): logger.write(i, loss=100 / (i + 1), reward=math.pow(2, (i + 1))) if (i + 1) % 2 == 0: logger.write(valid=i**10) logger.new_line() time.sleep(0.3)
def loop(): logger.info(a=2, b=1) logger.add_indicator('loss_ma', IndicatorType.queue, IndicatorOptions(queue_size=10)) for i in range(10): logger.add_global_step(1) logger.store(loss=100 / (i + 1), loss_ma=100 / (i + 1)) logger.write() if (i + 1) % 2 == 0: logger.new_line() time.sleep(2)
def loop(self): # Loop through the monitored iterator for epoch in logger.loop(range(0, self.__epochs)): self._train() self._test() self.__log_model_params() # Clear line and output to console logger.write() # Clear line and go to the next line; # that is, we add a new line to the output # at the end of each epoch if (epoch + 1) % self.__log_new_line_interval == 0: logger.new_line() if self.__is_save_models: logger.save_checkpoint()
def print(self): order = self.calculator.topological_order.copy() added = set(order) ignored = set() for k in self.parser.types: if k not in added: added.add(k) order.append(k) ignored.add(k) logger.log("Configs:", Text.heading) for k in order: computed = getattr(self.calculator.configs, k, None) if k in ignored: parts = self.__print_config(k, is_ignored=True) elif k in self.parser.list_appends: parts = self.__print_config(k, value=computed, is_list=True) elif k in self.parser.options: v = self.parser.values[k] opts = self.parser.options[k] lst = list(opts.keys()) if v in opts: lst.remove(v) else: v = None parts = self.__print_config(k, value=computed, option=v, other_options=lst) else: parts = self.__print_config(k, value=computed) logger.log(parts) logger.new_line()
def main(): args = parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # Loading data with logger.section("Loading data"): train_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('./data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) # Model creation with logger.section("Create model"): model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) # Specify indicators logger.add_indicator("train_loss", queue_limit=10, is_print=True) logger.add_indicator("test_loss", is_histogram=False, is_print=True) logger.add_indicator("accuracy", is_histogram=False, is_print=True) for name, param in model.named_parameters(): if param.requires_grad: logger.add_indicator(name, is_histogram=True, is_print=False) logger.add_indicator(f"{name}_grad", is_histogram=True, is_print=False) # Start the experiment EXPERIMENT.start_train() # Loop through the monitored iterator for epoch in logger.loop(range(0, args.epochs)): # Delayed keyboard interrupt handling to use # keyboard interrupts to end the loop. # This will capture interrupts and finish # the loop at the end of processing the iteration; # i.e. the loop won't stop in the middle of an epoch. try: with logger.delayed_keyboard_interrupt(): # Training and testing train(args, model, device, train_loader, optimizer, epoch) test(model, device, test_loader) # Add histograms with model parameter values and gradients for name, param in model.named_parameters(): if param.requires_grad: logger.store(name, param.data.cpu().numpy()) logger.store(f"{name}_grad", param.grad.cpu().numpy()) # Clear line and output to console logger.write() # Output the progress summaries to `trial.yaml` and # to the python file header logger.save_progress() # Clear line and go to the next line; # that is, we add a new line to the output # at the end of each epoch logger.new_line() # Handled delayed interrupt except KeyboardInterrupt: logger.finish_loop() logger.new_line() logger.log("\nKilling loop...") break
# We'll track the progress of that too for i in range(100): time.sleep(0.01) # Progress is tracked manually unlike in the top level iterator. # The progress updates do not have to be sequential. logger.progress(i + 1) # Log stored values. # This will output to the console and write TensorBoard summaries. logger.write() # Store progress in the trials file and in the python code as a comment if (global_step + 1) % 10 == 0: logger.save_progress() # By default we will overwrite the same console line. # `new_line` makes it go to the next line. # This helps keep the console output concise. if (global_step + 1) % 10 == 0: logger.new_line() except KeyboardInterrupt: logger.finish_loop() logger.new_line() logger.log( f"Stopping the training at {global_step} and saving checkpoints" ) break with logger.section("Cleaning up"): time.sleep(0.5)
def main(): args = parse_args() # Loading data with logger.section("Load data"): mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 train_dataset = create_mnist_dataset(x_train, y_train, args.batch_size) test_dataset = create_mnist_dataset(x_test, y_test, args.batch_size) # Model creation with logger.section("Create model"): model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(512, activation=tf.nn.relu), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation=tf.nn.softmax) ]) # Creation of the trainer with logger.section("Create trainer"): optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_iterator = train_dataset.make_initializable_iterator() data, target = train_iterator.get_next() train_loss = loss(model, data, target) train_op = optimizer.minimize(train_loss) test_iterator = test_dataset.make_initializable_iterator() data, target = test_iterator.get_next() test_loss = loss(model, data, target) test_accuracy = accuracy(model, data, target) logger.add_indicator("train_loss", queue_limit=10, is_print=True) logger.add_indicator("test_loss", is_histogram=False, is_print=True) logger.add_indicator("accuracy", is_histogram=False, is_print=True) # batches = len(x_train) // args.batch_size with tf.Session() as session: EXPERIMENT.start_train(session) # Loop through the monitored iterator for epoch in logger.loop(range(0, args.epochs)): # Delayed keyboard interrupt handling to use # keyboard interrupts to end the loop. # This will capture interrupts and finish # the loop at the end of processing the iteration; # i.e. the loop won't stop in the middle of an epoch. try: with logger.delayed_keyboard_interrupt(): # Training and testing session.run(train_iterator.initializer) train(args, session, train_loss, train_op, batches, epoch) session.run(test_iterator.initializer) test(session, test_loss, test_accuracy, len(x_test) // args.batch_size) # Clear line and output to console logger.write() # Output the progress summaries to `trial.yaml` and # to the python file header logger.save_progress() # Clear line and go to the next line; # that is, we add a new line to the output # at the end of each epoch logger.new_line() # Handled delayed interrupt except KeyboardInterrupt: logger.finish_loop() logger.new_line() logger.log("\nKilling loop...") break