Ejemplo n.º 1
0
    def __next__(self):
        if self.__signal_received is not None:
            logger.log('\nKilling Loop.',
                       color=Text.danger)
            loop.finish_loop()
            self.__finish()
            raise StopIteration("SIGINT")

        try:
            global_step = next(self.__loop)
        except StopIteration as e:
            self.__finish()
            raise e

        loop.set_global_step(global_step)

        if self.is_interval(self.__log_write_interval, global_step):
            tracker.save()
        if self.is_interval(self.__log_new_line_interval, global_step):
            logger.log()

        if (self.__is_save_models and
                self.is_interval(self.__save_models_interval, global_step)):
            experiment.save_checkpoint()

        return global_step
Ejemplo n.º 2
0
    def __print_info_and_check_repo(self):
        """
        ## 🖨 Print the experiment info and check git repo status
        """

        logger.new_line()
        logger.log([(self.name, Text.title), ': ',
                    (str(self.run.uuid), Text.meta)])

        if self.run.comment != '':
            logger.log(['\t', (self.run.comment, Text.highlight)])

        logger.log([
            "\t"
            "[dirty]" if self.run.is_dirty else "[clean]", ": ",
            (f"\"{self.run.commit_message.strip()}\"", Text.highlight)
        ])

        if self.run.load_run is not None:
            logger.log([
                "\t"
                "loaded from",
                ": ",
                (f"{self.run.load_run}", Text.meta2),
            ])

        # Exit if git repository is dirty
        if self.check_repo_dirty and self.run.is_dirty:
            logger.log([("[FAIL]", Text.danger),
                        " Cannot trial an experiment with uncommitted changes."
                        ])
            exit(1)
Ejemplo n.º 3
0
def main():
    lab = Lab(os.getcwd())
    parser = argparse.ArgumentParser(description='Run TensorBoard')
    parser.add_argument("-l",
                        action='store_true',
                        dest='list',
                        help='List all available experiments')
    parser.add_argument('-e',
                        required=False,
                        type=str,
                        nargs='+',
                        dest='experiments',
                        help='List of experiments')

    args = parser.parse_args()

    if args.list:
        utils.list_experiments(lab, logger)
    elif args.experiments:
        # List out the experiments.
        # This will fail if experiments are missing.
        runs = utils.get_last_trials(lab, args.experiments)
        utils.list_trials(runs, logger)

        # Invoke Tensorboard
        cmd = utils.get_tensorboard_cmd(lab, args.experiments)
        logger.log("Starting TensorBoard", color=colors.Style.bold)
        os.system(cmd)
    else:
        parser.print_usage()
Ejemplo n.º 4
0
    def calc_configs(self,
                     configs: Optional[Configs],
                     configs_dict: Dict[str, any],
                     run_order: Optional[List[Union[List[str], str]]]):
        self.configs_processor = ConfigProcessor(configs, configs_dict)
        self.configs_processor(run_order)

        logger.log()
Ejemplo n.º 5
0
    def handler(self, sig, frame):
        # Pass second interrupt without delaying
        if self.signal_received is not None:
            self.old_handler(*self.signal_received)
            return

        # Store the interrupt signal for later
        self.signal_received = (sig, frame)
        logger.log([('\nSIGINT received. Delaying KeyboardInterrupt.',
                     Text.danger)])
Ejemplo n.º 6
0
def _open_dashboard():
    try:
        import lab_dashboard
    except (ImportError, ModuleNotFoundError):
        logger.log("Cannot import ", ('lab_dashboard', Text.highlight), '.')
        logger.log('Install with ',
                   ('pip install machine_learning_lab_dashboard', Text.value))
        return

    lab_dashboard.start_server()
Ejemplo n.º 7
0
 def __finish(self):
     try:
         signal.signal(signal.SIGINT, self.old_handler)
     except ValueError:
         pass
     tracker.save()
     logger.log()
     if self.__is_save_models:
         logger.log("Saving model...")
         experiment.save_checkpoint()
Ejemplo n.º 8
0
def get_device(use_cuda: bool, cuda_device: int):
    is_cuda = use_cuda and torch.cuda.is_available()
    if not is_cuda:
        return torch.device('cpu')
    else:
        if cuda_device < torch.cuda.device_count():
            return torch.device('cuda', cuda_device)
        else:
            logger.log(f"Cuda device index {cuda_device} higher than "
                       f"device count {torch.cuda.device_count()}", Text.warning)
            return torch.device('cuda', torch.cuda.device_count() - 1)
Ejemplo n.º 9
0
    def _print_artifacts_list(self, table: Dict[str, int],
                              artifacts: Dict[str, Artifact]):
        order = list(table.keys())
        if not len(order):
            return

        keys = {k for name in order for k in artifacts[name].keys()}
        for k in keys:
            for name in order:
                value = artifacts[name].get_string(k, artifacts)
                logger.log([(name, Text.key), ": ", (value, Text.value)])
Ejemplo n.º 10
0
 def print_all(self, others: Dict[str, Artifact]):
     if plt is None:
         logger.log(('matplotlib', logger.Text.highlight),
                    ' not found. So cannot display impages')
     images = [_to_numpy(v) for v in self._values.values()]
     cols = 3
     fig: plt.Figure
     fig, axs = plt.subplots((len(images) + cols - 1) // cols,
                             cols,
                             sharex='all',
                             sharey='all',
                             figsize=(8, 10))
     fig.suptitle(self.name)
     for i, img in enumerate(images):
         ax: plt.Axes = axs[i // cols, i % cols]
         ax.imshow(img)
     plt.show()
Ejemplo n.º 11
0
    def print_info_and_check_repo(self):
        """
        ## 🖨 Print the experiment info and check git repo status
        """
        logger.log_color([(self.info.name, colors.Style.bold)])
        logger.log_color([("\t", None),
                          (self.trial.comment, colors.BrightColor.cyan)])
        logger.log_color([("\t", None),
                          ("[dirty]" if self.trial.is_dirty else "[clean]",
                           None), (": ", None),
                          (f"\"{self.trial.commit_message.strip()}\"",
                           colors.BrightColor.orange)])

        # Exit if git repository is dirty
        if self.check_repo_dirty and self.trial.is_dirty:
            logger.log("Cannot trial an experiment with uncommitted changes. ",
                       new_line=False)
            logger.log("[FAIL]", color=colors.BrightColor.red)
            exit(1)
Ejemplo n.º 12
0
    def print(self):
        order = self.calculator.topological_order.copy()
        added = set(order)
        ignored = set()

        for k in self.parser.types:
            if k not in added:
                added.add(k)
                order.append(k)
                ignored.add(k)

        logger.log("Configs:", Text.heading)

        for k in order:
            computed = getattr(self.calculator.configs, k, None)

            if k in ignored:
                parts = self.__print_config(k, is_ignored=True)
            elif k in self.parser.list_appends:
                parts = self.__print_config(k,
                                            value=computed,
                                            is_list=True)
            elif k in self.parser.options:
                v = self.parser.values[k]
                opts = self.parser.options[k]
                lst = list(opts.keys())
                if v in opts:
                    lst.remove(v)
                else:
                    v = None

                parts = self.__print_config(k,
                                            value=computed,
                                            option=v,
                                            other_options=lst)
            else:
                parts = self.__print_config(k, value=computed)

            logger.log(parts)

        logger.new_line()
Ejemplo n.º 13
0
def get_last_run_checkpoint(experiment_path: PurePath,
                            run_uuid: str,
                            checkpoint: int = -1):
    checkpoint = get_run_checkpoint(experiment_path, run_uuid,
                                    checkpoint)

    if checkpoint is None:
        logger.log("Couldn't find a previous run/checkpoint")
        return None, None

    logger.log(["Selected ",
                ("run", Text.key),
                " = ",
                (run_uuid, Text.value),
                " ",
                ("checkpoint", Text.key),
                " = ",
                (checkpoint, Text.value)])

    run_path = experiment_path / str(run_uuid)
    checkpoint_path = run_path / "checkpoints"
    return checkpoint_path / str(checkpoint), checkpoint
Ejemplo n.º 14
0
    def _print_artifacts_table(self, table: Dict[str, int],
                               artifacts: Dict[str, Artifact]):
        order = list(table.keys())
        if not len(order):
            return

        keys = []
        keys_set = set()

        for name in order:
            for k in artifacts[name].keys():
                if k not in keys_set:
                    keys_set.add(k)
                    keys.append(k)

        parts = [self.__format_artifact(table[name], name) for name in order]
        logger.log('|'.join(parts), Text.heading)

        for k in keys:
            parts = []
            for name in order:
                value = artifacts[name].get_string(k, artifacts)
                parts.append(self.__format_artifact(table[name], value))
            logger.log('|'.join(parts), Text.value)
Ejemplo n.º 15
0
    def __handler(self, sig, frame):
        # Pass second interrupt without delaying
        if self.__signal_received is not None:
            logger.log('\nSIGINT received twice. Stopping...',
                       color=Text.danger)
            self.old_handler(*self.__signal_received)
            return

        if self.__is_loop_on_interrupt:
            # Store the interrupt signal for later
            self.__signal_received = (sig, frame)
            logger.log('\nSIGINT received. Delaying KeyboardInterrupt.',
                       color=Text.danger)
        else:
            self.__finish()
            logger.log('Killing loop...', Text.danger)
            self.old_handler(sig, frame)
Ejemplo n.º 16
0
                    # We'll track the progress of that too
                    for i in range(100):
                        time.sleep(0.01)
                        # Progress is tracked manually unlike in the top level iterator.
                        # The progress updates do not have to be sequential.
                        logger.progress(i + 1)

                # Log stored values.
                # This will output to the console and write TensorBoard summaries.
                logger.write()

                # Store progress in the trials file and in the python code as a comment
                if (global_step + 1) % 10 == 0:
                    logger.save_progress()

                # By default we will overwrite the same console line.
                # `new_line` makes it go to the next line.
                # This helps keep the console output concise.
                if (global_step + 1) % 10 == 0:
                    logger.new_line()
        except KeyboardInterrupt:
            logger.finish_loop()
            logger.new_line()
            logger.log(
                f"Stopping the training at {global_step} and saving checkpoints"
            )
            break

with logger.section("Cleaning up"):
    time.sleep(0.5)
Ejemplo n.º 17
0
from lab import logger
from lab.logger.colors import Text, Color

if __name__ == '__main__':
    logger.log("Colors are missing when views on github", Text.highlight)

    logger.log([('Styles\n', Text.heading), ('Danger\n', Text.danger),
                ('Warning\n', Text.warning), ('Meta\n', Text.meta),
                ('Key\n', Text.key), ('Meta2\n', Text.meta2),
                ('Title\n', Text.title), ('Heading\n', Text.heading),
                ('Value\n', Text.value), ('Highlight\n', Text.highlight),
                ('Subtle\n', Text.subtle)])

    logger.log([
        ('Colors\n', Text.heading),
        ('Red\n', Color.red),
        ('Black\n', Color.black),
        ('Blue\n', Color.blue),
        ('Cyan\n', Color.cyan),
        ('Green\n', Color.green),
        ('Orange\n', Color.orange),
        ('Purple Heading\n', [Color.purple, Text.heading]),
        ('White\n', Color.white),
    ])
Ejemplo n.º 18
0
def main():
    args = parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # Loading data
    with logger.section("Loading data"):
        train_loader = torch.utils.data.DataLoader(datasets.MNIST(
            './data',
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ])),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST('./data',
                           train=False,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.1307, ), (0.3081, ))
                           ])),
            batch_size=args.test_batch_size,
            shuffle=True,
            **kwargs)

    # Model creation
    with logger.section("Create model"):
        model = Net().to(device)
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum)

    # Specify indicators
    logger.add_indicator("train_loss", queue_limit=10, is_print=True)
    logger.add_indicator("test_loss", is_histogram=False, is_print=True)
    logger.add_indicator("accuracy", is_histogram=False, is_print=True)
    for name, param in model.named_parameters():
        if param.requires_grad:
            logger.add_indicator(name, is_histogram=True, is_print=False)
            logger.add_indicator(f"{name}_grad",
                                 is_histogram=True,
                                 is_print=False)

    # Start the experiment
    EXPERIMENT.start_train()

    # Loop through the monitored iterator
    for epoch in logger.loop(range(0, args.epochs)):
        # Delayed keyboard interrupt handling to use
        # keyboard interrupts to end the loop.
        # This will capture interrupts and finish
        # the loop at the end of processing the iteration;
        # i.e. the loop won't stop in the middle of an epoch.
        try:
            with logger.delayed_keyboard_interrupt():

                # Training and testing
                train(args, model, device, train_loader, optimizer, epoch)
                test(model, device, test_loader)

                # Add histograms with model parameter values and gradients
                for name, param in model.named_parameters():
                    if param.requires_grad:
                        logger.store(name, param.data.cpu().numpy())
                        logger.store(f"{name}_grad", param.grad.cpu().numpy())

                # Clear line and output to console
                logger.write()

                # Output the progress summaries to `trial.yaml` and
                # to the python file header
                logger.save_progress()

                # Clear line and go to the next line;
                # that is, we add a new line to the output
                # at the end of each epoch
                logger.new_line()

        # Handled delayed interrupt
        except KeyboardInterrupt:
            logger.finish_loop()
            logger.new_line()
            logger.log("\nKilling loop...")
            break
Ejemplo n.º 19
0
 def print_all(self, others: Dict[str, Artifact]):
     logger.log(self.name, TextStyle.heading)
     for t in self._values.values():
         logger.log(t, TextStyle.value)
Ejemplo n.º 20
0
def main():
    args = parse_args()

    # Loading data
    with logger.section("Load data"):
        mnist = tf.keras.datasets.mnist

        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train, x_test = x_train / 255.0, x_test / 255.0

        train_dataset = create_mnist_dataset(x_train, y_train, args.batch_size)
        test_dataset = create_mnist_dataset(x_test, y_test, args.batch_size)

    # Model creation
    with logger.section("Create model"):
        model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28)),
            tf.keras.layers.Dense(512, activation=tf.nn.relu),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(10, activation=tf.nn.softmax)
        ])

    # Creation of the trainer
    with logger.section("Create trainer"):
        optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
        train_iterator = train_dataset.make_initializable_iterator()
        data, target = train_iterator.get_next()
        train_loss = loss(model, data, target)
        train_op = optimizer.minimize(train_loss)

        test_iterator = test_dataset.make_initializable_iterator()
        data, target = test_iterator.get_next()
        test_loss = loss(model, data, target)
        test_accuracy = accuracy(model, data, target)

    logger.add_indicator("train_loss", queue_limit=10, is_print=True)
    logger.add_indicator("test_loss", is_histogram=False, is_print=True)
    logger.add_indicator("accuracy", is_histogram=False, is_print=True)

    #
    batches = len(x_train) // args.batch_size

    with tf.Session() as session:
        EXPERIMENT.start_train(session)

        # Loop through the monitored iterator
        for epoch in logger.loop(range(0, args.epochs)):
            # Delayed keyboard interrupt handling to use
            # keyboard interrupts to end the loop.
            # This will capture interrupts and finish
            # the loop at the end of processing the iteration;
            # i.e. the loop won't stop in the middle of an epoch.
            try:
                with logger.delayed_keyboard_interrupt():

                    # Training and testing
                    session.run(train_iterator.initializer)
                    train(args, session, train_loss, train_op, batches, epoch)
                    session.run(test_iterator.initializer)
                    test(session, test_loss, test_accuracy,
                         len(x_test) // args.batch_size)

                    # Clear line and output to console
                    logger.write()

                    # Output the progress summaries to `trial.yaml` and
                    # to the python file header
                    logger.save_progress()

                    # Clear line and go to the next line;
                    # that is, we add a new line to the output
                    # at the end of each epoch
                    logger.new_line()

            # Handled delayed interrupt
            except KeyboardInterrupt:
                logger.finish_loop()
                logger.new_line()
                logger.log("\nKilling loop...")
                break