Beispiel #1
0
    def test_create_from_path(self):
        file = File("some/path.ext")
        self.assertEqual("some/path.ext", file.path)
        self.assertEqual(None, file.content)
        self.assertEqual("ext", file.extension)

        file = File("some/path.txt.ext")
        self.assertEqual("some/path.txt.ext", file.path)
        self.assertEqual(None, file.content)
        self.assertEqual("ext", file.extension)

        file = File("so.me/path")
        self.assertEqual("so.me/path", file.path)
        self.assertEqual(None, file.content)
        self.assertEqual("", file.extension)

        file = File("some/path.ext", extension="txt")
        self.assertEqual("some/path.ext", file.path)
        self.assertEqual(None, file.content)
        self.assertEqual("txt", file.extension)
Beispiel #2
0
    def test_assign_raise_not_image(self):
        # given
        path = self._random_path()
        op_processor = MagicMock()
        exp = self._create_run(processor=op_processor)
        attr = FileSeries(exp, path)

        file = File.from_content("some text")
        with create_file(file.content, binary_mode=True) as tmp_filename:
            saved_file = File(tmp_filename)

            # when
            with self.assertRaises(OperationNotSupported):
                attr.assign([file])
            with self.assertRaises(OperationNotSupported):
                attr.assign([saved_file])
Beispiel #3
0
    def test_log_path(self):
        # given
        wait = self._random_wait()
        path = self._random_path()
        op_processor = MagicMock()
        exp = self._create_run(processor=op_processor)
        attr = FileSeries(exp, path)

        file = File.as_image(numpy.random.rand(10, 10) * 255)
        with create_file(file.content, binary_mode=True) as tmp_filename:
            saved_file = File(tmp_filename)

            # when
            attr.log(
                saved_file,
                step=3,
                timestamp=self._now(),
                wait=wait,
                description="something",
            )

            # then
            op_processor.enqueue_operation.assert_called_once_with(
                LogImages(
                    path,
                    [
                        LogImages.ValueType(
                            ImageValue(base64_encode(file.content), None,
                                       "something"),
                            3,
                            self._now(),
                        )
                    ],
                ),
                wait,
            )
Beispiel #4
0
 def test_raise_exception_in_constructor(self):
     with self.assertRaises(ValueError):
         File(path="path", content=b"some_content")
     with self.assertRaises(ValueError):
         File()
Beispiel #5
0
    def train(self, train_loader):

        scaler = GradScaler(enabled=self.args.fp16_precision)

        # save config file
        save_config_file(self.writer.log_dir, self.args)

        n_iter = 0
        logging.info(f"Start SimCLR training for {self.args.epochs} epochs.")
        logging.info(f"Training with gpu: {self.args.disable_cuda}.")

        model_name = f"{type(self.model).__name__}"
        with open(f"./{model_name}_arch.txt", "w") as f:
            f.write(str(self.model))
        self.run[f"config/model/{model_name}_arch"].upload(
            f"./{model_name}_arch.txt")
        self.run["config/optimizer"] = type(self.optimizer).__name__

        for epoch_counter in range(self.args.epochs):
            for images, _ in tqdm(train_loader):
                images = torch.cat(images, dim=0)

                images = images.to(self.args.device)

                with autocast(enabled=self.args.fp16_precision):
                    features = self.model(images)
                    logits, labels = self.info_nce_loss(features)
                    loss = self.criterion(logits, labels)

                self.optimizer.zero_grad()

                scaler.scale(loss).backward()

                scaler.step(self.optimizer)
                scaler.update()

                if n_iter % self.args.log_every_n_steps == 0:
                    top1, top5 = accuracy(logits, labels, topk=(1, 5))
                    self.writer.add_scalar("loss", loss, global_step=n_iter)
                    self.writer.add_scalar("acc/top1",
                                           top1[0],
                                           global_step=n_iter)
                    self.writer.add_scalar("acc/top5",
                                           top5[0],
                                           global_step=n_iter)
                    self.writer.add_scalar("learning_rate",
                                           self.scheduler.get_lr()[0],
                                           global_step=n_iter)

                    self.run["metrics/acc"].log(top1[0])
                    self.run["metrics/loss"].log(loss)
                    self.run["metrics/lr"].log(self.scheduler.get_lr()[0])

                n_iter += 1

            # warmup for the first 10 epochs
            if epoch_counter >= 10:
                self.scheduler.step()
            logging.debug(
                f"Epoch: {epoch_counter}\tLoss: {loss}\tTop1 accuracy: {top1[0]}"
            )

        logging.info("Training has finished.")
        # save model checkpoints
        checkpoint_name = "checkpoint_{:04d}.pth.tar".format(self.args.epochs)
        save_checkpoint(
            {
                "epoch": self.args.epochs,
                "arch": self.args.arch,
                "state_dict": self.model.state_dict(),
                "optimizer": self.optimizer.state_dict(),
            },
            is_best=False,
            filename=os.path.join(self.writer.log_dir, checkpoint_name),
        )
        logging.info(
            f"Model checkpoint and metadata has been saved at {self.writer.log_dir}."
        )

        self.run["checkpoints"].upload(
            File(os.path.join(self.writer.log_dir, checkpoint_name)))
        self.run.stop()