Beispiel #1
0
    def load(self, f=None):
        if f is None:
            f = self.ckpt

        if isinstance(self.net, nn.DataParallel):
            self.net.module.load_state_dict(
                torch.load(f, map_location=self.device))
        else:
            self.net.load_state_dict(torch.load(f, map_location=self.device))

        if os.path.exists(f + ".config"):
            with open(f + ".config", "r") as fp:
                dic = json.loads(fp.read())
            self.config = defaultdict(float, dic)
            print("Loaded,", self.config)

        if os.path.exists(f + ".optimizer"):
            self.optimizer.load_state_dict(torch.load(f + ".optimizer"))

        if os.path.exists(f + ".scheduler") and self.lr_scheduler is not None:
            self.lr_scheduler.load_state_dict(torch.load(f + ".scheduler"))

        if os.path.exists(f + ".csv"):
            self.dataframe = pd.read_csv(f + ".csv")

        if self.config["tensorboard_dir"] is not None:
            self.tensorboard = SummaryWriter(self.config["tensorboard_dir"])
        else:
            self.tensorboard = None
Beispiel #2
0
 def test_write_scalar(self):
     summary_writer = SummaryWriter(self._log_dir)
     tag_name = "learning_rate"
     learning_rate = torch.tensor(.01)
     for i in range(10):
         summary_writer.add_scalar(tag_name, learning_rate, i)
         learning_rate -= 0.005
Beispiel #3
0
 def on_train_begin(self, logs=None):
     self.train_writer = SummaryWriter(
         os.path.join(self.logdir, "train"), purge_step=self.purge_step
     )
     self.val_writer = SummaryWriter(
         os.path.join(self.logdir, "val"), purge_step=self.purge_step
     )
     self.steps = self.params["steps"]
     self.global_step = 0
Beispiel #4
0
    def __init__(
        self,
        net,
        criterion=None,
        metric=cal_accuracy,
        train_dataloader=None,
        val_dataloader=None,
        test_dataloader=None,
        optimizer=None,
        lr_scheduler=None,
        tensorboard_dir="./pinkblack_tb/",
        ckpt="./ckpt/ckpt.pth",
        experiment_id=None,
        clip_gradient_norm=False,
        is_data_dict=False,
    ):
        """
        :param net: nn.Module Network
        :param criterion: loss function. __call__(prediction, *batch_y)
        :param metric: metric function __call__(prediction, *batch_y).
                        *note* : bigger is better. (Early Stopping할 때 metric이 더 큰 값을 선택한다)

        :param train_dataloader:
        :param val_dataloader:
        :param test_dataloader:

        :param optimizer: torch.optim
        :param lr_scheduler:
        :param tensorboard_dir: tensorboard log
        :param ckpt: weight path
        :param experiment_id: be shown on tensorboard
        :param clip_gradient_norm: False or Scalar value (숫자를 입력하면 gradient clipping한다.)
        :param is_data_dict: whether dataloaders return dict. 
        (dataloader에서 주는 데이터가 dict인지 - 아니라면 (x, y pair tuple로 주는 데이터이다.)
        """

        self.net = net
        self.criterion = nn.CrossEntropyLoss(
        ) if criterion is None else criterion
        self.metric = metric

        self.dataloader = dict()
        if train_dataloader is not None:
            self.dataloader["train"] = train_dataloader
        if val_dataloader is not None:
            self.dataloader["val"] = val_dataloader
        if test_dataloader is not None:
            self.dataloader["test"] = test_dataloader

        if train_dataloader is None or val_dataloader is None:
            logging.warning("Init Trainer :: Two dataloaders are needed!")

        self.optimizer = (Adam(
            filter(lambda p: p.requires_grad, self.net.parameters()))
                          if optimizer is None else optimizer)
        self.lr_scheduler = lr_scheduler

        self.ckpt = ckpt

        self.config = defaultdict(float)
        self.config["max_train_metric"] = -1e8
        self.config["max_val_metric"] = -1e8
        self.config["max_test_metric"] = -1e8
        self.config["tensorboard_dir"] = tensorboard_dir
        self.config["timestamp"] = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.config["clip_gradient_norm"] = clip_gradient_norm
        self.config["is_data_dict"] = is_data_dict

        if experiment_id is None:
            self.config["experiment_id"] = self.config["timestamp"]
        else:
            self.config["experiment_id"] = experiment_id

        self.dataframe = pd.DataFrame()

        self.device = Trainer.get_model_device(self.net)
        if self.device == torch.device("cpu"):
            logging.warning(
                "Init Trainer :: Do you really want to train the network on CPU instead of GPU?"
            )

        if self.config["tensorboard_dir"] is not None:
            self.tensorboard = SummaryWriter(self.config["tensorboard_dir"])
        else:
            self.tensorboard = None

        self.callbacks = defaultdict(list)
Beispiel #5
0
class TensorBoard(Callback):
    """Callback that streams epoch results to tensorboard events folder.

Supports all values that can be represented as a string,
including 1D iterables such as `np.ndarray`.

Example:

    ```python
    tensorboard_logger = TensorBoard('runs')
    model.fit(X_train, Y_train, callbacks=[tensorboard_logger])
    ```
  """
    def __init__(self,
                 logdir: Optional[str] = None,
                 update_freq: Union[str, int] = "epoch",
                 **kwargs) -> None:
        """
        Arguments:
            logdir: Save directory location. Default is
                runs/**CURRENT_DATETIME_HOSTNAME**, which changes after each run.
                Use hierarchical folder structure to compare
                between runs easily. e.g. pass in 'runs/exp1', 'runs/exp2', etc.
                for each new experiment to compare across them.
            update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`,
                writes the losses and metrics to TensorBoard after each batch. The same
                applies for `'epoch'`. 
            **kwargs: Options to pass to `SummaryWriter` object
        """
        self.logdir = logdir
        self.writer = None
        self.keys = None
        if update_freq == "batch":
            self.update_freq = 1
        else:
            self.update_freq = update_freq
        self._open_args = kwargs if kwargs else {}
        super(TensorBoard, self).__init__()

    def on_train_begin(self, logs=None):
        self.writer = SummaryWriter(self.logdir, **self._open_args)

    def on_train_batch_end(self, batch: int, logs):
        if self.update_freq == "epoch":
            return
        logs = logs or {}

        def handle_value(k):
            is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0
            if isinstance(k, six.string_types):
                return k
            elif isinstance(k, tp.Iterable) and not is_zero_dim_ndarray:
                return '"[%s]"' % (", ".join(map(str, k)))
            else:
                return k

        if self.update_freq != "epoch" and batch % self.update_freq == 0:
            if self.keys is None:
                self.keys = sorted(logs.keys())
            row_dict = collections.OrderedDict({"batch": batch})
            row_dict.update(
                (key + "batch", handle_value(logs[key])) for key in self.keys)

            for key, value in row_dict.items():
                self.writer.add_scalar(key, value, batch)

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}

        def handle_value(k):
            is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0
            if isinstance(k, six.string_types):
                return k
            elif isinstance(k, tp.Iterable) and not is_zero_dim_ndarray:
                return '"[%s]"' % (", ".join(map(str, k)))
            else:
                return k

        if self.keys is None:
            self.keys = sorted(logs.keys())

        row_dict = collections.OrderedDict({"epoch": epoch})
        row_dict.update((key, handle_value(logs[key])) for key in self.keys)

        for key, value in row_dict.items():
            self.writer.add_scalar(key, value, epoch)

    def on_train_end(self, logs=None):
        self.writer.close()
Beispiel #6
0
def main(
    steps_per_epoch: int = 200,
    epochs: int = 50,
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    X_train, _1, X_test, _2 = dataget.image.mnist(global_cache=True).get()
    # Now binarize data
    X_train = (X_train > 0).astype(jnp.float32)
    X_test = (X_test > 0).astype(jnp.float32)

    print("X_train:", X_train.shape, X_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)

    vae = VAE(latent_size=LATENT_SIZE)

    # model = VariationalAutoEncoder(latent_size=LATENT_SIZE, optimizer=optax.adam(1e-3))

    def loss(x, y_pred):
        logits, mean, stddev = y_pred
        ce_loss = elegy.losses.binary_crossentropy(x, logits,
                                                   from_logits=True).mean()
        kl_loss = 2e-1 * kl_divergence(mean, stddev)
        return ce_loss + kl_loss

    model = elegy.Model(
        module=vae,
        loss=loss,
        optimizer=optax.adam(1e-3),
        run_eagerly=eager,
    )

    # Fit with datasets in memory
    history = model.fit(
        x=X_train,
        epochs=epochs,
        batch_size=64,
        steps_per_epoch=steps_per_epoch,
        validation_data=(X_test, ),
        shuffle=True,
        callbacks=[TensorBoard(logdir)],
    )

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )

    elegy.utils.plot_history(history)

    # get random samples
    idxs = np.random.randint(0, len(X_test), size=(5, ))
    x_sample = X_test[idxs]

    # get predictions
    logits, mean, stddev = model.predict(x=x_sample)
    y_pred = jax.nn.sigmoid(logits)

    # plot and save results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        figure = plt.figure(figsize=(12, 12))
        for i in range(5):
            plt.subplot(2, 5, i + 1)
            plt.imshow(x_sample[i], cmap="gray")
            plt.subplot(2, 5, 5 + i + 1)
            plt.imshow(y_pred[i], cmap="gray")
        # # tbwriter.add_figure("VAE Example", figure, epochs)

    plt.show()
Beispiel #7
0
class Trainer:
    experiment_name = None

    def __init__(
        self,
        net,
        criterion=None,
        metric=cal_accuracy,
        train_dataloader=None,
        val_dataloader=None,
        test_dataloader=None,
        optimizer=None,
        lr_scheduler=None,
        tensorboard_dir="./pinkblack_tb/",
        ckpt="./ckpt/ckpt.pth",
        experiment_id=None,
        clip_gradient_norm=False,
        is_data_dict=False,
    ):
        """
        :param net: nn.Module Network
        :param criterion: loss function. __call__(prediction, *batch_y)
        :param metric: metric function __call__(prediction, *batch_y).
                        *note* : bigger is better. (Early Stopping할 때 metric이 더 큰 값을 선택한다)

        :param train_dataloader:
        :param val_dataloader:
        :param test_dataloader:

        :param optimizer: torch.optim
        :param lr_scheduler:
        :param tensorboard_dir: tensorboard log
        :param ckpt: weight path
        :param experiment_id: be shown on tensorboard
        :param clip_gradient_norm: False or Scalar value (숫자를 입력하면 gradient clipping한다.)
        :param is_data_dict: whether dataloaders return dict. 
        (dataloader에서 주는 데이터가 dict인지 - 아니라면 (x, y pair tuple로 주는 데이터이다.)
        """

        self.net = net
        self.criterion = nn.CrossEntropyLoss(
        ) if criterion is None else criterion
        self.metric = metric

        self.dataloader = dict()
        if train_dataloader is not None:
            self.dataloader["train"] = train_dataloader
        if val_dataloader is not None:
            self.dataloader["val"] = val_dataloader
        if test_dataloader is not None:
            self.dataloader["test"] = test_dataloader

        if train_dataloader is None or val_dataloader is None:
            logging.warning("Init Trainer :: Two dataloaders are needed!")

        self.optimizer = (Adam(
            filter(lambda p: p.requires_grad, self.net.parameters()))
                          if optimizer is None else optimizer)
        self.lr_scheduler = lr_scheduler

        self.ckpt = ckpt

        self.config = defaultdict(float)
        self.config["max_train_metric"] = -1e8
        self.config["max_val_metric"] = -1e8
        self.config["max_test_metric"] = -1e8
        self.config["tensorboard_dir"] = tensorboard_dir
        self.config["timestamp"] = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.config["clip_gradient_norm"] = clip_gradient_norm
        self.config["is_data_dict"] = is_data_dict

        if experiment_id is None:
            self.config["experiment_id"] = self.config["timestamp"]
        else:
            self.config["experiment_id"] = experiment_id

        self.dataframe = pd.DataFrame()

        self.device = Trainer.get_model_device(self.net)
        if self.device == torch.device("cpu"):
            logging.warning(
                "Init Trainer :: Do you really want to train the network on CPU instead of GPU?"
            )

        if self.config["tensorboard_dir"] is not None:
            self.tensorboard = SummaryWriter(self.config["tensorboard_dir"])
        else:
            self.tensorboard = None

        self.callbacks = defaultdict(list)

    def register_callback(self, func, phase="val"):
        self.callbacks[phase].append(func)

    def save(self, f=None):
        if f is None:
            f = self.ckpt
        os.makedirs(os.path.dirname(f), exist_ok=True)
        if isinstance(self.net, nn.DataParallel):
            state_dict = self.net.module.state_dict()
        else:
            state_dict = self.net.state_dict()
        torch.save(state_dict, f)
        torch.save(self.optimizer.state_dict(), f + ".optimizer")

        if self.lr_scheduler is not None:
            torch.save(self.lr_scheduler.state_dict(), f + ".scheduler")

        with open(f + ".config", "w") as fp:
            json.dump(self.config, fp)

        self.dataframe.to_csv(f + ".csv", float_format="%.6f", index=False)

    def load(self, f=None):
        if f is None:
            f = self.ckpt

        if isinstance(self.net, nn.DataParallel):
            self.net.module.load_state_dict(
                torch.load(f, map_location=self.device))
        else:
            self.net.load_state_dict(torch.load(f, map_location=self.device))

        if os.path.exists(f + ".config"):
            with open(f + ".config", "r") as fp:
                dic = json.loads(fp.read())
            self.config = defaultdict(float, dic)
            print("Loaded,", self.config)

        if os.path.exists(f + ".optimizer"):
            self.optimizer.load_state_dict(torch.load(f + ".optimizer"))

        if os.path.exists(f + ".scheduler") and self.lr_scheduler is not None:
            self.lr_scheduler.load_state_dict(torch.load(f + ".scheduler"))

        if os.path.exists(f + ".csv"):
            self.dataframe = pd.read_csv(f + ".csv")

        if self.config["tensorboard_dir"] is not None:
            self.tensorboard = SummaryWriter(self.config["tensorboard_dir"])
        else:
            self.tensorboard = None

    def train(self,
              epoch=None,
              phases=None,
              step=None,
              validation_interval=1,
              save_every_validation=False):
        """
        :param epoch: train dataloader를 순회할 횟수
        :param phases: ['train', 'val', 'test'] 중 필요하지 않은 phase를 뺄 수 있다.
        >> trainer.train(1, phases=['val'])

        :param step: epoch이 아닌 step을 훈련단위로 할 때의 총 step 수.
        :param validation_interval: validation 간격
        :param save_every_validation: True이면, validation마다 checkpoint를 저장한다.
        :return: None
        """
        if phases is None:
            phases = list(self.dataloader.keys())

        if epoch is None and step is None:
            raise ValueError(
                "PinkBlack.trainer :: epoch or step should be specified.")

        train_unit = "epoch" if step is None else "step"
        self.config[train_unit] = int(self.config[train_unit])

        num_unit = epoch if step is None else step
        validation_interval = 1 if validation_interval <= 0 else validation_interval

        kwarg_list = [train_unit]
        for phase in phases:
            kwarg_list += [f"{phase}_loss", f"{phase}_metric"]
        kwarg_list += ["lr", "time"]

        print_row(kwarg_list=[""] * len(kwarg_list), pad="-")
        print_row(kwarg_list=kwarg_list, pad=" ")
        print_row(kwarg_list=[""] * len(kwarg_list), pad="-")

        start = self.config[train_unit]

        for i in range(start, start + num_unit, validation_interval):
            start_time = time()
            if train_unit == "epoch":
                for phase in phases:
                    self.config[f"{phase}_loss"], self.config[
                        f"{phase}_metric"] = self._train(
                            phase, num_steps=len(self.dataloader[phase]))
                    for func in self.callbacks[phase]:
                        func()
                self.config[train_unit] += 1
            elif train_unit == "step":
                for phase in phases:
                    if phase == "train":
                        # num_unit 이 validation interval로 나눠떨어지지 않는 경우
                        num_steps = min((start + num_unit - i),
                                        validation_interval)
                        self.config[train_unit] += num_steps
                    else:
                        num_steps = len(self.dataloader[phase])
                    self.config[f"{phase}_loss"], self.config[
                        f"{phase}_metric"] = self._train(phase,
                                                         num_steps=num_steps)
                    for func in self.callbacks[phase]:
                        func()
            else:
                raise NotImplementedError

            if self.lr_scheduler is not None:
                if isinstance(self.lr_scheduler, ReduceLROnPlateau):
                    self.lr_scheduler.step(self.config["val_metric"])
                else:
                    self.lr_scheduler.step()

            i_str = str(self.config[train_unit])
            is_best = self.config["max_val_metric"] < self.config["val_metric"]
            if is_best:
                for phase in phases:
                    self.config[f"max_{phase}_metric"] = max(
                        self.config[f"max_{phase}_metric"],
                        self.config[f"{phase}_metric"])
                i_str = (str(self.config[train_unit])) + "-best"

            elapsed_time = time() - start_time
            if self.tensorboard is not None:
                _loss, _metric = {}, {}
                for phase in phases:
                    _loss[phase] = self.config[f"{phase}_loss"]
                    _metric[phase] = self.config[f"{phase}_metric"]

                self.tensorboard.add_scalars(
                    f"{self.config['experiment_id']}/loss", _loss,
                    self.config[train_unit])
                self.tensorboard.add_scalars(
                    f"{self.config['experiment_id']}/metric", _metric,
                    self.config[train_unit])
                self.tensorboard.add_scalar(
                    f"{self.config['experiment_id']}/time", elapsed_time,
                    self.config[train_unit])
                self.tensorboard.add_scalar(
                    f"{self.config['experiment_id']}/lr",
                    self.optimizer.param_groups[0]["lr"],
                    self.config[train_unit],
                )

            print_kwarg = [i_str]
            for phase in phases:
                print_kwarg += [
                    self.config[f"{phase}_loss"],
                    self.config[f"{phase}_metric"]
                ]
            print_kwarg += [self.optimizer.param_groups[0]["lr"], elapsed_time]

            print_row(kwarg_list=print_kwarg, pad=" ")
            print_row(kwarg_list=[""] * len(kwarg_list), pad="-")
            self.dataframe = self.dataframe.append(dict(
                zip(kwarg_list, print_kwarg)),
                                                   ignore_index=True)

            if is_best:
                self.save(self.ckpt)
                if Trainer.experiment_name is not None:
                    self.update_experiment()

            if save_every_validation:
                self.save(self.ckpt + f"-{self.config[train_unit]}")

    def _step(self, phase, iterator, only_inference=False):

        if self.config["is_data_dict"]:
            batch_dict = next(iterator)
            batch_size = batch_dict[list(batch_dict.keys())[0]].size(0)
            for k, v in batch_dict.items():
                batch_dict[k] = v.to(self.device)
        else:
            batch_x, batch_y = next(iterator)
            if isinstance(batch_x, list):
                batch_x = [x.to(self.device) for x in batch_x]
            else:
                batch_x = [batch_x.to(self.device)]

            if isinstance(batch_y, list):
                batch_y = [y.to(self.device) for y in batch_y]
            else:
                batch_y = [batch_y.to(self.device)]

            batch_size = batch_x[0].size(0)

        self.optimizer.zero_grad()
        with torch.set_grad_enabled(phase == "train"):
            if self.config["is_data_dict"]:
                outputs = self.net(batch_dict)
                if not only_inference:
                    loss = self.criterion(outputs, batch_dict)
            else:
                outputs = self.net(*batch_x)
                if not only_inference:
                    loss = self.criterion(outputs, *batch_y)

            if only_inference:
                return outputs

            if phase == "train":
                loss.backward()
                if self.config["clip_gradient_norm"]:
                    clip_grad_norm_(self.net.parameters(),
                                    self.config["clip_gradient_norm"])
                self.optimizer.step()

        with torch.no_grad():
            if self.config["is_data_dict"]:
                metric = self.metric(outputs, batch_dict)
            else:
                metric = self.metric(outputs, *batch_y)

        return {
            "loss": loss.item(),
            "batch_size": batch_size,
            "metric": metric.item()
        }

    def _train(self, phase, num_steps=0):
        running_loss = AverageMeter()
        running_metric = AverageMeter()

        if phase == "train":
            self.net.train()
        else:
            self.net.eval()

        dataloader = self.dataloader[phase]
        step_iterator = iter(dataloader)
        tq = tqdm(range(num_steps), leave=False)
        for st in tq:
            if (st + 1) % len(dataloader) == 0:
                step_iterator = iter(dataloader)
            results = self._step(phase=phase, iterator=step_iterator)
            tq.set_description(
                f"Loss:{results['loss']:.4f}, Metric:{results['metric']:.4f}")
            running_loss.update(results["loss"], results["batch_size"])
            running_metric.update(results["metric"], results["batch_size"])

        return running_loss.avg, running_metric.avg

    def eval(self, dataloader=None):
        self.net.eval()
        if dataloader is None:
            dataloader = self.dataloader["val"]
            phase = "val"

        output_list = []
        step_iterator = iter(dataloader)
        num_steps = len(dataloader)
        for st in tqdm(range(num_steps), leave=False):
            results = self._step(phase="val",
                                 iterator=step_iterator,
                                 only_inference=True)
            output_list.append(results)

        output_cat = torch.cat(output_list)
        return output_cat

    def add_external_config(self, args):
        """
        args : a dict-like object which contains key-value configurations.
        """
        new_d = defaultdict(float)
        for k, v in args.items():
            new_d[f"config_{k}"] = v
        self.config.update(new_d)

    def update_experiment(self):
        """
        Update experiment statistics by its name (csv file).
        """
        assert Trainer.experiment_name is not None
        df_config = pd.DataFrame(pd.Series(
            self.config)).T.set_index("experiment_id")
        if os.path.exists(Trainer.experiment_name + ".csv"):
            df_ex = pd.read_csv(Trainer.experiment_name + ".csv", index_col=0)
            if self.config["experiment_id"] in df_ex.index:
                df_ex = df_ex.drop(self.config["experiment_id"])
            df_ex = df_ex.append(df_config, sort=False)
        else:
            df_ex = df_config
        df_ex.to_csv(Trainer.experiment_name + ".csv")
        return df_ex

    def swa_apply(self, bn_update=True):
        assert hasattr(self.optimizer, "swap_swa_sgd")
        self.optimizer.swap_swa_sgd()
        if bn_update:
            self.swa_bn_update()

    def swa_bn_update(self):
        r"""Updates BatchNorm running_mean, running_var buffers in the model.
        It performs one pass over data in `loader` to estimate the activation
        statistics for BatchNorm layers in the model.
        original source is from : torchcontrib
        """
        if not check_bn(self.net):
            return
        was_training = self.net.training
        self.net.train()
        momenta = {}
        self.net.apply(reset_bn)
        self.net.apply(lambda module: get_momenta(module, momenta))
        n = 0
        for input in self.dataloader['train']:
            if isinstance(input, (list, tuple)):
                input = input[0]
                b = input.size(0)
                input = input.to(self.device)
            elif self.config['is_data_dict']:
                b = input[list(input.keys())[0]].size(0)
                for k, v in input.items():
                    input[k] = v.to(self.device)
            else:
                b = input.size(0)
                input = input.to(self.device)

            momentum = b / float(n + b)
            for module in momenta.keys():
                module.momentum = momentum

            self.net(input)
            n += b

        self.net.apply(lambda module: set_momenta(module, momenta))
        self.net.train(was_training)

    @staticmethod
    def get_model_device(net):
        device = torch.device("cpu")
        for param in net.parameters():
            device = param.device
            break
        return device

    @staticmethod
    def set_experiment_name(name):
        Trainer.experiment_name = name
def train(agents, params, num_processes):
    """Training Loop for value-based RL methods.
    Params
    ======
        agent (object) --- the agent to train
        params (dict) --- the dictionary of parameters
    """
    n_episodes = params['episodes']
    maxlen = params['maxlen']
    name = params['agent_params']['name']
    brain_name = params['brain_name']
    env = params['environment']
    add_noise = params['agent_params']['add_noise']
    pretrain = params['pretrain']
    pretrain_length = params['pretrain_length']
    num_agents = num_processes
    scores = np.zeros(num_agents)  # list containing scores from each episode
    scores_window = deque(maxlen=maxlen)  # last N scores
    scores_episode = []
    writer = SummaryWriter(log_dir=params['log_dir'] + name)

    env_info = env.reset(train_mode=True)[brain_name]
    tic = time.time()
    timesteps = 0
    achievement_length = 0

    episode_start = 1
    if params['load_agent']:
        episode_start, timesteps = agents.load_agent()

    for i_episode in range(episode_start, n_episodes + 1):
        tic = time.time()
        states = env_info.vector_observations
        scores = np.zeros(num_agents)
        env.reset()

        while True:
            states = torch.tensor(states)

            if pretrain and pretrain_length < len(agents.memory.memory):
                pretrain = False

            actions, noise_epsilon = agents.act(states,
                                                add_noise,
                                                pretrain=pretrain)

            env_info = env.step(actions)[
                brain_name]  # send the action to the environment
            next_states = env_info.vector_observations  # get the next state
            rewards = env_info.rewards  # get the reward
            dones = env_info.local_done  # see if episode has finished
            adjusted_rewards = np.array(env_info.rewards)

            if params['hack_rewards']:
                if adjusted_rewards[0] != 0:
                    adjusted_rewards[1] = adjusted_rewards[0] * params[
                        'alternative_reward_scalar']
                elif adjusted_rewards[1] != 0:
                    adjusted_rewards[0] = adjusted_rewards[1] * params[
                        'alternative_reward_scalar']

            actor_loss, critic_loss = agents.step(states,
                                                  actions,
                                                  adjusted_rewards,
                                                  next_states,
                                                  dones,
                                                  pretrain=pretrain)
            if actor_loss != None and critic_loss != None:

                if params['agent_params']['schedule_lr']:
                    actor_lr, critic_lr = agents.get_lr()
                else:
                    actor_lr, critic_lr = params['agent_params'][
                        'actor_params']['lr'], params['agent_params'][
                            'critic_params']['lr']

                writer.add_scalar('noise_epsilon', noise_epsilon, timesteps)
                writer.add_scalar('actor_loss', actor_loss, timesteps)
                writer.add_scalar('critic_loss', critic_loss, timesteps)
                writer.add_scalar('actor_lr', actor_lr, timesteps)
                writer.add_scalar('critic_lr', critic_lr, timesteps)

            print('\rTimestep {}\tMax: {:.2f}'.format(timesteps,
                                                      np.max(scores)),
                  end="")

            scores += rewards  # update the scores
            states = next_states  # roll over the state to next time step
            if np.any(dones):  # exit loop if episode finished
                break

            timesteps += 1

            # Fills the buffer with experiences resulting from random actions
            # to encourage exploration
            if timesteps % params['random_fill_every'] == 0:
                pretrain = True
                pretrain = params['pretrain_length']

        score = np.mean(scores)
        scores_episode.append(score)
        scores_window.append(score)  # save most recent score

        print('\rEpisode {}\tMax: {:.2f} \t Time: {:.2f}'.format(
            i_episode, np.max(scores),
            time.time() - tic),
              end="\n")

        if i_episode % params['save_every'] == 0:
            agents.save_agent(np.mean(scores_window),
                              i_episode,
                              timesteps,
                              save_history=True)
        else:
            agents.save_agent(np.mean(scores_window),
                              i_episode,
                              timesteps,
                              save_history=False)

        writer.add_scalars('scores', {
            'mean': np.mean(scores),
            'min': np.min(scores),
            'max': np.max(scores)
        }, timesteps)

        update_csv(name, i_episode, np.mean(scores), np.mean(scores))

        agents.step_lr(np.mean(scores))

        if np.mean(scores) > params['achievement']:
            achievement_length += 1
            if achievement_length > params['achievement_length']:
                toc = time.time()
                print(
                    "\n\n Congratulations! The agent has managed to solve the environment in {} episodes with {} training time\n\n"
                    .format(i_episode, toc - tic))
                writer.close()
                return scores
        else:
            achievement_length = 0

    writer.close()
    return scores
Beispiel #9
0
    def train(self, load_model=False, model_path=None):
        if load_model:
            if model_path is not None:
                self.load_weights(model_path)
        ## Training utterances
        all_input_ids, all_input_len, all_label_ids = convert_examples_to_features(
            self.train_examples, self.label_list, args.max_seq_length,
            self.tokenizer, args.max_turn_length)

        num_train_batches = all_input_ids.size(0)
        num_train_steps = int(num_train_batches / args.train_batch_size /
                              args.gradient_accumulation_steps *
                              args.num_train_epochs)

        logger.info("***** training *****")
        logger.info("  Num examples = %d", len(self.train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_steps)

        all_input_ids, all_input_len, all_label_ids = all_input_ids.to(
            DEVICE), all_input_len.to(DEVICE), all_label_ids.to(DEVICE)

        train_data = TensorDataset(all_input_ids, all_input_len, all_label_ids)
        train_sampler = RandomSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=args.train_batch_size)

        all_input_ids_dev, all_input_len_dev, all_label_ids_dev = convert_examples_to_features(
            self.dev_examples, self.label_list, args.max_seq_length,
            self.tokenizer, args.max_turn_length)

        logger.info("***** validation *****")
        logger.info("  Num examples = %d", len(self.dev_examples))
        logger.info("  Batch size = %d", args.dev_batch_size)

        all_input_ids_dev, all_input_len_dev, all_label_ids_dev = \
            all_input_ids_dev.to(DEVICE), all_input_len_dev.to(DEVICE), all_label_ids_dev.to(DEVICE)

        dev_data = TensorDataset(all_input_ids_dev, all_input_len_dev,
                                 all_label_ids_dev)
        dev_sampler = SequentialSampler(dev_data)
        dev_dataloader = DataLoader(dev_data,
                                    sampler=dev_sampler,
                                    batch_size=args.dev_batch_size)

        logger.info("Loaded data!")

        if args.fp16:
            self.sumbt_model.half()
        self.sumbt_model.to(DEVICE)

        ## Get domain-slot-type embeddings
        slot_token_ids, slot_len = \
            get_label_embedding(self.processor.target_slot, args.max_label_length, self.tokenizer, DEVICE)

        # for slot_idx, slot_str in zip(slot_token_ids, self.processor.target_slot):
        #     self.idx2slot[slot_idx] = slot_str

        ## Get slot-value embeddings
        label_token_ids, label_len = [], []
        for slot_idx, labels in zip(slot_token_ids, self.label_list):
            # self.idx2value[slot_idx] = {}
            token_ids, lens = get_label_embedding(labels,
                                                  args.max_label_length,
                                                  self.tokenizer, DEVICE)
            label_token_ids.append(token_ids)
            label_len.append(lens)
            # for label, token_id in zip(labels, token_ids):
            #     self.idx2value[slot_idx][token_id] = label

        logger.info('embeddings prepared')

        if USE_CUDA and N_GPU > 1:
            self.sumbt_model.module.initialize_slot_value_lookup(
                label_token_ids, slot_token_ids)
        else:
            self.sumbt_model.initialize_slot_value_lookup(
                label_token_ids, slot_token_ids)

        def get_optimizer_grouped_parameters(model):
            param_optimizer = [(n, p) for n, p in model.named_parameters()
                               if p.requires_grad]
            no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
            optimizer_grouped_parameters = [
                {
                    'params': [
                        p for n, p in param_optimizer
                        if not any(nd in n for nd in no_decay)
                    ],
                    'weight_decay':
                    0.01,
                    'lr':
                    args.learning_rate
                },
                {
                    'params': [
                        p for n, p in param_optimizer
                        if any(nd in n for nd in no_decay)
                    ],
                    'weight_decay':
                    0.0,
                    'lr':
                    args.learning_rate
                },
            ]
            return optimizer_grouped_parameters

        if not USE_CUDA or N_GPU == 1:
            optimizer_grouped_parameters = get_optimizer_grouped_parameters(
                self.sumbt_model)
        else:
            optimizer_grouped_parameters = get_optimizer_grouped_parameters(
                self.sumbt_model.module)

        t_total = num_train_steps

        scheduler = None
        if args.fp16:
            try:
                from apex.optimizers import FP16_Optimizer
                from apex.optimizers import FusedAdam
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
                )

            optimizer = FusedAdam(optimizer_grouped_parameters,
                                  lr=args.learning_rate,
                                  bias_correction=False,
                                  max_grad_norm=1.0)
            if args.fp16_loss_scale == 0:
                optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
            else:
                optimizer = FP16_Optimizer(
                    optimizer, static_loss_scale=args.fp16_loss_scale)

        else:
            optimizer = AdamW(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              correct_bias=False)
            scheduler = get_linear_schedule_with_warmup(
                optimizer,
                num_warmup_steps=args.warmup_proportion * t_total,
                num_training_steps=t_total)
        logger.info(optimizer)

        # Training code
        ###############################################################################

        logger.info("Training...")

        global_step = 0
        last_update = None
        best_loss = None
        model = self.sumbt_model
        if not args.do_not_use_tensorboard:
            summary_writer = None
        else:
            summary_writer = SummaryWriter("./tensorboard_summary/logs_1214/")

        for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
            # Train
            model.train()
            tr_loss = 0
            nb_tr_examples = 0
            nb_tr_steps = 0

            for step, batch in enumerate(tqdm(train_dataloader)):
                batch = tuple(t.to(DEVICE) for t in batch)
                input_ids, input_len, label_ids = batch

                # Forward
                if N_GPU == 1:
                    loss, loss_slot, acc, acc_slot, _ = model(
                        input_ids, input_len, label_ids, N_GPU)
                else:
                    loss, _, acc, acc_slot, _ = model(input_ids, input_len,
                                                      label_ids, N_GPU)

                    # average to multi-gpus
                    loss = loss.mean()
                    acc = acc.mean()
                    acc_slot = acc_slot.mean(0)

                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                # Backward
                if args.fp16:
                    optimizer.backward(loss)
                else:
                    loss.backward()

                # tensrboard logging
                if summary_writer is not None:
                    summary_writer.add_scalar("Epoch", epoch, global_step)
                    summary_writer.add_scalar("Train/Loss", loss, global_step)
                    summary_writer.add_scalar("Train/JointAcc", acc,
                                              global_step)
                    if N_GPU == 1:
                        for i, slot in enumerate(self.processor.target_slot):
                            summary_writer.add_scalar(
                                "Train/Loss_%s" % slot.replace(' ', '_'),
                                loss_slot[i], global_step)
                            summary_writer.add_scalar(
                                "Train/Acc_%s" % slot.replace(' ', '_'),
                                acc_slot[i], global_step)

                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    # modify lealrning rate with special warm up BERT uses
                    lr_this_step = args.learning_rate * warmup_linear(
                        global_step / t_total, args.warmup_proportion)
                    if summary_writer is not None:
                        summary_writer.add_scalar("Train/LearningRate",
                                                  lr_this_step, global_step)
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr_this_step
                    if scheduler is not None:
                        torch.nn.utils.clip_grad_norm_(
                            optimizer_grouped_parameters, 1.0)
                    optimizer.step()
                    if scheduler is not None:
                        scheduler.step()
                    optimizer.zero_grad()
                    global_step += 1

            # Perform evaluation on validation dataset
            model.eval()
            dev_loss = 0
            dev_acc = 0
            dev_loss_slot, dev_acc_slot = None, None
            nb_dev_examples, nb_dev_steps = 0, 0

            for step, batch in enumerate(
                    tqdm(dev_dataloader, desc="Validation")):
                batch = tuple(t.to(DEVICE) for t in batch)
                input_ids, input_len, label_ids = batch
                if input_ids.dim() == 2:
                    input_ids = input_ids.unsqueeze(0)
                    input_len = input_len.unsqueeze(0)
                    label_ids = label_ids.unsuqeeze(0)

                with torch.no_grad():
                    if N_GPU == 1:
                        loss, loss_slot, acc, acc_slot, _ = model(
                            input_ids, input_len, label_ids, N_GPU)
                    else:
                        loss, _, acc, acc_slot, _ = model(
                            input_ids, input_len, label_ids, N_GPU)

                        # average to multi-gpus
                        loss = loss.mean()
                        acc = acc.mean()
                        acc_slot = acc_slot.mean(0)

                num_valid_turn = torch.sum(label_ids[:, :, 0].view(-1) > -1,
                                           0).item()
                dev_loss += loss.item() * num_valid_turn
                dev_acc += acc.item() * num_valid_turn

                if N_GPU == 1:
                    if dev_loss_slot is None:
                        dev_loss_slot = [l * num_valid_turn for l in loss_slot]
                        dev_acc_slot = acc_slot * num_valid_turn
                    else:
                        for i, l in enumerate(loss_slot):
                            dev_loss_slot[
                                i] = dev_loss_slot[i] + l * num_valid_turn
                        dev_acc_slot += acc_slot * num_valid_turn

                nb_dev_examples += num_valid_turn

            dev_loss = dev_loss / nb_dev_examples
            dev_acc = dev_acc / nb_dev_examples

            if N_GPU == 1:
                dev_acc_slot = dev_acc_slot / nb_dev_examples

            # tensorboard logging
            if summary_writer is not None:
                summary_writer.add_scalar("Validate/Loss", dev_loss,
                                          global_step)
                summary_writer.add_scalar("Validate/Acc", dev_acc, global_step)
                if N_GPU == 1:
                    for i, slot in enumerate(self.processor.target_slot):
                        summary_writer.add_scalar(
                            "Validate/Loss_%s" % slot.replace(' ', '_'),
                            dev_loss_slot[i] / nb_dev_examples, global_step)
                        summary_writer.add_scalar(
                            "Validate/Acc_%s" % slot.replace(' ', '_'),
                            dev_acc_slot[i], global_step)

            dev_loss = round(dev_loss, 6)

            output_model_file = os.path.join(
                os.path.join(SUMBT_PATH, args.output_dir), "pytorch_model.bin")

            if last_update is None or dev_loss < best_loss:

                if not USE_CUDA or N_GPU == 1:
                    torch.save(model.state_dict(), output_model_file)
                else:
                    torch.save(model.module.state_dict(), output_model_file)

                last_update = epoch
                best_loss = dev_loss
                best_acc = dev_acc

                logger.info(
                    "*** Model Updated: Epoch=%d, Validation Loss=%.6f, Validation Acc=%.6f, global_step=%d ***"
                    % (last_update, best_loss, best_acc, global_step))
            else:
                logger.info(
                    "*** Model NOT Updated: Epoch=%d, Validation Loss=%.6f, Validation Acc=%.6f, global_step=%d  ***"
                    % (epoch, dev_loss, dev_acc, global_step))

            if last_update + args.patience <= epoch:
                break
Beispiel #10
0
        d_iter_count += 1
        if d_iter_count == config.max_d_iters:
            d_iter_count = 0
            mode = 'G'


iter_no = 0
max_iters = 100

mode = 'G'
g_iter_count = 0
d_iter_count = 0

batch_size = config.batch_size

train_writer = SummaryWriter(log_dir='../logs/train')
val_writer = SummaryWriter(log_dir='../logs/val')

with tqdm(total=max_iters) as pbar:
    for iter_no in range(max_iters):
        train_batch = train_loader.next_batch()

        gan.train()
        train_step(iter_no, train_batch)

        if iter_no % config.validation_interval == 0:
            val_batch = val_loader.next_batch()
            gan.eval()
            validate(val_batch, iter_no)
            pbar.update(1)
Beispiel #11
0
def main(
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
    steps_per_epoch: int = 200,
    epochs: int = 100,
    batch_size: int = 64,
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    train_dataset = MNIST(training=True)
    test_dataset = MNIST(training=False)
    train_loader = eg.data.DataLoader(train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True)
    test_loader = eg.data.DataLoader(test_dataset,
                                     batch_size=batch_size,
                                     shuffle=True)

    print("X_train:", train_dataset.x.shape, train_dataset.x.dtype)
    print("y_train:", train_dataset.y.shape, train_dataset.y.dtype)
    print("X_test:", test_dataset.x.shape, test_dataset.x.dtype)
    print("y_test:", test_dataset.y.shape, test_dataset.y.dtype)

    @dataclass(unsafe_hash=True, repr=False)
    class MLP(eg.Module):
        """Standard LeNet-300-100 MLP network."""

        n1: int = 300
        n2: int = 100

        @eg.compact
        def __call__(self, x: jnp.ndarray):
            x = x.astype(jnp.float32) / 255.0

            x = eg.Flatten()(x)
            x = eg.Linear(self.n1)(x)
            x = jax.nn.relu(x)
            x = eg.Linear(self.n2)(x)
            x = jax.nn.relu(x)
            x = eg.Linear(10)(x)

            return x

    model = eg.Model(
        module=MLP(n1=300, n2=100),
        loss=[
            eg.losses.Crossentropy(),
            eg.regularizers.L2(l=1e-4),
        ],
        metrics=eg.metrics.Accuracy(),
        optimizer=optax.adamw(1e-3),
        eager=eager,
    )

    x_sample, y_sample = next(iter(train_loader))
    model.summary(x_sample)

    history = model.fit(
        inputs=train_loader,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        validation_data=test_loader,
        shuffle=True,
        callbacks=[eg.callbacks.TensorBoard(logdir=logdir)],
    )

    eg.utils.plot_history(history)

    # get random samples
    idxs = np.random.randint(0, 10000, size=(9, ))
    x_sample, y_sample = next(iter(test_loader))

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot and save results
    def make_plot():
        plt.figure(figsize=(12, 12))
        for i in range(3):
            for j in range(3):
                k = 3 * i + j
                plt.subplot(3, 3, k + 1)
                plt.title(f"{np.argmax(y_pred[k])}")
                plt.imshow(x_sample[k], cmap="gray")

    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        make_plot()
        # tbwriter.add_figure("Predictions", plt.gcf(), 100)

    make_plot()
    plt.show()

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )
Beispiel #12
0
def main(debug: bool = False, eager: bool = False, logdir: str = "runs"):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    X_train, y_train, X_test, y_test = dataget.image.mnist(
        global_cache=True).get()

    print("X_train:", X_train.shape, X_train.dtype)
    print("y_train:", y_train.shape, y_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)
    print("y_test:", y_test.shape, y_test.dtype)

    class Lambda(elegy.Module):
        def __init__(self, f):
            super().__init__()
            self.f = f

        def call(self, x):
            return self.f(x)

    class MLP(elegy.Module):
        """Standard LeNet-300-100 MLP network."""
        def __init__(self, n1: int = 300, n2: int = 100, **kwargs):
            super().__init__(**kwargs)
            self.n1 = n1
            self.n2 = n2

        def call(self, image: jnp.ndarray):
            image = image.astype(jnp.float32) / 255.0

            mlp = elegy.nn.sequential(
                elegy.nn.Flatten(),
                elegy.nn.Linear(self.n1),
                jax.nn.relu,
                elegy.nn.Linear(self.n2),
                jax.nn.relu,
                elegy.nn.Linear(10),
            )

            return mlp(image)

    model = elegy.Model(
        module=MLP(n1=300, n2=100),
        loss=[
            elegy.losses.SparseCategoricalCrossentropy(from_logits=True),
            elegy.regularizers.GlobalL2(l=1e-4),
        ],
        metrics=elegy.metrics.SparseCategoricalAccuracy(),
        optimizer=optax.adamw(1e-3),
        run_eagerly=eager,
    )

    model.summary(X_train[:64])

    history = model.fit(
        x=X_train,
        y=y_train,
        epochs=100,
        steps_per_epoch=200,
        batch_size=64,
        validation_data=(X_test, y_test),
        shuffle=True,
        callbacks=[elegy.callbacks.TensorBoard(logdir=logdir)],
    )

    print(model.module.submodules)

    plot_history(history)

    # get random samples
    idxs = np.random.randint(0, 10000, size=(9, ))
    x_sample = X_test[idxs]

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot and save results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        figure = plt.figure(figsize=(12, 12))
        for i in range(3):
            for j in range(3):
                k = 3 * i + j
                plt.subplot(3, 3, k + 1)
                plt.title(f"{np.argmax(y_pred[k])}")
                plt.imshow(x_sample[k], cmap="gray")
        tbwriter.add_figure("Predictions", figure, 100)

    plt.show()

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )
Beispiel #13
0
def main(
    steps_per_epoch: int = 200,
    batch_size: int = 64,
    epochs: int = 50,
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    dataset = load_dataset("mnist")
    X_train = np.array(np.stack(dataset["train"]["image"]), dtype=np.uint8)
    X_test = np.array(np.stack(dataset["test"]["image"]), dtype=np.uint8)
    # Now binarize data
    X_train = (X_train > 0).astype(jnp.float32)
    X_test = (X_test > 0).astype(jnp.float32)

    print("X_train:", X_train.shape, X_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)

    model = eg.Model(
        module=VAE(latent_size=LATENT_SIZE),
        loss=[
            BinaryCrossEntropy(on="logits"),
            KL(weight=0.1),
        ],
        optimizer=optax.adam(1e-3),
        eager=eager,
    )

    model.summary(X_train[:batch_size])

    # Fit with datasets in memory
    history = model.fit(
        inputs=X_train,
        epochs=epochs,
        batch_size=batch_size,
        steps_per_epoch=steps_per_epoch,
        validation_data=(X_test, ),
        shuffle=True,
        callbacks=[eg.callbacks.TensorBoard(logdir)],
    )

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )

    eg.utils.plot_history(history)

    # get random samples
    idxs = np.random.randint(0, len(X_test), size=(5, ))
    x_sample = X_test[idxs]

    # get predictions
    preds = model.predict(x=x_sample)
    y_pred = jax.nn.sigmoid(preds["logits"])

    # plot and save results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        figure = plt.figure(figsize=(12, 12))
        for i in range(5):
            plt.subplot(2, 5, i + 1)
            plt.imshow(x_sample[i], cmap="gray")
            plt.subplot(2, 5, 5 + i + 1)
            plt.imshow(y_pred[i], cmap="gray")
        # # tbwriter.add_figure("VAE Example", figure, epochs)

    plt.show()
Beispiel #14
0
def main(
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
    steps_per_epoch: int = 200,
    epochs: int = 100,
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    train_dataset = MNIST(training=True)
    test_dataset = MNIST(training=False)
    train_loader = elegy.data.DataLoader(train_dataset,
                                         batch_size=64,
                                         shuffle=True)
    test_loader = elegy.data.DataLoader(test_dataset,
                                        batch_size=64,
                                        shuffle=True)

    print("X_train:", train_dataset.x.shape, train_dataset.x.dtype)
    print("y_train:", train_dataset.y.shape, train_dataset.y.dtype)
    print("X_test:", test_dataset.x.shape, test_dataset.x.dtype)
    print("y_test:", test_dataset.y.shape, test_dataset.y.dtype)

    class MLP(elegy.Module):
        """Standard LeNet-300-100 MLP network."""
        def __init__(self, n1: int = 300, n2: int = 100, **kwargs):
            super().__init__(**kwargs)
            self.n1 = n1
            self.n2 = n2

        def call(self, image: jnp.ndarray):
            image = image.astype(jnp.float32) / 255.0

            mlp = elegy.nn.sequential(
                elegy.nn.Flatten(),
                elegy.nn.Linear(self.n1),
                jax.nn.relu,
                elegy.nn.Linear(self.n2),
                jax.nn.relu,
                elegy.nn.Linear(10),
            )

            return mlp(image)

    model = elegy.Model(
        module=MLP(n1=300, n2=100),
        loss=[
            elegy.losses.SparseCategoricalCrossentropy(from_logits=True),
            elegy.regularizers.GlobalL2(l=1e-4),
        ],
        metrics=elegy.metrics.SparseCategoricalAccuracy(),
        optimizer=optax.adamw(1e-3),
        run_eagerly=eager,
    )

    x_sample, y_sample = next(iter(train_loader))
    model.summary(x_sample)

    history = model.fit(
        x=train_loader,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        validation_data=test_loader,
        shuffle=True,
        callbacks=[elegy.callbacks.TensorBoard(logdir=logdir)],
    )

    elegy.utils.plot_history(history)

    # get random samples
    idxs = np.random.randint(0, 10000, size=(9, ))
    x_sample, y_sample = next(iter(test_loader))

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot and save results
    def make_plot():
        plt.figure(figsize=(12, 12))
        for i in range(3):
            for j in range(3):
                k = 3 * i + j
                plt.subplot(3, 3, k + 1)
                plt.title(f"{np.argmax(y_pred[k])}")
                plt.imshow(x_sample[k], cmap="gray")

    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        make_plot()
        # tbwriter.add_figure("Predictions", plt.gcf(), 100)

    make_plot()
    plt.show()

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )
Beispiel #15
0
def main(
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
    steps_per_epoch: int = 200,
    epochs: int = 100,
    batch_size: int = 64,
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    dataset = load_dataset("mnist")
    dataset.set_format("np")
    X_train = np.stack(dataset["train"]["image"])[..., None]
    y_train = dataset["train"]["label"]
    X_test = np.stack(dataset["test"]["image"])[..., None]
    y_test = dataset["test"]["label"]

    print("X_train:", X_train.shape, X_train.dtype)
    print("y_train:", y_train.shape, y_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)
    print("y_test:", y_test.shape, y_test.dtype)

    model = eg.Model(
        module=CNN(),
        loss=eg.losses.Crossentropy(),
        metrics=eg.metrics.Accuracy(),
        optimizer=optax.adam(1e-3),
        eager=eager,
    )

    # show summary
    model.summary(X_train[:64])

    train_dataset = TensorDataset(torch.from_numpy(X_train),
                                  torch.from_numpy(y_train))
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True)
    test_dataset = TensorDataset(torch.from_numpy(X_test),
                                 torch.from_numpy(y_test))
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

    history = model.fit(
        train_dataloader,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        validation_data=test_dataloader,
        callbacks=[eg.callbacks.TensorBoard(logdir=logdir)],
    )

    eg.utils.plot_history(history)

    model.save("models/conv")

    model = eg.load("models/conv")

    print(model.evaluate(x=X_test, y=y_test))

    # get random samples
    idxs = np.random.randint(0, 10000, size=(9, ))
    x_sample = X_test[idxs]

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        figure = plt.figure(figsize=(12, 12))
        for i in range(3):
            for j in range(3):
                k = 3 * i + j
                plt.subplot(3, 3, k + 1)

                plt.title(f"{np.argmax(y_pred[k])}")
                plt.imshow(x_sample[k], cmap="gray")
        # tbwriter.add_figure("Conv classifier", figure, 100)

    plt.show()
Beispiel #16
0
def main(
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
    steps_per_epoch: int = 200,
    epochs: int = 100,
    batch_size: int = 64,
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    dataset = load_dataset("mnist")
    dataset.set_format("np")
    X_train = np.stack(dataset["train"]["image"])
    X_test = np.stack(dataset["test"]["image"])

    print("X_train:", X_train.shape, X_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)

    model = eg.Model(
        module=MLP(n1=256, n2=64),
        loss=MeanSquaredError(),
        optimizer=optax.rmsprop(0.001),
        eager=eager,
    )

    model.summary(X_train[:64])

    # Notice we are not passing `y`
    history = model.fit(
        inputs=X_train,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        batch_size=batch_size,
        validation_data=(X_test, ),
        shuffle=True,
        callbacks=[eg.callbacks.TensorBoard(logdir=logdir, update_freq=300)],
    )

    eg.utils.plot_history(history)

    # get random samples
    idxs = np.random.randint(0, 10000, size=(5, ))
    x_sample = X_test[idxs]

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot and save results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:

        figure = plt.figure(figsize=(12, 12))
        for i in range(5):
            plt.subplot(2, 5, i + 1)
            plt.imshow(x_sample[i], cmap="gray")
            plt.subplot(2, 5, 5 + i + 1)
            plt.imshow(y_pred[i], cmap="gray")

    plt.show()
Beispiel #17
0
 def on_train_begin(self, logs=None):
     self.writer = SummaryWriter(self.logdir, **self._open_args)
Beispiel #18
0
def main(
    steps_per_epoch: int = 200,
    batch_size: int = 64,
    epochs: int = 50,
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    X_train, _1, X_test, _2 = dataget.image.mnist(global_cache=True).get()
    # Now binarize data
    X_train = (X_train > 0).astype(jnp.float32)
    X_test = (X_test > 0).astype(jnp.float32)

    print("X_train:", X_train.shape, X_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)

    vae = VariationalAutoEncoder(latent_size=LATENT_SIZE)

    model = elegy.Model(
        module=vae,
        loss=[BinaryCrossEntropy(from_logits=True, on="logits")],
        optimizer=optax.adam(1e-3),
        run_eagerly=eager,
    )

    model.summary(X_train[:64])

    # Fit with datasets in memory
    history = model.fit(
        x=X_train,
        epochs=epochs,
        batch_size=batch_size,
        steps_per_epoch=steps_per_epoch,
        validation_data=(X_test, ),
        shuffle=True,
        callbacks=[TensorBoard(logdir)],
    )

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )

    elegy.utils.plot_history(history)

    # get random samples
    idxs = np.random.randint(0, len(X_test), size=(5, ))
    x_sample = X_test[idxs]

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot and save results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        figure = plt.figure(figsize=(12, 12))
        for i in range(5):
            plt.subplot(2, 5, i + 1)
            plt.imshow(x_sample[i], cmap="gray")
            plt.subplot(2, 5, 5 + i + 1)
            plt.imshow(y_pred["det_image"][i], cmap="gray")
        # tbwriter.add_figure("VAE Example", figure, epochs)

    plt.show()

    # call update_modules to enable parameter transfer
    # for now only Elegy Modules support this
    model.update_modules()

    # sample
    model_decoder = elegy.Model(vae.decoder)

    z_samples = np.random.normal(size=(12, LATENT_SIZE))
    samples = model_decoder.predict(z_samples, initialize=True)
    samples = jax.nn.sigmoid(samples)

    # plot and save results
    # with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
    figure = plt.figure(figsize=(5, 12))
    plt.title("Generative Samples")
    for i in range(5):
        plt.subplot(2, 5, 2 * i + 1)
        plt.imshow(samples[i], cmap="gray")
        plt.subplot(2, 5, 2 * i + 2)
        plt.imshow(samples[i + 1], cmap="gray")
    # # tbwriter.add_figure("VAE Generative Example", figure, epochs)

    plt.show()
Beispiel #19
0
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
import tf_robustify
import vgg
import carlini_wagner_attack

os.system("taskset -p 0xffffffff %d" % os.getpid())

import sh
sh.rm('-rf', 'logs')

import logging
logging.basicConfig(level=logging.INFO, stream=sys.stdout)

from tensorboardX.writer import SummaryWriter
swriter = SummaryWriter('logs')
add_scalar_old = swriter.add_scalar


def add_scalar_and_log(key, value, global_step=0):
    logging.info('{}:{}: {}'.format(global_step, key, value))
    add_scalar_old(key, value, global_step)


swriter.add_scalar = add_scalar_and_log


def str2bool(x):
    return x.lower() == 'true'

Beispiel #20
0
class TensorBoard(Callback):
    """
    Callback that streams epoch results to tensorboard events folder.

    Supports all values that can be represented as a string,
    including 1D iterables such as `np.ndarray`.


    ```python
    tensorboard_logger = TensorBoard('runs')
    model.fit(X_train, Y_train, callbacks=[tensorboard_logger])
    ```
    """

    def __init__(
        self,
        logdir: Optional[str] = None,
        *,
        update_freq: Union[str, int] = "epoch",
        purge_step: Optional[int] = None,
        comment: str = "",
    ) -> None:
        """
        Arguments:
            logdir: Save directory location. Default is
                runs/**CURRENT_DATETIME_HOSTNAME**/{train, val}, which changes after each run.
                Use hierarchical folder structure to compare
                between runs easily. e.g. pass in 'runs/exp1', 'runs/exp2', etc.
                for each new experiment to compare across them.
            update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`,
                writes the losses and metrics to TensorBoard after each batch. The same
                applies for `'epoch'`. If using an integer, let's say `1000`, the
                callback will write the metrics and losses to TensorBoard every 1000
                batches. Note that writing too frequently to TensorBoard can slow down
                your training.
            purge_step (int):
                When logging crashes at step :math:`T+X` and restarts at step :math:`T`,
                any events whose global_step larger or equal to :math:`T` will be
                purged and hidden from TensorBoard.
                Note that crashed and resumed experiments should have the same ``logdir``.
            comment (string): Comment logdir suffix appended to the default
                ``logdir``. If ``logdir`` is assigned, this argument has no effect.
        """
        if not logdir:
            import socket
            from datetime import datetime

            current_time = datetime.now().strftime("%b%d_%H-%M-%S")
            self.logdir = os.path.join(
                "runs", current_time + "_" + socket.gethostname() + comment
            )
        else:
            self.logdir = logdir
        self.train_writer = None
        self.val_writer = None
        self.keys = None
        self.write_per_batch = True
        try:
            self.update_freq = int(update_freq)
        except ValueError as e:
            self.update_freq = 1
            if update_freq == "batch":
                self.write_per_batch = True
            elif update_freq == "epoch":
                self.write_per_batch = False
            else:
                raise e
        self.purge_step = purge_step

        super(TensorBoard, self).__init__()

    def on_train_begin(self, logs=None):
        self.train_writer = SummaryWriter(
            os.path.join(self.logdir, "train"), purge_step=self.purge_step
        )
        self.val_writer = SummaryWriter(
            os.path.join(self.logdir, "val"), purge_step=self.purge_step
        )
        self.steps = self.params["steps"]
        self.global_step = 0

    def on_train_batch_end(self, batch: int, logs=None):
        if not self.write_per_batch:
            return
        logs = logs or {}
        self.global_step = batch + self.current_epoch * (self.steps)
        if self.global_step % self.update_freq == 0:
            if self.keys is None:
                self.keys = logs.keys()
            for key in self.keys:
                self.train_writer.add_scalar(key, logs[key], self.global_step)

    def on_epoch_begin(self, epoch: int, logs=None):
        self.current_epoch = epoch

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}

        if self.keys is None:
            self.keys = logs.keys()

        # logs on on_{train, test}_batch_end do not have val metrics
        if self.write_per_batch:
            for key in logs:
                if "val" in key:
                    self.val_writer.add_scalar(
                        key.replace("val_", ""), logs[key], self.global_step
                    )
            return

        elif epoch % self.update_freq == 0:

            for key in self.keys:
                if "val" in key:
                    self.val_writer.add_scalar(
                        key.replace("val_", ""), logs[key], epoch
                    )
                else:
                    self.train_writer.add_scalar(key, logs[key], epoch)

    def on_train_end(self, logs=None):
        self.train_writer.close()
        self.val_writer.close()
Beispiel #21
0
def main(
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
    steps_per_epoch: int = 200,
    epochs: int = 100,
    batch_size: int = 64,
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    X_train, y_train, X_test, y_test = dataget.image.mnist(
        global_cache=True).get()

    X_train = X_train[..., None]
    X_test = X_test[..., None]

    print("X_train:", X_train.shape, X_train.dtype)
    print("y_train:", y_train.shape, y_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)
    print("y_test:", y_test.shape, y_test.dtype)

    class CNN(elegy.Module):
        def call(self, image: jnp.ndarray, training: bool):
            @elegy.to_module
            def ConvBlock(x, units, kernel, stride=1):
                x = elegy.nn.Conv2D(units,
                                    kernel,
                                    stride=stride,
                                    padding="same")(x)
                x = elegy.nn.BatchNormalization()(x, training)
                x = elegy.nn.Dropout(0.2)(x, training)
                return jax.nn.relu(x)

            x: np.ndarray = image.astype(jnp.float32) / 255.0

            # base
            x = ConvBlock()(x, 32, [3, 3])
            x = ConvBlock()(x, 64, [3, 3], stride=2)
            x = ConvBlock()(x, 64, [3, 3], stride=2)
            x = ConvBlock()(x, 128, [3, 3], stride=2)

            # GlobalAveragePooling2D
            x = jnp.mean(x, axis=[1, 2])

            # 1x1 Conv
            x = elegy.nn.Linear(10)(x)

            return x

    model = elegy.Model(
        module=CNN(),
        loss=elegy.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=elegy.metrics.SparseCategoricalAccuracy(),
        optimizer=optax.adam(1e-3),
        run_eagerly=eager,
    )

    # show model summary
    model.summary(X_train[:64], depth=1)

    history = model.fit(
        x=X_train,
        y=y_train,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        batch_size=batch_size,
        validation_data=(X_test, y_test),
        shuffle=True,
        callbacks=[TensorBoard(logdir=logdir)],
    )

    elegy.utils.plot_history(history)

    model.save("models/conv")

    model = elegy.load("models/conv")

    print(model.evaluate(x=X_test, y=y_test))

    # get random samples
    idxs = np.random.randint(0, 10000, size=(9, ))
    x_sample = X_test[idxs]

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        figure = plt.figure(figsize=(12, 12))
        for i in range(3):
            for j in range(3):
                k = 3 * i + j
                plt.subplot(3, 3, k + 1)

                plt.title(f"{np.argmax(y_pred[k])}")
                plt.imshow(x_sample[k], cmap="gray")
        # tbwriter.add_figure("Conv classifier", figure, 100)

    plt.show()
Beispiel #22
0
def main(
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
    steps_per_epoch: int = 200,
    batch_size: int = 64,
    epochs: int = 100,
    size: int = 32,
    num_layers: int = 3,
    num_heads: int = 8,
    dropout: float = 0.0,
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    X_train, y_train, X_test, y_test = dataget.image.mnist(
        global_cache=True).get()

    print("X_train:", X_train.shape, X_train.dtype)
    print("y_train:", y_train.shape, y_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)
    print("y_test:", y_test.shape, y_test.dtype)

    model = elegy.Model(
        module=ViT(
            size=size,
            num_layers=num_layers,
            num_heads=num_heads,
            dropout=dropout,
        ),
        loss=[
            elegy.losses.SparseCategoricalCrossentropy(from_logits=True),
            # elegy.regularizers.GlobalL2(l=1e-4),
        ],
        metrics=elegy.metrics.SparseCategoricalAccuracy(),
        optimizer=optax.adamw(1e-3),
        run_eagerly=eager,
    )

    model.init(X_train, y_train)

    model.summary(X_train[:64])

    history = model.fit(
        x=X_train,
        y=y_train,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        batch_size=batch_size,
        validation_data=(X_test, y_test),
        shuffle=True,
        callbacks=[elegy.callbacks.TensorBoard(logdir=logdir)],
    )

    elegy.utils.plot_history(history)

    # get random samples
    idxs = np.random.randint(0, 10000, size=(9, ))
    x_sample = X_test[idxs]

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot and save results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        figure = plt.figure(figsize=(12, 12))
        for i in range(3):
            for j in range(3):
                k = 3 * i + j
                plt.subplot(3, 3, k + 1)
                plt.title(f"{np.argmax(y_pred[k])}")
                plt.imshow(x_sample[k], cmap="gray")
        # tbwriter.add_figure("Predictions", figure, 100)

    plt.show()

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )
Beispiel #23
0
def main(debug: bool = False, eager: bool = False, logdir: str = "runs"):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    X_train, _1, X_test, _2 = dataget.image.mnist(global_cache=True).get()

    print("X_train:", X_train.shape, X_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)

    class MLP(elegy.Module):
        """Standard LeNet-300-100 MLP network."""

        def __init__(self, n1: int = 300, n2: int = 100, **kwargs):
            super().__init__(**kwargs)
            self.n1 = n1
            self.n2 = n2

        def call(self, image: jnp.ndarray):
            image = image.astype(jnp.float32) / 255.0
            x = elegy.nn.Flatten()(image)
            x = elegy.nn.sequential(
                elegy.nn.Linear(self.n1),
                jax.nn.relu,
                elegy.nn.Linear(self.n2),
                jax.nn.relu,
                elegy.nn.Linear(self.n1),
                jax.nn.relu,
                elegy.nn.Linear(x.shape[-1]),
                jax.nn.sigmoid,
            )(x)
            return x.reshape(image.shape) * 255

    class MeanSquaredError(elegy.losses.MeanSquaredError):
        # we request `x` instead of `y_true` since we are don't require labels in autoencoders
        def call(self, x, y_pred):
            return super().call(x, y_pred)

    model = elegy.Model(
        module=MLP(n1=256, n2=64),
        loss=MeanSquaredError(),
        optimizer=optax.rmsprop(0.001),
        run_eagerly=eager,
    )

    model.summary(X_train[:64])

    # Notice we are not passing `y`
    history = model.fit(
        x=X_train,
        epochs=20,
        batch_size=64,
        validation_data=(X_test,),
        shuffle=True,
        callbacks=[elegy.callbacks.TensorBoard(logdir=logdir, update_freq=300)],
    )

    plot_history(history)

    # get random samples
    idxs = np.random.randint(0, 10000, size=(5,))
    x_sample = X_test[idxs]

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot and save results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:

        figure = plt.figure(figsize=(12, 12))
        for i in range(5):
            plt.subplot(2, 5, i + 1)
            plt.imshow(x_sample[i], cmap="gray")
            plt.subplot(2, 5, 5 + i + 1)
            plt.imshow(y_pred[i], cmap="gray")

        # tbwriter.add_figure("AutoEncoder images", figure, 20)

    plt.show()

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )
Beispiel #24
0
def main(
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
    steps_per_epoch: int = 200,
    batch_size: int = 64,
    epochs: int = 100,
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    dataset = load_dataset("mnist")
    dataset.set_format("np")
    X_train = np.stack(dataset["train"]["image"])
    y_train = dataset["train"]["label"]
    X_test = np.stack(dataset["test"]["image"])
    y_test = dataset["test"]["label"]

    print("X_train:", X_train.shape, X_train.dtype)
    print("y_train:", y_train.shape, y_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)
    print("y_test:", y_test.shape, y_test.dtype)

    model = eg.Model(
        module=MLP(n1=300, n2=100),
        loss=[
            eg.losses.Crossentropy(),
            eg.regularizers.L2(l=1e-4),
        ],
        metrics=eg.metrics.Accuracy(),
        optimizer=optax.adamw(1e-3),
        eager=eager,
    )

    model.summary(X_train[:64])

    history = model.fit(
        inputs=X_train,
        labels=y_train,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        batch_size=batch_size,
        validation_data=(X_test, y_test),
        shuffle=True,
        callbacks=[eg.callbacks.TensorBoard(logdir=logdir)],
    )

    eg.utils.plot_history(history)

    # get random samples
    idxs = np.random.randint(0, 10000, size=(9,))
    x_sample = X_test[idxs]

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot and save results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        figure = plt.figure(figsize=(12, 12))
        for i in range(3):
            for j in range(3):
                k = 3 * i + j
                plt.subplot(3, 3, k + 1)
                plt.title(f"{np.argmax(y_pred[k])}")
                plt.imshow(x_sample[k], cmap="gray")
        # tbwriter.add_figure("Predictions", figure, 100)

    plt.show()

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )
Beispiel #25
0
def main(
    steps_per_epoch: tp.Optional[int] = None,
    batch_size: int = 32,
    epochs: int = 50,
    debug: bool = False,
    eager: bool = False,
    logdir: str = "runs",
):

    if debug:
        import debugpy

        print("Waiting for debugger...")
        debugpy.listen(5678)
        debugpy.wait_for_client()

    current_time = datetime.now().strftime("%b%d_%H-%M-%S")
    logdir = os.path.join(logdir, current_time)

    dataset = load_dataset("mnist")
    dataset.set_format("np")
    X_train = np.array(np.stack(dataset["train"]["image"]), dtype=np.uint8)
    X_test = np.array(np.stack(dataset["test"]["image"]), dtype=np.uint8)

    # Now binarize data
    X_train = (X_train / 255.0).astype(jnp.float32)
    X_test = (X_test / 255.0).astype(jnp.float32)

    print("X_train:", X_train.shape, X_train.dtype)
    print("X_test:", X_test.shape, X_test.dtype)

    model = eg.Model(
        module=VariationalAutoEncoder(latent_size=LATENT_SIZE),
        loss=[BinaryCrossEntropy(from_logits=True, on="logits")],
        optimizer=optax.adam(1e-3),
        eager=eager,
    )
    assert model.module is not None

    model.summary(X_train[:64])

    # Fit with datasets in memory
    history = model.fit(
        inputs=X_train,
        epochs=epochs,
        batch_size=batch_size,
        steps_per_epoch=steps_per_epoch,
        validation_data=(X_test, ),
        shuffle=True,
        callbacks=[eg.callbacks.TensorBoard(logdir)],
    )

    print(
        "\n\n\nMetrics and images can be explored using tensorboard using:",
        f"\n \t\t\t tensorboard --logdir {logdir}",
    )

    eg.utils.plot_history(history)

    # get random samples
    idxs = np.random.randint(0, len(X_test), size=(5, ))
    x_sample = X_test[idxs]

    # get predictions
    y_pred = model.predict(x=x_sample)

    # plot and save results
    with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
        figure = plt.figure(figsize=(12, 12))
        for i in range(5):
            plt.subplot(2, 5, i + 1)
            plt.imshow(x_sample[i], cmap="gray")
            plt.subplot(2, 5, 5 + i + 1)
            plt.imshow(y_pred["det_image"][i], cmap="gray")
        # tbwriter.add_figure("VAE Example", figure, epochs)

    # sample
    model_decoder = eg.Model(model.module.decoder)

    z_samples = np.random.normal(size=(12, LATENT_SIZE))
    samples = model_decoder.predict(z_samples)
    samples = jax.nn.sigmoid(samples)

    # plot and save results
    # with SummaryWriter(os.path.join(logdir, "val")) as tbwriter:
    figure = plt.figure(figsize=(5, 12))
    plt.title("Generative Samples")
    for i in range(5):
        plt.subplot(2, 5, 2 * i + 1)
        plt.imshow(samples[i], cmap="gray")
        plt.subplot(2, 5, 2 * i + 2)
        plt.imshow(samples[i + 1], cmap="gray")
    # # tbwriter.add_figure("VAE Generative Example", figure, epochs)

    plt.show()
import torch.nn.parallel
import torch.optim
import torch.utils.data
from progress.bar import Bar
from tensorboardX.writer import SummaryWriter
from termcolor import cprint

from model import shape_net
from datasets import SIK1M
from losses import shape_loss
# select proper device to run
from utils import misc
from utils.eval.evalutils import AverageMeter
import numpy as np

writer = SummaryWriter('log')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cudnn.benchmark = True
steps = 0



def print_args(args):
    opts = vars(args)
    cprint("{:>30}  Options  {}".format("=" * 15, "=" * 15), 'yellow')
    for k, v in sorted(opts.items()):
        print("{:>30}  :  {}".format(k, v))
    cprint("{:>30}  Options  {}".format("=" * 15, "=" * 15), 'yellow')


def main(args):