def train(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    from os import path
    model = Detector().to(device)
    train_logger, valid_logger = None, None
    if args.log_dir is not None:
        train_logger = tb.SummaryWriter(path.join(args.log_dir, 'train'),
                                        flush_secs=1)
        valid_logger = tb.SummaryWriter(path.join(args.log_dir, 'valid'),
                                        flush_secs=1)

    # Loading training & testing data.
    training_data = load_detection_data(TRAINING_DATA_PATH,
                                        batch_size=BATCH_SIZE)
    val_data, val_labels = [[data, labels]
                            for data, labels, extra in load_detection_data(
                                TEST_DATA_PATH, batch_size=16)][0]
    val_data, val_labels = val_data.to(device), val_labels.to(device)

    # Optimizer & Loss
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=5e-4,
                                 weight_decay=1e-6)
    loss_func = torch.nn.BCEWithLogitsLoss(reduction='none')

    # Logger
    logger = tb.SummaryWriter('/logs/train/1')

    # Algorithm
    for epoch in range(EPOCHS):
        print(epoch)
        model.train()
        for data, labels, extra in training_data:
            # Performing Prediction
            data = data.to(device)
            labels = labels.to(device)
            results = model(data)

            # Calculating Loss
            BCE = loss_func(results, labels)
            Pt = torch.exp(-BCE)
            focal_loss = ((1 - Pt)**2 * BCE).mean()

            # Updating Weights
            optimizer.zero_grad()
            focal_loss.backward()
            optimizer.step()

        # Logging Results
        model.eval()
        results = model(val_data)
        log(logger, val_data, val_labels, results, epoch)

        if epoch == 100:
            save_model(model, name='det1.th')

        if epoch == 125:
            save_model(model, name='det2.th')

    save_model(model)
Esempio n. 2
0
    def logger(self, epoch, x_train, train_loss, val_loss, k):
        """
        Write to TensorBoard
        """
        #Writing to be done in the first epoch
        print('Epoch in TensorBoard:', epoch)
        if epoch == 0:
            tb_path = './runs/' + self.model_name_save_dir
            print('tb_path', tb_path)

            self.writer['train'] = tb.SummaryWriter(log_dir=tb_path +
                                                    '/train' + str(k))
            self.writer['val'] = tb.SummaryWriter(log_dir=tb_path + '/val' +
                                                  str(k))
            sample_data = iter(self.trainloader).next()[
                0]  # [batch_size X seq_length X embedding_dim]
            self.writer['train'].add_graph(self.model,
                                           sample_data.to(self.device))
            self.writer['train'].add_text('Model:', str(self.model))
            self.writer['train'].add_text('Input shape:', str(x_train.shape))
            self.writer['train'].add_text('Data Preprocessing:',
                                          'None, One-hot')
            self.writer['train'].add_text('Optimiser', str(self.optimizer))
            self.writer['train'].add_text(
                'Batch Size:', str(self.config['DATA']['BATCH_SIZE']))
            self.writer['train'].add_text(
                'Epochs:', str(self.config['TRAINER']['epochs']))

        for measure, value in self.metrics['train'].items():
            self.writer['train'].add_scalar(str('Train/' + measure), value,
                                            epoch)
        self.writer['train'].add_scalar('Loss', train_loss, epoch)
        for measure, value in self.metrics['val'].items():
            self.writer['val'].add_scalar(str('Val/' + measure), value, epoch)
        self.writer['val'].add_scalar('Loss', val_loss, epoch)
Esempio n. 3
0
def create_summary_writers(net, device, log_dir):
    train_path = log_dir / "train"
    val_path = log_dir / "validation"

    train_writer = tensorboard.SummaryWriter(train_path, flush_secs=60)
    val_writer = tensorboard.SummaryWriter(val_path, flush_secs=60)

    return train_writer, val_writer
Esempio n. 4
0
def get_loggers(params):
    if on_colab:
        train_logger = tb.SummaryWriter(
            os.path.join(params.log_dir, 'train/%s' % params.run_name))
        valid_logger = tb.SummaryWriter(
            os.path.join(params.log_dir, 'valid/%s' % params.run_name))
    else:
        train_logger = None
        valid_logger = None
    return train_logger, valid_logger
Esempio n. 5
0
def train(args):
    from os import path
    model = Detector()
    train_logger, valid_logger = None, None
    if args.log_dir is not None:
        train_logger = tb.SummaryWriter(path.join(args.log_dir, 'train'),
                                        flush_secs=1)
        valid_logger = tb.SummaryWriter(path.join(args.log_dir, 'valid'),
                                        flush_secs=1)
    """
    Your code here, modify your HW3 code
    """
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model = Detector().to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss = torch.nn.BCEWithLogitsLoss()
    num_epoch = 50

    train_data = load_detection_data('dense_data/train')
    valid_data = load_detection_data('dense_data/valid')

    global_step = 0
    best_vacc = 0
    best_vloss = 1
    for epoch in range(num_epoch):
        print(epoch)
        model.train()
        acc_vals = []
        loss_vals = []
        for img, label, ec in train_data:
            img, label = img.to(device), label.to(device)

            logit = model(img)
            label = label
            loss_val = loss(logit, label)
            loss_vals.append(loss_val.item())
            acc_val = accuracy(logit, label)

            if train_logger is not None:
                train_logger.add_scalar('loss', loss_val, global_step)
            acc_vals.append(acc_val)

            optimizer.zero_grad()
            loss_val.backward()
            # print(loss_val.item())
            optimizer.step()
            global_step += 1
        avg_loss = sum(loss_vals) / len(loss_vals)
        if (avg_loss < best_vloss):
            print("saving!")
            best_vloss = avg_loss
            save_model(model)
Esempio n. 6
0
def train(args):
    from os import path
    import torch.utils.tensorboard as tb
    model = TCN()
    train_logger, valid_logger = None, None
    if args.log_dir is not None:
        train_logger = tb.SummaryWriter(path.join(args.log_dir, 'train'),
                                        flush_secs=1)
        valid_logger = tb.SummaryWriter(path.join(args.log_dir, 'valid'),
                                        flush_secs=1)

    import torch

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model = model.to(device)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=0.9,
                                weight_decay=1e-5)
    loss = torch.nn.CrossEntropyLoss()

    train_data = SpeechDataset('data/train.txt',
                               transform=one_hot,
                               max_len=args.sequence_length)
    valid_data = SpeechDataset('data/valid.txt',
                               transform=one_hot,
                               max_len=args.sequence_length)

    model.train()
    for iterations in range(args.iteration_num):
        batch = make_batch(args.batch_size, train_data)
        batch_data = batch[:, :, :-1]
        batch_data = batch_data.to(device)
        batch_label = batch.argmax(dim=1)
        batch_label = batch_label.to(device)

        o = model(batch_data)
        loss_val = loss(o, batch_label)

        if args.log_dir is not None:
            train_logger.add_scalar('train/loss',
                                    loss_val,
                                    global_step=iterations)

        optimizer.zero_grad()
        loss_val.backward()
        optimizer.step()

    save_model(model)
def train(args):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    from os import path
    model = FCN().to(device)
    train_logger, valid_logger = None, None
    if args.log_dir is not None:
        train_logger = tb.SummaryWriter(path.join(args.log_dir, 'train'), flush_secs=1)
        valid_logger = tb.SummaryWriter(path.join(args.log_dir, 'valid'), flush_secs=1)

    # Loading the training and testing data.
    training_data = load_dense_data(TRAINING_DATA_PATH, batch_size = BATCH_SIZE)
    testing_data = load_dense_data(TEST_DATA_PATH, batch_size = BATCH_SIZE)
    val_data, val_labels = [
      [data, labels] for data, labels in load_dense_data(TEST_DATA_PATH, batch_size = 1)
    ][0]
    val_data, val_labels = val_data.to(device), val_labels.to(device)

    # Optimizer & Loss 
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4, weight_decay = 1e-1)
    DENSE_WEIGHTS = (1.0 / torch.FloatTensor(DENSE_CLASS_DISTRIBUTION)).to(device)
    loss_func = torch.nn.CrossEntropyLoss(DENSE_WEIGHTS)

    converter = dense_transforms.ToTensor()
    result_tracker = ConfusionMatrix()

    for epoch in range(EPOCHS):
        model.train()
        print(epoch)
        # Iterates through the the batched data.
        for data, labels in training_data:
          # Adds the batch to the GPU
          data = data.to(device)
          labels = labels.long().to(device)

          # Determines loss based on the results of the model.
          results = model(data)
          loss = loss_func(results, labels)

          # Updates the parameters based on the loss and gradients.
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

        # Logging Results
        model.eval()
        results = model(val_data)
        result_tracker.add(results.argmax(1), val_labels)
        print(result_tracker.iou, result_tracker.global_accuracy)

    save_model(model)
Esempio n. 8
0
def train(args):
    from os import path
    model = Detector()
    train_logger, valid_logger = None, None
    if args.log_dir is not None:
        train_logger = tb.SummaryWriter(path.join(args.log_dir, 'train'),
                                        flush_secs=1)
        valid_logger = tb.SummaryWriter(path.join(args.log_dir, 'valid'),
                                        flush_secs=1)
    """
    Your code here, modify your HW3 code
    """
    raise NotImplementedError('train')
    save_model(model)
Esempio n. 9
0
    def __init__(self, model, resume, config, iters_per_epoch, val_logger=None, train_logger=None):
        self.model = model
        self.config = config

        self.val_logger = val_logger
        self.train_logger = train_logger
        self.logger = logging.getLogger(self.__class__.__name__)
        self.do_validation = self.config['trainer']['val']
        self.start_epoch = 1
        self.improved = False

        # SETTING THE DEVICE
        self.device, availble_gpus = self._get_available_devices(self.config['n_gpu'])
        self.model = torch.nn.DataParallel(self.model, device_ids=availble_gpus)
        self.model.to(self.device)

        # CONFIGS
        cfg_trainer = self.config['trainer']
        self.epochs = cfg_trainer['epochs']
        self.save_period = cfg_trainer['save_period']

        # OPTIMIZER
        trainable_params = [{'params': filter(lambda p:p.requires_grad, self.model.module.get_other_params())},
                            {'params': filter(lambda p:p.requires_grad, self.model.module.get_backbone_params()), 
                            'lr': config['optimizer']['args']['lr'] / 10}]

        self.optimizer = get_instance(torch.optim, 'optimizer', config, trainable_params)
        model_params = sum([i.shape.numel() for i in list(model.parameters())])
        opt_params = sum([i.shape.numel() for j in self.optimizer.param_groups for i in j['params']])
        assert opt_params == model_params, 'some params are missing in the opt'

        self.lr_scheduler = getattr(utils.lr_scheduler, config['lr_scheduler'])(optimizer=self.optimizer, num_epochs=self.epochs, 
                                        iters_per_epoch=iters_per_epoch)

        # MONITORING
        self.monitor = cfg_trainer.get('monitor', 'off')
        if self.monitor == 'off':
            self.mnt_mode = 'off'
            self.mnt_best = 0
        else:
            self.mnt_mode, self.mnt_metric = self.monitor.split()
            assert self.mnt_mode in ['min', 'max']
            self.mnt_best = -math.inf if self.mnt_mode == 'max' else math.inf
            self.early_stoping = cfg_trainer.get('early_stop', math.inf)

        # CHECKPOINTS & TENSOBOARD
        date_time = datetime.datetime.now().strftime('%m-%d_%H-%M')
        run_name = config['experim_name']
        self.checkpoint_dir = os.path.join(cfg_trainer['save_dir'], run_name)
        helpers.dir_exists(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, 'config.json')
        with open(config_save_path, 'w') as handle:
            json.dump(self.config, handle, indent=4, sort_keys=True)
         
        writer_dir = os.path.join(cfg_trainer['log_dir'], run_name)
        self.writer = tensorboard.SummaryWriter(writer_dir)
        self.html_results = HTML(web_dir=config['trainer']['save_dir'], exp_name=config['experim_name'],
                            save_name=config['experim_name'], config=config, resume=resume)

        if resume: self._resume_checkpoint(resume)
Esempio n. 10
0
def train(epochs):

    print("Train start")
    writer = tensorboard.SummaryWriter(log_dir='./log', comment='Train loop')
    for ep in range(1, epochs + 1):
        epoch_loss, epoch_accuracy, epoch_precision = 0, 0, 0
        epoch_f1, idx = 0, 0
        for idx, (inp, label) in enumerate(train_loader):
            optimizer.zero_grad()
            op = model(inp)
            loss = criterion(op, label)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            epoch_accuracy += accuracy(op, label)
            epoch_precision += precision(op, label)
            epoch_f1 += f1(op, label)
        writer.add_scalars(
            'Training', {
                'Accuracy': epoch_accuracy / idx,
                'Precision': epoch_precision / idx,
                'F1': epoch_f1 / idx
            }, ep)
        writer.add_scalars('Loss', {'Training': epoch_loss / idx}, ep)
    writer.close()
    torch.save(model.state_dict(), PATH)
    print("Done training")
Esempio n. 11
0
 def __init__(
     self,
     job_dir,
     num_examples,
     learning_rate,
     batch_size,
     epochs,
     num_workers,
     seed,
 ):
     super(PyTorchModel, self).__init__(job_dir=job_dir, seed=seed)
     self.num_examples = num_examples
     self.learning_rate = learning_rate
     self.batch_size = batch_size
     self.epochs = epochs
     self.summary_writer = tensorboard.SummaryWriter(log_dir=self.job_dir)
     self.logger = utils.setup_logger(name=__name__ + "." +
                                      self.__class__.__name__,
                                      distributed_rank=0)
     self.trainer = engine.Engine(self.train_step)
     self.evaluator = engine.Engine(self.tune_step)
     self._network = None
     self._optimizer = None
     self._metrics = None
     self.num_workers = num_workers
     self.device = distributed.device()
     self.best_state = None
     self.counter = 0
Esempio n. 12
0
def main():
    # Random seed initialization
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    # Define your dataloader here
    loader_train = None
    loader_eval = None

    writer = tensorboard.SummaryWriter(
        log_dir=path.join('..', 'experiment', cfg.save)
    )

    if torch.cuda.is_available():
        device = torch.device('cuda')
        torch.cuda.manual_seed_all(seed)
    else:
        device = torch.device('cpu')

    # Make a CNN
    net = simple.Simple()
    net = net.to(device)
    # Will be supported later...
    '''
    writer.add_graph(
        net,
        input_to_model=torch.randn(1, 3, 64, 64).to(device),
    )
    '''

    # Set up an optimizer
    params = [p for p in net.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=1e-4)

    # Set up a learning rate scheduler
    scheduler = lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[int(0.5 * cfg.epochs), int(0.75 * cfg.epochs)],
        gamma=0.5,
    )

    def do_train(epoch: int):
        net.train()
        for batch, (x, t) in enumerate(loader_train):
            x = x.to(device)
            t = t.to(device)
            # Define your training loop here

    def do_eval(epoch: int):
        net.eval()
        for x, t in loader_eval:
            x = x.to(device)
            t = t.to(device)
            # Define your evaluation loop here

    # Outer loop
    for i in tqdm.trange(cfg.epochs):
        do_train(i + 1)
        do_eval(i + 1)
Esempio n. 13
0
def perform_experiment(store_dir=None, test_model=False, test_dir=None):
    model = FullyConnectedNN([84, 42, 21], 4).to(device)
    optimizer = optim.Adagrad(model.parameters(),
                              lr=0.0005)  #, weight_decay=0.1
    scheduler = {
        "scheduler":
        lambda o: optim.lr_scheduler.MultiStepLR(o, [20, 30], gamma=.1),
        "epoch":
        35
    }

    tensorboard = tb.SummaryWriter(
        os.path.join(os.path.dirname(os.path.realpath(__file__)), "..",
                     "tb_logs", "nn"))

    runner = NeuralNetworkRunner(
        model,
        optimizer=optimizer,
        tensorboard=tensorboard,
        loss_fn=nn.CrossEntropyLoss(weight=weight_vector))
    runner.train(lr_setup=scheduler)
    runner.get_metrics().plot_confusion_matrix(
        tensorboard=tensorboard,
        labels=["normal", "bacteria", "virus", "covid"],
        tag="nn")
    if store_dir is not None:
        store_path = os.path.join(store_dir, "nn.torch")
        model.save(store_path)
    if test_model:
        output_path = os.path.join(test_dir, "nn.txt")
        runner_output_test(runner, output_path)
Esempio n. 14
0
def main():

    if __name__ == '__main__':
        writer = tensorboard.SummaryWriter(log_dir='./logs')
        device = torch.device(
            'cuda:0') if torch.cuda.is_available() else torch.device('cpu')
        vggloss = VGGLosses(device=device).to(device)

        dataset = Dataset(root='dataset/Shinkai',
                          style_transform=transform,
                          smooth_transform=transform)

        dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

        G = Generator().to(device)
        D = PatchDiscriminator().to(device)

        G.apply(weights_init)
        D.apply(weights_init)

        optimizer_G = optim.Adam(G.parameters(), lr=0.0001)  #Based on paper
        optimizer_D = optim.Adam(D.parameters(), lr=0.0004)  #Based on paper

        init_train(20, lr=0.1, con_weight=1.0)
        train(epoch=10, con_weight=1.2, gra_weight=2., col_weight=10.)
Esempio n. 15
0
 def __init__(self,
              max_epoch,
              batch_size,
              step,
              model,
              metric='val_kappa',
              current_epoch=1,
              optimizer=None,
              warmup_scheduler=None,
              lr_scheduler=None,
              weighted_sampler=None):
     self.callbacks = []
     self.max_epoch = max_epoch
     self.epoch = current_epoch
     self.batch_size = batch_size
     self.step = step
     self.losses = {}
     self.args = args
     self.model_path = args.model_path
     self.model = model
     self.ckpt_path = args.checkpoint
     self.writer = tsb.SummaryWriter(self.ckpt_path)
     self.warmup_scheduler = warmup_scheduler
     self.lr_scheduler = lr_scheduler
     self.optimizer = optimizer
     self.metric = metric
     self.history = 0  # Record for acc
     self.weighted_sampler = weighted_sampler
     # Compile metric opt
     if 'loss' in metric:
         self.opt = np.less
         self.best = np.inf
     else:
         self.opt = np.greater
         self.best = -np.inf
Esempio n. 16
0
    def __init__(
        self,
        config,  # type: ignore
        output_folder: str,
        model: nn.Module,
        loss_fn: nn.Module,
        train_loader: DataLoader,  # type: ignore
        val_loader: DataLoader,  # type: ignore
        test_loader: DataLoader,  # type: ignore
        early_stopping_patience: int = 5,
    ) -> None:
        super().__init__()
        self.optim = Adam(model.parameters(), config["lr"])  # type: ignore
        self.train_loader: DataLoader = train_loader  # type: ignore
        self.val_loader: DataLoader = val_loader  # type: ignore
        self.model = model
        self.loss_fn = loss_fn
        self.current_epoch: int = 0
        self.epochs: int = config["epochs"]
        self.output_folder = output_folder
        self.writer = tensorboard.SummaryWriter(output_folder)
        self.best_val_loss = float('inf')
        self.early_stopping_patience = early_stopping_patience
        self.patience = early_stopping_patience

        self.is_training: bool = True
Esempio n. 17
0
def train(args):
    from os import path
    model = Planner()
    train_logger, valid_logger = None, None
    if args.log_dir is not None:
        train_logger = tb.SummaryWriter(path.join(args.log_dir, 'train'),
                                        flush_secs=1)
    """
    Your code here, modify your HW1 / HW2 code
    
    """
    import torch

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    model = Planner().to(device)
    if args.continue_training:
        model.load_state_dict(
            torch.load(
                path.join(path.dirname(path.abspath(__file__)), 'cnn.th')))

    #optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=1e-5)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    loss = torch.nn.BCEWithLogitsLoss()

    import inspect
    transform = eval(
        args.transform, {
            k: v
            for k, v in inspect.getmembers(dense_transforms)
            if inspect.isclass(v)
        })
    train_data = load_data('2x2x1250z', transform=transform, num_workers=4)
    global_step = 0
    for epoch in range(args.num_epoch):
        acc = []
        losses = []
        for img, label in train_data:
            img, label = img.to(device), label.to(device)

            logit = model(img)
            loss_val = loss(logit, label)
            accuracy = ((logit > 0).long() == label).detach().cpu().numpy()
            acc.extend(accuracy)
            if train_logger is not None:
                train_logger.add_scalar('loss', loss_val, global_step)

            optimizer.zero_grad()
            loss_val.backward()
            losses.append(loss_val.detach().cpu().numpy())
            optimizer.step()
            global_step += 1

        if train_logger:
            train_logger.add_scalar('accuracy', np.mean(acc), global_step)
        print('epoch %-3d \t loss = %0.3f \t acc = %0.3f' %
              (epoch, np.mean(losses), np.mean(acc)))
        save_model(model)
    save_model(model)
Esempio n. 18
0
 def __init__(self, save_dir: Optional[str] = None) -> None:
     if is_master():
         from torch.utils import tensorboard
         self._save_dir = Path(save_dir or ".")
         self._save_dir.mkdir(exist_ok=True, parents=True)
         self.writer = tensorboard.SummaryWriter(save_dir)
         self.writer.add_text("exec", ' '.join(get_args()))
Esempio n. 19
0
    def restore_checkpoint(self, epoch=None):
        """Restores the Trainer's state using self.log_dir.

        Args:
            epoch: Epoch from which to restore the Trainer's state. If None, uses the
                latest available epoch.
        """
        epoch = epoch or self._find_latest_epoch()
        checkpoint = torch.load(self._path(f"trainer_state_{epoch}.ckpt"))

        self.model.load_state_dict(checkpoint["model"])
        self.optimizer.load_state_dict(checkpoint["optimizer"])
        self._step = checkpoint["step"]
        self._epoch = checkpoint["epoch"]
        self._examples_processed = checkpoint["examples_processed"]
        self._time_taken = checkpoint["time_taken"]
        if self.lr_scheduler is not None:
            self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])

        # NOTE(eugenhotaj): We need to replace the SummaryWriter and ensure any
        # logs written after the last saved checkpoint are purged.
        self._summary_writer.close()
        self._summary_writer = tensorboard.SummaryWriter(self.log_dir,
                                                         max_queue=100,
                                                         purge_step=self._step)
Esempio n. 20
0
def train(args):
    from os import path
    import torch.utils.tensorboard as tb
    model = TCN()
    train_logger, valid_logger = None, None
    if args.log_dir is not None:
        train_logger = tb.SummaryWriter(path.join(args.log_dir, 'train'),
                                        flush_secs=1)
        valid_logger = tb.SummaryWriter(path.join(args.log_dir, 'valid'),
                                        flush_secs=1)
    """
    Your code here, modify your code from prior assignments
    Hint: SGD might need a fairly high learning rate to work well here

    """
    raise NotImplementedError('train')
    save_model(model)
 def _report_results(self, tensorboard_log_dir, checkpoint_path, results):
     step = self._get_step_from_checkpoint(checkpoint_path)
     with tensorboard.SummaryWriter(tensorboard_log_dir) as writer:
         for sampler, sampler_results in results.items():
             for metric, value in sampler_results.items():
                 writer.add_scalar(f'{sampler}__{metric}',
                                   value,
                                   global_step=step)
Esempio n. 22
0
    def _get_writer(self):
        """Get writer and initialize if possible."""
        if (self._writer is None and self._logdir is not None
                and self._global_tag is not None and self.name is not None):
            self._writer = tb.SummaryWriter(
                os.path.join(self._logdir, self.name))

        return self._writer
Esempio n. 23
0
def train(args):
    from os import path
    model = FCN()
    train_logger, valid_logger = None, None
    if args.log_dir is not None:
        train_logger = tb.SummaryWriter(path.join(args.log_dir, 'train'),
                                        flush_secs=1)
        valid_logger = tb.SummaryWriter(path.join(args.log_dir, 'valid'),
                                        flush_secs=1)
    """
    Your code here, modify your HW1 / HW2 code
    Hint: Use ConfusionMatrix, ConfusionMatrix.add(logit.argmax(1), label), ConfusionMatrix.iou to compute
          the overall IoU, where label are the batch labels, and logit are the logits of your classifier.
    Hint: If you found a good data augmentation parameters for the CNN, use them here too. Use dense_transforms
    Hint: Use the log function below to debug and visualize your model
    """
    save_model(model)
Esempio n. 24
0
 def __init__(self, model, data, optimizer_cls, loss_fn_cls, log_name:str):
   self.model = model
   self.data = data
   self.optimizer = optimizer_cls(model.parameters())
   self.loss_fn = loss_fn_cls(ignore_index=self.data.tag_pad_idx)
   self.writer = tensorboard.SummaryWriter(log_name)
   self.train_global = 0
   self.test_global = 0
Esempio n. 25
0
    def __init__(self, args, cfg):
        super(Tester, self).__init__(args, cfg)

        args = self.args

        if self.batch_size != 1:
            self.logger.info(
                "batch size in the testing mode should be set to one.")
            self.logger.info("setting batch size (batch-size = 1).")
            self.batch_size = 1

        if self.seq_size != 1:
            self.logger.info("setting sequence size (s=1)")
            raise ValueError("Sequence size mus tbe equal 1 in test mode.")

        # create the folder for saving training checkpoints
        self.checkpoint_dir = self.out_dir
        Path(self.checkpoint_dir).mkdir(parents=True, exist_ok=True)

        # preapre dataset and dataloaders
        transform = None

        self.model = nets.get_model(input_shape=(self.n_channels,
                                                 self.im_height_model,
                                                 self.im_width_model),
                                    cfg=self.cfg,
                                    device=self.device)
        self.criterion = get_loss_function(self.cfg, args.device)

        self.has_lidar = True if self.model.lidar_feat_net is not None else False
        self.has_imu = True if self.model.imu_feat_net is not None else False

        self.test_dataset = ds.Kitti(config=self.cfg,
                                     transform=transform,
                                     ds_type='test',
                                     has_imu=self.has_imu,
                                     has_lidar=self.has_lidar)

        self.test_dataloader = torch.utils.data.DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
            worker_init_fn=worker_init_fn,
            collate_fn=ds.deeplio_collate)

        self.data_permuter = DataCombiCreater(combinations=self.combinations,
                                              device=self.device)

        self.tensor_writer = tensorboard.SummaryWriter(log_dir=self.runs_dir)

        # debugging and visualizing
        self.logger.print("System Training Configurations:")
        self.logger.print("args: {}".format(self.args))

        self.logger.print(yaml.dump(self.cfg))
        self.logger.print(self.test_dataset)
Esempio n. 26
0
def train(env,
          agent,
          n_episodes: int = 1000,
          score_threshold: float = 32) -> list:
    """
    Params
    ======
        n_episodes (int): maximum number of training episodes
    """
    scores = []
    scores_window: Deque[float] = deque(maxlen=100)
    best_score = float("-inf")
    writer = tensorboard.SummaryWriter(f"runs/{int(time())}")
    for i_episode in range(1, n_episodes + 1):
        state = env.reset()
        score = 0

        start = time()
        while True:
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += np.mean(reward)
            if np.any(done):
                break

        time_for_episode = time() - start
        writer.add_scalar("train/time", time_for_episode, i_episode)
        scores_window.append(score)
        scores.append(score)
        window_score = np.mean(scores_window)

        writer.add_scalar("train/reward", score, i_episode)
        writer.add_scalar("train/window", window_score, i_episode)
        writer.add_scalar("train/memory_size", len(agent.memory), i_episode)

        print(
            f'\rEpisode {i_episode}\tAverage Score: {window_score:.2f}\tTime: {time_for_episode:.2f}',
            end="")

        if i_episode % 100 == 0:
            print(f'\rEpisode {i_episode}\tAverage Score: {window_score:.2f}')

        if window_score >= score_threshold and best_score < score_threshold:
            print(
                f'\nEnvironment solved in {i_episode:d} episodes!\tAverage Score: {window_score:.2f}'
            )

        if window_score > best_score and window_score >= score_threshold:
            best_score = window_score
            torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pt')
            torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pt')

    print(f"Best average score: {best_score}")
    writer.close()
    return scores
def initialize(parameters, action_encoder, distributed_config=None):
    task = parameters.get('task', 'train')

    if task == 'train' and _train_utils.is_leader():
        savedir = _train_utils.get_save_dir(parameters)
        # Save parameters
        with open(os.path.join(savedir, 'params.json'), 'w') as fp:
            json.dump(parameters, fp, sort_keys=True, indent=4)

    Chem.disable_log('rdApp.*')

    batch_size = parameters.get('batch_size', 128)

    if distributed_config:
        batch_size = batch_size // distributed_config.world_size

    config = joint_network.JointClassificationNetworkConfiguration(
        action_encoder.get_num_atom_insert_locations(),
        action_encoder.num_insert_bond_locations,
        hidden_size=384,
        depth=parameters.get('message_depth', 5))

    model = joint_network.JointClassificationNetwork(batch_size, config)

    model_path = parameters.get('model_path')
    if model_path:
        model.load_state_dict(torch.load(model_path, map_location='cpu'))

    if distributed_config:
        print("Creating model on GPU {0}".format(
            distributed_config.local_rank))
        gpu_id = distributed_config.local_rank
        model = modules.SingleDeviceDistributedParallel(
            model.cuda(gpu_id), gpu_id)
    else:
        model = model.cuda()

    if task == 'train':
        model.train()
    else:
        model.eval()

    def save_model(ep, it):
        if task == 'train' and _train_utils.is_leader():
            model_filename = os.path.join(
                savedir, "joint_model_ep_{0}_it_{1:04d}.pth".format(ep, it))
            torch.save(model.state_dict(), model_filename)

    if _train_utils.is_leader() and (task == 'train'):
        from torch.utils import tensorboard
        summary_dir = os.path.join(savedir, 'summary')
        writer = tensorboard.SummaryWriter(log_dir=summary_dir)
    else:
        writer = None

    return model, save_model, writer
Esempio n. 28
0
    def __init__(self):
        self.writer = tensorboard.SummaryWriter("checkpoint/tensorboard")

        self.rect_color = (0,255,255)
        self.landmarks_color  = (0,255,0)
        self.rect_width = 3
        self.landmarks_radius = 1
        self.winname = "image"
        self.crop_resize_shape = (400, 400)
        self.user_press = None
    def init_saver(self):

        trainercore.init_saver(self)

        if self.args.training and self.args.test_file is not None:
            self._aux_saver = tensorboard.SummaryWriter(
                self.args.log_directory + "/test/")

        else:
            self._aux_saver = None
Esempio n. 30
0
 def __init__(self, config: Config, rank_print=0):
     """
     only work at specific local_rank process
     """
     super().__init__()
     self.config = config
     self.isWork = (rank_print == self.config.local_rank)
     # ANCHOR tensorboard init
     if self.isWork:
         self.tx_writer = tensorboard.SummaryWriter(config.log_file)