Example #1
0
def test(model, dataloader, params):
    val_data = tqdm(dataloader.data_iterator(data_type='test',
                                             batch_size=params.batch_size),
                    total=(dataloader.size()[0] // params.batch_size))
    metrics = Metrics()
    loss_avg = RunningAverage()
    with torch.no_grad():
        for data, labels in val_data:
            model.eval()
            data = torch.tensor(data, dtype=torch.long).to(params.device)
            labels = torch.tensor(labels, dtype=torch.long).to(params.device)

            batch_masks = data != 0

            loss, logits = model(data,
                                 attention_mask=batch_masks,
                                 labels=labels)

            predicted = logits.max(2)[1]
            metrics.update(batch_pred=predicted.cpu().numpy(),
                           batch_true=labels.cpu().numpy(),
                           batch_mask=batch_masks.cpu().numpy())
            loss_avg.update(torch.mean(loss).item())
            val_data.set_postfix(type='VAL',
                                 loss='{:05.3f}'.format(loss_avg()))
    metrics.loss = loss_avg()
    return metrics
Example #2
0
    def run_train(self, dataset, args):
        model, tokenizer = self.bert, self.tokenizer
        batch_size = args.batch_size
        model.train()
        train_examples = dataset.train_dataloader

        # Initialize Optimizer
        num_train_iters = args.epochs * len(
            train_examples) / batch_size / args.gradient_accumulation_steps
        self.init_optimizer(args, num_train_iters)

        train_avg_loss = RunningAverage()
        for epoch in range(args.epochs):
            print('Epoch {}'.format(epoch))
            train_bar = tqdm(
                enumerate(train_examples),
                total=len(train_examples),
                desc="Training",
            )
            for step, batch in train_bar:
                inputs = {k: v.to('cuda') for k, v in batch.items()}
                loss = model(inputs['input_ids'],
                             inputs['token_type_ids'],
                             labels=inputs['labels'])
                if args.n_gpus > 1:
                    loss = loss.mean()
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps
                loss.backward()
                train_avg_loss.update(loss.item())
Example #3
0
def validate(model, val_set, params):
    val_data = tqdm(DataLoader(val_set,
                               batch_size=params.batch_size,
                               collate_fn=KeyphraseData.collate_fn),
                    total=(len(val_set) // params.batch_size))
    metrics = Metrics()
    loss_avg = RunningAverage()
    with torch.no_grad():
        model.eval()
        for data, labels, mask in val_data:

            data = data.to(params.device)
            labels = labels.to(params.device)
            mask = mask.to(params.device)

            loss, logits = model(data, attention_mask=mask, labels=labels)

            predicted = logits.max(2)[1]
            metrics.update(batch_pred=predicted.cpu().numpy(),
                           batch_true=labels.cpu().numpy(),
                           batch_mask=mask.cpu().numpy())
            loss_avg.update(torch.mean(loss).item())
            val_data.set_postfix(type='VAL',
                                 loss='{:05.3f}'.format(loss_avg()))

    metrics.loss = loss_avg()
    return metrics
Example #4
0
def train(model, dataloader, optimizer, scheduler, params):
    print("Starting training...")
    best_val_loss = 100
    #print(params.save_dir, params.tag)
    stats = Stats(params.save_dir, params.tag)
    for epoch in range(params.epoch_num):
        loss_avg = RunningAverage()
        train_data = tqdm(dataloader.data_iterator(data_type='train',
                                                   batch_size=params.batch_size),
                                                   total=(dataloader.size()[0] // params.batch_size))
        optimizer.zero_grad()
        model.zero_grad()
        for data, labels in train_data:
            model.train()
            data = torch.tensor(data, dtype=torch.long).to(params.device)
            labels = torch.tensor(labels, dtype=torch.long).to(params.device)

            batch_masks = (data != 0)
            output = model(data, attention_mask=batch_masks, labels=labels)

            loss = torch.mean(output[0])
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm)  # Gradient clipping is not in AdamW anymore (so you can use amp without issue)

            optimizer.step()
            scheduler.step()
            model.zero_grad()
            optimizer.zero_grad()
            # update the average loss
            loss_avg.update(loss.item())
            train_data.set_postfix(type='TRAIN',epoch=epoch,loss='{:05.3f}'.format(loss_avg()))

        metrics = validate(model, dataloader, params)
        print('After {} epochs: F1={}, Loss={}'.format(epoch , metrics.f1(), metrics.loss))
        stats.update(metrics, epoch, loss_avg())
        stats.save()
        if epoch % params.save_freq == 0 and params.save_checkpoints:
            save_checkpoint({'epoch': epoch,
                                    'state_dict': model.state_dict(),
                                    'optim_dict': optimizer.state_dict()},
                                    is_best=False,
                                    tag=params.tag,
                                    epoch=epoch,
                                    score=metrics.f1(),
                                    checkpoint=params.save_dir)
        if metrics.loss < best_val_loss:
            best_val_loss = metrics.loss
            save_checkpoint({'epoch': epoch,
                                    'state_dict': model.state_dict(),
                                    'optim_dict': optimizer.state_dict()},
                                    is_best=True,
                                    tag=params.tag,
                                    epoch='generic',
                                    score='epic',
                                    checkpoint=params.save_dir)
Example #5
0
    def one_epoch(self, mode, epoch_num):
        if mode not in ['train', 'test']:
            raise ValueError("Unknown value {} for mode".format(mode))
        print("{}ing... epoch: {}".format(mode, epoch_num))

        if mode == 'train':
            self.model.train()
            dl = self.train_data
            one_iter_function = self.one_train_iteration
        else:
            self.model.eval()
            dl = self.test_data
            one_iter_function = self.one_test_iteration

        acc_avg = RunningAverage()
        loss_avg = RunningAverage()
        with tqdm(total=len(dl)) as t:
            for n, (data, label) in enumerate(dl):
                if self.train_params['use_gpu']:
                    data, label = data.cuda(
                        self.train_params['gpu_id']), label.cuda(
                            self.train_params['gpu_id'])
                data, label = Variable(data), Variable(label)
                data = data.float()
                loss, acc = one_iter_function(data, label)
                loss_avg.update(loss)
                acc_avg.update(acc)
                t.set_postfix(
                    run_param="Epoch{} Loss:{:.2f} Acc:{:.2f}".format(
                        epoch_num, loss_avg(), acc_avg()))
                t.update()

        return acc_avg, loss_avg
Example #6
0
def evaluate():
    """Calculates loss and prediction accuracy given torch dataloader"""
    # Turn on evaluation mode which disables dropout.
    md.eval()
    avg_loss = RunningAverage()
    avg_acc = RunningAverage()

    with torch.no_grad():
        pbar = tqdm(test_dl, ascii=True, leave=False)
        for batch in pbar:
            # run model
            inp, target = batch
            inp, target = inp.to(device), target.to(device)
            out = md(inp.t())

            # calculate loss
            loss = criterion(out.view(-1), target.float())
            avg_loss.update(loss.item())

            # calculate accuracy
            pred = out.view(-1) > 0.5
            correct = pred == target.byte()
            avg_acc.update(torch.sum(correct).item() / len(correct))

            pbar.set_postfix(loss=f'{avg_loss():05.3f}',
                             acc=f'{avg_acc():05.2f}')

    return avg_loss(), avg_acc()
Example #7
0
def val(dataset, model, args, mode):
    model.eval()
    loader = DataLoader(dataset, batch_size=args.batch_size)
    dataloader_iter = iter(loader)
    state_h, state_c = model.init_state(args.sequence_length)
    loss_avg = RunningAverage()
    acc_avg = RunningAverage()
    while True:
        try:
            X, y = next(dataloader_iter)
        except RuntimeError:
            continue
        except StopIteration:
            break

        y_pred, (state_h,
                 state_c) = model(X.to(device),
                                  (state_h.to(device), state_c.to(device)))
        loss = criterion(y_pred.transpose(1, 2), y.long().to(device))
        loss_avg.update(loss.item())

        acc = accuracy(y_pred.transpose(1, 2), y.long().to(device))
        acc_avg.update(acc)

    print({
        'epoch': epoch,
        'val_loss': '{:05.4f}'.format(loss_avg()),
        'accuracy': '{:05.3f}'.format(acc_avg())
    })
Example #8
0
def train():
    # Turn on training mode which enables dropout.
    md.train()
    avg_loss = RunningAverage()
    avg_acc = RunningAverage()
    avg_prec = RunningAverage()
    avg_recall = RunningAverage()
    sparsity = 0.0
    info = {
        'loss': None,
        'acc': None,
    }

    pbar = tqdm(train_dl, ascii=True, leave=False)

    for batch in pbar:
        inp, target = batch
        inp, target = inp.to(device), target.to(device)
        # run model
        md.zero_grad()
        out = md(inp.t())
        loss = criterion(out.view(-1), target.float())
        loss.backward()

        torch.nn.utils.clip_grad_norm_(md.parameters(), args.clip)
        optimizer.step()
        if args.prune:
            pruner.step()

        # upgrade stats
        avg_loss.update(loss.item())
        pred = out.view(-1) > 0.5

        correct = pred == target.byte()
        avg_acc.update(torch.sum(correct).item() / len(correct))

        #         avg_prec.update(t_p/(t_p+f_p))
        #         avg_recall.update(t_p/(t_p+f_n))
        info['loss'] = f'{avg_loss():05.3f}'
        info['acc'] = f'{avg_acc():05.2f}'
        #         info['prec'] = f'{avg_prec():05.2f}'
        #         info['recall'] = f'{avg_recall():05.2f}'
        if args.prune:
            sparsity = pruner.log()
            info['spar'] = f'{sparsity:.2f}'

        pbar.set_postfix(**info)

    return avg_loss(), avg_acc(), sparsity
Example #9
0
def train_one_epoch(model, datagen, loss_fn, optimizer):
    model.train()
    loss_avg = RunningAverage()
    with tqdm(total=len(datagen)) as t:
        for imgsA, imgsB, labels in datagen:
            imgsA, imgsB, labels = imgsA.to(DEVICE), imgsB.to(
                DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            out = model(imgsA, imgsB)
            loss = loss_fn(out, labels)
            loss.backward()
            optimizer.step()

            t.set_postfix(loss=loss.cpu().item())
            t.update()
            loss_avg.update(loss.cpu().item())
    return loss_avg()
Example #10
0
def evaluate():
    """Calculates loss and prediction accuracy given torch dataloader"""
    # Turn on evaluation mode which disables dropout.
    md.eval()
    avg_loss = RunningAverage()
    avg_acc = RunningAverage()
    T_P = 0
    F_P = 0
    T_N = 0
    F_N = 0
    F__P = 0
    with torch.no_grad():
        pbar = tqdm(test_dl, ascii=True, leave=False)
        for batch in pbar:
            # run model
            inp, target = batch
            inp, target = inp.to(device), target.to(device)
            out = md(inp.t())

            # calculate loss
            loss = criterion(out.view(-1), target.float())
            avg_loss.update(loss.item())

            # calculate accuracy
            pred = out.view(-1) > 0.5
            correct = pred == target.byte()
            t_p, f_p, t_n, f_n, f__p = confusion(pred, target.byte())
            T_P += t_p
            F_P += f_p
            T_N += t_n
            F_N += f_n
            F__P += f__p
            avg_acc.update(torch.sum(correct).item() / len(correct))

            pbar.set_postfix(loss=f'{avg_loss():05.3f}',
                             acc=f'{avg_acc():05.2f}')


#     print('False_Positive',F_P)
#     print('True_Positive',T_P)
#     print('False_Neg',F_N)
#     print('True_Neg',T_N)
# print('Check',F__P)
    if (T_P == 0):
        avg_prec = 0.0
    else:
        avg_prec = T_P / (T_P + F_P)
    if (T_P == 0):
        avg_recall = 0.0
    else:
        avg_recall = T_P / (T_P + F_N)
    if (avg_prec + avg_recall == 0.0):
        f1_score = 0.0
    else:
        f1_score = 2 * (avg_prec * avg_recall) / (avg_prec + avg_recall)
    return avg_loss(), avg_acc(), avg_prec, avg_recall, f1_score
Example #11
0
def main():
    args = get_args()
    wandb.init()
    wandb.config.update(args)

    seed = 42
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.deterministic = True
    torch.backends.cudnn.benchmark = False

    loaded_model = False

    [train_loader, valid_loader, model,
     optimizer] = initialize(args, loaded_model)
    scaler = torch.cuda.amp.GradScaler()

    wandb.watch(model)
    best_acc = 0
    run_avg = RunningAverage()

    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    # scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, cycle_momentum=False)

    for epoch in range(1, args.epochs_number + 1):
        run_avg.reset_train()
        run_avg.reset_val()

        train(args, model, train_loader, epoch, optimizer, scaler, run_avg)
        val_acc = evaluation(args, model, valid_loader, epoch, run_avg)

        # scheduler.step()
        if best_acc < val_acc:
            best_acc = val_acc
            save_checkpoint(model, optimizer, args, epoch)
Example #12
0
    def train(self):
        set_logger(os.path.join(self.log_dir, 'train.log'), terminal=False)

        epochs = self.hps.num_epochs
        print_every = self.hps.print_every
        log_every = self.hps.log_summary_every
        lr = self.hps.learning_rate

        loss_avg = RunningAverage()
        summary_writer = SummaryWriter(log_dir=self.summ_dir)
        current_best_loss = 1e3

        encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=lr)
        decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=lr)

        training_pairs = self.dl

        criterion = nn.NLLLoss(reduce=False)

        if self.hps.resume:
            log('- load ckpts...')
            self.load_state_dict()

        for epoch in trange(epochs, desc='epochs'):
            loss_avg.reset()
            with tqdm(total=len(training_pairs)) as progress_bar:
                for language_pair, mask_pair in training_pairs:
                    language_pair, mask_pair = language_pair.to(
                        self.device), mask_pair.to(self.device)
                    loss = self.train_single(language_pair, mask_pair,
                                             encoder_optimizer,
                                             decoder_optimizer, criterion)
                    loss_avg.update(loss.item())
                    self.global_step += 1
                    if self.global_step % log_every == 0:
                        summary_writer.add_scalar('loss_value',
                                                  loss,
                                                  global_step=self.global_step)
                    if self.global_step % print_every == 0:
                        log('global step: {}, loss average: {:.3f}'.format(
                            self.global_step, loss_avg()))

                    progress_bar.set_postfix(loss_avg=loss_avg())
                    progress_bar.update()
            if loss_avg() < current_best_loss:
                log('new best loss average found, saving modules...')
                current_best_loss = loss_avg()
                state = {
                    'encoder': self.encoder.state_dict(),
                    'decoder': self.decoder.state_dict(),
                    'global_step': self.global_step,
                    'epoch': epoch,
                    'loss_avg': loss_avg()
                }
                torch.save(state, os.path.join(self.ckpt_dir, 'best.pth.tar'))
Example #13
0
    def run_train(self, dataset, ontology, args):
        model, tokenizer = self.bert, self.tokenizer
        batch_size = args.batch_size
        self.train()

        # Generate training examples
        turns = list(dataset['train'].iter_turns())
        train_examples = [
            turn_to_examples(t, ontology, tokenizer) for t in turns
        ]
        train_examples = list(itertools.chain.from_iterable(train_examples))
        print('Generated training examples')

        # Random Oversampling
        # Note that: Most of the constructed examples are negative
        if args.random_oversampling:
            negative_examples, positive_examples = [], []
            for example in train_examples:
                if example[-1] == 0: negative_examples.append(example)
                if example[-1] == 1: positive_examples.append(example)
            nb_negatives, nb_positives = len(negative_examples), len(
                positive_examples)
            sampled_positive_examples = random.choices(positive_examples,
                                                       k=int(nb_negatives / 8))
            train_examples = sampled_positive_examples + negative_examples
            print('Did Random Oversampling')
            print('Number of positive examples increased from {} to {}'.format(
                nb_positives, len(sampled_positive_examples)))

        # Initialize Optimizer
        num_train_iters = args.epochs * len(
            train_examples) / batch_size / args.gradient_accumulation_steps
        self.init_optimizer(args, num_train_iters)

        # Main training loop
        iterations = 0
        best_dev_joint_goal = 0.0
        train_avg_loss = RunningAverage()
        for epoch in range(args.epochs):
            print('Epoch {}'.format(epoch))

            random.shuffle(train_examples)
            pbar = tqdm(range(0, len(train_examples), batch_size))
            for i in pbar:
                iterations += 1

                # Next training batch
                batch = train_examples[i:i + batch_size]
                _, _, input_ids, token_type_ids, labels = list(zip(*batch))

                # Padding and Convert to Torch Tensors
                input_ids, input_masks = pad(input_ids, args.device)
                token_type_ids = pad(token_type_ids, args.device)[0]
                labels = torch.LongTensor(labels).to(args.device)

                # Calculate loss
                loss = model(input_ids,
                             token_type_ids,
                             input_masks,
                             labels=labels)
                if args.n_gpus > 1:
                    loss = loss.mean()
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps
                loss.backward()
                train_avg_loss.update(loss.item())

                # Update pbar
                pbar.update(1)
                pbar.set_postfix_str(f'Train Loss: {train_avg_loss()}')

                # parameters update
                if iterations % args.gradient_accumulation_steps == 0:
                    self.optimizer.step()
                    self.optimizer.zero_grad()

            # Evaluate on the dev set and the test set
            dev_results = self.run_dev(dataset, ontology, args)
            test_results = self.run_test(dataset, ontology, args)

            print('Evaluations after epoch {}'.format(epoch))
            print(dev_results)
            print(test_results)
            if dev_results['joint_goal'] > best_dev_joint_goal:
                best_dev_joint_goal = dev_results['joint_goal']
                self.save(args.output_dir)
                print('Saved the model')
Example #14
0
def validate(args,
             model,
             test_loader,
             criterion_ueff,
             epoch,
             epochs,
             device='cpu'):
    with torch.no_grad():
        val_si = RunningAverage()
        # val_bins = RunningAverage()
        metrics = utils.RunningAverageDict()
        for batch in tqdm(test_loader,
                          desc=f"Epoch: {epoch + 1}/{epochs}. Loop: Validation"
                          ) if is_rank_zero(args) else test_loader:
            img = batch['image'].to(device)
            depth = batch['depth'].to(device)
            if 'has_valid_depth' in batch:
                if not batch['has_valid_depth']:
                    continue
            depth = depth.squeeze().unsqueeze(0).unsqueeze(0)
            bins, pred = model(img)

            mask = depth > args.min_depth
            l_dense = criterion_ueff(pred,
                                     depth,
                                     mask=mask.to(torch.bool),
                                     interpolate=True)
            val_si.append(l_dense.item())

            pred = nn.functional.interpolate(pred,
                                             depth.shape[-2:],
                                             mode='bilinear',
                                             align_corners=True)

            pred = pred.squeeze().cpu().numpy()
            pred[pred < args.min_depth_eval] = args.min_depth_eval
            pred[pred > args.max_depth_eval] = args.max_depth_eval
            pred[np.isinf(pred)] = args.max_depth_eval
            pred[np.isnan(pred)] = args.min_depth_eval

            gt_depth = depth.squeeze().cpu().numpy()
            valid_mask = np.logical_and(gt_depth > args.min_depth_eval,
                                        gt_depth < args.max_depth_eval)
            if args.garg_crop or args.eigen_crop:
                gt_height, gt_width = gt_depth.shape
                eval_mask = np.zeros(valid_mask.shape)

                if args.garg_crop:
                    eval_mask[int(0.40810811 * gt_height):int(0.99189189 *
                                                              gt_height),
                              int(0.03594771 * gt_width):int(0.96405229 *
                                                             gt_width)] = 1

                elif args.eigen_crop:
                    if args.dataset == 'kitti':
                        eval_mask[int(0.3324324 * gt_height):int(0.91351351 *
                                                                 gt_height),
                                  int(0.0359477 * gt_width):int(0.96405229 *
                                                                gt_width)] = 1
                    else:
                        eval_mask[45:471, 41:601] = 1
            valid_mask = np.logical_and(valid_mask, eval_mask)
            metrics.update(
                utils.compute_errors(gt_depth[valid_mask], pred[valid_mask]))

        return metrics.get_value(), val_si
Example #15
0
 def race(self, driver):
     """
     Let a driver race in a preconfigured quickrace
     :param driver: a driver object that generates actions based on sensors
     :return: driver fitness value after race
     """
     if not self.connect():
         raise IOError("could not connect to TORCS")
     start_time = timeit.default_timer()
     try:
         print "Start racing..."
         s = None
         lap_times = []
         cur_lap_time = -10.0
         timeout_reached = False
         recovery_lock = 0
         max_speed = 0.0
         avg_speed = RunningAverage()
         driver.prepare()
         while True:
             data = self.sock.recv(2048)
             if data.strip().startswith("("):
                 s = SensorModel(string=data)
                 action = driver.get_action(sensors=s)
                 # save maximum speed for fitness function
                 max_speed = max(max_speed, s['speedX'])
                 avg_speed.add_value(float(s['speedX']))
                 # AUTORECOVERY: if off track, go backwards until back on track and then some more
                 if self.auto_recover and (s.is_off_track() or recovery_lock > 0):
                     action.gear = -1
                     action.accel = 0.4
                     action.clutch = 0.0
                     action.steering = s['angle'] / -2.0
                     if s.is_off_track():
                         recovery_lock = RECOVERY_LOCK
                     else:
                         recovery_lock -= 1
                 self.sock.sendto(str(action), self.server_address)
                 if s['curLapTime'][0] < cur_lap_time:
                     lap_times.append(cur_lap_time)
                     print "lap %i: %0.2f seconds" % (len(lap_times), cur_lap_time)
                 cur_lap_time = s['curLapTime'][0]
             else:
                 if data.startswith("***shutdown***"):
                     if s['curLapTime'][0] > 1:
                         lap_times.append(s['curLapTime'][0])
                     print "--- END OF RACE --- finished at position %i, avg/max speed: %0.2f/%0.2f km/h" % (
                         int(s['racePos']), avg_speed.avg, max_speed)
                     break
             if self.timeout is not None and s['curLapTime'] > self.timeout:
                 print "--- RACE TIMEOUT REACHED ---"
                 timeout_reached = True
                 break
         if s is not None:
             print "lap times:", lap_times
             # print "distance raced:", s['distRaced']
             return driver.compute_fitness(last_sensor=s, lap_times=lap_times, max_speed=max_speed,
                                           average_speed=avg_speed.avg, timeout_reached=timeout_reached)
         else:
             return 0.0
     except KeyboardInterrupt:
         print "Exit client"
     except Exception as e:
         print "Client Error:", e
     finally:
         #print "race call took %0.1f seconds." % (timeit.default_timer() - start_time)
         self.close()
def test_running_average():
    train_losses = [1, 0.5, 0.3]
    train_accuracies = [0.3, 0.5, 0.1]
    running_average = RunningAverage()
    for i in range(len(train_losses)):
        running_average.update_train_loss_avg(train_losses[i], 1)
        running_average.update_train_acc_avg(train_accuracies[i], 1)

    assert running_average.train_loss_run_avg == 0.6
    assert running_average.train_acc_run_avg == 0.3

    running_average.update_train_loss_avg(0.2, 1)
    running_average.update_train_acc_avg(0.1, 1)

    assert running_average.train_loss_run_avg == 0.5
    assert running_average.train_acc_run_avg == 0.25

    val_losses = [1, 0.7, 1.3]
    val_accuracies = [0.3, 0.4, 8.3]

    for i in range(len(val_losses)):
        running_average.update_val_loss_avg(val_losses[i], 1)
        running_average.update_val_acc_avg(val_accuracies[i], 1)

    assert running_average.val_loss_run_avg == 1
    assert running_average.val_acc_run_avg == 3

    running_average.update_val_loss_avg(3, 1)
    running_average.update_val_acc_avg(7, 1)

    assert running_average.val_loss_run_avg == 1.5
    assert running_average.val_acc_run_avg == 4

    running_average.reset_train()
    running_average.reset_val()

    assert running_average.sum_train_loss == 0
    assert running_average.sum_train_acc == 0
    assert running_average.train_loss_counter == 0
    assert running_average.train_acc_counter == 0
    assert running_average.train_loss_run_avg == 0
    assert running_average.train_acc_run_avg == 0

    assert running_average.sum_val_loss == 0
    assert running_average.sum_val_acc == 0
    assert running_average.val_loss_counter == 0
    assert running_average.val_acc_counter == 0
    assert running_average.val_loss_run_avg == 0
    assert running_average.val_acc_run_avg == 0
Example #17
0
def main(args):
    if args.use_gpu == 1:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num
        print("Using GPU", args.gpu_num)
    else:
        print("Not using GPU")

    train_dir = "train"
    val_dir = "val"
    long_dtype, float_dtype = get_dtypes(args)

    print("Initializing train dataset")
    train_dset, train_loader = data_loader(args.dataset_folder, train_dir,
                                           args.batch_size)
    print("Initializing val dataset")
    val_dset, val_loader = data_loader(args.dataset_folder, val_dir,
                                       args.batch_size)
    print("Training for %d" % args.num_epochs)
    print("Arguments", args.__dict__)

    model = PredictFromNightBaseline()
    # model = PredictFromDayBaseline()
    # model = PredictBaseline()
    model.type(float_dtype)
    print(model)

    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=args.learning_rate)
    # criterion = nn.BCELoss()
    criterion = nn.CrossEntropyLoss()

    max_val_acc = 0.0

    for epoch in range(args.num_epochs):
        gc.collect()

        # Train epoch
        model.train()
        loss_avg = RunningAverage()
        acc_avg = RunningAverage()
        with tqdm(total=len(train_loader)) as t:
            for i, train_batch in enumerate(train_loader):
                train_batch = [
                    tensor.cuda() if args.use_gpu else tensor
                    for tensor in train_batch
                ]
                X_day, X_night, Y = train_batch

                out = model(X_day, X_night)
                loss = criterion(out, Y)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                acc_avg.update_step(calc_accuracy(out, Y), Y.shape[0])
                loss_avg.update_step(loss.item(), Y.shape[0])
                t.set_postfix(loss='{:05.3f}'.format(loss_avg()),
                              acc='{:05.3f}'.format(acc_avg()))
                t.update()


#why doesnt the code go here
# Val metrics
        model.eval()
        val_loss = RunningAverage()
        val_acc = RunningAverage()
        for i, val_batch in enumerate(val_loader):
            val_batch = [
                tensor.cuda() if args.use_gpu else tensor
                for tensor in val_batch
            ]
            X_day, X_night, Y = val_batch

            out = model(X_day, X_night)
            loss = criterion(out, Y)

            val_loss.update_step(loss.item(), Y.shape[0])
            val_acc.update_step(calc_accuracy(out, Y), Y.shape[0])

        metrics_string = "Loss: {:05.3f} ; Acc: {:05.3f}".format(
            loss_avg(), acc_avg())
        val_metrics = "Loss: {:05.3f} ; Acc: {:05.3f}".format(
            val_loss(), val_acc())
        print("Epoch [%d/%d] - Train -" % (epoch + 1, args.num_epochs),
              metrics_string, "- Val -", val_metrics)

        if val_acc() > max_val_acc and args.save_model_weights:
            torch.save(model.state_dict(),
                       os.path.join(model_path, str(epoch)))
Example #18
0
def train(config_name, gene_variant=None):
    # Prepare tokenizer, dataset, and model
    configs = get_configs(config_name, verbose=False)
    if configs['use_gene_features']:
        assert(not gene_variant is None)
        configs['gene_variant'] = gene_variant
    tokenizer = BertTokenizer.from_pretrained(configs['transformer'], do_basic_tokenize=False)
    train_set, dev_set, test_set = load_oneie_dataset(configs['base_dataset_path'], tokenizer)
    model = BasicCorefModel(configs)

    # Initialize the optimizer
    num_train_docs = len(train_set)
    epoch_steps = int(math.ceil(num_train_docs / configs['batch_size']))
    num_train_steps = int(epoch_steps * configs['epochs'])
    num_warmup_steps = int(num_train_steps * 0.1)
    optimizer = model.get_optimizer(num_warmup_steps, num_train_steps)
    print('Initialized optimizer')

    # Main training loop
    best_dev_score, iters, batch_loss = 0.0, 0, 0
    for epoch in range(configs['epochs']):
        #print('Epoch: {}'.format(epoch))
        print('\n')
        progress = tqdm.tqdm(total=epoch_steps, ncols=80,
                             desc='Train {}'.format(epoch))
        accumulated_loss = RunningAverage()

        train_indices = list(range(num_train_docs))
        random.shuffle(train_indices)
        for train_idx in train_indices:
            iters += 1
            inst = train_set[train_idx]
            iter_loss = model(inst, is_training=True)[0]
            iter_loss /= configs['batch_size']
            iter_loss.backward()
            batch_loss += iter_loss.data.item()
            if iters % configs['batch_size'] == 0:
                accumulated_loss.update(batch_loss)
                torch.nn.utils.clip_grad_norm_(model.parameters(), configs['max_grad_norm'])
                optimizer.step()
                optimizer.zero_grad()
                batch_loss = 0
                # Update progress bar
                progress.update(1)
                progress.set_postfix_str('Average Train Loss: {}'.format(accumulated_loss()))
        progress.close()

        # Evaluation after each epoch
        print('Evaluation on the dev set', flush=True)
        dev_score = evaluate(model, dev_set, configs)['avg']

        # Save model if it has better dev score
        if dev_score > best_dev_score:
            best_dev_score = dev_score
            # Evaluation on the test set
            print('Evaluation on the test set', flush=True)
            evaluate(model, test_set, configs)
            # Save the model
            save_path = os.path.join(configs['saved_path'], 'model.pt')
            torch.save({'model_state_dict': model.state_dict()}, save_path)
            print('Saved the model', flush=True)
Example #19
0
val_loader = data.DataLoader(Dataset(fnms=val_fnms, argumentation=None),
                             batch_size=BATCH_SIZE,
                             num_workers=4)

model = bbox_model()
model.to(device)
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

OnPlateau = 0
EarlyStopping = 0
best_loss = float('inf')
for i in range(EPOCHS):
    log.info('epoch {}'.format(i))
    model.train()
    loss_trn_avg = RunningAverage()
    with tqdm(total=len(trn_loader)) as t:
        for imgs, labels in trn_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            out = model(imgs)
            loss = loss_fn(out, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            t.set_postfix(loss=loss.cpu().item())
            t.update()
            loss_trn_avg.update(loss.cpu().item())

    model.eval()
    loss_val_avg = RunningAverage()