Beispiel #1
0
def do_evaluate(net, test_dataset, batch_size, augment=[]):

    test_loader = DataLoader(
        test_dataset,
        sampler     = SequentialSampler(test_dataset),
        batch_size  = batch_size,
        drop_last   = False,
        num_workers = 4,
        pin_memory  = True,
        collate_fn  = null_collate
    )
    #----
    start_timer = timer()

    test_num  = 0
    test_id   = []
    test_probability = [[],[],[]]
    test_truth = [[],[],[]]

    start_timer = timer()
    for t, (input, truth, infor) in enumerate(test_loader):

        batch_size,C,H,W = input.shape
        input = input.cuda()

        with torch.no_grad():
            net.eval()

            num_augment=0
            probability=[0,0,0]
            if 'null' in augment: #null
                logit =  net(input)
                prob  = logit_to_probability(logit)

                probability = [p+q**0.5 for p,q in zip(probability,prob)]
                num_augment += 1

            probability = [p/num_augment for p in probability]

        batch_size  = len(infor)
        for i in range(NUM_TASK):
            test_probability[i].append(probability[i].data.cpu().numpy())
            test_truth[i].append(truth[i].data.cpu().numpy())

        test_id.extend([i.image_id for i in infor])
        test_num += batch_size

        print('\r %4d / %4d  %s'%(
             test_num, len(test_loader.dataset), time_to_str((timer() - start_timer),'min')
        ),end='',flush=True)

    assert(test_num == len(test_loader.dataset))
    print('')

    for i in range(NUM_TASK):
        test_probability[i] = np.concatenate(test_probability[i])
        test_truth[i] = np.concatenate(test_truth[i])

    print(time_to_str((timer() - start_timer),'sec'))
    return test_id, test_truth, test_probability
Beispiel #2
0
def train_one_epoch(train_loader,
                    model,
                    criterions,
                    optimizer,
                    epoch,
                    meters,
                    since,
                    log=None):
    losses = AverageMeter()
    f1 = AverageMeter()
    model.train()
    if len(meters['f1']):
        previous_loss = meters['loss'][-1]
        previous_f1 = meters['f1'][-1]
        best_f1_epoch = np.argmax(meters['f1'])
        best_f1_score = meters['f1'][best_f1_epoch]
        best_loss_epoch = np.argmin(meters['loss'])
        best_loss = meters['loss'][best_loss_epoch]
    else:
        best_f1_epoch = 0
        best_f1_score = 0
        best_loss_epoch = 0
        best_loss = 0
        previous_loss = 0
        previous_f1 = 0

    for batch_id, (images, target) in enumerate(train_loader):
        batch_x = images.cuda(non_blocking=True)
        target = torch.Tensor(np.array(target)).float().cuda(non_blocking=True)
        output = model(batch_x)
        bce_criterion = criterions[0]
        balance_criterion = criterions[1]
        bce_loss = bce_criterion(output, target)
        balance_loss = balance_criterion(output, target)
        total_loss = bce_loss + 8.0 * balance_loss
        losses.update(bce_loss.item(), batch_x.size(0))
        f1_batch = f1_score(target,
                            output.sigmoid().cpu() > 0.15,
                            average='macro')
        f1.update(f1_batch, batch_x.size(0))
        optimizer.zero_grad()
        total_loss.backward()
        # grident clip
        if cfg.grident_clip:
            torch.nn.utils.clip_grad_norm(model.parameters(), 1.)
        optimizer.step()
        print('Epoch %3d\t' % epoch,
              'Batch %3d|%3d\t' % (batch_id, len(train_loader)),
              'Loss: %10.5f\t' % losses.avg,
              'Metrics|F1 Score: %10.5f\t' % f1.avg,
              'Previous Loss: %10.5f\t' % previous_loss,
              'Previous F1 Score: %10.5f\t' % previous_f1,
              'Best loss:%10.5f Epoch %3d\t' % (best_loss, best_loss_epoch),
              'Besr F1:%10.5f Epoch %3d\t' % (best_f1_score, best_f1_epoch),
              'Time: %s' % time_to_str((timer() - since), 'min'),
              file=log)

    meters['loss'].append(losses.avg)
    meters['f1'].append(f1.avg)
    return meters
Beispiel #3
0
def train(train_loader, model, criterion, optimizer, epoch, valid_metrics,
          best_results, start):
    losses = utils.AverageMeter()
    f1 = utils.AverageMeter()
    acc = utils.AverageMeter()
    model.train()
    for i, (images, target) in enumerate(train_loader):
        images = images.to(device)
        indx_target = target.clone()
        target = torch.from_numpy(np.array(target)).long().to(device)
        # compute output
        output = model(images)
        loss = criterion(output, target)
        losses.update(loss.item(), images.size(0))
        f1_batch = f1_score(target.cpu().data.numpy(),
                            np.argmax(F.softmax(output).cpu().data.numpy(),
                                      axis=1),
                            average='macro')
        acc_score = accuracy_score(
            target.cpu().data.numpy(),
            np.argmax(F.softmax(output).cpu().data.numpy(), axis=1))
        f1.update(f1_batch, images.size(0))
        acc.update(acc_score, images.size(0))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print('\r', end='', flush=True)
        message = '%s %5.1f %6.1f	  |   %0.3f  %0.3f  %0.3f  | %0.3f  %0.3f  %0.4f   | %s  %s  %s |   %s' % (\
          "train", i/len(train_loader) + epoch, epoch,
          acc.avg, losses.avg, f1.avg,
          valid_metrics[0], valid_metrics[1],valid_metrics[2],
          str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
          utils.time_to_str((timer() - start),'min'))
        print(message, end='', flush=True)
    return [acc.avg, losses.avg, f1.avg]
Beispiel #4
0
def train(train_loader,model,loss_fn, optimizer,epoch,valid_loss,start):
    losses = utils.AverageMeter()
    model.train()

    for i, (x_batch, y_batch) in enumerate(train_loader):
        y_pred = model(x_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.update(loss.item(),x_batch.shape[0])

        print('\r', end='', flush=True)
        message = '%s %5.1f %6.1f           |       %0.3f        |       %0.3f       | %s' % ( \
            "train", i / len(train_loader) + epoch, epoch+1,
            losses.avg,
            valid_loss,
            utils.time_to_str((timer() - start), 'min'))
        print(message, end='', flush=True)

    log.write("\n")
    log.write(message)
    log.write("\n")
    return losses.avg
def evaluate(val_loader, model, criterion, epoch, train_loss, best_results, start):
    losses = AverageMeter()
    f1 = F1Meter()
    model.cuda()
    model.eval()
    with torch.no_grad():
        for i, (images, target) in enumerate(val_loader):
            images_var = images.cuda(non_blocking=True)
            in_images = images_var[:, :config.in_channels, :, :]
            target = torch.from_numpy(np.array(target)).float().cuda(non_blocking=True)

            classifier_output = model(in_images)
            classifier_loss = criterion(classifier_output, target)

            if config.with_mse_loss:
                out_images = images_var[:, -config.out_channels:, :, :]
                reconstruct_output = model.reconstruct_layer(model.features(in_images))
                reconstruct_loss = nn.MSELoss().cuda()(reconstruct_output, out_images)
                loss = classifier_loss + reconstruct_loss
            else:
                loss = classifier_loss

            losses.update(loss.item(), images_var.size(0))
            f1.update(classifier_output.sigmoid().cpu() > config.thresholds, target)

            if i % config.logging_every_n_steps == 0:
                message = logging_pattern % (
                    "val", i / len(val_loader) + epoch, epoch,
                    train_loss[0], train_loss[1],
                    losses.avg, f1.f1,
                    str(best_results[0])[:8], str(best_results[1])[:8],
                    time_to_str((timer() - start), 'min'))
                print(message, end='\n', flush=True)

    return [losses.avg, f1.f1]
Beispiel #6
0
def evaluate(val_loader, model, criterions, epoch, meters, start, log=None):
    losses = AverageMeter()
    f1 = AverageMeter()
    if len(meters['val_f1']):
        best_f1_epoch = np.argmax(meters['val_f1'])
        best_f1_score = meters['val_f1'][best_f1_epoch]
        best_loss_epoch = np.argmin(meters['val_loss'])
        best_loss = meters['val_loss'][best_loss_epoch]
    else:
        best_f1_epoch = 0
        best_f1_score = 0
        best_loss_epoch = 0
        best_loss = 0
    model.cuda()
    model.eval()
    preds = []
    targets = []
    with torch.no_grad():
        for batch_id, (images, target) in enumerate(val_loader):
            batch_x = images.cuda(non_blocking=True)
            batch_y = torch.Tensor(
                np.array(target)).float().cuda(non_blocking=True)
            output = model(batch_x)
            bce_criterion = criterions[0]
            balance_criterion = criterions[1]
            bce_loss = bce_criterion(output, batch_y)
            balance_loss = balance_criterion(output, batch_y)
            total_loss = bce_loss + balance_loss
            losses.update(bce_loss.item(), batch_x.size(0))
            pred_y = output.sigmoid().cpu().data.numpy()
            preds.append(pred_y)
            targets.append(target)
            f1_batch = f1_score(target, pred_y > 0.15, average='macro')
            f1.update(f1_batch, batch_x.size(0))
            print('Validate Epoch %3d\t' % epoch,
                  'Batch %4d|%4d\t' % (batch_id, len(val_loader)),
                  'Aver Loss: %6.5f\t' % losses.avg,
                  'Aver F1 Score: %6.5f' % f1.avg,
                  'Best Val loss:%10.5f, Epoch: %3d\t' %
                  (best_loss, best_loss_epoch),
                  'Best Val F1:%10.5f, Epoch: %3d\t' %
                  (best_f1_score, best_f1_epoch),
                  'Time: %s' % time_to_str((timer() - start), 'min'),
                  file=log)
    preds = np.concatenate(preds)
    targets = np.concatenate(targets)
    threshold, best_score, std_score = eval_f1score(preds,
                                                    targets,
                                                    cfg.label_names,
                                                    log=log)
    print("Average F1 Score is ", f1.avg, file=log)
    meters['val_loss'].append(losses.avg)
    meters['val_aver_f1'].append(f1.avg)
    meters['val_std_f1'].append(std_score)
    meters['val_f1'].append(best_score)
    meters['threshold'].append(threshold)

    return meters
    def tieResponse(self):
        rtv_string = "File Hash " + self.filehash + " Reputation\n\n"
        # Format a String Response
        i = 1
        for key in self.content:
            rtv_string = rtv_string + "Provider: " + key['provider'] + "\n"
            rtv_string = rtv_string + "Creation Date: " + utils.time_to_str(
                key['createDate']) + "\n"
            rtv_string = rtv_string + "Reputation: " + key['reputation'] + "\n"
            rtv_string += "\n"
            i += 1

        return rtv_string
Beispiel #8
0
    def update(self):
        self.time = (pygame.time.get_ticks() - self.game.start_time) / 1000

        self.chrono = self.font.render(time_to_str(self.time), True, pygame.Color("white"), pygame.Color("black"))

        self.fps_info = self.font.render("FPS : " + float_to_str(self.game.clock.get_fps()), True,
            pygame.Color("red"), pygame.Color("black"))

        self.player_coins = self.font.render(str(self.game.player.coins), True,
            pygame.Color("yellow"), pygame.Color("black"))

        self.player_oxygen_bottle = self.font.render(float_to_str(self.game.player.oxygen_bottle), True,
            pygame.Color("lightblue"), pygame.Color("black"))
Beispiel #9
0
def evaluate(val_loader, model, criterion, epoch, train_metrics, best_results,
             start):
    # only meter loss and f1 score
    losses = utils.AverageMeter()
    f1 = utils.AverageMeter()
    acc = utils.AverageMeter()

    valid_losses = []
    valid_f1s = []
    # switch mode for evaluation
    model.to(device)
    model.eval()
    with torch.no_grad():
        for i, (images, target) in enumerate(val_loader):
            images_var = images.to(device)
            indx_target = target.clone()
            target = torch.from_numpy(np.array(target)).long().to(device)
            output = model(images_var)
            loss = criterion(output, target)
            losses.update(loss.item(), images_var.size(0))
            valid_losses.append(loss.item())
            f1_batch = f1_score(target.cpu().data.numpy(),
                                np.argmax(F.softmax(output).cpu().data.numpy(),
                                          axis=1),
                                average='macro')
            acc_score = accuracy_score(
                target.cpu().data.numpy(),
                np.argmax(F.softmax(output).cpu().data.numpy(), axis=1))
            f1.update(f1_batch, images.size(0))
            valid_f1s.append(f1_batch.item() * -1)  # f1:biger is better
            acc.update(acc_score, images.size(0))
            print('\r', end='', flush=True)
            message = '%s   %5.1f %6.1f	 |	 %0.3f  %0.3f   %0.3f	| %0.3f  %0.3f  %0.4f  | %s  %s  %s  |  %s' % (\
              "val", i/len(val_loader) + epoch, epoch,
              acc.avg,losses.avg,f1.avg,
              train_metrics[0], train_metrics[1],train_metrics[2],
              str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
              utils.time_to_str((timer() - start),'min'))
            print(message, end='', flush=True)
    valid_loss = np.average(valid_losses)
    valid_f1 = np.average(valid_f1s)

    print("\n")
    early_stopping_f1(valid_f1, model)
    early_stopping(valid_loss, model)

    early_stop = early_stopping.early_stop and early_stopping_f1.early_stop

    return [acc.avg, losses.avg, f1.avg], early_stop
Beispiel #10
0
def train(task_name,
          model,
          optimizer,
          criterion,
          scheduler,
          train_loader,
          val_loader,
          mix_loder=None,
          log=None):
    meters = defaultdict(list)
    start = timer()
    for epoch in range(0, cfg.epochs):
        scheduler.step(epoch)
        cur_lr = get_learning_rate(optimizer)
        print('Learning rate is ', cur_lr, file=log)
        if mix_loder:
            meters = train_one_epoch_mixup(train_loader, mix_loder, model,
                                           criterion, optimizer, epoch, meters,
                                           start)
        else:
            meters = train_one_epoch(train_loader, model, criterion, optimizer,
                                     epoch, meters, start)
        meters = evaluate(val_loader, model, criterion, epoch, meters, start)
        is_best_loss = np.argmin(meters['val_loss']) == epoch
        is_best_f1 = np.argmax(meters['val_f1']) == epoch
        state = {
            "state_dict": model.state_dict(),
            "epoch": epoch,
            "optimizer": optimizer.state_dict(),
            "meters": meters.copy()
        }
        save_checkpoint(state, task_name, is_best_loss, is_best_f1)
        print('Task Name: %s\t' % task_name,
              'Validate Epoch %3d\t' % epoch,
              'Train Loss: %6.5f\t' % meters['loss'][-1],
              'Train F1 Score: %6.5f\t' % meters['f1'][-1],
              'Val Loss: %6.5f\t' % meters['val_loss'][-1],
              'Val F1 Score: %6.5f\t' % meters['val_f1'][-1],
              'Val Std F1 Score: %6.5f\t' % meters['val_std_f1'][-1],
              'Val Aver F1 Score: %6.5f\t' % meters['val_aver_f1'][-1],
              'Best Val loss:%6.5f, Epoch: %3d\t' %
              (np.min(meters['val_loss']), np.argmin(meters['val_loss'])),
              'Best F1 Loss: %6.5f, Epoch: %3d\t' %
              (np.max(meters['val_f1']), np.argmax(meters['val_f1'])),
              'Time: %s' % time_to_str((timer() - start), 'min'),
              file=log)
        time.sleep(0.01)

    return model
Beispiel #11
0
    def message(rate, iter, epoch, kaggle, valid_loss, train_loss, batch_loss, mode='print'):
        if mode==('print'):
            asterisk = ' '
            loss = batch_loss
        if mode==('log'):
            asterisk = '*' if iter in iter_save else ' '
            loss = train_loss

        text = \
            '%0.5f %5.1f%s %4.1f | '%(rate, iter/1000, asterisk, epoch,) +\
            '%0.4f : %0.4f %0.4f %0.4f | '%(kaggle[1],*kaggle[0]) +\
            '%4.4f, %4.4f, %4.4f : %4.4f, %4.4f, %4.4f | '%(*valid_loss,) +\
            '%4.4f, %4.4f, %4.4f |'%(*loss,) +\
            '%s' % (time_to_str((timer() - start_timer),'min'))

        return text
Beispiel #12
0
def evaluate(val_loader,model,loss_fn,epoch,train_loss,start_time):
    losses = utils.AverageMeter()
    # switch mode for evaluation
    model.cuda()
    model.eval()

    with torch.no_grad():
        for i, (x_batch, y_batch) in enumerate(val_loader):
            y_pred = model(x_batch)
            loss = loss_fn(y_pred, y_batch)
            losses.update(loss.item(),x_batch.shape[0])

            print('\r', end='', flush=True)
            message = '%s   %5.1f %6.1f           |       %0.3f        |       %0.3f       | %s' % ( \
                "val", i / len(val_loader) + epoch, epoch+1,
                train_loss,
                losses.avg,
                utils.time_to_str((timer() - start_time), 'min'))
            print(message, end='', flush=True)

            # Concatenate all every batch
            if i == 0:
                total_output = y_pred
                total_target = y_batch
            else:
                total_output = torch.cat([total_output, y_pred], 0)
                total_target = torch.cat([total_target, y_batch], 0)

        # compute loss for the entire evaluation dataset
        # print("total_output:", total_output.shape)
        # print("total_target:", total_target.shape)
        log.write("\n")
        log.write(message)
        log.write("\n")

    return losses.avg, total_output
Beispiel #13
0
def train(args):
    logs_temp_file = os.path.join(
        args.logs_dir,
        '_'.join(['steps_log', args.args_in.time_code]) + '.csv')
    epochs_temp_file = os.path.join(
        args.logs_dir,
        '_'.join(['epochs_log', args.args_in.time_code]) + '.csv')
    CHECKPOINT_PATH = os.path.join(args.logs_dir,
                                   args.args_in.model_name)  # 'checkpoint.pt')
    # rank = args.nr * args.gpus + gpu
    # print(rank)
    # dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank)
    # torch.manual_seed(0)
    # print(f'initialied gpu {gpu}')
    model = args.args_in.model
    num_params = model.num_params
    # torch.cuda.set_device(gpu)
    # model.cuda(gpu)
    model = model.cuda()
    batch_size = opts.batch_size  # 100
    # criterion = args.args_in.criterion.cuda(gpu)
    criterion = args.args_in.criterion.cuda()
    optimizer = args.args_in.optimizer  # torch.optim.SGD(model.parameters(), 1e-4)
    # # Wrap the model
    # model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)

    # load model if already exist
    if (os.path.exists(args.args_in.model_path)
            or os.path.exists(args.args_in.ckpt_path)):
        '''load the existing model accurately'''
        print('load the existing model accurately')
        if os.path.exists(args.args_in.model_path):
            ckpt = torch.load(args.args_in.model_path)
        else:
            ckpt = torch.load(args.args_in.ckpt_path)
        if 'state_dict' in ckpt:
            state_dict = ckpt['state_dict']
            optim_dict = ckpt['optimizer']

            model.load_state_dict(state_dict)
            optimizer.load_state_dict(optim_dict)
            del ckpt, state_dict, optim_dict
            torch.cuda.empty_cache()
        else:
            state_dict = ckpt
            model.load_state_dict(state_dict)
            del ckpt, state_dict
            torch.cuda.empty_cache()
    else:
        print('Model does not exist in directory')

    start = datetime.now()

    verbose = args.args_in.verbose

    training_datagen = args.args_in.training_datagen
    validation_datagen = args.args_in.validation_datagen

    steps_per_epoch = args.args_in.steps_per_epoch if args.args_in.steps_per_epoch else training_datagen.__len__(
    )
    steps_per_validation = args.args_in.steps_per_validation if args.args_in.steps_per_validation else validation_datagen.__len__(
    )

    # gpus = args.gpus  # torch.cuda.device_count()
    # steps_per_epoch //= gpus
    # steps_per_validation //= gpus

    callbacks = args.args_in.callbacks
    metrics = args.args_in.metrics
    # metrics_name = [x.__class__.__name__.lower()[:-4] for x in metrics]
    metrics_name = [metric.name for metric in metrics]

    logs = {'loss': 0}
    logs.update({x: 0 for x in metrics_name})
    train_dict = logs.copy()
    validation_dict = {f'val_{key}': 0 for key in logs}
    logs.update(validation_dict)
    logs.update({'time': 0, 'lr': 0, 'epoch': 0})

    logs_df = pd.DataFrame(columns=logs.keys())

    epoch_str_width = len(str(opts.epochs))
    best_loss = np.inf
    # group = dist.new_group([rank_i for rank_i in range(args.world_size)])
    # print(group)
    for epoch in range(args.args_in.initial_epoch, opts.epochs):
        training_datagen.on_epoch_end()
        logs['lr'] = optimizer.param_groups[0]['lr']
        train_df = pd.DataFrame(columns=list(train_dict.keys()))
        validation_df = pd.DataFrame(columns=list(validation_dict.keys()))
        model.train()  # model.train(mode=True)
        start_time = time.time()
        """###### Training  ######## """
        for step in range(steps_per_epoch):
            # x, y = training_datagen.__getitem__(gpus * step + gpu)
            x, y = training_datagen.__getitem__(step)
            if not opts.normalize_data:
                x = [x_ * 255. for x_ in x]
                y *= 255.
            x = [x_.cuda(non_blocking=True) for x_ in x]
            y = y.cuda(non_blocking=True)

            # Forward pass
            optimizer.zero_grad()
            output = model(x)
            if not opts.normalize_data:
                output = output * 255.
            loss = criterion(output, y)

            # Backward and optimize
            # optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                for metric in metrics:
                    temp = metric(output, y)
                    train_dict.update({metric.name: temp.item()})
                del x, y, output, temp
                torch.cuda.empty_cache()
                # for idx, metric in enumerate(metrics):
                #     train_dict[metrics_name[idx]] = metric(output, y).item()
            train_dict['loss'] = loss.item()
            train_df = train_df.append(train_dict, ignore_index=True)

            time_so_far = (time.time() - start_time)
            step_time = time_so_far / (step + 1)
            if verbose >= 1:
                time_spent_str = time_to_str(time_so_far)
                time_str = time_to_str(step_time * (steps_per_epoch - step))
                other_str = ' - '.join([
                    f"{key}: {value:0.5f}"
                    for key, value in train_dict.items()
                ])
                print(
                    f'Epoch [{epoch+1}/{opts.epochs}] - Step [{step + 1}/{steps_per_epoch}] - ETA: '
                    f'[{time_spent_str}<{time_str}] - {other_str}',
                    end='\r')

            logs_temp = {'Epoch': epoch + 1, 'Step': step + 1}
            logs_temp.update(train_dict)
            logs_temp['city'] = opts.city.lower()
            logs_temp_df = pd.DataFrame([logs_temp])
            if os.path.exists(logs_temp_file):
                logs_temp_df.to_csv(logs_temp_file,
                                    mode='a',
                                    index=False,
                                    header=False)
            else:
                logs_temp_df.to_csv(logs_temp_file, mode='a', index=False)
        # del x, y, output, temp
        # torch.cuda.empty_cache()

        epoch_time = (time.time() - start_time)
        train_dict = train_df.mean(axis=0).to_dict()
        for key, value in train_dict.items():
            logs[key] = value
        logs['time'] = epoch_time
        logs['epoch'] = epoch + 1
        """##### Validation #####"""
        model.eval()  # model.train(mode=False)
        val_start_time = time.time()
        for step in range(steps_per_validation):
            step_time = time.time()
            x, y = validation_datagen.__getitem__(gpus * step + gpu)
            if not opts.normalize_data:
                x = [x_ * 255. for x_ in x]
                y *= 255.
            x = [x_.cuda(non_blocking=True) for x_ in x]
            y = y.cuda(non_blocking=True)

            with torch.no_grad():
                output = model(torch.cat(x, dim=1)) if isinstance(
                    args.args_in.model, UNet_3Plus) else model(x)
                if not opts.normalize_data:
                    output = output * 255.
                val_loss = criterion(output, y)
                for metric in metrics:
                    temp = metric(output, y)
                    dist.all_reduce(temp, op=dist.ReduceOp.SUM, group=group)
                    validation_dict.update(
                        {f'val_{metric.name}': temp.item() / args.world_size})
                # for idx, metric in enumerate(metrics):
                #     validation_dict[f"val_{metrics_name[idx]}"] = metric(output, y).item()
                del x, y, output, temp
                torch.cuda.empty_cache()
            validation_dict['val_loss'] = val_loss.item()
            validation_df = validation_df.append(validation_dict,
                                                 ignore_index=True)

        # del x, y, output, temp, val_loss
        # torch.cuda.empty_cache()

        validation_dict = validation_df.mean(axis=0).to_dict()
        for key, value in validation_dict.items():
            logs[key] = value
        logs['val_time'] = (time.time() - val_start_time)
        logs['city'] = opts.city.lower()
        logs_df = logs_df.append(logs, ignore_index=True)

        # scheduler.step(epoch_val_loss)
        if not callbacks:  # is not None
            for callback in callbacks:
                callback.step(logs['val_loss'])

        other_str = ' - '.join([
            f"{key}: {value:0.6f}" for key, value in logs.items()
            if not isinstance(value, str)
        ])
        print(f'epoch {epoch + 1:0{epoch_str_width}d}/{epochs} -- {other_str}')
        """Updating the epoch log state"""
        epochs_temp_df = pd.DataFrame([logs])
        if os.path.exists(epochs_temp_file):
            epochs_temp_df.to_csv(epochs_temp_file,
                                  mode='a',
                                  index=False,
                                  header=False)
        else:
            epochs_temp_df.to_csv(epochs_temp_file, mode='a', index=False)
        """Saving the ckpts"""
        checkpoint = {
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        torch.save(checkpoint,
                   args.args_in.ckpt_path.replace('.', f'_ckpt{epoch + 1}.'))
        """Saving the present best model"""
        present_best_loss = logs['val_mse'] if 'val_mse' in logs else logs[
            'val_loss']
        if present_best_loss < best_loss:
            # saving the best checkpoint
            checkpoint = {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            torch.save(checkpoint, args.args_in.model_path)
            # torch.save(checkpoint, os.paths.join(args.best, args.args_in.model_name + '.pt')) #also accepted
            print(
                f'The model improves from {best_loss:0.6f} to {present_best_loss:0.6f} and has been saved in'
                f' {args.args_in.model_path}')
            best_loss = present_best_loss
        else:
            print(f'The model does not improve from {best_loss:0.6f}')

    # Save the logs
    if os.path.exists(args.args_in.model_path[:-3] + '.csv'):
        logs_df.to_csv(args.args_in.model_path[:-3] + '.csv',
                       mode='a',
                       index=False,
                       header=False)
    else:
        logs_df.to_csv(args.args_in.model_path[:-3] + '.csv',
                       mode='a',
                       index=False)
    # Save the last state of the model
    checkpoint = {
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }
    torch.save(checkpoint, CHECKPOINT_PATH)
    # save model parameters used
    readme_file = os.path.join(save_dir, 'SedanionScaledReadMe.csv')
    opts_dict = vars(
        argparse.Namespace(
            **{
                'filename': args.args_in.model_name[:-3],
                'num_params': num_params,
                'val_mse': best_loss
            }, **vars(opts)))
    # opts_dict = vars(argparse.Namespace(**{'filename': model_name[:-3], 'num_params': num_params,
    #                                        'val_loss': best_loss}, **vars(opts)))
    opts_df = pd.DataFrame([opts_dict])
    if os.path.exists(readme_file):
        opts_df.to_csv(readme_file, mode='a', index=False, header=False)
    else:
        opts_df.to_csv(readme_file, mode='a', index=False)
    print("Training complete in: " + str(datetime.now() - start))
def training(model, fold, args):
    # resore from last checkpoint
    # all model weights resored, but not learning rate.
    if os.path.exists(os.path.join(config.weights, config.model_name, str(fold), "checkpoint.pth.tar")):
        best_model = torch.load(os.path.join(config.weights, config.model_name, str(fold), "checkpoint.pth.tar"))
        model.load_state_dict(best_model["state_dict"])

    # logging issues
    log = Logger()
    log.open(os.path.join(config.logs_dir, "%s_log_train.txt" % config.model_name), mode="a")
    log.write(
        "\n---------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 20))

    log.write(
        '----------------------|--------- Train ---------|-------- Valid ---------|-------Best '
        'Results-------|----------|\n')
    log.write(
        'mode   iter   epoch   |      loss   f1_macro    |      loss   f1_macro   |       loss   f1_macro    | time   '
        '  |\n')
    log.write(
        '----------------------------------------------------------------------------------------------------------'
        '----\n')

    # training params
    optimizer = optim.SGD(model.parameters(),
                          lr=config.learning_rate_start,
                          momentum=0.9,
                          weight_decay=config.weight_decay)
    if config.loss_name == 'ce':
        criterion = nn.BCEWithLogitsLoss().cuda()
    elif config.loss_name == 'focal':
        criterion = FocalLoss().cuda()
    elif config.loss_name == 'f1':
        criterion = F1Loss().cuda()
    else:
        raise ValueError('unknown loss name {}'.format(config.loss_name))
    best_results = [np.inf, 0]
    val_metrics = [np.inf, 0]
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=config.learning_rate_decay_epochs,
                                    gamma=config.learning_rate_decay_rate)
    start = timer()

    # load dataset
    all_files = pd.read_csv(config.train_csv)

    image_names = all_files['Id']
    labels_strs = all_files['Target']
    image_labels = []
    for cur_label_str in labels_strs:
        cur_label = np.eye(config.num_classes, dtype=np.float)[np.array(list(map(int, cur_label_str.split(' '))))].sum(axis=0)
        image_labels.append(cur_label)
    image_labels = np.stack(image_labels, axis=0)

    msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=config.val_percent, random_state=0)
    for train_index, val_index in msss.split(image_names, image_labels):
        train_image_names = image_names[train_index]
        train_image_labels = image_labels[train_index]
        val_image_names = image_names[val_index]
        val_image_labels = image_labels[val_index]

    train_gen = HumanDataset(train_image_names, train_image_labels, config.train_dir, mode="train")
    sampler = WeightedRandomSampler(weights=get_sample_weights()[train_index], num_samples=int(len(all_files)*(1-config.val_percent)))
    train_loader = DataLoader(train_gen, batch_size=config.batch_size, pin_memory=True, num_workers=4, sampler=sampler)
    # train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4)
    val_gen = HumanDataset(val_image_names, val_image_labels, config.train_dir, augument=False, mode="train")
    val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4)

    # train
    for epoch in range(0, config.epochs):
        # training & evaluating
        scheduler.step(epoch)
        get_learning_rate(optimizer)
        train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start)
        val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start)

        # check results
        is_best_loss = val_metrics[0] < best_results[0]
        best_results[0] = min(val_metrics[0], best_results[0])
        is_best_f1 = val_metrics[1] > best_results[1]
        best_results[1] = max(val_metrics[1], best_results[1])

        # save model
        save_checkpoint({
            "epoch": epoch + 1,
            "model_name": config.model_name,
            "state_dict": model.state_dict(),
            "best_loss": best_results[0],
            "optimizer": optimizer.state_dict(),
            "fold": fold,
            "best_f1": best_results[1],
        }, is_best_loss, is_best_f1, fold)

        # print logs
        print('\r', end='', flush=True)
        log.write(
            logging_pattern % (
                "best", epoch, epoch,
                train_metrics[0], train_metrics[1],
                val_metrics[0], val_metrics[1],
                str(best_results[0])[:8], str(best_results[1])[:8],
                time_to_str((timer() - start), 'min')
            )
        )
        log.write("\n")
        time.sleep(0.01)
def training(model, fold, log, train_image_names, train_image_labels, val_image_names, val_image_labels):
    # logging issues
    log.write(
        "\n---------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 20))

    log.write(
        '----------------------|--------- Train ---------|-------- Valid ---------|-------Best '
        'Results-------|----------|\n')
    log.write(
        'mode   iter   epoch   |      loss   f1_macro    |      loss   f1_macro   |       loss   f1_macro    | time   '
        '  |\n')
    log.write(
        '----------------------------------------------------------------------------------------------------------'
        '----\n')

    # training params
    optimizer = optim.SGD(model.parameters(),
                          lr=config.learning_rate_start,
                          momentum=0.9,
                          weight_decay=config.weight_decay)
    if config.loss_name == 'ce':
        criterion = nn.BCEWithLogitsLoss().cuda()
    elif config.loss_name == 'focal':
        criterion = FocalLoss().cuda()
    elif config.loss_name == 'f1':
        criterion = F1Loss().cuda()
    else:
        raise ValueError('unknown loss name {}'.format(config.loss_name))
    best_results = [np.inf, 0]
    val_metrics = [np.inf, 0]
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=config.learning_rate_decay_epochs,
                                    gamma=config.learning_rate_decay_rate)
    start = timer()

    train_gen = HumanDataset(train_image_names, train_image_labels, config.train_dir, mode="train")
    train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4)
    val_gen = HumanDataset(val_image_names, val_image_labels, config.train_dir, augument=False, mode="train")
    val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4)

    # train
    for epoch in range(0, config.epochs):
        # training & evaluating
        scheduler.step(epoch)
        get_learning_rate(optimizer)
        train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start)
        val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start)

        # check results
        is_best_loss = val_metrics[0] < best_results[0]
        best_results[0] = min(val_metrics[0], best_results[0])
        is_best_f1 = val_metrics[1] > best_results[1]
        best_results[1] = max(val_metrics[1], best_results[1])

        # save model
        save_checkpoint({
            "epoch": epoch + 1,
            "model_name": config.model_name,
            "state_dict": model.state_dict(),
            "best_loss": best_results[0],
            "optimizer": optimizer.state_dict(),
            "fold": fold,
            "best_f1": best_results[1],
        }, is_best_loss, is_best_f1, fold)

        # print logs
        print('\r', end='', flush=True)
        log.write(
            logging_pattern % (
                "best", epoch, epoch,
                train_metrics[0], train_metrics[1],
                val_metrics[0], val_metrics[1],
                str(best_results[0])[:8], str(best_results[1])[:8],
                time_to_str((timer() - start), 'min')
            )
        )
        log.write("\n")
        time.sleep(0.01)
Beispiel #16
0
def main():

    input_images, content_input_images = utils.load_pictures_for_feed(
        "\\batch",
        recursive=True,
        gen_res=conf.INPUT_RESOLUTION,
        content_res=conf.VGG_INPUT_RESOLUTION)

    print("Shuffle inputs")
    random.seed(conf.SEED)
    random.shuffle(input_images)
    random.seed(conf.SEED)
    random.shuffle(content_input_images)
    print("Done")

    style_red, avg_style_red = utils.load_image("\\styles\\rain_princess.jpg",
                                                between_01=True,
                                                substract_mean=False)

    pre_style_grams, pre_content_tensor = precompute_style_gram(
        style_red, content_input_images)

    gen_graph, input_image, variables_gen_filter, variables_gen_bias, variables_scalars = gn.build_gen_graph_deep(
        tf, input_pictures=conf.BATCH_SIZE, width_res=conf.INPUT_RESOLUTION)
    gen_image = gen_graph['output']

    pre_content_tensor_shape = np.shape(pre_content_tensor)
    content_layer = tf.placeholder('float32', [
        conf.BATCH_SIZE, pre_content_tensor_shape[1],
        pre_content_tensor_shape[2], pre_content_tensor_shape[3]
    ],
                                   name="content_layer")

    #gen_shape = utils.tensorshape_to_int_array(gen_image.get_shape())
    #cut_1 = int((gen_shape[1] - conf.VGG_INPUT_RESOLUTION) / 2)
    #cut_2 = int((gen_shape[2] - conf.VGG_INPUT_RESOLUTION) / 2)
    #batch = tf.slice(gen_image, [0, cut_1, cut_2, 0], [gen_shape[0], conf.VGG_INPUT_RESOLUTION, conf.VGG_INPUT_RESOLUTION, gen_shape[3]])

    batch = gen_image / 255.0
    print(utils.tensorshape_to_int_array(batch.get_shape()))

    graph = vn.load_vgg_input(tf, batch)

    content_loss = conf.CONTENT_WEIGHT * calc_content_loss(
        graph, content_layer)
    style_loss = conf.STYLE_WEIGHT * calc_style_loss_64(graph, pre_style_grams)
    tv_loss = conf.TV_WEIGHT * calc_tv_loss(gen_image)
    loss = content_loss + style_loss + tv_loss

    learning_rate = conf.LEARNING_RATE
    var_learning_rate = tf.placeholder("float32")

    image_counter = 0
    assert len(input_images) >= conf.BATCH_SIZE

    feed = {}
    feed[input_image] = input_images[image_counter:image_counter +
                                     conf.BATCH_SIZE]
    #feed[content_input] = content_input_images[image_counter : image_counter + BATCH_SIZE]
    feed[content_layer] = pre_content_tensor[image_counter:image_counter +
                                             conf.BATCH_SIZE]
    # feed[style_image] = style_red.reshape(1, 224, 224,3)
    feed[var_learning_rate] = learning_rate

    image_counter = (image_counter + conf.BATCH_SIZE) % len(input_images)
    if image_counter + conf.BATCH_SIZE > len(input_images):
        image_counter = 0

    with tf.Session() as sess:

        # set log directory
        #summary_writer = tf.train.SummaryWriter(conf.project_path + conf.log_train, graph_def=sess.graph_def)

        #optimizer = tf.train.MomentumOptimizer(learning_rate=var_learning_rate, momentum=0.9)
        optimizer = tf.train.AdamOptimizer(learning_rate=var_learning_rate)
        #optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.5)
        variables = variables_gen_filter + variables_gen_bias + variables_scalars
        train_step = optimizer.minimize(loss, var_list=variables)

        print('number of variables : ' + str(len(tf.trainable_variables())))

        init = tf.global_variables_initializer()
        sess.run(init, feed)

        loading_directory = "\\version_61_k"
        saving_directory = "\\version_61_k"
        starting_pic_num = 0

        saver = nio.create_saver(tf, sess)
        nio.load_gen_last_checkpoint(tf, sess, saver, path=loading_directory)

        i = 0
        last_l = sess.run(loss, feed_dict=feed)
        last_cl = sess.run(content_loss, feed_dict=feed)
        last_sl = sess.run(style_loss, feed_dict=feed)
        last_tvl = sess.run(tv_loss, feed_dict=feed)
        #last_wl = sess.run(weight_loss, feed_dict=feed)

        start_training_time = time.time()
        last_training_checkpoint_time = start_training_time

        neg_loss_counter = 0
        avoid_save_loss = -1.0

        restore = False
        last_saved_iteration = 0
        for i in range(40000):
            if (i % 10 == 0):
                print(i)

            if i % 250 == 0:
                l = sess.run(loss, feed_dict=feed)

                if (last_l - l) < 0 and i != 0:
                    avoid_save_loss = last_l
                    neg_loss_counter += 1
                    print('neg loss -> counter increase :' +
                          str(neg_loss_counter))
                    if neg_loss_counter == 5:
                        learning_rate /= 10.0
                        neg_loss_counter = 0
                        restore = True
                        print('neg loss -> reset counters to 0')
                        print("new learning rate : " + str(learning_rate))
                else:
                    if avoid_save_loss != -1.0:
                        if l < avoid_save_loss:
                            avoid_save_loss = -1.0
                            neg_loss_counter = 0
                            print("loss reached best result again")
                            print("reset counter to 0")
                        else:
                            print(
                                "avoid saving until loss becomes smaller again:"
                                + str(l - avoid_save_loss))

                print('learning rate : ' + str(learning_rate))

                print('loss : ' + str(l))
                print('loss_improvement : ' + str((last_l - l) / last_l))
                last_l = l

                cl = sess.run(content_loss, feed_dict=feed)
                print('content_loss : ' + str(cl))
                print('content_loss_improvement : ' +
                      str((last_cl - cl) / last_cl))
                last_cl = cl

                sl = sess.run(style_loss, feed_dict=feed)
                print('style_loss : ' + str(sl))
                print('style_loss_improvement : ' +
                      str((last_sl - sl) / last_sl))
                last_sl = sl

                tvl = sess.run(tv_loss, feed_dict=feed)
                print('tv_loss : ' + str(tvl))
                print('tv_loss_improvement : ' +
                      str((last_tvl - tvl) / last_tvl))
                last_tvl = tvl

                t = time.time()
                print('training time: ' +
                      utils.time_to_str(t - start_training_time))
                print('training time since last checkpoint: ' +
                      utils.time_to_str(t - last_training_checkpoint_time))
                last_training_checkpoint_time = t

                utils.save_image(saving_directory,
                                 '\\im' + str(i + starting_pic_num),
                                 sess.run(gen_image, feed_dict=feed),
                                 to255=False)

                if restore == False:
                    if avoid_save_loss == -1:
                        nio.save_gen_checkpoint(sess,
                                                saver,
                                                path=saving_directory)
                        last_saved_iteration = i
                else:
                    print("Restoring last checkpoint -> iteration : " +
                          str(last_saved_iteration))
                    nio.load_gen_last_checkpoint(tf,
                                                 sess,
                                                 saver,
                                                 path=saving_directory)
                    restore = False

            sess.run(train_step, feed_dict=feed)

            feed[input_image] = input_images[image_counter:image_counter +
                                             conf.BATCH_SIZE]
            feed[content_layer] = pre_content_tensor[
                image_counter:image_counter + conf.BATCH_SIZE]

            image_counter = (image_counter +
                             conf.BATCH_SIZE) % len(input_images)
            if image_counter + conf.BATCH_SIZE > len(input_images):
                image_counter = 0

        utils.save_image(saving_directory,
                         '\\im' + str(i + starting_pic_num + 1),
                         sess.run(gen_image, feed_dict=feed),
                         to255=False)
        print(sess.run(loss, feed_dict=feed))
        if avoid_save_loss == -1:
            nio.save_gen_checkpoint(sess, saver, path=saving_directory)
            ai.export_gen_graph(tf, sess, variables_gen_filter,
                                variables_gen_bias, variables_scalars,
                                saving_directory)
        else:
            print("Restoring last checkpoint -> iteration : " +
                  str(last_saved_iteration))
            nio.load_gen_last_checkpoint(tf,
                                         sess,
                                         saver,
                                         path=saving_directory)
            print("export pb-File")
            ai.export_gen_graph(tf, sess, variables_gen_filter,
                                variables_gen_bias, variables_scalars,
                                saving_directory)
Beispiel #17
0
def train_one_epoch_mixup(train_loader,
                          mix_loader,
                          model,
                          criterion,
                          optimizer,
                          epoch,
                          meters,
                          since,
                          alpha=0.4,
                          log=None):
    losses = AverageMeter()
    f1 = AverageMeter()
    model.train()
    if len(meters['f1']):
        previous_loss = meters['loss'][-1]
        previous_f1 = meters['f1'][-1]
        best_f1_epoch = np.argmax(meters['f1'])
        best_f1_score = meters['f1'][best_f1_epoch]
        best_loss_epoch = np.argmin(meters['loss'])
        best_loss = meters['loss'][best_loss_epoch]
    else:
        best_f1_epoch = 0
        best_f1_score = 0
        best_loss_epoch = 0
        best_loss = 0
        previous_loss = 0
        previous_f1 = 0

    for batch_id, ((x1, y1), (x2,
                              y2)) in enumerate(zip(train_loader, mix_loader)):
        batch_x1 = x1.cuda(non_blocking=True)
        batch_x2 = x2.cuda(non_blocking=True)
        lam = np.random.beta(alpha, alpha)
        batch_x = lam * batch_x1 + (1.0 - lam) * batch_x2
        batch_y1 = torch.Tensor(np.array(y1)).float().cuda(non_blocking=True)
        batch_y2 = torch.Tensor(np.array(y2)).float().cuda(non_blocking=True)
        batch_y = lam * batch_y1 + (1.0 - lam) * batch_y2
        output = model(batch_x)
        loss = criterion(output, batch_y)
        losses.update(loss.item(), batch_x.size(0))
        f1_batch = f1_score(batch_y.cpu() > 0.5,
                            output.sigmoid().cpu() > 0.15,
                            average='macro')
        f1.update(f1_batch, batch_x.size(0))
        optimizer.zero_grad()
        loss.backward()
        if cfg.grident_clip:
            torch.nn.utils.clip_grad_norm(model.parameters(), 1.)
        optimizer.step()
        print('Epoch %3d\t' % epoch,
              'Batch %3d|%3d\t' % (batch_id, len(train_loader)),
              'Loss: %10.5f\t' % losses.avg,
              'Metrics|F1 Score: %10.5f\t' % f1.avg,
              'Previous Loss: %10.5f\t' % previous_loss,
              'Previous F1 Score: %10.5f\t' % previous_f1,
              'Best loss:%10.5f Epoch %3d\t' % (best_loss, best_loss_epoch),
              'Besr F1:%10.5f Epoch %3d\t' % (best_f1_score, best_f1_epoch),
              'Time: %s' % time_to_str((timer() - since), 'min'),
              file=log)

    meters['loss'].append(losses.avg)
    meters['f1'].append(f1.avg)

    return meters
Beispiel #18
0
    def draw(self):
        super().draw()

        time = self.normal_font.render("Votre temps était : " + time_to_str(self.game.score.time), 1, self.color)
        self.game.window.blit(time, (self.game.window.get_width() / 2 - time.get_rect().centerx, 250))
Beispiel #19
0
def run(model, net, datapath, labelpath, csvpath, winsize=48):
    fold = 0
    # 4.1 mkdirs
    if not os.path.exists(config.weights + config.model_name + os.sep +
                          str(fold)):
        os.makedirs(config.weights + config.model_name + os.sep + str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists(config.results):
        os.mkdir(config.results)
    #4.3 optim & criterion
    optimizer = optim.SGD(model.parameters(),
                          lr=config.lr,
                          momentum=0.9,
                          weight_decay=1e-4)

    criterion = nn.CrossEntropyLoss().to(device)
    start_epoch = 0
    best_acc = 0
    best_loss = np.inf
    best_f1 = 0
    best_results = [0, np.inf, 0]
    val_metrics = [0, np.inf, 0]

    model.to(device)
    train_lst = pd.read_csv(csvpath + "train.csv")
    train_gen = SARDataset(train_lst, datapath, labelpath, winsize, net)
    train_loader = DataLoader(
        train_gen,
        batch_size=config.batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=0)  #num_worker is limited by shared memory in Docker!

    val_lst = pd.read_csv(csvpath + "val.csv")
    val_gen = SARDataset(val_lst, datapath, labelpath, winsize, net)
    val_loader = DataLoader(val_gen,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=0)

    start = timer()
    #train
    for epoch in range(0, config.epochs):  #config.epochs
        # train
        train_metrics = train(train_loader, model, criterion, optimizer, epoch,
                              val_metrics, best_results, start)
        # val
        val_metrics, early_stop = evaluate(val_loader, model, criterion, epoch,
                                           train_metrics, best_results, start)
        if early_stop:
            print("Early stopping")
            break
        # check results
        is_best_acc = val_metrics[0] > best_results[0]
        best_results[0] = max(val_metrics[0], best_results[0])
        is_best_loss = val_metrics[1] < best_results[1]
        best_results[1] = min(val_metrics[1], best_results[1])
        is_best_f1 = val_metrics[2] > best_results[2]
        best_results[2] = max(val_metrics[2], best_results[2])
        # save model
        utils.save_checkpoint(
            {
                "epoch": epoch + 1,
                "model_name": config.model_name,
                "state_dict": model.state_dict(),
                "best_acc": best_results[0],
                "best_loss": best_results[1],
                "optimizer": optimizer.state_dict(),
                "fold": fold,
                "best_f1": best_results[2],
            }, is_best_acc, is_best_loss, is_best_f1, fold)
        print('\r', end='', flush=True)
        print('%s  %5.1f %6.1f	  |   %0.3f   %0.3f   %0.3f	 |  %0.3f   %0.3f	%0.3f	|   %s  %s  %s | %s' % (\
          "best", epoch, epoch,
          train_metrics[0], train_metrics[1],train_metrics[2],
          val_metrics[0],val_metrics[1],val_metrics[2],
          str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
          utils.time_to_str((timer() - start),'min'))
         )
    test_lst = pd.read_csv(csvpath + "test.csv")
    test_gen = SARDataset(test_lst, datapath, labelpath, winsize, net)
    test_loader = DataLoader(test_gen,
                             500,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=0)
    for point in ['loss']:  #'loss','acc','f1'
        best_model = torch.load(
            "%s/%s_fold_%s_model_best_%s.pth.tar" %
            (config.best_models, config.model_name, str(fold), point))
        model.load_state_dict(best_model["state_dict"])
        plabel = test(test_loader, model)
        np.save(config.results + config.model_name + str(point) + '.npy',
                plabel)
        if not opts.normalize_data:
            x_test = [x_ * 255. for x_ in x_test]

        # y_pred = model(x_test)
        y_pred = []
        for i in range(x_test[0].shape[0]):
            if use_cuda:
                x_now = [x_[i:i + 1].cuda() for x_ in x_test]
            else:
                x_now = [x_[i:i + 1] for x_ in x_test]
            y_pred.append(model(x_now))
        y_pred = torch.cat(y_pred, dim=0)

        # model.cpu()  # move model to CPU
        # x_test = [x_.cpu() for x_ in x_test]
        y_out = testing_datagen.process_output(y_pred)
        testing_datagen.write_data(y_out, file_path)

        del x_test, y_pred
        # assert len(test_slots[filename.split('_')[0]]) == batch_size

        time_so_far = (time.time() - start_time)
        step_time = time_so_far / (step + 1)
        time_spent_str = time_to_str(time_so_far)
        time_str = time_to_str(step_time * (steps_per_testing - step))
        print(
            f'[{itr+1}/{n_models}]: {model_name} : ETA [{time_spent_str}<{time_str}]: done - '
            f'[{step + 1}/{steps_per_testing}]',
            end='\r')
Beispiel #21
0
def main():

    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep + 'fold_'+str(config.fold)):
        os.makedirs(config.weights + config.model_name + os.sep + 'fold_'+ str(config.fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists(config.logs):
        os.mkdir(config.logs)
    if not os.path.exists(config.best_models + config.model_name ):
        os.mkdir(config.best_models + config.model_name)
    if not os.path.exists(config.best_models + config.model_name + os.sep + 'fold_'+str(config.fold)):
        os.mkdir(config.best_models + config.model_name + os.sep + 'fold_'+str(config.fold))
    tqdm.pandas()

    start_time = time.time()
    train_X, test_X, train_y, word_index = utils.load_and_prec(config)

    print("Start embedding matrix............")
    embedding_matrix_1 = utils.load_glove(word_index, config.embedding_dir, config.max_features)
    embedding_matrix_2 = utils.load_para(word_index, config.embedding_dir, config.max_features)
    embedding_matrix_3 = utils.load_fasttext(word_index, config.embedding_dir, config.max_features)

    total_time = (time.time() - start_time) / 60
    print("Took {:.2f} minutes".format(total_time))

    if config.embed_method == "mean":
        embedding_matrix = np.mean([embedding_matrix_1, embedding_matrix_2, embedding_matrix_3], axis=0)
    elif config.embed_method =="concat":
        embedding_matrix = np.concatenate((embedding_matrix_1, embedding_matrix_2, embedding_matrix_3), axis=1)
    print(np.shape(embedding_matrix))
    #
    # del embedding_matrix_1, embedding_matrix_2
    # del embedding_matrix_1

    # -------------------------------------------------------
    # training
    # -------------------------------------------------------
    train_preds = np.zeros((len(train_X)))
    test_preds = np.zeros((len(test_X)))

    x_test_cuda = torch.tensor(test_X, dtype=torch.long).cuda()
    test_dataset = torch.utils.data.TensorDataset(x_test_cuda)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)

    splits = list(StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED).split(train_X, train_y))

    sigmoid = nn.Sigmoid()
    loss_fn = torch.nn.BCEWithLogitsLoss(reduction="mean")



    # k-fold
    for fold, (train_idx, valid_idx) in enumerate(splits):
        print(f'Fold {fold + 1}')

        # tflogger
        tflogger = utils.TFLogger(os.path.join('../results', 'TFlogs',
                                         config.model_name + "_fold{0}_{1}".format(config.fold, fold)))
        # initialize the early_stopping object
        early_stopping = utils.EarlyStopping(patience=7, verbose=True)

        x_train_fold = torch.tensor(train_X[train_idx], dtype=torch.long).cuda()
        y_train_fold = torch.tensor(train_y[train_idx, np.newaxis], dtype=torch.float32).cuda()
        x_val_fold = torch.tensor(train_X[valid_idx], dtype=torch.long).cuda()
        y_val_fold = torch.tensor(train_y[valid_idx, np.newaxis], dtype=torch.float32).cuda()

        if config.model == "baseline_bidir_LSTM_GRU":
            model = baseline_bidir_LSTM_GRU.NeuralNet(config, embedding_matrix)
        elif config.model == "baseline_pytorch":
            model = baseline_pytorch.NeuralNet(config, embedding_matrix)
        elif config.model == "baseline_lstm_gru_attention":
            model = baseline_lstm_gru_attention.NeuralNet(config, embedding_matrix)
        elif config.model == "baseline_lstm_lstm_attention":
            model = baseline_lstm_lstm_attention.NeuralNet(config, embedding_matrix)
            
        model.cuda()

        optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

        # scheduler
        scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

        train_dataset = torch.utils.data.TensorDataset(x_train_fold, y_train_fold)
        valid_dataset = torch.utils.data.TensorDataset(x_val_fold, y_val_fold)

        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
        valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False)

        valid_loss = np.inf

        # initialize best loss
        best_loss = np.inf
        start_time = timer()
        for epoch in range(config.epochs):
            scheduler.step(epoch)
            # train
            lr = utils.get_learning_rate(optimizer)
            train_loss = train(train_loader=train_loader,model=model,loss_fn=loss_fn, optimizer=optimizer,
                               epoch=epoch,valid_loss=valid_loss,start=start_time)

            # validate
            valid_loss, valid_output = evaluate(val_loader=valid_loader, model=model, loss_fn=loss_fn, epoch=epoch,
                                                train_loss=train_loss, start_time=start_time)
            test_preds_fold = np.zeros(len(test_X))

            # check results
            is_best_loss = valid_loss < best_loss
            if is_best_loss:
                best_epoch = epoch
                best_train_loss = train_loss
            # update best loss
            best_loss = min(valid_loss, best_loss)

            # save NeuralNet
            utils.save_checkpoint({
                "epoch": epoch,
                "model_name": config.model_name,
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "fold": config.fold,
                "kfold": config.fold,
            },is_best_loss, config.fold, fold, config)
            # print logs
            print('\r', end='', flush=True)

            message = '%s  %5.1f %6.1f  %.2E |       %0.3f        |       %0.3f       | %s' % ( \
                "best", best_epoch, best_epoch, Decimal(lr),
                best_train_loss,
                best_loss,
                utils.time_to_str((timer() - start_time), 'min'))
            log.write(message)

            log.write("\n")
            time.sleep(0.01)

            # ================================================================== #
            #                        Tensorboard Logging                         #
            # ================================================================== #

            # 1. Log scalar values (scalar summary)
            info = {'Train_loss': train_loss,
                    'Valid_loss': valid_loss,
                    'Learnging_rate': lr}

            for tag, value in info.items():
                tflogger.scalar_summary(tag, value, epoch)

            # 2. Log values and gradients of the parameters (histogram summary)
            for tag, value in model.named_parameters():
                tag = tag.replace('.', '/')
                tflogger.histo_summary(tag, value.data.cpu().numpy(), epoch)
                if not value.grad is None:
                    tflogger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), epoch)
            # -------------------------------------
            # end tflogger

            # ================================================================== #
            #                        Early stopping                         #
            # ================================================================== #
            # early_stopping needs the validation loss to check if it has decresed,
            # and if it has, it will make a checkpoint of the current NeuralNet
            early_stopping(valid_loss, model)

            if early_stopping.early_stop:
                print("Early stopping")
                break

        # end looping all epochs
        train_preds[valid_idx] = sigmoid(valid_output).cpu().data.numpy()[:, 0]

        # test
        checkpoint_path = os.path.join("{0}{1}/fold_{2}/fold_{3}_model_best_loss.pth.tar".
                        format(config.best_models, config.model_name, str(config.fold), fold))

        best_model = torch.load(checkpoint_path)
        print("Test on epoch:", best_model['epoch'])
        model.load_state_dict(best_model["state_dict"])
        test_preds_fold = test(test_loader=test_loader, model=model)
        test_preds += test_preds_fold / len(splits)

    # end k-fold
    search_result = threshold_search(train_y, train_preds)
    print(search_result)
    log.write("Threshold:{0},    f1:{1}".format(search_result['threshold'], search_result['f1']))

    sub = pd.read_csv('../input/sample_submission.csv')
    sub.prediction = test_preds > search_result['threshold']
    sub.to_csv("submission_{0}.csv".format(config.model_name), index=False)

    print('Test successful!')