예제 #1
0
              "Valid CE: {:.4f} ".format(val_bce) +
              "Valid LWLRAP: {:.4f} ".format(val_lwlrap) +
              "sec: {:.1f}".format(endtime))

        # save log and weights
        train_log_epoch = pd.DataFrame([[
            epoch + 1, bce, lwlrap, bce_noisy, lwlrap_noisy, val_bce,
            val_lwlrap, endtime
        ]],
                                       columns=log_columns)
        train_log = pd.concat([train_log, train_log_epoch])
        train_log.to_csv("{}/train_log_fold{}.csv".format(
            OUTPUT_DIR, fold + 1),
                         index=False)

        if val_lwlrap > min_val_lwlrap:
            min_val_lwlrap = val_lwlrap
            trigger = 0
            torch.save(
                model.state_dict(),
                "{}/weight_fold_{}_best.pth".format(OUTPUT_DIR, fold + 1,
                                                    epoch + 1))

        if (epoch + 1) % NUM_CYCLE == 0:
            torch.save(
                model.state_dict(), "{}/weight_fold_{}_epoch_{}.pth".format(
                    OUTPUT_DIR, fold + 1, epoch + 1))

        if trigger > 100:
            break
        trigger += 1
def main():
    # load table data
    df_train = pd.read_csv("../input/train_curated.csv")
    df_noisy = pd.read_csv("../input/train_noisy.csv")
    df_test = pd.read_csv("../input/sample_submission.csv")
    labels = df_test.columns[1:].tolist()
    for label in labels:
        df_train[label] = df_train['labels'].apply(lambda x: label in x)
        df_noisy[label] = df_noisy['labels'].apply(lambda x: label in x)

    df_train['path'] = "../input/mel128/train/" + df_train['fname']
    df_test['path'] = "../input/mel128/test/" + df_train['fname']
    df_noisy['path'] = "../input/mel128/noisy/" + df_noisy['fname']

    # fold splitting
    folds = list(KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train))))

    # Training
    log_columns = ['epoch', 'bce', 'lwlrap', 'bce_noisy', 'lwlrap_noisy', 'val_bce', 'val_lwlrap', 'time']
    for fold, (ids_train_split, ids_valid_split) in enumerate(folds):
        if fold+1 not in FOLD_LIST: continue
        print("fold: {}".format(fold + 1))
        train_log = pd.DataFrame(columns=log_columns)

        # build model
        model = ResNet(NUM_CLASS).cuda()

        # prepare data loaders
        df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True)
        dataset_train = MelDataset(df_train_fold['path'], df_train_fold[labels].values,
                                    crop=CROP_LENGTH, crop_mode='random',
                                    mixup=True, freqmask=True, gain=True,
                                    )
        train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE,
                                  shuffle=True, num_workers=1, pin_memory=True,
                                  )

        df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True)
        dataset_valid = MelDataset(df_valid['path'], df_valid[labels].values,)
        valid_loader = DataLoader(dataset_valid, batch_size=1,
                                  shuffle=False, num_workers=1, pin_memory=True,
                                  )

        dataset_noisy = MelDataset(df_noisy['path'], df_noisy[labels].values,
                                    crop=CROP_LENGTH, crop_mode='random',
                                    mixup=True, freqmask=True, gain=True,
                                   )
        noisy_loader = DataLoader(dataset_noisy, batch_size=BATCH_SIZE,
                                  shuffle=True, num_workers=1, pin_memory=True,
                                  )
        noisy_itr = cycle(noisy_loader)

        # set optimizer and loss
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0])
        scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1)

        # training
        for epoch in range(NUM_EPOCH):
            # train for one epoch
            bce, lwlrap, bce_noisy, lwlrap_noisy = train((train_loader, noisy_itr), model, optimizer, scheduler, epoch)

            # evaluate on validation set
            val_bce, val_lwlrap = validate(valid_loader, model)

            # print log
            endtime = time.time() - starttime
            print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH)
                  + "CE: {:.4f} ".format(bce)
                  + "LwLRAP: {:.4f} ".format(lwlrap)
                  + "Noisy CE: {:.4f} ".format(bce_noisy)
                  + "Noisy LWLRAP: {:.4f} ".format(lwlrap_noisy)
                  + "Valid CE: {:.4f} ".format(val_bce)
                  + "Valid LWLRAP: {:.4f} ".format(val_lwlrap)
                  + "sec: {:.1f}".format(endtime)
                  )

            # save log and weights
            train_log_epoch = pd.DataFrame(
                [[epoch+1, bce, lwlrap, bce_noisy, lwlrap_noisy, val_bce, val_lwlrap, endtime]],
                columns=log_columns)
            train_log = pd.concat([train_log, train_log_epoch])
            train_log.to_csv("{}/train_log_fold{}.csv".format(OUTPUT_DIR, fold+1), index=False)
            if (epoch+1)%NUM_CYCLE==0:
                torch.save(model.state_dict(), "{}/weight_fold_{}_epoch_{}.pth".format(OUTPUT_DIR, fold+1, epoch+1))
예제 #3
0
    #         params=model.parameters(),
    #         lr=0.1,
    #         momentum=0.9,
    #         weight_decay=5e-4,
    #         nesterov=True
    #     )

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                        milestones=[6, 12, 16],
                                                        gamma=0.2)

    print("Start training")
    start_time = time.time()
    for epoch in range(epochs):
        train_one_epoch(model, criterion, optimizer, train_loader, device,
                        epoch)
        lr_scheduler.step()
        evaluate(model, criterion, valid_loader, device=device)
        checkpoint = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch
        }
        torch.save(checkpoint,
                   os.path.join(output_dir, 'model_{}.pth'.format(epoch)))
        torch.save(checkpoint, os.path.join(output_dir, 'checkpoint.pth'))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))
예제 #4
0
        oo = outputs.data.cpu().numpy()
        oc = np.argmax(oo, axis=1)
        acc = np.sum(oc == label) / batch_size
        acc_record += acc
        ''' Show loss and accuracy, save best model. '''
        if it > 0 and (it + 1) % args.show_iters == 0:
            loss_record /= args.show_iters
            acc_record /= args.show_iters
            print(
                '\rExtractor | iter %05d, average batch loss: %.5f, average batch accuracy: %.3f'
                % (it + 1, loss_record, acc_record))

            if acc_record > best_acc:
                best_acc = acc_record
                torch.save(
                    model.state_dict(),
                    osp.join(args.model_dir,
                             'best_extractor_%d.pth' % (hash_bit)))
            loss_record = 0.
            acc_record = 0.

            if best_acc > 0.98:
                print('Extractor | Early Stop Training.')
                break

    del criterion
    del optimizer
    ''' ==================== Triplet Similarity Learning ==================== '''
    model.norm = True
    model.zero_grad()
예제 #5
0
def train(k, epochs):

    model = ResNet(k=k)
    opt = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()

    if use_gpu:
        model.to('cuda')
        if use_horovod:
            # broadcast parameters and optimizer state from root device to other devices
            hvd.broadcast_parameters(model.state_dict(), root_rank=0)
            hvd.broadcast_optimizer_state(opt, root_rank=0)

            # Wraps the opimizer for multiGPU operation
            opt = hvd.DistributedOptimizer(
                opt, named_parameters=model.named_parameters(), op=hvd.Adasum)

    loss_dict = {'epoch': [], 'train': [], 'val': []}

    for epoch in range(epochs):
        train_loss = 0
        val_loss = 0

        # train block
        for img_batch, labels_batch in train_loader:
            if use_gpu:
                img_batch = img_batch.to('cuda')
                labels_batch = labels_batch.to('cuda')

            pred = model(img_batch)

            opt.zero_grad()
            loss = criterion(pred, labels_batch)
            loss.backward()
            opt.step()
            train_loss += loss.item()

        #val block
        with torch.no_grad():
            for img_batch, labels_batch in val_loader:
                if use_gpu:
                    img_batch = img_batch.to('cuda')
                    labels_batch = labels_batch.to('cuda')

                pred = model(img_batch)
                loss = criterion(pred, labels_batch)
                val_loss += loss.item()

        if use_horovod:
            train_loss = average_loss(train_loss, 'avg_train_loss')
            val_loss = average_loss(val_loss, 'avg_val_loss')

        loss_dict['epoch'].append(epoch + 1)
        loss_dict['train'].append(train_loss)
        loss_dict['val'].append(val_loss)

        print(",".join([
            "{}:{:.2f}".format(key, val[epoch])
            for key, val in loss_dict.items()
        ]))

    torch.save(model.state_dict(),
               "models/modelsdata/ResNet18_Cifar10_d{}.ckpt".format(k))
    save_obj(loss_dict,
             "models/modelsdata/losses/ResNet18_Cifar10_d{}".format(k))
    return loss_dict
예제 #6
0
def main():
    if not sys.warnoptions:
        warnings.simplefilter("ignore")

    # --- hyper parameters --- #
    BATCH_SIZE = 256
    LR = 1e-3
    WEIGHT_DECAY = 1e-4
    N_layer = 18
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # --- data process --- #
    # info
    src_path = './data/'
    target_path = './saved/ResNet18/'
    model_path = target_path + 'pkls/'
    pred_path = target_path + 'preds/'

    if not os.path.exists(model_path):
        os.makedirs(model_path)
    if not os.path.exists(pred_path):
        os.makedirs(pred_path)

    # evaluation: num of classify labels & image size
    # output testing id csv
    label2num_dict, num2label_dict = data_evaluation(src_path)

    # load
    train_data = dataLoader(src_path, 'train', label2num_dict)
    train_len = len(train_data)
    test_data = dataLoader(src_path, 'test')

    train_loader = Data.DataLoader(
        dataset=train_data,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=12,
    )
    test_loader = Data.DataLoader(
        dataset=test_data,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=12,
    )

    # --- model training --- #
    # fp: for storing data
    fp_train_acc = open(target_path + 'train_acc.txt', 'w')
    fp_time = open(target_path + 'time.txt', 'w')

    # train
    highest_acc, train_acc_seq = 0, []
    loss_funct = nn.CrossEntropyLoss()
    net = ResNet(N_layer).to(device)
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=LR,
                                 weight_decay=WEIGHT_DECAY)
    print(net)

    for epoch_i in count(1):
        right_count = 0

        # print('\nTraining epoch {}...'.format(epoch_i))
        # for batch_x, batch_y in tqdm(train_loader):
        for batch_x, batch_y in train_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)

            # clear gradient
            optimizer.zero_grad()

            # forward & backward
            output = net.forward(batch_x.float())
            highest_out = torch.max(output, 1)[1]
            right_count += sum(batch_y == highest_out).item()

            loss = loss_funct(output, batch_y)
            loss.backward()

            # update parameters
            optimizer.step()

        # calculate accuracy
        train_acc = right_count / train_len
        train_acc_seq.append(train_acc * 100)

        if train_acc > highest_acc:
            highest_acc = train_acc

        # save model
        torch.save(
            net.state_dict(),
            '{}{}_{}_{}.pkl'.format(model_path,
                                    target_path.split('/')[2],
                                    round(train_acc * 1000), epoch_i))

        # write data
        fp_train_acc.write(str(train_acc * 100) + '\n')
        fp_time.write(
            str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + '\n')
        print('\n{} Epoch {}, Training accuracy: {}'.format(
            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), epoch_i,
            train_acc))

        # test
        net.eval()
        test_df = pd.read_csv(src_path + 'testing_data/testing_labels.csv')
        with torch.no_grad():
            for i, (batch_x, _) in enumerate(test_loader):
                batch_x = batch_x.to(device)
                output = net.forward(batch_x.float())
                highest_out = torch.max(output, 1)[1].cpu()
                labels = [
                    num2label_dict[out_j.item()] for out_j in highest_out
                ]
                test_df['label'].iloc[i * BATCH_SIZE:(i + 1) *
                                      BATCH_SIZE] = labels
        test_df.to_csv('{}{}_{}_{}.csv'.format(pred_path,
                                               target_path.split('/')[2],
                                               round(train_acc * 1000),
                                               epoch_i),
                       index=False)
        net.train()

        lr_decay(optimizer)

    fp_train_acc.close()
    fp_time.close()
def main():
    # load table data
    df_train = pd.read_csv("../input/train_curated.csv")
    df_noisy = pd.read_csv("../input/train_noisy.csv")
    df_test = pd.read_csv("../input/sample_submission.csv")
    labels = df_test.columns[1:].tolist()
    for label in labels:
        df_train[label] = df_train['labels'].apply(lambda x: label in x)
        df_noisy[label] = df_noisy['labels'].apply(lambda x: label in x)

    df_train['path'] = "../input/mel128/train/" + df_train['fname']
    df_test['path'] = "../input/mel128/test/" + df_train['fname']
    df_noisy['path'] = "../input/mel128/noisy/" + df_noisy['fname']

    # calc sampling weight
    df_train['weight'] = 1
    df_noisy['weight'] = len(df_train) / len(df_noisy)

    # generate pseudo label with sharpening
    tmp = np.load("../input/pseudo_label/preds_noisy.npy").mean(axis=(0, 1))
    tmp = tmp**TEMPERATURE
    tmp = tmp / tmp.sum(axis=1)[:, np.newaxis]
    df_noisy_pseudo = df_noisy.copy()
    df_noisy_pseudo[labels] = tmp

    # fold splitting
    folds = list(
        KFold(n_splits=NUM_FOLD, shuffle=True,
              random_state=SEED).split(np.arange(len(df_train))))
    folds_noisy = list(
        KFold(n_splits=NUM_FOLD, shuffle=True,
              random_state=SEED).split(np.arange(len(df_noisy))))

    # Training
    log_columns = [
        'epoch', 'bce', 'lwlrap', 'bce_noisy', 'lwlrap_noisy', 'semi_mse',
        'val_bce', 'val_lwlrap', 'time'
    ]
    for fold, (ids_train_split, ids_valid_split) in enumerate(folds):
        if fold + 1 not in FOLD_LIST: continue
        print("fold: {}".format(fold + 1))
        train_log = pd.DataFrame(columns=log_columns)

        # build model
        model = ResNet(NUM_CLASS).cuda()
        model.load_state_dict(
            torch.load("{}/weight_fold_{}_epoch_512.pth".format(
                LOAD_DIR, fold + 1)))

        # prepare data loaders
        df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True)
        dataset_train = MelDataset(
            df_train_fold['path'],
            df_train_fold[labels].values,
            crop=CROP_LENGTH,
            crop_mode='additional',
            crop_rate=CROP_RATE,
            mixup=True,
            freqmask=True,
            gain=True,
        )
        train_loader = DataLoader(
            dataset_train,
            batch_size=BATCH_SIZE,
            shuffle=True,
            num_workers=1,
            pin_memory=True,
        )

        df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True)
        dataset_valid = MelDataset(
            df_valid['path'],
            df_valid[labels].values,
        )
        valid_loader = DataLoader(
            dataset_valid,
            batch_size=1,
            shuffle=False,
            num_workers=1,
            pin_memory=True,
        )

        dataset_noisy = MelDataset(
            df_noisy['path'],
            df_noisy[labels].values,
            crop=CROP_LENGTH,
            crop_mode='additional',
            crop_rate=CROP_RATE,
            mixup=True,
            freqmask=True,
            gain=True,
        )
        noisy_loader = DataLoader(
            dataset_noisy,
            batch_size=BATCH_SIZE,
            shuffle=True,
            num_workers=1,
            pin_memory=True,
        )
        noisy_itr = cycle(noisy_loader)

        df_semi = pd.concat([
            df_train.iloc[ids_train_split],
            df_noisy_pseudo.iloc[folds_noisy[fold][0]]
        ]).reset_index(drop=True)
        semi_sampler = torch.utils.data.sampler.WeightedRandomSampler(
            df_semi['weight'].values, len(df_semi))
        dataset_semi = MelDataset(
            df_semi['path'],
            df_semi[labels].values,
            crop=CROP_LENGTH,
            crop_mode='additional',
            crop_rate=CROP_RATE,
            mixup=True,
            freqmask=True,
            gain=True,
        )
        semi_loader = DataLoader(
            dataset_semi,
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=1,
            pin_memory=True,
            sampler=semi_sampler,
        )
        semi_itr = cycle(semi_loader)

        # set optimizer and loss
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=LR[0])
        scheduler = CosineLR(optimizer,
                             step_size_min=LR[1],
                             t0=len(train_loader) * NUM_CYCLE,
                             tmult=1)

        # training
        for epoch in range(NUM_EPOCH):
            # train for one epoch
            bce, lwlrap, bce_noisy, lwlrap_noisy, mse_semi = train(
                (train_loader, noisy_itr, semi_itr), model, optimizer,
                scheduler, epoch)

            # evaluate on validation set
            val_bce, val_lwlrap = validate(valid_loader, model)

            # print log
            endtime = time.time() - starttime
            print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH) +
                  "CE: {:.4f} ".format(bce) +
                  "LwLRAP: {:.4f} ".format(lwlrap) +
                  "Noisy CE: {:.4f} ".format(bce_noisy) +
                  "Noisy LWLRAP: {:.4f} ".format(lwlrap_noisy) +
                  "Semi MSE: {:.4f} ".format(mse_semi) +
                  "Valid CE: {:.4f} ".format(val_bce) +
                  "Valid LWLRAP: {:.4f} ".format(val_lwlrap) +
                  "sec: {:.1f}".format(endtime))

            # save log and weights
            train_log_epoch = pd.DataFrame([[
                epoch + 1, bce, lwlrap, bce_noisy, lwlrap_noisy, mse_semi,
                val_bce, val_lwlrap, endtime
            ]],
                                           columns=log_columns)
            train_log = pd.concat([train_log, train_log_epoch])
            train_log.to_csv("{}/train_log_fold{}.csv".format(
                OUTPUT_DIR, fold + 1),
                             index=False)
            if (epoch + 1) % NUM_CYCLE == 0:
                torch.save(
                    model.state_dict(),
                    "{}/weight_fold_{}_epoch_{}.pth".format(
                        OUTPUT_DIR, fold + 1, epoch + 1))