Esempio n. 1
0
def train(train_loaders, model, optimizer, scheduler, epoch):
    train_loader, noisy_itr = train_loaders
    bce_avr = AverageMeter()
    bce_noisy_avr = AverageMeter()
    criterion_bce = nn.BCEWithLogitsLoss() #.cuda()
    sigmoid = nn.Sigmoid() #.cuda()

    # switch to train mode
    model.train()

    inp = torch.zeros(80, 80, dtype=torch.float32)
    for i in range(len(inp)):
        inp[i][i] = 1
    
    # training
    preds = np.zeros([0, NUM_CLASS], np.float32)
    y_true = np.zeros([0, NUM_CLASS], np.float32)
    preds_noisy = np.zeros([0, NUM_CLASS], np.float32)
    y_true_noisy = np.zeros([0, NUM_CLASS], np.float32)
    for i, (input, target) in enumerate(train_loader):
        # get batches
        input = torch.autograd.Variable(input.to(device)) #.cuda())
        target = torch.autograd.Variable(target.to(device)) #.cuda())
        input_noisy, target_noisy = next(noisy_itr)
        input_noisy = torch.autograd.Variable(input_noisy.to(device)) #.cuda())
        target_noisy = torch.autograd.Variable(target_noisy.to(device)) #.cuda())

        # compute output
        output = model(input, inp.to(device))
        bce = criterion_bce(output, target)
        output_noisy = model.noisy(input_noisy, inp.to(device))
        bce_noisy = criterion_bce(sigmoid(output_noisy), target_noisy)
        loss = bce + bce_noisy
        pred = sigmoid(output)
        pred = pred.data.cpu().numpy()
        pred_noisy = sigmoid(output_noisy)
        pred_noisy = pred_noisy.data.cpu().numpy()

        # backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # record log
        bce_avr.update(bce.data, input.size(0))
        bce_noisy_avr.update(bce_noisy.data, input.size(0))
        preds = np.concatenate([preds, pred])
        y_true = np.concatenate([y_true, target.data.cpu().numpy()])
        preds_noisy = np.concatenate([preds_noisy, pred_noisy])
        y_true_noisy = np.concatenate([y_true_noisy, target_noisy.data.cpu().numpy()])

    # calc metric
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds)
    lwlrap = np.sum(per_class_lwlrap * weight_per_class)
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true_noisy, preds_noisy)
    lwlrap_noisy = np.sum(per_class_lwlrap * weight_per_class)

    return bce_avr.avg.item(), lwlrap, bce_noisy_avr.avg.item(), lwlrap_noisy
Esempio n. 2
0
def train(train_loaders, model, optimizer, scheduler, epoch):
    train_loader, noisy_itr = train_loaders
    kl_avr = AverageMeter()
    kl_noisy_avr = AverageMeter()
    lsigmoid = nn.LogSigmoid().cuda()
    lsoftmax = nn.LogSoftmax(dim=1).cuda()
    softmax = nn.Softmax(dim=1).cuda()
    criterion_kl = nn.KLDivLoss().cuda()

    # switch to train mode
    model.train()

    # training
    preds = np.zeros([0, NUM_CLASS], np.float32)
    y_true = np.zeros([0, NUM_CLASS], np.float32)
    preds_noisy = np.zeros([0, NUM_CLASS], np.float32)
    y_true_noisy = np.zeros([0, NUM_CLASS], np.float32)
    for i, (input, target) in enumerate(train_loader):
        # get batches
        input = torch.autograd.Variable(input.cuda())
        target = torch.autograd.Variable(target.cuda())

        input_noisy, target_noisy = next(noisy_itr)
        input_noisy = torch.autograd.Variable(input_noisy.cuda())
        target_noisy = torch.autograd.Variable(target_noisy.cuda())

        # compute output
        output = model(input)
        kl = criterion_kl(lsoftmax(output), target)
        output_noisy = model.noisy(input_noisy)
        kl_noisy = criterion_kl(lsoftmax(output_noisy), target_noisy)
        loss = kl + kl_noisy
        pred = softmax(output)
        pred = pred.data.cpu().numpy()
        pred_noisy = softmax(output_noisy)
        pred_noisy = pred_noisy.data.cpu().numpy()

        # backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # record log
        kl_avr.update(kl.data, input.size(0))
        kl_noisy_avr.update(kl_noisy.data, input.size(0))
        preds = np.concatenate([preds, pred])
        y_true = np.concatenate([y_true, target.data.cpu().numpy()])
        preds_noisy = np.concatenate([preds_noisy, pred_noisy])
        y_true_noisy = np.concatenate([y_true_noisy, target_noisy.data.cpu().numpy()])

    # calc metric
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds)
    lwlrap = np.sum(per_class_lwlrap * weight_per_class)
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true_noisy, preds_noisy)
    lwlrap_noisy = np.sum(per_class_lwlrap * weight_per_class)

    return kl_avr.avg.item(), lwlrap, kl_noisy_avr.avg.item(), lwlrap_noisy
def validate(val_loader, model):
    bce_avr = AverageMeter()
    sigmoid = torch.nn.Sigmoid().cuda()
    criterion_bce = nn.BCEWithLogitsLoss().cuda()

    # switch to eval mode
    model.eval()

    # validate
    preds = np.zeros([0, NUM_CLASS], np.float32)
    y_true = np.zeros([0, NUM_CLASS], np.float32)
    for i, (input, target) in enumerate(val_loader):
        # get batches
        input = torch.autograd.Variable(input.cuda())
        target = torch.autograd.Variable(target.cuda())

        # compute output
        with torch.no_grad():
            output = model(input)
            bce = criterion_bce(output, target)
            pred = sigmoid(output)
            pred = pred.data.cpu().numpy()

        # record log
        bce_avr.update(bce.data, input.size(0))
        preds = np.concatenate([preds, pred])
        y_true = np.concatenate([y_true, target.data.cpu().numpy()])

    # calc metric
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds)
    lwlrap = np.sum(per_class_lwlrap * weight_per_class)

    return bce_avr.avg.item(), lwlrap
Esempio n. 4
0
def validate(val_loader, model):
    kl_avr = AverageMeter()
    lsoftmax = nn.LogSoftmax(dim=1).cuda()
    softmax = torch.nn.Softmax(dim=1).cuda()
    criterion_kl = nn.KLDivLoss().cuda()

    # switch to eval mode
    model.eval()

    # validate
    preds = np.zeros([0, NUM_CLASS], np.float32)
    y_true = np.zeros([0, NUM_CLASS], np.float32)
    for i, (input, target) in enumerate(val_loader):
        # get batches
        input = torch.autograd.Variable(input.cuda())
        target = torch.autograd.Variable(target.cuda())

        # compute output
        with torch.no_grad():
            output = model(input)
            kl = criterion_kl(lsoftmax(output), target)
            pred = softmax(output)
            pred = pred.data.cpu().numpy()

        # record log
        kl_avr.update(kl.data, input.size(0))
        preds = np.concatenate([preds, pred])
        y_true = np.concatenate([y_true, target.data.cpu().numpy()])

    # calc metric
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds)
    lwlrap = np.sum(per_class_lwlrap * weight_per_class)

    return kl_avr.avg.item(), lwlrap
def test(traindatadir,
         testdatadir,
         traindatacsv,
         testdatacsv,
         device,
         model_path=''):
    """
    test data is loaded in same format as validation data
    """
    print('initialize dataset...')
    voiceDataset = FATDataset(traindatadir,
                              testdatadir,
                              traindatacsv,
                              testdatacsv,
                              batch_size=8)
    # print (voiceDataset.get_class_num())
    print('create model ... ')

    cnnmodel = models.CNNModelv2(voiceDataset.get_class_num()).to(device)
    #loading trained model
    print('loading model from {}...'.format(model_path))
    cnnmodel.load_state_dict(torch.load(model_path))

    # testing
    cnnmodel.eval()
    test_batches_num = voiceDataset.get_numof_batch(False)
    test_preds = np.array([]).reshape(0, voiceDataset.get_class_num())
    test_labels = np.array([]).reshape(0, voiceDataset.get_class_num())

    for tbidx in tqdm(range(test_batches_num)):
        # print ('generating validation fingerprint ... ')
        test_data, test_samplenumbatch, test_label = voiceDataset.get_data(
            tbidx, False)

        pred = oneSampleOutput(
            cnnmodel(test_data.to(device)).detach(),
            test_samplenumbatch).to(device)
        test_preds = np.vstack((test_preds, pred.cpu().numpy()))

        test_labels = np.vstack((test_labels, test_label.cpu().numpy()))
    score, weight = utils.calculate_per_class_lwlrap(test_labels, test_preds)
    lwlrap = (score * weight).sum()
    msg = '[TESTING]  lwlrap:{}'.format(lwlrap)
    print(msg)
    logger.info(msg)
def finetune(traindatadir,
             valdatadir,
             traindatacsv,
             valdatacsv,
             device,
             model_path,
             cfg_path,
             save_model_filename,
             frorm_scratch=True):
    EPOCH = 100
    printout_steps = 50
    eval_steps = 200
    lr_steps = 300
    lr = 5e-4
    t_max = 200
    eta_min = 3e-6

    print('initialize dataset...')
    voiceDataset = FATDataset(traindatadir,
                              valdatadir,
                              traindatacsv,
                              valdatacsv,
                              batch_size=8)
    # print (voiceDataset.get_class_num())
    print('create model ... ')

    # cnnmodel = models.CNNModelv2(voiceDataset.get_class_num()).to(device)
    print('cfg_path', cfg_path)
    cfg = pkl.load(open(cfg_path, 'rb'))
    if config.get('Parameters', 'model_arch').lower() == 'basic':
        cnnmodel = model4prune.CNNModelBasic(voiceDataset.get_class_num(),
                                             cfg).to(device)
    elif config.get('Parameters', 'model_arch').lower() == 'poolrevised':
        cnnmodel = model4prune.CNNModelPoolingRevised(
            voiceDataset.get_class_num(), cfg).to(device)
    print(cnnmodel)
    if not frorm_scratch:
        print('loading model from {}...'.format(model_path))
        cnnmodel.load_state_dict(torch.load(model_path))
    optimizer = torch.optim.Adam(cnnmodel.parameters(), lr)
    scheduler = CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min)
    criterion = nn.BCEWithLogitsLoss()
    bestlwlrap = -1
    for e in range(EPOCH):
        voiceDataset.shuffle_trainingdata()
        num_step_per_epoch = voiceDataset.get_numof_batch(istraindata=True)
        # print (num_step_per_epoch)
        for bidx in tqdm(range(num_step_per_epoch)):
            # print ('get fingerprint...')
            batch_data, samplenumbatch, label_batch = voiceDataset.get_data(
                bidx, True)  #[M, 128, duration, 3]
            # print ('fingerprint got...')
            if batch_data.shape[0] <= 1:
                continue
            # print (batch_data.shape,label_batch.shape)
            bx = batch_data.to(device)

            output = cnnmodel(bx)
            output = oneSampleOutput(output, samplenumbatch).to(device)
            # print (output.shape,label_batch.shape)
            by = label_batch.to(device)
            #             by = autograd.Variable(label_batch,requires_grad = True)
            loss = criterion(output, by)
            #             loss = autograd.Variable(loss, requires_grad = True)
            if bidx % printout_steps == 0:
                msg = '[TRAINING] Epoch:{}, step:{}/{}, loss:{}'.format(
                    e, bidx, num_step_per_epoch, loss)
                print(msg)
                logger.info(msg)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (bidx + 1) % eval_steps == 0:
                # doing validation
                cnnmodel.eval()
                val_batches_num = voiceDataset.get_numof_batch(False)
                val_preds = np.array([]).reshape(0,
                                                 voiceDataset.get_class_num())
                val_labels = np.array([]).reshape(0,
                                                  voiceDataset.get_class_num())
                val_loss = 0.
                for vbidx in tqdm(range(val_batches_num)):
                    # print ('generating validation fingerprint ... ')
                    val_data, val_samplenumbatch, val_label = voiceDataset.get_data(
                        vbidx, False)
                    # print ('val_data shape:',val_data.shape)
                    pred = oneSampleOutput(
                        cnnmodel(val_data.to(device)).detach(),
                        val_samplenumbatch).to(device)
                    val_preds = np.vstack((val_preds, pred.cpu().numpy()))
                    # print (pred.shape)
                    # print (criterion(pred,val_label.to(device)))
                    val_loss += criterion(
                        pred, val_label.to(device)).item() / val_label.shape[0]
                    val_labels = np.vstack(
                        (val_labels, val_label.cpu().numpy()))
                score, weight = utils.calculate_per_class_lwlrap(
                    val_labels, val_preds)
                lwlrap = (score * weight).sum()
                msg = '[VALIDATION] Epoch:{}, step:{}:/{}, loss:{}, lwlrap:{}'.format(
                    e, bidx, num_step_per_epoch, val_loss, lwlrap)
                print(msg)
                logger.info(msg)
                if lwlrap > bestlwlrap or bidx == num_step_per_epoch - 1:
                    bestlwlrap = lwlrap
                    #save model
                    save_model_path = os.path.join(checkpoint_dir,
                                                   save_model_filename)
                    torch.save(cnnmodel.state_dict(), save_model_path)
                    msg = 'save model to: {}'.format(save_model_path)
                    print(msg)
                    logger.info(msg)

                cnnmodel.train()
            if bidx % lr_steps == 0:
                scheduler.step()
def train(device, model_path='', loadModel=False):
    EPOCH = 50
    printout_steps = 50
    eval_steps = 200
    lr_steps = 300
    lr = 3e-3
    t_max = 200
    eta_min = 3e-6

    print('initialize dataset...')
    curated_data_path = os.path.join(config.get('DataPath', 'split_dir'),
                                     'mels_train_curated_split.pkl')
    curated_csv = os.path.join(config.get('DataPath', 'split_dir'),
                               'train_curated_split.csv')
    noisy_data_path = config.get('DataPath', 'noisy_data_path')
    noisy_csv = config.get('DataPath', 'filtered_noisy_csv_path')

    val_datapath = os.path.join(config.get('DataPath', 'split_dir'),
                                'mels_val_curated_split.pkl')
    val_csv_path = os.path.join(config.get('DataPath', 'split_dir'),
                                'val_curated_split.csv')

    voiceDataset = SemiSupervisedDataset(curated_data_path,curated_csv,\
                                            noisy_data_path,noisy_csv,\
                                            val_datapath,val_csv_path,\
                                            batch_size=4)
    # print (voiceDataset.get_class_num())
    print('create model ... ')

    cnnmodel = models.CNNModelv2(voiceDataset.get_class_num()).to(device)
    if loadModel:
        print('loading model from {}...'.format(model_path))
        cnnmodel.load_state_dict(torch.load(model_path))

    optimizer = torch.optim.Adam(cnnmodel.parameters(), lr)
    scheduler = CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min)
    criterion = nn.BCEWithLogitsLoss()
    bestlwlrap = -1
    for e in range(EPOCH):
        voiceDataset.shuffle_and_cut()
        num_step_per_epoch = voiceDataset.get_numof_batch(True)
        # print (num_step_per_epoch)
        for bidx in tqdm(range(num_step_per_epoch)):
            # Z1
            batch_data1, samplenumbatch1, label_batch1 = voiceDataset.get_data(
                bidx, True)  #[M, 128, duration, 3]
            # Z2
            # Z1 and Z2 should be from two exactly  same batch, but use different data augmentation
            batch_data2, samplenumbatch2, label_batch2 = voiceDataset.get_data(
                bidx, True)  #[M, 128, duration, 3]

            # print ('fingerprint got...')
            if batch_data1.shape[0] <= 1:
                continue
            # print (batch_data.shape,label_batch.shape)
            bx1 = batch_data1.to(device)
            bx2 = batch_data2.to(device)

            output1 = cnnmodel(bx1)
            output2 = cnnmodel(bx2)
            Z1 = oneSampleOutput(output1, samplenumbatch1).to(device)
            Z2 = oneSampleOutput(output2, samplenumbatch2).to(device)

            # print (Z1.shape,Z2.shape,label_batch1.shape)
            by = label_batch1.to(
                device)  # label_batch1 should be exactly same as label_batch2
            #
            loss = loss_func(criterion, Z1, Z2, by)
            #             loss = autograd.Variable(loss, requires_grad = True)
            if bidx % printout_steps == 0:
                msg = '[TRAINING] Epoch:{}, step:{}/{}, loss:{}'.format(
                    e, bidx, num_step_per_epoch, loss)
                print(msg)
                logger.info(msg)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (bidx + 1) % eval_steps == 0:
                # doing validation
                cnnmodel.eval()
                val_batches_num = voiceDataset.get_numof_batch(False)
                val_preds = np.array([]).reshape(0,
                                                 voiceDataset.get_class_num())
                val_labels = np.array([]).reshape(0,
                                                  voiceDataset.get_class_num())
                # val_loss = 0.
                for vbidx in tqdm(range(val_batches_num)):

                    #
                    val_data, val_samplenumbatch, val_label = voiceDataset.get_data(
                        vbidx, False)
                    # print ('val_data shape:',val_data.shape)
                    pred = oneSampleOutput(
                        cnnmodel(val_data.to(device)).detach(),
                        val_samplenumbatch).to(device)
                    val_preds = np.vstack((val_preds, pred.cpu().numpy()))
                    # print (pred.shape)
                    # print (criterion(pred,val_label.to(device)))
                    # val_loss += criterion(pred,val_label.to(device)).item()/val_label.shape[0]
                    val_labels = np.vstack(
                        (val_labels, val_label.cpu().numpy()))
                score, weight = utils.calculate_per_class_lwlrap(
                    val_labels, val_preds)
                lwlrap = (score * weight).sum()
                msg = '[VALIDATION] Epoch:{}, step:{}:/{},  lwlrap:{}'.format(
                    e, bidx, num_step_per_epoch, lwlrap)
                print(msg)
                logger.info(msg)
                if lwlrap > bestlwlrap:
                    bestlwlrap = lwlrap
                    #save model
                    save_model_filename = config.get('SaveModel',
                                                     'stage2_model')
                    save_model_path = os.path.join(
                        config.get('DataPath', 'checkpoint_dir'),
                        save_model_filename)
                    torch.save(cnnmodel.state_dict(), save_model_path)
                    msg = 'save model to: {}'.format(save_model_path)
                    print(msg)
                    logger.info(msg)

                cnnmodel.train()
            if bidx % lr_steps == 0:
                scheduler.step()
Esempio n. 8
0
def test(traindatadir,
         testdatadir,
         traindatacsv,
         testdatacsv,
         device,
         model_path=''):
    """
    test data is loaded in same format as validation data
    """
    print('initialize dataset...')
    voiceDataset = FATDataset(traindatadir,
                              testdatadir,
                              traindatacsv,
                              testdatacsv,
                              batch_size=8)
    # print (voiceDataset.get_class_num())
    print('create model ... ')

    # cnnmodel = models.CNNModelv2(voiceDataset.get_class_num()).to(device)
    cnnmodel = model4prune.CNNModelBasic(
        voiceDataset.get_class_num()).to(device)
    if config.get('Parameters', 'model_arch').lower() == 'basic':
        cnnmodel = model4prune.CNNModelBasic(
            voiceDataset.get_class_num()).to(device)
    elif config.get('Parameters', 'model_arch').lower() == 'poolrevised':
        cnnmodel = model4prune.CNNModelPoolingRevised(
            voiceDataset.get_class_num()).to(device)
    #loading trained model
    print('loading model from {}...'.format(model_path))
    cnnmodel.load_state_dict(torch.load(model_path))

    # testing
    cnnmodel.eval()
    test_batches_num = voiceDataset.get_numof_batch(False)
    test_preds = np.array([]).reshape(0, voiceDataset.get_class_num())
    test_labels = np.array([]).reshape(0, voiceDataset.get_class_num())

    # calculate flops and params
    flop_test_data, _, __ = voiceDataset.get_data(0, False)
    flop_test_data = flop_test_data[0:1, :, :, :].to(device)
    flops, params = profile(cnnmodel, inputs=(flop_test_data, ))

    eval_start = timeit.default_timer()
    for tbidx in tqdm(range(test_batches_num)):
        # print ('generating validation fingerprint ... ')
        test_data, test_samplenumbatch, test_label = voiceDataset.get_data(
            tbidx, False)

        pred = oneSampleOutput(
            cnnmodel(test_data.to(device)).detach(),
            test_samplenumbatch).to(device)
        test_preds = np.vstack((test_preds, pred.cpu().numpy()))

        test_labels = np.vstack((test_labels, test_label.cpu().numpy()))
    eval_stop = timeit.default_timer()

    score, weight = utils.calculate_per_class_lwlrap(test_labels, test_preds)
    lwlrap = (score * weight).sum()
    msg = '[TESTING]  lwlrap:{}, flops:{}, params:{}, running time:{}'.format(
        lwlrap, flops, params, eval_stop - eval_start)

    print(msg)
    logger.info(msg)