コード例 #1
0
ファイル: main.py プロジェクト: Yaminbens/NLP-hw2
def main():
    results = open("result","w")
    # for idxs in (3,4):
    #     for jdx in (1,2,4,6,8,10):
    idxs = 3
    jdx = 20
    d = Parser(utils.TRAIN)
    # d = Parser(utils.DUM)
    f = CFeatures(d.sentences, idxs)
    # f = BFeatures(d.sentences)
    # print("num of feats: ",f.f_len)
    w = np.zeros(f.f_len)
    tt = time()
    results.write("Training model for {} iterations with threshold {}...\n".format(jdx,idxs))
    perc = Perceptron(d.sentences, w, f, utils.MODE)
    for i in range(jdx):
        perc.train()
    w = perc.getW()
    results.write("time in seconds: {}\n".format(time() - tt))
    t = Parser(utils.TEST)
    inf = Inference(w, t.sentences, f,utils.MODE)
    inf.tag_text(utils.TEST_R)
    res = evaluate(utils.TEST, utils.TEST_R)
    results.write("correct: {}\n\n\n".format(res))
コード例 #2
0
    # net_A2.load_state_dict(p1)  # net1的参数更新为平均参数
    # net_B2.load_state_dict(p1)  # net2的参数更新为平均参数

    p1 = net_A3.state_dict()  # 拿到net1参数字典
    p2 = net_B3.state_dict()  # 拿到net2参数字典
    for key, value in p2.items():  # p1等于两个字典平均
        p1[key] = (p1[key] + value) / 2

    net_A3.load_state_dict(p1)  # net1的参数更新为平均参数
    net_B3.load_state_dict(p1)  # net2的参数更新为平均参数

    loss = (loss1 + loss2) / 2

    g1.ndata['h'] = features1
    g2.ndata['h'] = features2

    upload_en(g1, list_up1)  # 重新给服务器上的字典赋值初始特征
    upload_en(g2, list_up2)

    acc1 = evaluate(net_A1, net_A2, net_A3, g, features, labels, test_mask)
    acc.append(acc1)
    # print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}".format(
    #     epoch, loss.item(), np.mean(dur)))

    print(
        "Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc1, np.mean(dur)))
import pandas as pd

acc = pd.DataFrame(data=acc)
acc.to_csv('./test2.csv')
コード例 #3
0
def validate(val_loader, net, criterion, optimizer, epoch, iter_num,
             train_args, visualize):
    net.eval()

    val_loss = AverageMeter()

    gts_all = np.zeros((len(val_loader), int(
        args['longer_size'] / 2), int(args['longer_size'])),
                       dtype=int)
    predictions_all = np.zeros((len(val_loader), int(
        args['longer_size'] / 2), int(args['longer_size'])),
                               dtype=int)
    for vi, data in enumerate(val_loader):
        input, gt, slices_info = data
        assert len(input.size()) == 5 and len(gt.size()) == 4 and len(
            slices_info.size()) == 3
        input.transpose_(0, 1)
        gt.transpose_(0, 1)
        slices_info.squeeze_(0)
        assert input.size()[3:] == gt.size()[2:]

        count = torch.zeros(int(args['longer_size'] / 2),
                            args['longer_size'])  # .cuda()
        output = torch.zeros(cityscapes.num_classes,
                             int(args['longer_size'] / 2),
                             args['longer_size'])  # .cuda()

        slice_batch_pixel_size = input.size(1) * input.size(3) * input.size(4)

        for input_slice, gt_slice, info in zip(input, gt, slices_info):
            input_slice = Variable(input_slice)  # .cuda()
            gt_slice = Variable(gt_slice)  # .cuda()

            output_slice = net(input_slice)
            assert output_slice.size()[2:] == gt_slice.size()[1:]
            assert output_slice.size()[1] == cityscapes.num_classes
            output[:, info[0]:info[1], info[2]:info[3]] += output_slice[
                0, :, :info[4], :info[5]].data
            gts_all[vi, info[0]:info[1], info[2]:info[3]] += gt_slice[
                0, :info[4], :info[5]].data.cpu().numpy()

            count[info[0]:info[1], info[2]:info[3]] += 1

            val_loss.update(
                criterion(output_slice, gt_slice).data[0],
                slice_batch_pixel_size)

        output /= count
        gts_all[vi, :, :] /= count.cpu().numpy().astype(int)
        predictions_all[vi, :, :] = output.max(0)[1].squeeze_(0).cpu().numpy()

        print('validating: %d / %d' % (vi + 1, len(val_loader)))

    acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all,
                                              cityscapes.num_classes)
    if val_loss.avg < train_args['best_record']['val_loss']:
        train_args['best_record']['val_loss'] = val_loss.avg
        train_args['best_record']['epoch'] = epoch
        train_args['best_record']['iter'] = iter_num
        train_args['best_record']['acc'] = acc
        train_args['best_record']['acc_cls'] = acc_cls
        train_args['best_record']['mean_iu'] = mean_iu
        train_args['best_record']['fwavacc'] = fwavacc
    snapshot_name = 'epoch_%d_iter_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % (
        epoch, iter_num, val_loss.avg, acc, acc_cls, mean_iu, fwavacc,
        optimizer.param_groups[1]['lr'])
    torch.save(net.state_dict(),
               os.path.join(ckpt_path, exp_name, snapshot_name + '.pth'))
    torch.save(
        optimizer.state_dict(),
        os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth'))

    if train_args['val_save_to_img_file']:
        to_save_dir = os.path.join(ckpt_path, exp_name,
                                   '%d_%d' % (epoch, iter_num))
        check_mkdir(to_save_dir)

    val_visual = []
    for idx, data in enumerate(zip(gts_all, predictions_all)):
        gt_pil = cityscapes.colorize_mask(data[0])
        predictions_pil = cityscapes.colorize_mask(data[1])
        if train_args['val_save_to_img_file']:
            predictions_pil.save(
                os.path.join(to_save_dir, '%d_prediction.png' % idx))
            gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx))
            val_visual.extend([
                visualize(gt_pil.convert('RGB')),
                visualize(predictions_pil.convert('RGB'))
            ])
    val_visual = torch.stack(val_visual, 0)
    val_visual = torchvision.utils.make_grid(val_visual, nrow=2, padding=5)
    writer.add_image(snapshot_name, val_visual)

    print(
        '-----------------------------------------------------------------------------------------------------------'
    )
    print(
        '[epoch %d], [iter %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]'
        % (epoch, iter_num, val_loss.avg, acc, acc_cls, mean_iu, fwavacc))

    print(
        'best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d], '
        '[iter %d]' % (train_args['best_record']['val_loss'],
                       train_args['best_record']['acc'],
                       train_args['best_record']['acc_cls'],
                       train_args['best_record']['mean_iu'],
                       train_args['best_record']['fwavacc'],
                       train_args['best_record']['epoch'],
                       train_args['best_record']['iter']))

    print(
        '-----------------------------------------------------------------------------------------------------------'
    )

    writer.add_scalar('val_loss', val_loss.avg, epoch)
    writer.add_scalar('acc', acc, epoch)
    writer.add_scalar('acc_cls', acc_cls, epoch)
    writer.add_scalar('mean_iu', mean_iu, epoch)
    writer.add_scalar('fwavacc', fwavacc, epoch)

    net.train()
    return val_loss.avg
コード例 #4
0
sampleDatasets_cpf_sv = ["db","cpf","sv"]
sampleDatasets_sv = ["db","sv"]

#select model and eval functions
from models.DeepJet_models_final import conv_model_final as trainingModel
from DeepJetCore.training.training_base import training_base
from funcs import loadModel, evaluate

inputDataset = sampleDatasets_pf_cpf_sv
trainDir = opts.d
inputTrainDataCollection = opts.t
inputTestDataCollection = opts.i
LoadModel = True
removedVars = None

if True:
    evalModel = loadModel(trainDir,inputTrainDataCollection,trainingModel,LoadModel,inputDataset,removedVars)
    evalDir = opts.o

    from DeepJetCore.DataCollection import DataCollection
    testd=DataCollection()
    testd.readFromFile(inputTestDataCollection)

    if os.path.isdir(evalDir):
        raise Exception('output directory: %s must not exists yet' %evalDir)
    else:
        os.mkdir(evalDir)

    df = evaluate(testd, evalModel, evalDir)
    
コード例 #5
0
ファイル: infer.py プロジェクト: Yaminbens/NLP-hw2
from Inference import *
from main import *
from funcs import evaluate
import utils

#inf TEST
t = Parser(utils.TEST)
d = Parser(utils.TRAIN)
# d = Parser("dum")
f = Features(d.sentences)
# w = np.zeros(f.f_len)
w = pickle.load(open(utils.W_VEC, 'rb'))
inf = Inference(w, t.sentences, f)
inf.tag_text(utils.TEST_R)
evaluate(utils.TEST, utils.TEST_R)

#
# #inf TRAIN
# t = Parser(utils.TEST)
# d = Parser(utils.TRAIN)
# # d = Parser("dum")
# f = Features(d.sentences)
# # w = np.zeros(f.f_len)
# w = pickle.load(open(utils.W_VEC, 'rb'))
# inf = Inference(w,d.sentences,f)
# inf.tag_text(utils.TRAIN_R)
# evaluate(utils.TRAIN, utils.TRAIN_R)
コード例 #6
0
    i, j = sample
    subset = F.sample(ratings, user_counts, movie_counts, i, j)

    if j == 100:
        max_k = 25
    else:
        max_k = 45

    k_s = range(5, max_k, 5)
    train, test = train_test(subset)

    print "Running Baseline, KNN on the dataset with {} users and {} items".format(
        i, j)

    base, base_test = F.train_baseline(subset)
    base_eval = F.evaluate(base, subset, base_test)
    base_eval['name'] = 'baseline'
    base_eval['sample'] = sample

    all_results.append(base_eval)

    for k in k_s:

        f = k
        if f > 25:
            f = 25

        # MF model
        t_0 = time.time()

        mf, mf_test = F.train_matrix(subset, f, 5)
コード例 #7
0
k_s = range(5, 60, 5)
factor_sizes = range(5, 60, 5)
top_k = 5
all_results = []

user_value_counts = ratings['UserId'].value_counts()
movie_value_counts = ratings['MovieId'].value_counts()

for sample in samples:
    i, j = sample
    _dataset = F.sample(ratings, user_value_counts, movie_value_counts, i, j)
    print "Running Baseline, MF, KNN on the dataset with {} users and {} items".format(i, j)

    base, base_test = F.train_baseline(_dataset)
    base_eval = F.evaluate(base, _dataset, base_test)
    base_eval['name'] = 'baseline'
    base_eval['sample'] = sample

    all_results.append(base_eval)

    for f in factor_sizes:
        t_0 = time.time()

        mf, mf_test = F.train_matrix(_dataset, f, 5)

        print "Running MF with F of {}".format(f)
        results = F.evaluate(mf, _dataset, mf_test, top_k)
        # add k, and sample size to results
        results['sample'] = sample
        results['f'] = f
plot = False
#####################################################

for i in range(2):
    x = pd.read_csv("../Data/" + datasets[i] + ".csv")
    xtrain = x
    ytrain = xtrain["y"].to_numpy()
    ytest = xtrain["y"].to_numpy()
    xpos, xneg, x, xtest = funcs.generate_two_classes(xtrain, xtrain)

    mean_diff = xpos.mean(0) - xneg.mean(0)

    cov_pos = funcs.covariance_matrix(xpos)
    cov_neg = funcs.covariance_matrix(xneg)

    pooled_covariance = cov_pos + cov_neg
    inverse_pooled_covariance = np.linalg.inv(pooled_covariance)

    classifier = inverse_pooled_covariance.dot(mean_diff)

    xpos_transform = xpos.dot(classifier)
    xneg_transform = xneg.dot(classifier)

    predictions = funcs.classify(xtest, classifier, thresholds[i], datasets[i])

    print('Results for ' + datasets[i])
    funcs.evaluate(predictions, ytest, datasets[i])
    print('\n\n\n')
    if plot == True:
        funcs.plot_normal(xpos_transform, xneg_transform, datasets[i])