Esempi in Python per read_test_data, esempi in Python per utils.read_test_data

Esempio n. 1

0

Mostra file

def test():
    print("=== Test ===")

    args = get_args()
    print(args)


    data_dir = f"./../../asset/{args.dataset}/"

    if args.train :
        test_labels, test_texts = read_train_data(data_dir)
    else :    
        test_labels, test_texts = read_test_data(data_dir)
    
    # test_texts = list(test_texts)[:100]
    # test_labels = list(test_labels)[:100]

    test_texts = list(test_texts)
    test_labels = list(test_labels)

    model_name = args.model
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    test_encodings = tokenizer(
        test_texts, truncation=True, padding=True, max_length=512)
    test_dataset = CustomDataset(test_encodings, test_labels)

    checkpoint_dir = f"./models/{args.task}/{args.model}/"
    best_checkpoint = find_best_checkpoint(checkpoint_dir)

    model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint)

    test_trainer = Trainer(model)

    test_loader = DataLoader(
        test_dataset, batch_size=args.batch_size, shuffle=False)
    raw_pred, _, _ = test_trainer.prediction_loop(
        test_loader, description="prediction")

    # Preprocess raw predictions
    y_pred = np.argmax(raw_pred, axis=1)

    metrics = compute_metrics(y_pred, test_labels)
    print(metrics)

    if args.train :
        fpath = os.path.join(data_dir, f"train-predictions/{args.model}.pkl")
    else :    
        fpath = os.path.join(data_dir, f"predictions/{args.model}.pkl")

    parent_dir = "/".join(str(fpath).split('/')[:-1])
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    with open(fpath, 'wb') as f:
        pickle.dump(y_pred, f)

Esempio n. 2

0

Mostra file

File: test_pretrained_model.py Progetto: avidale/MLM_transfer

def cls_test(model, task_name):
    data = read_test_data(dir="evaluation/outputs/{}".format(task_name))

    x = data["test_x"]
    y = data["test_y"]
    x = [sent for sent in x]

    pred = np.argmax(model(x).cpu().data.numpy(), axis=1)
    acc = sum([1 if p == y else 0 for p, y in zip(pred, y)]) / len(pred)

    return acc

Esempio n. 3

0

Mostra file

File: linear_classifiers.py Progetto: mmshh/reddit-classifier

def get_datas():
    train_data = read_train_data()
    comment = train_data[0]
    result = train_data[1]
    test_data = read_test_data()

    lab = []
    classes_name, classes_count = np.unique(result, return_counts=True)
    for i in range(len(result)):
        lab.append(np.where(classes_name == result[i])[0][0])
    lab = np.asarray(lab)
    
    return comment, lab, test_data, classes_name

Esempio n. 4

0

Mostra file

File: test_cls_wd.py Progetto: avidale/MLM_transfer

def test_acc(model):
    data = read_test_data(dir="evaluation/outputs/yelp")

    x = data["test_x"]
    y = data["test_y"]

    model.eval()

    x = [sent for sent in x]

    pred = np.argmax(model(x).cpu().data.numpy(), axis=1)
    acc = sum([1 if p == y else 0 for p, y in zip(pred, y)]) / len(pred)

    return acc

Esempio n. 5

0

Mostra file

def main():
    # set model
    model = getattr(models, args.model)(args)

    if args.data == 'cifar10':
        image_size = 32
        args.num_classes = 10
    elif args.data == 'cifar100':
        image_size = 32
        args.num_classes = 100
    elif args.data == 'imagenet':
        image_size = 224
        args.num_classes = 1000
    else:
        raise NotImplementedError

    n_flops, n_params = measure_model(model, image_size, image_size)
    print('FLOPs: %.2fM, Params: %.2fM' % (n_flops / 1e6, n_params / 1e6))

    if torch.cuda.device_count():
        model = torch.nn.DataParallel(model)  # for multi-GPU training
    if torch.cuda.is_available():
        model.cuda()

    print(model)

    if args.mode == 'train':
        # get the training loader and validation loader
        train_set, val_set = read_train_data(datadir=args.data_dir, data=args.data)

        # set the start epoch value
        if args.resume:
            start_epoch = None
        else:
            start_epoch = args.start_epoch

        train(startepoch=start_epoch, epochs=args.epochs, model=model, train_set=train_set,
              val_set=val_set, resume=args.resume)

    elif args.mode == 'test':
        test_set = read_test_data(datadir=args.data_dir, data=args.data, mode='test')
        test(model=model, test_set=test_set)
    else:
        raise NotImplementedError

Esempio n. 6

0

Mostra file

File: evaluation.py Progetto: PWB97/weather-classification

def main(opt):
    if torch.cuda.is_available():
        device = torch.device('cuda')
        torch.cuda.set_device(opt.gpu_id)
    else:
        device = torch.device('cpu')

    if opt.network == 'resnet':
        model = resnet(opt.classes, opt.layers)
    elif opt.network == 'resnext':
        model = resnext(opt.classes, opt.layers)
    elif opt.network == 'resnext_wsl':
        # resnext_wsl must specify the opt.battleneck_width parameter
        opt.network = 'resnext_wsl_32x' + str(opt.battleneck_width) + 'd'
        model = resnext_wsl(opt.classes, opt.battleneck_width)
    elif opt.network == 'vgg':
        model = vgg_bn(opt.classes, opt.layers)
    elif opt.network == 'densenet':
        model = densenet(opt.classes, opt.layers)
    elif opt.network == 'inception_v3':
        model = inception_v3(opt.classes, opt.layers)
    elif opt.network == 'dpn':
        model = dpn(opt.classes, opt.layers)
    elif opt.network == 'effnet':
        model = effnet(opt.classes, opt.layers)
    # elif opt.network == 'pnasnet_m':
    #     model = pnasnet_m(opt.classes, opt.layers, opt.pretrained)

    # model = nn.DataParallel(model, device_ids=[4])
    # model = nn.DataParallel(model, device_ids=[0, 1, 2, 3])
    model = nn.DataParallel(model, device_ids=[opt.gpu_id, opt.gpu_id + 1])
    # model = convert_model(model)
    model = model.to(device)

    images, names = utils.read_test_data(
        os.path.join(opt.root_dir, opt.test_dir))

    dict_ = {}
    for crop_size in [opt.crop_size]:
        if opt.tta:
            transforms = test_transform(crop_size)
        else:
            transforms = my_transform(False, crop_size)

        dataset = TestDataset(images, names, transforms)

        loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=4)
        state_dict = torch.load(opt.model_dir + '/' + opt.network + '-' +
                                str(opt.layers) + '-' + str(crop_size) +
                                '_model.ckpt')
        if opt.network == 'densenet':
            pattern = re.compile(
                r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$'
            )
            for key in list(state_dict.keys()):
                res = pattern.match(key)
                if res:
                    new_key = res.group(1) + res.group(2)
                    state_dict[new_key] = state_dict[key]
                    del state_dict[key]
        model.load_state_dict(state_dict)
        if opt.vote:
            if opt.tta:
                im_names, labels = eval_model_tta(loader, model, device=device)
            else:
                im_names, labels = eval_model(loader, model, device=device)
        else:
            if opt.tta:
                im_names, labels = eval_logits_tta(loader,
                                                   model,
                                                   device=device)
            else:
                im_names, labels = eval_logits(loader, model, device)
        im_labels = []
        # print(im_names)
        for name, label in zip(im_names, labels):
            if name in dict_:
                dict_[name].append(label)
            else:
                dict_[name] = [label]

    header = ['filename', 'type']
    utils.mkdir(opt.results_dir)
    result = opt.network + '-' + str(opt.layers) + '-' + str(
        opt.crop_size) + '_result.csv'
    filename = os.path.join(opt.results_dir, result)
    with open(filename, 'w', encoding='utf-8') as f:
        f_csv = csv.writer(f)
        f_csv.writerow(header)
        for key in dict_.keys():
            v = np.argmax(np.sum(np.array(dict_[key]), axis=0)) + 1
            # v = list(np.sum(np.array(dict_[key]), axis=0))
            f_csv.writerow([key, v])

Esempio n. 7

0

Mostra file

            if (predictions[i]['Category'] == result[i]):
                count += 1
        return count / len(predictions)

    def define_alpha(self, validation_comments, validation_result):
        """
        Helper function to find a good value for hyper param alpha
        """
        alpha = [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01]
        result = np.zeros(len(alpha))
        for i in range(len(alpha)):
            print('Alpha ', i + 1, '/', len(alpha), ' : ', alpha[i])
            predict = bayes_classifier.predict(validation_comments, alpha[i])
            result[i] = bayes_classifier.score(predict, validation_result)
            print(result[i])
        print(result)
        print(alpha[np.argmax(result)])
        return alpha[np.argmax(result)]


if __name__ == "__main__":
    train_data = read_train_data()
    test_data = read_test_data()
    comment = train_data[0]
    result = train_data[1]
    bayes_classifier = BayesClassifier()
    alpha_star = 0.01
    bayes_classifier.train(comment, result)
    predictions = bayes_classifier.predict(test_data, alpha_star)
    convert_to_csv(predictions)

Esempio n. 8

0

Mostra file

File: HDNet_Inference.py Progetto: emulhall/HDNet_TikTok

# Read the test images and run the HDNet
test_files = get_test_data(data_main_path)

# Read the HUMBI test images and run the HDNet
#test_files = get_HUMBI_data(os.path.join(data_main_path, 'HUMBI_example.pkl'),'test')

for f in range(len(test_files)):
    #for f in range(len(test_files[0])):
    #time_stamp = test_files[3][f]
    #data_name = str(test_files[6][f])
    data_name = str(test_files[f])
    #print('Processing time stamp: ', time_stamp)
    print('Processing file: ', data_name)
    print('\n')
    #X,Z, Z3, DP = read_HUMBI_data(test_files,f,IMAGE_HEIGHT,IMAGE_WIDTH)
    X, Z, Z3, _, _, _, _, _, _, _, _, DP = read_test_data(
        data_main_path, data_name, IMAGE_HEIGHT, IMAGE_WIDTH)

    prediction1n = sess2.run([out2_normal], feed_dict={x1_n: X})

    normal_pred_raw = np.asarray(prediction1n)[0, ...]
    normal_pred = nmap_normalization(normal_pred_raw)

    normal_pred = np.where(Z3, normal_pred, np.zeros_like(normal_pred))

    X_1 = np.zeros((1, IMAGE_HEIGHT, IMAGE_WIDTH, 9), dtype='f')

    X_1[..., 0] = X[..., 0]
    X_1[..., 1] = X[..., 1]
    X_1[..., 2] = X[..., 2]
    X_1[..., 3] = normal_pred[..., 0]
    X_1[..., 4] = normal_pred[..., 1]

Esempio n. 9

0

Mostra file

File: prepare_dataset.py Progetto: ZindiAfrica/ZindiAfrica

import numpy as np
from utils import read_train_data, read_test_data

#read training data
train_imgs, train_gts = read_train_data('train_data')

#remove dublicate training imgs
idx_to_rmv = []
for i in range(len(train_imgs) - 1):
    for j in range(i + 1, len(train_imgs)):
        if np.all(train_imgs[i] == train_imgs[j]):
            idx_to_rmv.append(i)
            if train_gts[i] != train_gts[j]:
                idx_to_rmv.append(j)

idx = [i for i in range(len(train_imgs)) if not (i in idx_to_rmv)]
print('unique train imgs:', len(idx))

#save unique training imgs
np.save('unique_train_imgs_rot_fixed', np.array(train_imgs)[idx])
np.save('unique_train_gts_rot_fixed', np.array(train_gts)[idx])

#read test data
test_imgs, test_gts, ids = read_test_data('test_data')

#save test data
np.save('test_imgs_rot_fixed', np.array(test_imgs))
np.save('test_gts', np.array(test_gts))
np.save('ids', np.array(ids))

Esempio n. 10

0

Mostra file

File: testCOde.py Progetto: aminghazanfari83/Deep-Learning-Liu

@author: aminghazanfari
"""

# After running, if you get the "using TensorFlow backend" message, please run again.
import utils
import keras
import numpy as np
from keras.preprocessing import sequence

training_data = list(utils.read_training_data())
print('Number of sentences in the training data: {}'.format(len(training_data)))

development_data = list(utils.read_development_data())
print('Number of sentences in the development data: {}'.format(len(development_data)))

test_data = list(utils.read_test_data())
print('Number of sentences in the test data: {}'.format(len(test_data)))



# Construct a simple index for words

w2i = dict()
tag2i = dict()
for tagged_sentence in training_data:
    for word, tag in tagged_sentence:
        #print('The content of tag {}'.format(tag))
        #print('The content of word {}'.format(word))
        if word not in w2i:
            w2i[word] = len(w2i) + 2    # assign next available index
        if tag not in tag2i:

Esempio n. 11

0

Mostra file

#read training data
train_imgs, train_gts = read_train_data(args.train_data_path)

#remove dublicate training imgs
idx_to_rmv = []
for i in range(len(train_imgs) - 1):
    for j in range(i + 1, len(train_imgs)):
        if np.all(train_imgs[i] == train_imgs[j]):
            idx_to_rmv.append(i)
            if train_gts[i] != train_gts[j]:
                idx_to_rmv.append(j)

idx = [i for i in range(len(train_imgs)) if not (i in idx_to_rmv)]
print('unique train imgs:', len(idx))

#save unique training imgs
np.save(os.path.join(args.save_path, 'unique_train_imgs_rot_fixed'),
        np.array(train_imgs)[idx])
np.save(os.path.join(args.save_path, 'unique_train_gts_rot_fixed'),
        np.array(train_gts)[idx])

#read test data
test_imgs, test_gts, ids = read_test_data(args.test_data_path)

#save test data
np.save(os.path.join(args.save_path, 'test_imgs_rot_fixed'),
        np.array(test_imgs))
np.save(os.path.join(args.save_path, 'test_gts'), np.array(test_gts))
np.save(os.path.join(args.save_path, 'ids'), np.array(ids))

Esempio n. 12

0

Mostra file

File: random_classifier.py Progetto: mmshh/reddit-classifier

    np.random.seed(0)
    random_ints = np.random.randint(0, 20, len(data))
    print(np.bincount(random_ints))
    predictions = []

    for i in range(len(data)):
        result = np.zeros(20)
        subText = data[i].split()
        for j in range(len(subText)):
            if (subText[j] in subreddits):
                result[subreddits.index(subText[j])] += 1
        if (np.any(result)):
            predictions.append({
                'Id': i,
                'Category': subreddits[np.argmax(result)]
            })
        else:
            predictions.append({
                'Id': i,
                'Category': subreddits[random_ints[i]]
            })

    return predictions


if __name__ == "__main__":
    X_test = read_test_data()
    predictions = classify_2(X_test)
    convert_to_csv(predictions)

Esempio n. 13

0

Mostra file

File: main.py Progetto: amiremadz/Data_Science_Bowl_2018

from keras.callbacks import EarlyStopping, ModelCheckpoint
from skimage.transform import resize
from skimage import io
from skimage.util import random_noise
from matplotlib import pyplot as plt
import random
import sys

model_name = 'model-dsbowl-2018.h5'
antialias_flag = False

# get train data
X_train, Y_train = enhance_images()

# get test train data
X_test, test_sizes = read_test_data()

if os.path.isfile(model_name):
    model = load_model(model_name, custom_objects={'mean_iou': mean_iou})
    #model = load_model(model_name, custom_objects={'dice_coef': dice_coef})
else:
    # get u-net model
    model = build_unet()
    # train model
    print("\nTraining ...")
    earlystopper = EarlyStopping(patience=5, verbose=1)
    checkpointer = ModelCheckpoint(model_name, verbose=1, save_best_only=True)
    results = model.fit(X_train,
                        Y_train,
                        validation_split=0.1,
                        batch_size=4,

Esempio n. 14

0

Mostra file

import tensorflow as tf
import utils
import ops
from tensorflow.contrib import slim
import mcnn
import cv2
from utils import load, show_density_map

file = "IMG_23"

checkpoint_dir = "checkpoint"
test_img_path = "G:/ShanghaiTech/part_B/test_data/images/" + file + ".jpg"
test_dmp_path = "G:/ShanghaiTech/part_B/test_data/ground-truth/GT_" + file + ".mat"

img, gt_dmp, gt_count = utils.read_test_data(test_img_path,
                                             test_dmp_path,
                                             scale=4)

test = tf.placeholder(tf.float32, shape=[None, None, None, 3])

estimate = mcnn.multi_column_cnn(test)
saver = tf.train.Saver()
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    could_load, checkpoint_counter = load(checkpoint_dir, sess, saver)

    if could_load:
        print(" [*] Load SUCCESS")

Esempio n. 15

0

Mostra file

def predict():
    print("=== Predict ===")
    args = get_args()
    print(args)

    if args.bias_type != "":
        data_dir = f"./../../data/{args.mutation_tool}/{args.bias_type}/{args.mutant}/"
    else:
        data_dir = f"./../../data/{args.mutation_tool}/{args.mutant}/"

    if args.type == "mutant":
        test_labels, test_texts = read_test_data(data_dir)
    elif args.type == "original":
        generate_original_data(data_dir, mutation_tool=args.mutation_tool)
        test_labels, test_texts = read_original_data(data_dir)
    else:
        raise ValueError("Unknown type that needs to be tested")

    # test_texts = list(test_texts)[:100]
    # test_labels = list(test_labels)[:100]

    test_texts = list(test_texts)
    test_labels = list(test_labels)

    model_name = args.model
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    if args.task == "imdb" and args.type == "mutant" and (
            args.bias_type == "occupation" or args.bias_type == "country"):
        test_encodings = batch_tokenizer(tokenizer,
                                         test_texts,
                                         batch_size=10000)
    else:
        test_encodings = tokenizer(test_texts,
                                   truncation=True,
                                   padding=True,
                                   max_length=512)

    test_dataset = CustomDataset(test_encodings, test_labels)

    checkpoint_dir = f"./models/{args.task}/{args.model}/"
    best_checkpoint = find_best_checkpoint(checkpoint_dir)

    model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint)

    test_trainer = Trainer(model)

    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False)
    raw_pred, _, _ = test_trainer.prediction_loop(test_loader,
                                                  description="prediction")

    # Preprocess raw predictions
    y_pred = np.argmax(raw_pred, axis=1)

    fpath = os.path.join(data_dir, f"{args.type}-predictions/{args.model}.pkl")

    parent_dir = "/".join(str(fpath).split('/')[:-1])
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    with open(fpath, 'wb') as f:
        pickle.dump(y_pred, f)