def test(): print("=== Test ===") args = get_args() print(args) data_dir = f"./../../asset/{args.dataset}/" if args.train : test_labels, test_texts = read_train_data(data_dir) else : test_labels, test_texts = read_test_data(data_dir) # test_texts = list(test_texts)[:100] # test_labels = list(test_labels)[:100] test_texts = list(test_texts) test_labels = list(test_labels) model_name = args.model tokenizer = AutoTokenizer.from_pretrained(model_name) test_encodings = tokenizer( test_texts, truncation=True, padding=True, max_length=512) test_dataset = CustomDataset(test_encodings, test_labels) checkpoint_dir = f"./models/{args.task}/{args.model}/" best_checkpoint = find_best_checkpoint(checkpoint_dir) model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint) test_trainer = Trainer(model) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False) raw_pred, _, _ = test_trainer.prediction_loop( test_loader, description="prediction") # Preprocess raw predictions y_pred = np.argmax(raw_pred, axis=1) metrics = compute_metrics(y_pred, test_labels) print(metrics) if args.train : fpath = os.path.join(data_dir, f"train-predictions/{args.model}.pkl") else : fpath = os.path.join(data_dir, f"predictions/{args.model}.pkl") parent_dir = "/".join(str(fpath).split('/')[:-1]) if not os.path.exists(parent_dir): os.makedirs(parent_dir) with open(fpath, 'wb') as f: pickle.dump(y_pred, f)
def cls_test(model, task_name): data = read_test_data(dir="evaluation/outputs/{}".format(task_name)) x = data["test_x"] y = data["test_y"] x = [sent for sent in x] pred = np.argmax(model(x).cpu().data.numpy(), axis=1) acc = sum([1 if p == y else 0 for p, y in zip(pred, y)]) / len(pred) return acc
def get_datas(): train_data = read_train_data() comment = train_data[0] result = train_data[1] test_data = read_test_data() lab = [] classes_name, classes_count = np.unique(result, return_counts=True) for i in range(len(result)): lab.append(np.where(classes_name == result[i])[0][0]) lab = np.asarray(lab) return comment, lab, test_data, classes_name
def test_acc(model): data = read_test_data(dir="evaluation/outputs/yelp") x = data["test_x"] y = data["test_y"] model.eval() x = [sent for sent in x] pred = np.argmax(model(x).cpu().data.numpy(), axis=1) acc = sum([1 if p == y else 0 for p, y in zip(pred, y)]) / len(pred) return acc
def main(): # set model model = getattr(models, args.model)(args) if args.data == 'cifar10': image_size = 32 args.num_classes = 10 elif args.data == 'cifar100': image_size = 32 args.num_classes = 100 elif args.data == 'imagenet': image_size = 224 args.num_classes = 1000 else: raise NotImplementedError n_flops, n_params = measure_model(model, image_size, image_size) print('FLOPs: %.2fM, Params: %.2fM' % (n_flops / 1e6, n_params / 1e6)) if torch.cuda.device_count(): model = torch.nn.DataParallel(model) # for multi-GPU training if torch.cuda.is_available(): model.cuda() print(model) if args.mode == 'train': # get the training loader and validation loader train_set, val_set = read_train_data(datadir=args.data_dir, data=args.data) # set the start epoch value if args.resume: start_epoch = None else: start_epoch = args.start_epoch train(startepoch=start_epoch, epochs=args.epochs, model=model, train_set=train_set, val_set=val_set, resume=args.resume) elif args.mode == 'test': test_set = read_test_data(datadir=args.data_dir, data=args.data, mode='test') test(model=model, test_set=test_set) else: raise NotImplementedError
def main(opt): if torch.cuda.is_available(): device = torch.device('cuda') torch.cuda.set_device(opt.gpu_id) else: device = torch.device('cpu') if opt.network == 'resnet': model = resnet(opt.classes, opt.layers) elif opt.network == 'resnext': model = resnext(opt.classes, opt.layers) elif opt.network == 'resnext_wsl': # resnext_wsl must specify the opt.battleneck_width parameter opt.network = 'resnext_wsl_32x' + str(opt.battleneck_width) + 'd' model = resnext_wsl(opt.classes, opt.battleneck_width) elif opt.network == 'vgg': model = vgg_bn(opt.classes, opt.layers) elif opt.network == 'densenet': model = densenet(opt.classes, opt.layers) elif opt.network == 'inception_v3': model = inception_v3(opt.classes, opt.layers) elif opt.network == 'dpn': model = dpn(opt.classes, opt.layers) elif opt.network == 'effnet': model = effnet(opt.classes, opt.layers) # elif opt.network == 'pnasnet_m': # model = pnasnet_m(opt.classes, opt.layers, opt.pretrained) # model = nn.DataParallel(model, device_ids=[4]) # model = nn.DataParallel(model, device_ids=[0, 1, 2, 3]) model = nn.DataParallel(model, device_ids=[opt.gpu_id, opt.gpu_id + 1]) # model = convert_model(model) model = model.to(device) images, names = utils.read_test_data( os.path.join(opt.root_dir, opt.test_dir)) dict_ = {} for crop_size in [opt.crop_size]: if opt.tta: transforms = test_transform(crop_size) else: transforms = my_transform(False, crop_size) dataset = TestDataset(images, names, transforms) loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=4) state_dict = torch.load(opt.model_dir + '/' + opt.network + '-' + str(opt.layers) + '-' + str(crop_size) + '_model.ckpt') if opt.network == 'densenet': pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' ) for key in list(state_dict.keys()): res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) state_dict[new_key] = state_dict[key] del state_dict[key] model.load_state_dict(state_dict) if opt.vote: if opt.tta: im_names, labels = eval_model_tta(loader, model, device=device) else: im_names, labels = eval_model(loader, model, device=device) else: if opt.tta: im_names, labels = eval_logits_tta(loader, model, device=device) else: im_names, labels = eval_logits(loader, model, device) im_labels = [] # print(im_names) for name, label in zip(im_names, labels): if name in dict_: dict_[name].append(label) else: dict_[name] = [label] header = ['filename', 'type'] utils.mkdir(opt.results_dir) result = opt.network + '-' + str(opt.layers) + '-' + str( opt.crop_size) + '_result.csv' filename = os.path.join(opt.results_dir, result) with open(filename, 'w', encoding='utf-8') as f: f_csv = csv.writer(f) f_csv.writerow(header) for key in dict_.keys(): v = np.argmax(np.sum(np.array(dict_[key]), axis=0)) + 1 # v = list(np.sum(np.array(dict_[key]), axis=0)) f_csv.writerow([key, v])
if (predictions[i]['Category'] == result[i]): count += 1 return count / len(predictions) def define_alpha(self, validation_comments, validation_result): """ Helper function to find a good value for hyper param alpha """ alpha = [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01] result = np.zeros(len(alpha)) for i in range(len(alpha)): print('Alpha ', i + 1, '/', len(alpha), ' : ', alpha[i]) predict = bayes_classifier.predict(validation_comments, alpha[i]) result[i] = bayes_classifier.score(predict, validation_result) print(result[i]) print(result) print(alpha[np.argmax(result)]) return alpha[np.argmax(result)] if __name__ == "__main__": train_data = read_train_data() test_data = read_test_data() comment = train_data[0] result = train_data[1] bayes_classifier = BayesClassifier() alpha_star = 0.01 bayes_classifier.train(comment, result) predictions = bayes_classifier.predict(test_data, alpha_star) convert_to_csv(predictions)
# Read the test images and run the HDNet test_files = get_test_data(data_main_path) # Read the HUMBI test images and run the HDNet #test_files = get_HUMBI_data(os.path.join(data_main_path, 'HUMBI_example.pkl'),'test') for f in range(len(test_files)): #for f in range(len(test_files[0])): #time_stamp = test_files[3][f] #data_name = str(test_files[6][f]) data_name = str(test_files[f]) #print('Processing time stamp: ', time_stamp) print('Processing file: ', data_name) print('\n') #X,Z, Z3, DP = read_HUMBI_data(test_files,f,IMAGE_HEIGHT,IMAGE_WIDTH) X, Z, Z3, _, _, _, _, _, _, _, _, DP = read_test_data( data_main_path, data_name, IMAGE_HEIGHT, IMAGE_WIDTH) prediction1n = sess2.run([out2_normal], feed_dict={x1_n: X}) normal_pred_raw = np.asarray(prediction1n)[0, ...] normal_pred = nmap_normalization(normal_pred_raw) normal_pred = np.where(Z3, normal_pred, np.zeros_like(normal_pred)) X_1 = np.zeros((1, IMAGE_HEIGHT, IMAGE_WIDTH, 9), dtype='f') X_1[..., 0] = X[..., 0] X_1[..., 1] = X[..., 1] X_1[..., 2] = X[..., 2] X_1[..., 3] = normal_pred[..., 0] X_1[..., 4] = normal_pred[..., 1]
import numpy as np from utils import read_train_data, read_test_data #read training data train_imgs, train_gts = read_train_data('train_data') #remove dublicate training imgs idx_to_rmv = [] for i in range(len(train_imgs) - 1): for j in range(i + 1, len(train_imgs)): if np.all(train_imgs[i] == train_imgs[j]): idx_to_rmv.append(i) if train_gts[i] != train_gts[j]: idx_to_rmv.append(j) idx = [i for i in range(len(train_imgs)) if not (i in idx_to_rmv)] print('unique train imgs:', len(idx)) #save unique training imgs np.save('unique_train_imgs_rot_fixed', np.array(train_imgs)[idx]) np.save('unique_train_gts_rot_fixed', np.array(train_gts)[idx]) #read test data test_imgs, test_gts, ids = read_test_data('test_data') #save test data np.save('test_imgs_rot_fixed', np.array(test_imgs)) np.save('test_gts', np.array(test_gts)) np.save('ids', np.array(ids))
@author: aminghazanfari """ # After running, if you get the "using TensorFlow backend" message, please run again. import utils import keras import numpy as np from keras.preprocessing import sequence training_data = list(utils.read_training_data()) print('Number of sentences in the training data: {}'.format(len(training_data))) development_data = list(utils.read_development_data()) print('Number of sentences in the development data: {}'.format(len(development_data))) test_data = list(utils.read_test_data()) print('Number of sentences in the test data: {}'.format(len(test_data))) # Construct a simple index for words w2i = dict() tag2i = dict() for tagged_sentence in training_data: for word, tag in tagged_sentence: #print('The content of tag {}'.format(tag)) #print('The content of word {}'.format(word)) if word not in w2i: w2i[word] = len(w2i) + 2 # assign next available index if tag not in tag2i:
#read training data train_imgs, train_gts = read_train_data(args.train_data_path) #remove dublicate training imgs idx_to_rmv = [] for i in range(len(train_imgs) - 1): for j in range(i + 1, len(train_imgs)): if np.all(train_imgs[i] == train_imgs[j]): idx_to_rmv.append(i) if train_gts[i] != train_gts[j]: idx_to_rmv.append(j) idx = [i for i in range(len(train_imgs)) if not (i in idx_to_rmv)] print('unique train imgs:', len(idx)) #save unique training imgs np.save(os.path.join(args.save_path, 'unique_train_imgs_rot_fixed'), np.array(train_imgs)[idx]) np.save(os.path.join(args.save_path, 'unique_train_gts_rot_fixed'), np.array(train_gts)[idx]) #read test data test_imgs, test_gts, ids = read_test_data(args.test_data_path) #save test data np.save(os.path.join(args.save_path, 'test_imgs_rot_fixed'), np.array(test_imgs)) np.save(os.path.join(args.save_path, 'test_gts'), np.array(test_gts)) np.save(os.path.join(args.save_path, 'ids'), np.array(ids))
np.random.seed(0) random_ints = np.random.randint(0, 20, len(data)) print(np.bincount(random_ints)) predictions = [] for i in range(len(data)): result = np.zeros(20) subText = data[i].split() for j in range(len(subText)): if (subText[j] in subreddits): result[subreddits.index(subText[j])] += 1 if (np.any(result)): predictions.append({ 'Id': i, 'Category': subreddits[np.argmax(result)] }) else: predictions.append({ 'Id': i, 'Category': subreddits[random_ints[i]] }) return predictions if __name__ == "__main__": X_test = read_test_data() predictions = classify_2(X_test) convert_to_csv(predictions)
from keras.callbacks import EarlyStopping, ModelCheckpoint from skimage.transform import resize from skimage import io from skimage.util import random_noise from matplotlib import pyplot as plt import random import sys model_name = 'model-dsbowl-2018.h5' antialias_flag = False # get train data X_train, Y_train = enhance_images() # get test train data X_test, test_sizes = read_test_data() if os.path.isfile(model_name): model = load_model(model_name, custom_objects={'mean_iou': mean_iou}) #model = load_model(model_name, custom_objects={'dice_coef': dice_coef}) else: # get u-net model model = build_unet() # train model print("\nTraining ...") earlystopper = EarlyStopping(patience=5, verbose=1) checkpointer = ModelCheckpoint(model_name, verbose=1, save_best_only=True) results = model.fit(X_train, Y_train, validation_split=0.1, batch_size=4,
import tensorflow as tf import utils import ops from tensorflow.contrib import slim import mcnn import cv2 from utils import load, show_density_map file = "IMG_23" checkpoint_dir = "checkpoint" test_img_path = "G:/ShanghaiTech/part_B/test_data/images/" + file + ".jpg" test_dmp_path = "G:/ShanghaiTech/part_B/test_data/ground-truth/GT_" + file + ".mat" img, gt_dmp, gt_count = utils.read_test_data(test_img_path, test_dmp_path, scale=4) test = tf.placeholder(tf.float32, shape=[None, None, None, 3]) estimate = mcnn.multi_column_cnn(test) saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) could_load, checkpoint_counter = load(checkpoint_dir, sess, saver) if could_load: print(" [*] Load SUCCESS")
def predict(): print("=== Predict ===") args = get_args() print(args) if args.bias_type != "": data_dir = f"./../../data/{args.mutation_tool}/{args.bias_type}/{args.mutant}/" else: data_dir = f"./../../data/{args.mutation_tool}/{args.mutant}/" if args.type == "mutant": test_labels, test_texts = read_test_data(data_dir) elif args.type == "original": generate_original_data(data_dir, mutation_tool=args.mutation_tool) test_labels, test_texts = read_original_data(data_dir) else: raise ValueError("Unknown type that needs to be tested") # test_texts = list(test_texts)[:100] # test_labels = list(test_labels)[:100] test_texts = list(test_texts) test_labels = list(test_labels) model_name = args.model tokenizer = AutoTokenizer.from_pretrained(model_name) if args.task == "imdb" and args.type == "mutant" and ( args.bias_type == "occupation" or args.bias_type == "country"): test_encodings = batch_tokenizer(tokenizer, test_texts, batch_size=10000) else: test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=512) test_dataset = CustomDataset(test_encodings, test_labels) checkpoint_dir = f"./models/{args.task}/{args.model}/" best_checkpoint = find_best_checkpoint(checkpoint_dir) model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint) test_trainer = Trainer(model) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) raw_pred, _, _ = test_trainer.prediction_loop(test_loader, description="prediction") # Preprocess raw predictions y_pred = np.argmax(raw_pred, axis=1) fpath = os.path.join(data_dir, f"{args.type}-predictions/{args.model}.pkl") parent_dir = "/".join(str(fpath).split('/')[:-1]) if not os.path.exists(parent_dir): os.makedirs(parent_dir) with open(fpath, 'wb') as f: pickle.dump(y_pred, f)