def predict(): try: input_data = flask.request.json['comment'] preds = get_predictions(input_data) return flask.jsonify(preds) except: return "JSON containing 'comment' key must be passed", 400
def lossandaccuracy(loader, model, factor): epoch_loss = [] ious = [] model.eval() with torch.no_grad(): for i, batchdata in enumerate(loader): # print (len(batchdata)) img, labels, index, spatialWeights, maxDist = batchdata data = img.to(device) target = labels.to(device).long() output = model(data) ## loss from cross entropy is weighted sum of pixel wise loss and Canny edge loss *20 CE_loss = criterion(output, target) loss = CE_loss * (torch.from_numpy(np.ones( spatialWeights.shape)).to(torch.float32).to(device) + (spatialWeights).to(torch.float32).to(device)) loss = torch.mean(loss).to(torch.float32).to(device) loss_dice = criterion_DICE(output, target) loss_sl = torch.mean( criterion_SL(output.to(device), (maxDist).to(device))) ##total loss is the weighted sum of suface loss and dice loss plus the boundary weighted cross entropy loss loss = (1 - factor) * loss_sl + factor * (loss_dice) + loss epoch_loss.append(loss.item()) predict = get_predictions(output) iou = mIoU(predict, labels) ious.append(iou) return np.average(epoch_loss), np.average(ious)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--EXP_NAME', type=str, default='segment') parser.add_argument('--EXP_DIR', type=str, default='/home/yangle/result/TrainNet/') parser.add_argument('--DATASET_PATH', type=str, default='/home/yangle/BasicDataset/dataset/MSRA10K/') parser.add_argument('--SAVE_DIR', type=str, default='/home/yangle/result/mask/MSRA10K/') parser.add_argument('--LEARNING_RATE', type=float, default=1e-4) parser.add_argument('--WEIGHT_DECAY', type=float, default=0.0001) args = parser.parse_args() if not os.path.exists(args.SAVE_DIR): os.makedirs(args.SAVE_DIR) normalize = transforms.Normalize(mean=saliency.mean, std=saliency.std) test_joint_transformer = transforms.Compose( [joint_transforms.JointResize(224)]) test_dset = saliency.TestImage(args.DATASET_PATH, 'val', joint_transform=None, transform=transforms.Compose([ transforms.ToTensor(), normalize, ])) test_loader = torch.utils.data.DataLoader(test_dset, batch_size=1, shuffle=False) model = tiramisu.FCDenseNet57(in_channels=3, n_classes=2) # model = model.cuda() model = torch.nn.DataParallel(model).cuda() optimizer = optim.RMSprop(model.parameters(), lr=args.LEARNING_RATE, weight_decay=args.WEIGHT_DECAY) exper = experiment.Experiment(args.EXP_NAME, args.EXP_DIR) # exper.resume(model, optimizer) base_path = args.EXP_DIR + args.EXP_NAME + '/weights/' weights_fpath = base_path + 'segment-weights-132-0.109-4.278-0.120-4.493.pth' optim_path = base_path + 'segment-optim-132.pth' exper.resume(model, optimizer, weights_fpath, optim_path) # count = 1 for count, (img, name) in enumerate(test_loader): # for img, name in test_loader: data = Variable(img.cuda(), volatile=True) output = model(data) pred = utils.get_predictions(output) pred = pred[0] img_name = name[0] # img_name = str(name) # img_name = img_name.replace('tif', 'png') save_path = args.SAVE_DIR + img_name torchvision.utils.save_image(pred, save_path) print(count)
def train(model, trainloader, vaildloader, device=device, EPOCHS=100, lr=1e-5, debug=False): model.train() optimizer = torch.optim.Adam(model.parameters(), lr=lr) best_acc = -np.inf for epoch in range(EPOCHS): running_loss = 0.0 for data in tqdm(trainloader): tokens_tensors, segments_tensors, masks_tensors, labels = [ t.to(device) for t in data ] optimizer.zero_grad() outputs = model(input_ids=tokens_tensors, token_type_ids=segments_tensors, attention_mask=masks_tensors, labels=labels) loss = outputs[0] loss.backward() optimizer.step() running_loss += loss.item() if debug == True: break _, _, acc = get_predictions(model, vaildloader, compute_acc=True) print('[epoch %d] loss: %.3f, acc: %.3f' % (epoch + 1, running_loss, acc)) if acc >= best_acc: best_acc = acc print('model is improve so dump model') joblib.dump(model, 'checkpoint/bert_model.pkl')
def calc_entropy(data): entropy = 0 predictions = get_predictions(get_counts(data, LABEL_LOCATION)) for label in predictions: entropy -= predictions[label] * math.log(predictions[label], 2) return entropy
def mixup(model, s_x, s_y, t_x, alpha, num_class, temperature): t_y = get_predictions(model, t_x, num_class, temperature) beta_distribution = torch.distributions.Beta(alpha, alpha) lambdas_mu = beta_distribution.sample([s_x.shape[0]]).to('cuda') lambdas_mu = torch.max(lambdas_mu, 1 - lambdas_mu) lambdas_mu_features = lambdas_mu.view(lambdas_mu.shape[0], 1, 1, 1) lambdas_mu_labels = lambdas_mu.view(lambdas_mu.shape[0], 1) x_aug = s_x * lambdas_mu_features + t_x * (1 - lambdas_mu_features) y_aug = s_y * lambdas_mu_labels + t_y * (1 - lambdas_mu_labels) return x_aug, y_aug
def cutmix(model, s_x, s_y, t_x, alpha, num_class): x_aug = s_x.clone() temp_tx = t_x.clone() t_y = get_predictions(model, t_x, num_class, temperature) mu = np.random.beta(alpha, alpha) lmbd = np.maximum(mu, 1 - mu) lmbd = torch.tensor(lmbd) bbx1, bby1, bbx2, bby2 = generate_bbox(s_x.size(), lmbd) x_aug[:, :, bbx1:bbx2, bby1:bby2] = temp_tx[:, :, bbx1:bbx2, bby1:bby2] y_aug = (lmbd * s_y.float()) + ((1 - lmbd) * t_y.float()) return x_aug.cuda(), y_aug.cuda()
def evaluate(ckpt, image_folder, gpu): """Run model evaluation Running this evaluation script will output a csv file, `result.csv` at the working directory. """ model_info = torch.load(ckpt, map_location='cpu') model = Model_(ckpt=ckpt, gpu=gpu).eval() if gpu: model = model.cuda() # dataset dataset = TestDataset(image_folder=image_folder, shape=model_info['input_size']) dataloader = DataLoader(dataset, batch_size=1) filenames = [] logits = [] predicted_indices, predicted_strings = [], [] for filename, each_image in dataloader: print(filename[0]) if gpu: each_image = each_image.cuda() with torch.no_grad(): logit, _, _ = model(each_image) logit = logit.squeeze() logit = torch.softmax(logit, dim=0) logit = logit.cpu().data.numpy() predicted_idx, predicted_str = get_predictions(logit) logits.append(logit) filenames.append(filename) predicted_indices.append(predicted_idx) predicted_strings.append(predicted_str) # write out the result filenames = pd.DataFrame(np.stack(filenames, axis=0)) logits = pd.DataFrame(np.stack(logits, axis=0)) predicted_indices = pd.DataFrame(np.stack(predicted_indices, axis=0)) predicted_strings = pd.DataFrame(np.stack(predicted_strings, axis=0)) # save the confidence and result confidence = pd.concat([filenames, logits], axis=1) confidence.to_csv('./data/confidence.csv', index=None, header=False) result = pd.concat([filenames, predicted_indices, predicted_strings], axis=1) result.to_csv('./data/result.csv', index=None, header=False) return logits
def get_mask_from_cv2_image(image, model, useGpu=True, pupilOnly=False): if useGpu: device=torch.device("cuda") else: device=torch.device("cpu") img = image.unsqueeze(1) data = img.to(device) output = model(data) predict = get_predictions(output) pred_img = 1 - predict[0].cpu().numpy()/3.0 if pupilOnly: pred_img = np.ceil(pred_img) * 0.5 return pred_img
def evaluate(validation_generator): # evaluate the model with validation set y_true = np.array([0] * len(os.listdir('../data/validation/cats/')) + [1] * len(os.listdir('../data/validation/dogs/'))) model = load_model('../CNN/cnn.h5') print(model.summary()) scores = model.evaluate_generator(validation_generator) print('val_loss: {}, val_acc: {}'.format(scores[0], scores[1])) y_pred = get_predictions(model, validation_generator) cm = confusion_matrix(y_true, y_pred) cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # plot percentage confusion matrix plot_confusion_matrix(cm_percent, class_names=['Cat', 'Dog']) plt.savefig('../MLP/cm_percent_val.png', format='png') plt.show()
def get_mask_from_path(path: str, model, useGpu=True): if useGpu: device=torch.device("cuda") else: device=torch.device("cpu") clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8,8)) pilimg = Image.open(path).convert("L") table = 255.0*(np.linspace(0, 1, 256)**0.8) pilimg = cv2.LUT(np.array(pilimg), table) img = clahe.apply(np.array(np.uint8(pilimg))) img = Image.fromarray(img) img = img.unsqueeze(1) data = img.to(device) output = model(data) predict = get_predictions(output) return predict
def main(): parser = argparse.ArgumentParser() parser.add_argument('--DATASET_PATH', type=str, default='/home/zhangdong/database/DUTS/') parser.add_argument('--SAVE_DIR', type=str, default='/home/yangle/DAVIS/result/DUTS/') args = parser.parse_args() normalize = transforms.Normalize(mean=saliency.mean, std=saliency.std) # test_joint_transforms = transforms.Compose([joint_transforms.JointResize(224)]) test_dset = saliency.TestImage(args.DATASET_PATH, 'val', joint_transform=None, transform=transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), normalize ])) model = tiramisu.FCDenseNet57(n_classes=2) # model = model.cuda() model = torch.nn.DataParallel(model).cuda() weight_path = '/home/yangle/DAVIS/result/TrainNet/tiramisu/weights/latest_weights.pth' state = torch.load(weight_path) model.load_state_dict(state['state_dict']) model = model.module test_loader = torch.utils.data.DataLoader(test_dset, batch_size=1, shuffle=False) count = 1 for data, name in test_loader: data = Variable(data.cuda(), volatile=True) _, _, hei, wid = data.size() output = model(data) pred = utils.get_predictions(output) pred = pred[0] # transforms_size = torchvision.transforms.Resize((hei, wid)) # mask = transforms_size([pred]) name = name[0] img_name = str(name) save_path = args.SAVE_DIR + img_name torchvision.utils.save_image(pred, save_path) print(count) count += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--DATASET_PATH', type=str, default='/disk5/yangle/DAVIS/dataset/cat_128/process/') parser.add_argument('--SAVE_DIR', type=str, default='/disk5/yangle/DAVIS/result/tiramisu/tiramisu57-8chs/') parser.add_argument('--LEARNING_RATE', type=float, default=1e-4) parser.add_argument('--WEIGHT_DECAY', type=float, default=0.0001) args = parser.parse_args() if not os.path.exists(args.SAVE_DIR): os.makedirs(args.SAVE_DIR) normalize = transforms.Normalize(mean=saliency.mean, std=saliency.std) test_dset = saliency.Saliency( args.DATASET_PATH, 'val', joint_transform=None, transform=transforms.Compose([transforms.ToTensor(), normalize, ])) test_loader = torch.utils.data.DataLoader(test_dset, batch_size=1, shuffle=False) # model = tiramisu.FCDenseNet67(in_channels=8, n_classes=N_CLASSES) model = tiramisu.FcDnSubtle(in_channels=8, n_classes=N_CLASSES) # model = model.cuda() model = torch.nn.DataParallel(model).cuda() weights_fpath = 'cat_8ch-111.pth' state = torch.load(weights_fpath) model.load_state_dict(state['state_dict']) model.eval() count = 1 for iord, (img, target, img_cont, fomask, comask) in enumerate(test_loader): inputs = torch.cat((img, comask, img_cont, fomask), 1) inputs = Variable(inputs.cuda(), volatile=True) output = model(inputs) pred = utils.get_predictions(output) pred = pred[0] img_name = str(iord) + '.png' save_path = args.SAVE_DIR + img_name torchvision.utils.save_image(pred, save_path) print(count) count += 1
from utils import FakeNewsDataset,get_predictions,create_mini_batch from torch.utils.data import DataLoader from transformers import BertTokenizer import pandas as pd # load trained model tokenizer = BertTokenizer.from_pretrained("bert-base-chinese") model = joblib.load('checkpoint/bert_model.pkl') model.eval() # load testset testset = FakeNewsDataset("test", tokenizer=tokenizer) testloader = DataLoader(testset, batch_size=256, collate_fn=create_mini_batch) # get predictions predictions , confidence = get_predictions(model, testloader) # 用來將預測的 label id 轉回 label 文字 index_map = {v: k for k, v in testset.label_map.items()} # 生成 Kaggle 繳交檔案 df = pd.DataFrame({"Category": predictions.tolist()}) df['Category'] = df.Category.apply(lambda x: index_map[x]) df['Confidence'] = confidence.detach().cpu().numpy() df_pred = pd.concat([testset.df.loc[:, ["Id"]], df.loc[:, ['Category','Confidence']]], axis=1) print(df_pred.head()) # 上傳到 Kaggle 網站 不需要 Confidence df_pred = df_pred.drop('Confidence',axis=1) df_pred.to_csv('output/bert_1_prec_training_samples.csv', index=False) print('save done!')
def __init__(self, rows): self.counts = get_counts(rows, LABEL_LOCATION) self.predictions = get_predictions(self.counts) self.data_class = None
def get_pupil_ellipse_from_cv2_image(image, model, useGpu=True, predict=None, isEllseg=False, ellsegPrecision=None, ellsegEllipse=False, debugWindowName=None): """ OUTPUT FORMAT { 0: center x, 1: center y, 2: ellipse major axis radius, 3: ellipse minor axis radius, 4: ellipse angle } """ if useGpu: device = torch.device("cuda") else: device = torch.device("cpu") if predict is None: if not isEllseg: img = image.unsqueeze(1) data = img.to(device) output = model(data) predict = get_predictions(output) pred_img = predict[0].numpy() else: # w:320 h:240 img = np.array(transforms.ToPILImage()(image).convert("L")) img = (img - img.mean()) / img.std() img = torch.from_numpy(img).unsqueeze(0).to( ellsegPrecision) # Adds a singleton for channels img = img.unsqueeze(0) img = img.to(device).to(ellsegPrecision) x4, x3, x2, x1, x = model.enc(img) op = model.dec(x4, x3, x2, x1, x) if ellsegEllipse: # Option to get ellipse directly from ellseg output ellpred = model.elReg(x, 0).view(-1) _, _, H, W = img.shape H_mat = np.array([[W / 2, 0, W / 2], [0, H / 2, H / 2], [0, 0, 1]]) p_cx, p_cy, p_a, p_b, p_theta, _ = my_ellipse( ellpred[5:].tolist()).transform(H_mat)[0] return [p_cx, p_cy, p_a, p_b, p_theta] # [centerX, centerY, axis1, axis2, angle] #elOut = model.elReg(x, 0) # Linear regression to ellipse parameters #print(elOut.shape) predict = get_predictions(op) pred_img = predict[0].numpy() # cv2.imshow("ELLIPSE", pred_img/2) else: pred_img = predict[0].numpy() if debugWindowName is not None: outIm = pred_img / np.max(pred_img) cv2.imshow(debugWindowName, outIm) return get_pupil_parameters(pred_img)
dummy_vars = ['touchscreen', 'detachable_keyboard', 'discrete_gpu'] target_vars = ['min_price', 'max_price'] target = 'min_price' num_vars = [col for col in df.columns if col not in cat_vars + dummy_vars + target_vars] variable_lists = [cat_vars, dummy_vars, target_vars, num_vars] df = utils.imputation(df) utils.drop_columns(df, ['name', 'base_name', 'pixels_y'], variable_lists) # utils.decrease_cat_size_handling(df, cat_vars, target) # df = utils.one_hot_encoding(df, cat_vars) utils.smooth_handling(df, cat_vars, target) estimator = xgb.XGBRegressor(n_estimators=200, max_depth=4, gamma=0.3, colsample_bytree=0.6, subsample=1, min_child_weight=15) df_min = utils.fit_predict(df, estimator, target, 'id', 'MIN') df_complete_predictions = utils.get_predictions(df, estimator, target, 'id', 'min_price_pred') ##### max_price train_min = pd.read_csv("train.csv") train_min.drop(columns=['min_price'], inplace=True) test_min = pd.read_csv("test.csv") df = utils.merge_train_test(train_min, test_min, 'min_price') cat_vars = ['name', 'brand', 'base_name', 'cpu', 'cpu_details', 'gpu', 'os', 'os_details', 'screen_surface'] dummy_vars = ['touchscreen', 'detachable_keyboard', 'discrete_gpu'] target_vars = ['min_price', 'max_price'] target = 'max_price' num_vars = [col for col in df.columns if col not in cat_vars + dummy_vars + target_vars] variable_lists = [cat_vars, dummy_vars, target_vars, num_vars]
while infinite: camlinks_closed_count = 0 # print(infinite) for ix, cam in enumerate(cams): # print(ix) # if cam.grabbed: if cam.more(): frame_time, frame = cam.read() face_locations = detector.detect_faces(frame) if len(face_locations) > 0: detected_faces = detector.extract_faces(frame, face_locations) embeddings = facenet.get_embeddings(detected_faces) if embeddings is not None: predictions = u.get_predictions(embeddings, annoy_object, labels, DEFAULT_THRESH) for pred in predictions: stuID = pred[0] dist = pred[1] if stuID != "Unknown": log = f"{frame_time} -- {stuID} - {dist}" print(log) # day = frame_time.date().strftime("%d/%m/%Y") # tm = frame_time.time().strftime("%H:%M:%S") if stuID not in logs.keys(): logs[stuID] = [{"dt":frame_time, "dist":dist}] else: lastLog = logs[stuID][-1] minutes = int((frame_time - lastLog["dt"]).total_seconds()//60)
def get_mask_from_cv2_image(image, model, useGpu=True, pupilOnly=False, includeRawPredict=False, channels=3, trim_pupil=False, isEllseg=False, ellsegPrecision=None, useEllsegEllipseAsMask=False): if useGpu: device = torch.device("cuda") else: device = torch.device("cpu") if not isEllseg: img = image.unsqueeze(1) data = img.to(device) output = model(data) rawpredict = get_predictions(output) predict = rawpredict + 1 # print(np.unique(predict[0].cpu().numpy())) pred_img = 1 - predict[0].cpu().numpy() / channels else: img = np.array(Image.fromarray(image).convert("L")) img = (img - img.mean()) / img.std() img = torch.from_numpy(img).unsqueeze(0).to( ellsegPrecision) # Adds a singleton for channels img = img.unsqueeze(0) img = img.to(device).to(ellsegPrecision) x4, x3, x2, x1, x = model.enc(img) op = model.dec(x4, x3, x2, x1, x) rawpredict = get_predictions(op) plt.imshow(rawpredict[0], cmap="BrBG", alpha=0.3) if useEllsegEllipseAsMask: ellpred = model.elReg(x, 0).view(-1) #i1, i2, i3, i4, i5, p1, p2, p3, p4, p5 = ellpred[0].cpu().detach().numpy() _, _, H, W = img.shape H_mat = np.array([[W / 2, 0, W / 2], [0, H / 2, H / 2], [0, 0, 1]]) #import pdb #pdb.set_trace() i_cx, i_cy, i_a, i_b, i_theta, _ = my_ellipse( ellpred[:5].tolist()).transform(H_mat)[0] p_cx, p_cy, p_a, p_b, p_theta, _ = my_ellipse( ellpred[5:].tolist()).transform(H_mat)[0] ellimage = np.full((int(H), int(W)), 2 / 3) startAngle = 0 endAngle = 360 iris_color = 1 / 3 pupil_color = 0.0 pred_img = draw_ellipse(ellimage, (i_cx, i_cy), (i_a, i_b), i_theta, startAngle, endAngle, iris_color, -1) pred_img = draw_ellipse(ellimage, (p_cx, p_cy), (p_a, p_b), p_theta, startAngle, endAngle, pupil_color, -1) else: predict = rawpredict + 1 pred_img = 1 - predict[0].cpu().numpy() / channels #print(pred_img) # trim pupil if asked to if trim_pupil: newimg = np.invert(pred_img > 0) labeled_img = measure.label(newimg) labels = np.unique(labeled_img) newimg = np.zeros((newimg.shape[0], newimg.shape[1])) old_sum = 0 old_label = None for label in (y for y in labels if y != 0): if np.sum(labeled_img == label) > old_sum: old_sum = np.sum(labeled_img == label) old_label = label if old_label is not None: newimg = newimg + (labeled_img == old_label) newimg[newimg == 0] = 2 newimg[newimg == 1] = 0 newimg[newimg == 2] = 1 pred_img[pred_img == 0] = 1 - (1 / channels) pred_img[newimg == 0] = 0 #print(np.unique(pred_img)) if pupilOnly: pred_img = np.ceil(pred_img) * 0.5 if includeRawPredict: return pred_img, rawpredict return pred_img
def predict(): predictions = get_predictions(request) return jsonify(predictions)
def main(args): if args.delay: delay_plot(args) return if args.pr: targets, predictions, _ = get_predictions(args.run_dir, stream=False) pr_fname = os.path.join(args.run_dir, 'pr.npz') _ = find_thresholds(targets, predictions, stream=False, pr=pr_fname) return if args.compute_metrics or args.plot_predictions: run_info, model, loader = load_run(args.run_dir, data=args.data) params = run_info[-1] labels = np.array([ a.replace('hdm05_', '') for a in loader[1].dataset.action_descriptions ]) if args.compute_metrics: rows = [] thr_tab = pd.DataFrame( index=labels, columns=pd.MultiIndex.from_product([['fair', 'unfair'], ['stream', 'sequences']])) for stream, fair in itertools.product((False, True), repeat=2): targets, predictions, annot_time = get_predictions( args.run_dir, stream=stream, force=args.force) thr_targets, thr_predictions = targets, predictions if fair: thr_targets, thr_predictions, _ = get_predictions( args.run_dir, train=True, stream=stream, force=args.force) train_targets = thr_targets print('Stream: {} Fair: {}'.format(stream, fair)) thrs = find_thresholds(thr_targets, thr_predictions, stream=stream) thr_tab[(('fair' if fair else 'unfair'), ('stream' if stream else 'sequences'))] = thrs[1] metrics = compute_metrics(targets, predictions, thrs, stream=stream) row = (stream, fair) + metrics + (annot_time, ) rows.append(row) thr_tab['train_support'] = train_targets.sum(axis=0) thr_tab['test_support'] = targets.sum(axis=0) thresholds_file = os.path.join(args.run_dir, 'thresholds.csv') thr_tab.to_csv(thresholds_file) columns = ('Stream', 'Fair', 'microAP', 'macroAP', 'microF1', 'macroF1', 'catMicroF1', 'catMacroF1', 'AnnotTime') metrics = pd.DataFrame.from_records(rows, columns=columns) metrics_file = os.path.join(args.run_dir, 'metrics.csv') metrics.to_csv(metrics_file) print(metrics) if args.plot_predictions: stream, fair = False, False targets, predictions, annot_time = get_predictions(args.run_dir, stream=stream, force=args.force) thr_targets, thr_predictions = targets, predictions if fair: thr_targets, thr_predictions, _ = get_predictions(args.run_dir, train=True, stream=stream, force=args.force) train_targets = thr_targets thrs = find_thresholds(thr_targets, thr_predictions, stream=stream) global_thr, multiple_thrs = thrs out = os.path.join(args.run_dir, 'time-analysis.pdf') seq_ids = [ int(loader[1].dataset.data[i]['seq_id']) for i in range(len(targets)) ] plot_preditctions(targets, predictions, seq_ids, labels, global_thr, out) return
batch_size=args.bs, shuffle=False, num_workers=0) # windows下num_workers改成0,源代码为2 counter = 0 os.makedirs('test/labels/', exist_ok=True) os.makedirs('test/output/', exist_ok=True) os.makedirs('test/mask/', exist_ok=True) with torch.no_grad(): for i, batchdata in tqdm(enumerate(testloader), total=len(testloader)): img, labels, index, x, y = batchdata data = img.to(device) output = model(data) predict = get_predictions(output) for j in range(len(index)): np.save('test/labels/{}.npy'.format(index[j]), predict[j].cpu().numpy()) try: plt.imsave('test/output/{}.jpg'.format(index[j]), 255 * labels[j].cpu().numpy()) except: pass pred_img = predict[j].cpu().numpy() / 3.0 inp = img[j].squeeze() * 0.5 + 0.5 img_orig = np.clip(inp, 0, 1) img_orig = np.array(img_orig) combine = np.hstack([img_orig, pred_img]) plt.imsave('test/mask/{}.jpg'.format(index[j]), combine)
def main_script(): args = parse_args() print("save = {}".format(args.save)) print("csv = {}".format(args.csv)) if args.MWE not in [0, 1, 2, 3, 4]: sys.exit('MWE must be 0,1,2,3 or 4') if args.num_threads <= 0: sys.exit('num_threads must be a positive integer') if args.num_epochs <= 0: sys.exit('num_epochs must be a positive integer') if args.dim <= 0: sys.exit('dim must be a positive integer') if args.neg_samples <= 0: sys.exit('neg_samples must be a positive integer') if args.report_schedule <= 0: sys.exit('report_schedule must be a positive integer') # set report schedule based on MWE case #if args.MWE == 1: # report_schedule = 1000 #else: # report_schedule = 100 # max_list_length = 6203 ######################### LOAD DATA ########################################### if args.MWE == 1: filename = 'war_and_peace.txt' #with open(filename, 'r') as file: #data = tokenizer_MWE1(file.read().replace('\n', ' ')) #print(data) lst = [] f = open(filename, "r") for line in f: for word in line.split(" "): if word == "\n": continue elif "\n" in word: lst.append(word[:-1]) else: lst.append(word) data_string = listToString(lst, args.MWE) print("STRING CREATED") text_file = open("w_and_p.txt", "w") text_file.write(data_string) text_file.close() print("STRING WRITTEN TO TEXT FILE") data = tokenizer_MWE1(data_string) print("STRING TOKENIZED") #print(data) elif args.MWE == 3: if os.path.exists("wire_3match.gz"): print("loading from gzip files") file = "wire_3match.gz" data_list = list(_open_file(file))[0] #print(data_list) else: wire_vocab = set() df_wire = pd.read_csv(validation_path) for _, record in df_wire.iterrows(): wire_vocab.add(record["srcWikiTitle"]) wire_vocab.add(record["dstWikiTitle"]) wire_vocab = list(wire_vocab) print("WiRe vocab loaded successfully") files = [] for _, _, fs in os.walk("data/", topdown=False): files += [f for f in fs if f.endswith(".gz")] files = [ os.path.join("data/page_dist_training_data/", f) for f in files ] data_list = [] for i, file in tqdm(enumerate(files)): sentences = list(_open_file(file)) data_list += sentences original_data_length = len(data_list) new_list = [] for i, page in enumerate(data_list): if i % 10000 == 0: print("{}/{}".format(i, original_data_length)) c = sum(item in page for item in wire_vocab) # only include Wikipedia pages that mention at least 2 WiRe elements if c >= 3: decoded_page = [x.encode('ascii', 'ignore') for x in page] new_list.append(decoded_page) data_list = new_list print("Original data length = {}".format(original_data_length)) print("Reduced data length = {}".format(len(new_list))) with gzip.open("wire_3match.gz", "w") as tfz: tfz.write(json.dumps(new_list)) tfz.close() print("WRITING DATA") lst = [] for entities in tqdm(data_list): lst.append(listToString(entities, args.MWE)) lst.append("\n") #print(lst) data_string = listToString(lst, args.MWE) #print(data_string) print("STRING CREATED") text_file = open("wire.txt", "w") text_file.write(data_string) text_file.close() print("STRING WRITTEN TO TEXT FILE") data = tokenizer_MWE0(data_string) print("STRING TOKENIZED") elif args.MWE == 4: if os.path.exists("wire_video_3match.gz"): print("loading from gzip files") file = "wire_video_3match.gz" data_list = list(_open_file(file))[0] #print(data_list) else: # load wire vocab if os.path.exists("wire_video_vocab.pkl"): wire_video_vocab = pkl.load(open('wire_video_vocab.pkl', 'rb')) print( "WiRe + Video vocab loaded from pickle file successfully") else: wire_vocab = set() df_wire = pd.read_csv(validation_path) for _, record in df_wire.iterrows(): wire_vocab.add(record["srcWikiTitle"]) wire_vocab.add(record["dstWikiTitle"]) wire_vocab = list(wire_vocab) print("WiRe vocab loaded successfully. Length = {} entities". format(len(wire_vocab))) # load video vocab video_vocab = set() path = 'data/wikipedias' for i, file in enumerate( glob.glob(os.path.join( path, '*.json'))): #only process .JSON files in folder. with open(file, encoding='utf-8', mode='r') as f: json_object = json.load(f) for dic in json_object: if dic['lang'] == 'en': # add only English entities entity = dic['name'] video_vocab.add(entity) video_vocab = list(video_vocab) print("Video vocab loaded successfully. Length = {} entities". format(len(video_vocab))) # create combined vocab wire_video_vocab = wire_vocab + video_vocab pkl.dump(wire_video_vocab, open('wire_video_vocab.pkl', 'wb'), protocol=pkl.HIGHEST_PROTOCOL) print( "Combined vocab loaded successfully. Length = {} entities". format(len(wire_video_vocab))) # filter wikipedia files with combined vocab files = [] for _, _, fs in os.walk("data/", topdown=False): files += [f for f in fs if f.endswith(".gz")] files = [ os.path.join("data/page_dist_training_data/", f) for f in files ] data_list = [] for i, file in tqdm(enumerate(files)): sentences = list(_open_file(file)) data_list += sentences original_data_length = len(data_list) new_list = [] for i, page in enumerate(data_list): if i % 10000 == 0: print("{}/{}".format(i, original_data_length)) c = sum(item in page for item in wire_video_vocab) # only include Wikipedia pages that mention at least 2 WiRe elements if c >= 3: decoded_page = [x.encode('ascii', 'ignore') for x in page] new_list.append(decoded_page) data_list = new_list print("Original data length = {}".format(original_data_length)) print("Reduced data length = {}".format(len(new_list))) with gzip.open("wire_video_3match.gz", "w") as tfz: tfz.write(json.dumps(new_list)) tfz.close() else: print("\n\n----------- LOADING DATA ----------") if os.path.exists("data_list.pkl"): # start = time.time() # print("loading from existing pickle") # pickle_in = open("data_list.pkl","rb") # data_list = pkl.load(pickle_in) # end = time.time() # print("loaded in {} secs".format(round(end - start,2))) # else: print("loading from gzip files") files = [] for _, _, fs in os.walk("data/", topdown=False): if args.MWE == 2: files += [f for f in fs if f.endswith("00000.gz")] else: files += [f for f in fs if f.endswith(".gz")] files = [ os.path.join("data/page_dist_training_data/", f) for f in files ] data_list = [] with gzip.open(files[0], 'rt') as f: for line in f: print(line) for i, file in tqdm(enumerate(files)): sentences = list(_open_file(file)) data_list += sentences # pickle_out = open("data_list.pkl","wb") # pkl.dump(data_list, pickle_out) # pickle_out.close() #if args.MWE == 2: #data_list = data_list[20] print("WRITING DATA") lst = [] for entities in tqdm(data_list): lst.append(listToString(entities, args.MWE)) lst.append("\n") data_string = listToString(lst, args.MWE) print(data_string) print("STRING CREATED") text_file = open("wikipedia.txt", "w") text_file.write(data_string) text_file.close() print("STRING WRITTEN TO TEXT FILE") data = tokenizer_MWE0(data_string) print("STRING TOKENIZED") #print(data[:2]) #print(corpus) #print(data) ################################################################################ entity_2_idx = defaultdict(lambda: len(entity_2_idx)) counter = Counter() dataset = [] print("WRITING ENTITY2IDX DICT") for entity in tqdm(data): entity_2_idx[entity] counter[entity_2_idx[entity]] += 1 dataset.append(entity_2_idx[entity]) # print(entity_2_idx) num_tokens = len(entity_2_idx) print("num_tokens = {}".format(num_tokens)) #print(entity_2_idx) #print("\n\n") #print(counter) #print("\n\n") #print(dataset[:2]) dataset_length = len(dataset) print(dataset) print("Dataset length = {}".format(dataset_length)) # load the vocabulary if args.MWE == 1: vocab = Vocabulary(entity_2_idx, tokenizer_MWE1) else: vocab = Vocabulary(entity_2_idx, tokenizer_MWE0) ############################################################################ # create the embedding to train # use 100 dimensional spherical Gaussian with KL-divergence as energy function # embed = GaussianEmbedding(num_tokens, dimension, # covariance_type=cov_type, energy_type=E_type) embed = GaussianEmbedding(N=num_tokens, neg_samples=args.neg_samples, size=args.dim, covariance_type=cov_type, energy_type=E_type, mu_max=mu_max, sigma_min=sigma_min, sigma_max=sigma_max, init_params={ 'mu0': mu0, 'sigma_mean0': sigma_mean0, 'sigma_std0': sigma_std0 }, eta=args.eta, Closs=args.Closs, verbose_loss=args.verbose_loss, verbose_gradients=args.verbose_gradients, grad_weight_by_Npairs=args.grad_weight_by_Npairs) ########################################################################### # open the corpus and train with 8 threads # the corpus is just an iterator of documents, here a new line separated # gzip file for example if print_init_embeddings: print("---------- INITIAL EMBEDDING MEANS ----------") print(embed.mu) print("---------- INITIAL EMBEDDING COVS ----------") print(embed.sigma) epoch_losses = [] epoch_fwd_KL_pears = [] epoch_fwd_KL_spears = [] epoch_rev_KL_pears = [] epoch_rev_KL_spears = [] epoch_fisher_pears = [] epoch_fisher_spears = [] epoch_cos_pears = [] epoch_cos_spears = [] epoch_times = [] train_time_start = time.time() for e in range(args.num_epochs): epoch_start = time.time() print("---------- EPOCH {} ----------".format(e + 1)) #print(embed.mu[:10]) #print(embed.sigma[:10]) if args.MWE == 1: with open('w_and_p.txt', 'r') as corpus: total_num_examples = len(open('w_and_p.txt').readlines()) if args.dynamic_window_size: epoch_losses.append( embed.train_dynamic( iter_pairs( corpus, vocab, dynamic_window_size=args.dynamic_window_size, batch_size=batch_size, nsamples=args.neg_samples, window=window), n_workers=args.num_threads, total_num_examples=total_num_examples, verbose_pairs=args.verbose_pairs, report_interval=args.report_schedule)) else: epoch_losses.append( embed.train(iter_pairs( corpus, vocab, dynamic_window_size=args.dynamic_window_size, batch_size=batch_size, nsamples=args.neg_samples, window=window), n_workers=args.num_threads, verbose_pairs=args.verbose_pairs, report_interval=args.report_schedule)) elif args.MWE == 3: with open('wire.txt', 'r') as corpus: total_num_examples = len(open('wire.txt').readlines()) if args.dynamic_window_size: epoch_losses.append( embed.train_dynamic( iter_pairs( corpus, vocab, dynamic_window_size=args.dynamic_window_size, batch_size=batch_size, nsamples=args.neg_samples, window=window), n_workers=args.num_threads, total_num_examples=total_num_examples, verbose_pairs=args.verbose_pairs, report_interval=args.report_schedule)) else: epoch_losses.append( embed.train(iter_pairs( corpus, vocab, dynamic_window_size=args.dynamic_window_size, batch_size=batch_size, nsamples=args.neg_samples, window=window), n_workers=args.num_threads, verbose_pairs=args.verbose_pairs, report_interval=args.report_schedule)) else: with open('wikipedia.txt', 'r') as corpus: total_num_examples = len(open('wikipedia.txt').readlines()) if args.dynamic_window_size: epoch_losses.append( embed.train_dynamic( iter_pairs( corpus, vocab, dynamic_window_size=args.dynamic_window_size, batch_size=batch_size, nsamples=args.neg_samples, window=window), n_workers=args.num_threads, total_num_examples=total_num_examples, verbose_pairs=args.verbose_pairs, report_interval=args.report_schedule)) else: epoch_losses.append( embed.train(iter_pairs( corpus, vocab, dynamic_window_size=args.dynamic_window_size, batch_size=batch_size, nsamples=args.neg_samples, window=window), n_workers=args.num_threads, verbose_pairs=args.verbose_pairs, report_interval=args.report_schedule)) epoch_end = time.time() epoch_times.append(round(epoch_end - epoch_start, 2)) if args.save == True: print("Epoch {} complete. Saving model.".format(e + 1)) os.chdir("Models/") embed.save( 'model_MWE={}_d={}_e={}_neg={}_eta={}_C={}_epoch={}'.format( args.MWE, args.dim, args.num_epochs, args.neg_samples, args.eta, args.Closs, e + 1), vocab=vocab.id2word, full=True) os.chdir('..') print("MEASURING EMBEDDING PERFORMANCE ON VALIDATION DATA") actual, pred_KL_fwd, pred_KL_rev, pred_fisher, pred_cos = get_predictions( validation_path, e, embed, vocab, is_round=False) ### forward KL predictions ### pear_r_fwd, _ = pearsonr(actual, pred_KL_fwd) spear_r_fwd, _ = spearmanr(actual, pred_KL_fwd) print("------ Epoch: {} FORWARD KL SIMILARITY KL[src||dst] ------". format(e + 1)) print("Pearson R: {}, Spearman R: {}".format(pear_r_fwd, spear_r_fwd)) ### reverse KL predictions ### pear_r_rev, _ = pearsonr(actual, pred_KL_rev) spear_r_rev, _ = spearmanr(actual, pred_KL_rev) print("------ Epoch: {} REVERSE KL SIMILARITY KL[dst||src] ------". format(e + 1)) print("Pearson R: {}, Spearman R: {}".format(pear_r_rev, spear_r_rev)) ### fisher predictions ### pear_fisher, _ = pearsonr(actual, pred_fisher) spear_fisher, _ = spearmanr(actual, pred_fisher) print("------ Epoch: {} FISHER DISTANCE ------".format(e + 1)) print("Pearson R: {}, Spearman R: {}".format(pear_fisher, spear_fisher)) ### cosine predictions ### pear_r_cos, _ = pearsonr(actual, pred_cos) spear_r_cos, _ = spearmanr(actual, pred_cos) print("------ Epoch: {} COSINE SIMILARITY OF MEANS ------".format(e + 1)) print("Pearson R: {}, Spearman R: {}".format(pear_r_cos, spear_r_cos)) epoch_fwd_KL_pears.append(pear_r_fwd) epoch_fwd_KL_spears.append(spear_r_fwd) epoch_rev_KL_pears.append(pear_r_rev) epoch_rev_KL_spears.append(spear_r_rev) epoch_fisher_pears.append(pear_fisher) epoch_fisher_spears.append(spear_fisher) epoch_cos_pears.append(pear_r_cos) epoch_cos_spears.append(spear_r_cos) train_time_end = time.time() training_time = round(train_time_end - train_time_start, 2) print("\n\n\nEPOCH LOSSES : {}".format(epoch_losses)) print("EPOCH fwd KL Pearson R : {}".format(epoch_fwd_KL_pears)) print("EPOCH fwd KL Spearman R : {}".format(epoch_fwd_KL_spears)) print("EPOCH rev KL Pearson R : {}".format(epoch_rev_KL_pears)) print("EPOCH rev KL Spearman R : {}".format(epoch_rev_KL_spears)) print("EPOCH Fisher Pearson R : {}".format(epoch_fisher_pears)) print("EPOCH Fisher Spearman R : {}".format(epoch_fisher_spears)) print("EPOCH cosine Pearson R : {}".format(epoch_cos_pears)) print("EPOCH cosine Spearman R : {}".format(epoch_cos_spears)) print("TOTAL TRAININT TIME = {} secs".format(training_time)) if print_final_embeddings: print("---------- FINAL EMBEDDING MEANS ----------") print(embed.mu) print("---------- FINAL EMBEDDING COVS ----------") print(embed.sigma) ############################################################################ if calc_general_and_specific: sigma_norms = np.linalg.norm(embed.sigma, axis=1) most_general_indices = np.split(sigma_norms, 2)[0].argsort()[-10:][::-1] most_specific_indices = np.split(sigma_norms, 2)[0].argsort()[:10] idx_2_entity = {v: k for k, v in entity_2_idx.items()} print("MOST GENERAL ENTITIES") for idx in most_general_indices: print(idx_2_entity[idx]) print("MOST SPECIFIC ENTITIES") for idx in most_specific_indices: print(idx_2_entity[idx]) ########################################################################### # print("LOADING MODEL") # test = GaussianEmbedding(N=num_tokens, size=dimension, # covariance_type=cov_type, energy_type=E_type, # mu_max=mu_max, sigma_min=sigma_min, sigma_max=sigma_max, # init_params={'mu0': mu0, # 'sigma_mean0': sigma_mean0, # 'sigma_std0': sigma_std0}, # eta=eta, Closs=Closs) # # test.load('model_file_location') ########################################################################### if calc_similarity_example: print("TESTING KL SIMILARITY") entity1 = 'Copenhagen' entity2 = 'Denmark' idx1 = vocab.word2id(entity1) idx2 = vocab.word2id(entity2) mu1 = embed.mu[idx1] Sigma1 = np.diag(embed.sigma[idx1]) mu2 = embed.mu[idx2] Sigma2 = np.diag(embed.sigma[idx2]) print("ENTITY 1 : {}".format(entity1)) #print("mu1 = {}".format(mu1)) #print("Sigma1 = {}".format(Sigma1)) print("ENTITY 2 : {}".format(entity2)) #print("mu2 = {}".format(mu2)) #print("Sigma2 = {}".format(Sigma2)) forward_KL_similarity = KL_Multivariate_Gaussians( mu1, Sigma1, mu2, Sigma2) reverse_KL_similarity = KL_Multivariate_Gaussians( mu2, Sigma2, mu1, Sigma1) print("KL[entity1 || entity2] similarity = {}".format( round(forward_KL_similarity, 4))) print("KL[entity2 || entity1] similarity = {}".format( round(reverse_KL_similarity, 4))) print("cosine similarity = {}".format( round(cosine_between_vecs(mu1, mu2), 4))) ############################################################################ if calc_nearest_neighbours: print("\n\n") print("FINDING NEAREST NEIGHBOURS") target = "war" metric = cosine num = 10 target_idx = entity_2_idx[target] neighbours = embed.nearest_neighbors(target=target_idx, metric=metric, num=num + 1, vocab=vocab, sort_order='similarity') print("Target = {}".format(target)) print("Similarity metric = {}".format(metric)) for i in range(1, num + 1): print("{}: {}".format(i, neighbours[i])) # print("rank {}: word = {}, sigma = {}, id = {}, similarity = {}".format(i,neighbours[i][word],neighbours[i][sigma],neighbours[i][id],neighbours[i][similarity])) ############################################################################ if args.csv: f_results = 'grid_search_results_MWE={}_threads={}_epochs={}_d={}_neg={}_eta={}_C={}_mu0={}_sig0={}_sigstd0={}_mumax={}_sigmax={}_sigmin={}_windowweighting={}.csv'.format( args.MWE, args.num_threads, args.num_epochs, args.dim, args.neg_samples, args.eta, args.Closs, mu0, sigma_mean0, sigma_std0, mu_max, sigma_max, sigma_min, args.grad_weight_by_Npairs) hyperparameter_list = [ "Threads", "Dimension", "Neg samples", "Eta", "Closs" ] epoch_list = [ 'Epoch {} Loss'.format(i + 1) for i in range(args.num_epochs) ] pear_r_fwd_list = [ 'Epoch {} fwd KL Pearson R'.format(i + 1) for i in range(args.num_epochs) ] spear_r_fwd_list = [ 'Epoch {} fwd KL Spearman R'.format(i + 1) for i in range(args.num_epochs) ] pear_r_rev_list = [ 'Epoch {} rev KL Pearson R'.format(i + 1) for i in range(args.num_epochs) ] spear_r_rev_list = [ 'Epoch {} rev KL Spearman R'.format(i + 1) for i in range(args.num_epochs) ] pear_r_cos_list = [ 'Epoch {} cosine Pearson R'.format(i + 1) for i in range(args.num_epochs) ] spear_r_cos_list = [ 'Epoch {} cosine Spearman R'.format(i + 1) for i in range(args.num_epochs) ] time_list = [ 'Epoch {} Time'.format(i + 1) for i in range(args.num_epochs) ] header_list = hyperparameter_list + epoch_list + pear_r_fwd_list + spear_r_fwd_list + pear_r_rev_list + spear_r_rev_list + pear_r_cos_list + spear_r_cos_list + time_list if os.path.exists(f_results): append_write = 'a' # append if already exists else: # write header with open(f_results, 'w') as file: writer = csv.writer(file) writer.writerow(header_list) append_write = 'a' # make a new file if not with open(f_results, append_write) as file: writer = csv.writer(file) hyperparameter_values = [ args.num_threads, args.dim, args.neg_samples, args.eta, args.Closs ] values_list = hyperparameter_values + epoch_losses + epoch_fwd_KL_pears + epoch_fwd_KL_spears + epoch_rev_KL_pears + epoch_rev_KL_spears + epoch_cos_pears + epoch_cos_spears + epoch_times writer.writerow(values_list)
def train(cfg): gpu_id = cfg["hyperparameters"]["gpu_id"] # Use GPU if available if gpu_id >= 0: assert torch.cuda.is_available() device = torch.device("cuda:" + str(gpu_id)) logging.info("Using GPU {} | {}".format( gpu_id, torch.cuda.get_device_name(gpu_id))) elif gpu_id == -1: device = torch.device("cpu") logging.info("Using the CPU") else: raise NotImplementedError( "Device ID {} not recognized. gpu_id = 0, 1, 2 etc. Use -1 for CPU" .format(gpu_id)) _set_seed(cfg["hyperparameters"]["seed"]) net = bAbI_dnc(cfg) _set_seed(cfg["hyperparameters"]["seed"]) net_lstm = bAbI_lstm(cfg) if cfg["hyperparameters"]["distributed_tr"]: net = nn.DataParallel(net).to(device) net_lstm = nn.DataParallel(net_lstm).to(device) else: net = net.to(device) net_lstm = net_lstm.to(device) logging.info("Network successfully constructed") optimizer = optim.RMSprop(net.parameters(), lr=cfg["hyperparameters"]["lr"], momentum=cfg["hyperparameters"]["momentum"], eps=1e-10) optimizer_lstm = optim.RMSprop(net_lstm.parameters(), lr=cfg["hyperparameters"]["lr"], momentum=cfg["hyperparameters"]["momentum"], eps=1e-10) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.8) start_epoch = 0 batch_iter_tr = 0 batch_iter_val = 0 # Prepare train and validation datasets logging.info("Loading the data ...") tr_dataset = bAbI_Dataset(cfg["paths"]["train_data"], cfg["paths"]["lexicon"], cfg["hyperparameters"]["use_one_hot"]) val_dataset = bAbI_Dataset(cfg["paths"]["val_data"], cfg["paths"]["lexicon"], cfg["hyperparameters"]["use_one_hot"]) # Prepare the dataloaders tr_dataloader = DataLoader(tr_dataset, batch_size=cfg["hyperparameters"]["batch_size"], shuffle=True) val_dataloader = DataLoader( val_dataset, batch_size=cfg["hyperparameters"]["batch_size"], shuffle=True) loss_weight = tr_dataset.loss_weight.to(device) num_classes = tr_dataset.len_lexicon modes = ["TRAIN", "VAL"] datasets = {"TRAIN": tr_dataset, "VAL": val_dataset} dataloaders = {"TRAIN": tr_dataloader, "VAL": val_dataloader} # Define tensorBoard logger summary_writer = SummaryWriter(cfg["logging"]["tensorboard_dir"]) idx_to_word = pickle.load(open(cfg["paths"]["idx_to_word"], "rb")) num_epochs = cfg["hyperparameters"]["num_epochs"] lr = 0 (chx, mhx, rv) = (None, None, None) for epoch in range(start_epoch, num_epochs): for mode in modes: if mode == "TRAIN": net.train() net_lstm.train() else: net.eval() net_lstm.eval() loss_epoch = 0 acc_epoch = 0 loss_epoch_lstm = 0 acc_epoch_lstm = 0 dataset = datasets[mode] dataloader = dataloaders[mode] prog_bar = tqdm(dataloader) prog_bar.set_description("{} | Epoch {}/{}".format( mode, epoch, num_epochs)) for batch in prog_bar: # Send the data to the appropriate device for k in batch.keys(): if k in ["input", "output", "mask_answer"]: batch[k] = batch[k].to(device) optimizer.zero_grad() optimizer_lstm.zero_grad() with torch.set_grad_enabled(mode == "TRAIN"): if batch["input"].size( 0) != cfg["hyperparameters"]["batch_size"]: continue if cfg["dnc"]["debug"]: logits, (chx, mhx, rv), v = net( batch["input"], None, mhx, None, reset_experience=cfg["dnc"]["reset_experience"], pass_through_memory=cfg["dnc"]["pass_through_mem"]) else: logits, (chx, mhx, rv) = net( batch["input"], None, mhx, None, reset_experience=cfg["dnc"]["reset_experience"], pass_through_memory=cfg["dnc"]["pass_through_mem"]) logits_lstm = net_lstm(batch["input"]) logits_ = logits.view(-1, num_classes) logits_lstm_ = logits_lstm.view(-1, num_classes) output = batch["output"].view(-1) mask = batch["mask_answer"].view(-1) loss = MaskedCrossEntropy(logits_, output, mask) loss_lstm = MaskedCrossEntropy(logits_lstm_, output, mask) if mode == "TRAIN": # Back propagation with anomaly detection -> Makes it easier to locate the faulty parts of the net # if some undesirable phenomena happen, e.g. if some layers produce NaN of Inf values. # with torch.autograd.detect_anomaly(): loss.backward() loss_lstm.backward() # clip gradients to avoid explosion nn.utils.clip_grad_norm_(net.parameters(), 10.0) nn.utils.clip_grad_norm_(net_lstm.parameters(), 10.0) optimizer.step() optimizer_lstm.step() lr = optimizer.param_groups[0]["lr"] summary_writer.add_scalar("lr_DNC", lr, global_step=batch_iter_tr) mhx = { k: (v.detach() if isinstance(v, var) else v) for k, v in mhx.items() } loss_dict = {"DNC": loss.item(), "LSTM": loss_lstm.item()} summary_writer.add_scalars("Loss_" + mode + "_Batch", loss_dict, global_step=batch_iter_tr if mode == "TRAIN" else batch_iter_val) # accumulate batch losses loss_epoch += loss.item() loss_epoch_lstm += loss_lstm.item() answers_gt = regroup(batch["answers_gt"], batch["length_answers"]) predictions_dnc = get_predictions(logits, batch["answers_idx"]) predictions_dnc = regroup(predictions_dnc, batch["length_answers"]) acc, acc_list = accuracy(answers_gt, predictions_dnc) predictions_lstm = get_predictions(logits_lstm, batch["answers_idx"]) predictions_lstm = regroup(predictions_lstm, batch["length_answers"]) acc_lstm, acc_list_lstm = accuracy(answers_gt, predictions_lstm) acc_dict = {"DNC": acc, "LSTM": acc_lstm} tb_text_dnc = get_tb_text_babi(answers_gt, predictions_dnc, acc_list, idx_to_word, 8) tb_text_lstm = get_tb_text_babi(answers_gt, predictions_lstm, acc_list_lstm, idx_to_word, 8) summary_writer.add_scalars("Accuracy_" + mode + "_Batch", acc_dict, global_step=batch_iter_tr if mode == "TRAIN" else batch_iter_val) summary_writer.add_text("Output_DNC_" + mode + "_Batch", tb_text_dnc, global_step=batch_iter_tr if mode == "TRAIN" else batch_iter_val) summary_writer.add_text("Output_LSTM_" + mode + "_Batch", tb_text_lstm, global_step=batch_iter_tr if mode == "TRAIN" else batch_iter_val) # Accumulate accuracy acc_epoch += acc acc_epoch_lstm += acc_lstm # update the learning rate using the cyclic scheme # and log in into tensorboard. # if mode == "TRAIN": # if cfg["hyperparameters"]["lr_schedule"] == "cyclic": # lr = scheduler.get_lr()[0] # scheduler.step() # else: # lr = optimizer.param_groups[0]["lr"] # summary_writer.add_scalar("lr", lr, global_step=batch_iter_tr) # Release GPU memory cache torch.cuda.empty_cache() if mode == "TRAIN": batch_iter_tr += 1 else: batch_iter_val += 1 # Compute average epoch and loss accuracies if len(dataset) % cfg["hyperparameters"]["batch_size"] == 0: total = len(dataloader) else: total = len(dataloader) - 1 loss_epoch /= total loss_epoch_lstm /= total acc_epoch /= total acc_epoch_lstm /= total loss_dict = {"DNC": loss_epoch, "LSTM": loss_epoch_lstm} acc_dict = {"DNC": acc_epoch, "LSTM": acc_epoch_lstm} summary_writer.add_scalars("Loss_" + mode + "_Epoch", loss_dict, global_step=epoch) summary_writer.add_scalars("Accuracy_" + mode + "_Epoch", acc_dict, global_step=epoch) if mode == "TRAIN": _save_checkpoint("dnc", net, optimizer, epoch, batch_iter_tr, batch_iter_val, lr, cfg) _save_checkpoint("lstm", net_lstm, optimizer_lstm, epoch, batch_iter_tr, batch_iter_val, lr, cfg) lr_scheduler.step() # Delete the oldest checkpoint if the number of checkpoints exceeds 10 to save disk space. checkpoints = [ ckpt for ckpt in os.listdir(cfg["logging"]["checkpoints_dir"]) if ckpt.endswith("pth") ] checkpoints = [ os.path.join(cfg["logging"]["checkpoints_dir"], checkpoint) for checkpoint in checkpoints ] if len(checkpoints) > 10: oldest_checkpoint_pth = min( checkpoints, key=lambda s: int( s.split("/")[-1].split("_")[-1].split(".")[0])) os.remove(oldest_checkpoint_pth) if "dnc" in oldest_checkpoint_pth: oldest_checkpoint_pth = oldest_checkpoint_pth.replace( "dnc", "lstm") os.remove(oldest_checkpoint_pth) elif "lstm" in oldest_checkpoint_pth: oldest_checkpoint_pth = oldest_checkpoint_pth.replace( "lstm", "dnc") os.remove(oldest_checkpoint_pth)
def validate_video(self, loader, base_model, logits_model, criterion, epoch, args): """ Run video-level validation on the Charades test set""" with torch.no_grad(): batch_time = AverageMeter() ids = [] sov_prediction = dict() # switch to evaluate mode base_model.eval() logits_model.eval() criterion.eval() end = time.time() for i, (input, s_target, o_target, v_target, meta) in enumerate(loader): gc.collect() meta['epoch'] = epoch s_target = s_target.long().cuda(async=True) o_target = o_target.long().cuda(async=True) v_target = v_target.long().cuda(async=True) input_var = torch.autograd.Variable(input.cuda()) s_target_var = torch.autograd.Variable(s_target) o_target_var = torch.autograd.Variable(o_target) v_target_var = torch.autograd.Variable(v_target) feat = base_model(input_var) s, o, v, so, ov, vs, ss, oo, vv, so_t, ov_t, vs_t, os_t, vo_t, sv_t = logits_model( feat) s_output, o_output, v_output, loss = criterion( *((s, o, v, so, ov, vs, ss, oo, vv, so_t, ov_t, vs_t, os_t, vo_t, sv_t) + (s_target_var, o_target_var, v_target_var, meta)), synchronous=True) # store predictions s_output_video = s_output.max(dim=0)[0] o_output_video = o_output.max(dim=0)[0] v_output_video = v_output.max(dim=0)[0] sov_prediction[meta['id'][0]] = get_predictions( s_output_video.data.cpu().numpy(), o_output_video.data.cpu().numpy(), v_output_video.data.cpu().numpy()) ids.append(meta['id'][0]) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test2: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})'. format(i, len(loader), batch_time=batch_time)) sov_mAP, sov_rec_at_n, sov_mprec_at_n = eval_visual_relation( prediction=sov_prediction, groundtruth_path=args.groundtruth_lookup) print(' * sov_mAP {:.3f}'.format(sov_mAP)) print(' * sov_rec_at_n', sov_rec_at_n) print(' * sov_mprec_at_n', sov_mprec_at_n) return sov_mAP, sov_rec_at_n, sov_mprec_at_n
def evaluate_ellseg_on_image(frame, model): assert len(frame.shape) == 4, 'Frame must be [1,1,H,W]' with torch.no_grad(): x4, x3, x2, x1, x = model.enc(frame) latent = torch.mean(x.flatten(start_dim=2), -1) elOut = model.elReg(x, 0) seg_out = model.dec(x4, x3, x2, x1, x) seg_out, elOut, latent = seg_out.cpu(), elOut.squeeze().cpu( ), latent.squeeze().cpu() seg_map = get_predictions(seg_out).squeeze().numpy() ellipse_from_network = 1 if args.ellseg_ellipses == 1 else 0 ellipse_from_output = 1 if args.ellseg_ellipses == 0 else 0 no_ellipse = 1 if args.ellseg_ellipses == -1 else 0 if ellipse_from_network: # Get EllSeg proposed ellipse predictions # Ellipse Centers -> derived from segmentation output # Ellipse axes and orientation -> Derived from latent space _, norm_pupil_center = get_seg2ptLoss(seg_out[:, 2, ...], torch.zeros(2, ), temperature=4) _, norm_iris_center = get_seg2ptLoss(-seg_out[:, 0, ...], torch.zeros(2, ), temperature=4) norm_pupil_ellipse = torch.cat([norm_pupil_center, elOut[7:10]]) norm_iris_ellipse = torch.cat([norm_iris_center, elOut[2:5]]) # Transformation function H _, _, H, W = frame.shape H = np.array([[W / 2, 0, W / 2], [0, H / 2, H / 2], [0, 0, 1]]) pupil_ellipse = my_ellipse( norm_pupil_ellipse.numpy()).transform(H)[0][:-1] iris_ellipse = my_ellipse( norm_iris_ellipse.numpy()).transform(H)[0][:-1] if ellipse_from_output: # Get ElliFit derived ellipse fits from segmentation mask seg_map_temp = copy.deepcopy(seg_map) seg_map_temp[seg_map_temp == 2] += 1 # Pupil by PartSeg standard is 3 seg_map_temp[seg_map_temp == 1] += 1 # Iris by PartSeg standard is 2 pupilPts, irisPts = getValidPoints(seg_map_temp, isPartSeg=False) if np.sum(seg_map_temp == 3) > 50 and type(pupilPts) is not list: if args.skip_ransac: model_pupil = ElliFit(**{'data': pupilPts}) else: model_pupil = ransac(pupilPts, ElliFit, 15, 40, 5e-3, 15).loop() else: print('Not enough pupil points') model_pupil = type('model', (object, ), {}) model_pupil.model = np.array([-1, -1, -1, -1, -1]) if np.sum(seg_map_temp == 2) > 50 and type(irisPts) is not list: if args.skip_ransac: model_iris = ElliFit(**{'data': irisPts}) else: model_iris = ransac(irisPts, ElliFit, 15, 40, 5e-3, 15).loop() else: print('Not enough iris points') model_iris = type('model', (object, ), {}) model_iris.model = np.array([-1, -1, -1, -1, -1]) model_iris.Phi = np.array([-1, -1, -1, -1, -1]) # iris_fit_error = np.inf pupil_ellipse = np.array(model_pupil.model) iris_ellipse = np.array(model_iris.model) if no_ellipse: pupil_ellipse = np.array([-1, -1, -1, -1, -1]) iris_ellipse = np.array([-1, -1, -1, -1, -1]) return seg_map, latent.cpu().numpy(), pupil_ellipse, iris_ellipse
average='micro', sample_weight=None)) # Tag unlabelled content untagged_raw = pd.read_csv(os.path.join(DATADIR, 'untagged_content.csv.gz'), dtype=object, compression='gzip') new_texts = untagged_raw['combined_text'] pred_untagged = get_predictions(new_texts=new_texts, df=untagged_raw, model=model, labels_index=labels_index, tokenizer=tokenizer, logger=logger, max_sequence_length=MAX_SEQUENCE_LENGTH, p_threshold=P_THRESHOLD, level1taxon=False) logger.debug('Number of unique content items: %s', pred_untagged.content_id.nunique()) logger.debug( 'Number of content items tagged to taxons with more than p_threshold: %s', pred_untagged.shape) # TODO set 0.65 and 0.85 as environment vars pred_untagged.loc[(pred_untagged['probability'] > 0.65) & (pred_untagged['probability'] < 0.85)].sort_values(
import torch import pandas as pd from dataset import create_data_loader from model import multimodal from utils import get_predictions from zipfile import ZipFile LOAD_MODEL = True device = 'cuda' model = multimodal() model = model.to(device) if LOAD_MODEL: model.load_state_dict(torch.load('vit-bert-1.0val.bin')) df_test = pd.read_csv('test_captions.csv') df_test.drop('Unnamed: 0', axis=1, inplace=True) extract_path = 'test_img.zip' with ZipFile(extract_path, 'r') as zipObj: zipObj.extractall() test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE, my_trans, 'test_img', False) submission_preds = get_predictions(model, test_data_loader, device)
def main(output_dir, n_attentions, image_shape, batch_size, learning_rate, gpu): """Perform model training""" # initialize the dataset train_set = TrainDataset(phase='train', shape=image_shape) val_set = TrainDataset(phase='val', shape=image_shape) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) # initialize the model model = Model(n_classes=196, input_size=image_shape, n_attentions=n_attentions, gpu=gpu) if gpu: model = model.cuda() # initialize related optimization methods criterion = nn.CrossEntropyLoss() criterion_attention = nn.MSELoss() optimizer = optim.Adam(params=model.parameters(), lr=learning_rate) feature_center = torch.zeros(196, n_attentions * 2208) scheduler = SuperConvergence(optimizer, max_lr=learning_rate, stepsize=5000, better_as_larger=False, last_epoch=-1) if gpu: feature_center = feature_center.cuda() # initialize other hyperparameters crop_threshold = 0.5 drop_threshold = 0.5 focal_weight = 0.4 # perform the training epoch = 0 while True: print('Starting epoch {:03d}'.format(epoch)) # statistic tracking train_loss_tracker = Tracker() train_accuracy_tracker = Tracker() model = model.train() for idx, (X, y) in enumerate(train_loader): if gpu: X = X.cuda() y = y.cuda() mini_batch = X.size(0) logits, feature_matrix, sampled_attentions = model(X) loss = (criterion(logits, y) + criterion_attention(feature_matrix, feature_center[y])) optimizer.zero_grad() loss.backward() optimizer.step() feature_center[y] = feature_center[y] + ( focal_weight * (feature_matrix.detach() - feature_center[y])) preds, _ = get_predictions(logits.squeeze().cpu().data.numpy()) preds = np.array(preds) == y.cpu().squeeze().data.numpy() accuracy = np.mean(preds) train_loss_tracker.step(loss.item() * mini_batch, mini_batch) train_accuracy_tracker.step(accuracy * mini_batch, mini_batch) # perform data cropping with torch.no_grad(): crop_attentions = F.interpolate( sampled_attentions.unsqueeze(1), size=image_shape, mode='bilinear', align_corners=False) crop_attentions = crop_attentions > crop_threshold cropped_images = [] for _idx in range(crop_attentions.size(0)): positive_indices = torch.nonzero(crop_attentions[_idx]) x_min = torch.min(positive_indices[:, 2]) y_min = torch.min(positive_indices[:, 1]) x_max = torch.max(positive_indices[:, 2]) y_max = torch.max(positive_indices[:, 1]) cropped_image = F.interpolate( crop_attentions[_idx, :, y_min:y_max + 1, x_min:x_max + 1].float().unsqueeze(0) * X[_idx, :, y_min:y_max + 1, x_min:x_max + 1].unsqueeze(0), size=image_shape, mode='bilinear', align_corners=False) cropped_images.append(cropped_image) cropped_images = torch.cat(cropped_images, dim=0) logits, _, _ = model(cropped_images) loss = criterion(logits, y) optimizer.zero_grad() loss.backward() optimizer.step() # perform attention dropping with torch.no_grad(): drop_attentions = F.interpolate( sampled_attentions.unsqueeze(1), size=image_shape, mode='bilinear', align_corners=False) drop_attentions = (drop_attentions < drop_threshold).float() dropped_images = drop_attentions * X logits, _, _ = model(dropped_images) loss = criterion(logits, y) optimizer.zero_grad() loss.backward() optimizer.step() stop = (epoch == 10) scheduler.step(epoch=None, metrics=train_loss_tracker.get_average(), stop=stop) if idx % 100 == 0: _temp_lr = optimizer.param_groups[0]['lr'] print('Batch {}, average loss {} - average accuracy {}, lr {}'. format(idx, train_loss_tracker.get_average(), train_accuracy_tracker.get_average(), _temp_lr)) # do validation pass val_loss_tracker = Tracker() val_accuracy_tracker = Tracker() model = model.eval() for X_val, y_val in val_loader: if gpu: X_val = X_val.cuda() y_val = y_val.cuda() mini_batch = X_val.size(0) with torch.no_grad(): logits, _, _ = model(X_val) val_loss = criterion(logits, y_val) preds, _ = get_predictions(logits.squeeze().cpu().data.numpy()) preds = np.array(preds) == y_val.cpu().squeeze().data.numpy() accuracy = np.mean(preds) val_loss_tracker.step(val_loss.item() * mini_batch, mini_batch) val_accuracy_tracker.step(accuracy * mini_batch, mini_batch) state_dict = { 'n_classes': 196, 'input_size': image_shape, 'n_attentions': n_attentions, 'state_dict': model.state_dict() } torch.save(state_dict, os.path.join(output_dir, '{:03d}.ckpt'.format(epoch))) print('Validation - loss {}, accuracy {}'.format( val_loss_tracker.get_average(), val_accuracy_tracker.get_average())) epoch += 1
def delay_plot(args): n_points = 100 targets, predictions, _ = get_predictions(args.run_dir) targets = np.concatenate(targets, axis=0) predictions = np.concatenate(predictions, axis=0) t = np.linspace(0, 1, 500, endpoint=False) p, r, f1 = [], [], [] for thr in tqdm(t): pp, rr, ff1, _ = precision_recall_fscore_support( targets.ravel(), predictions.ravel() > thr, average='binary') p.append(pp) r.append(rr) f1.append(ff1) p, r, f1 = map(np.array, (p, r, f1)) # keep = p > 0.75 # t = t[keep] # f1 = f1[keep] # offset = len(f1) // n_points # f1 = f1[::offset] # t = t[::offset] print('Num. Thresholds:', len(t)) def find_annotations(curve): start = 0 for value, sublist in itertools.groupby(curve): duration = len(list(sublist)) if value == 1: # skip 0s end = start + duration - 1 yield (start, end, duration) start += duration def iou(annot, ground): # min of ends - max of starts + 1 intersection = np.minimum(annot[:, 1], ground[:, 1]) - np.maximum( annot[:, 0], ground[:, 0]) + 1 intersection = np.maximum(intersection, 0) # union = sum of durations - intersection union = annot[:, 2] + ground[:, 2] - intersection return intersection / union n_classes = targets.shape[1] # find all annotations of groundtruth ground = [] n_annotations = 0 for i in range(n_classes): annot = find_annotations(targets[:, i]) annot = np.array(list(annot)) ground.append(annot) n_annotations += annot.shape[0] # iterate over thresholds global_ious = [] global_delays = [] global_thr = [] mean_delays = [] accuracies = [] for thr in tqdm(t): hard_predictions = predictions > thr delays = [] accuracy = 0 for i in range(n_classes): annot = find_annotations(hard_predictions[:, i]) annot = np.array(list(annot)) if annot.size: # For each ground-truth start, search the nearest start of an annotation: # - compute start distances between all (prediction, gt) pair all_delays = annot[:, 0].reshape( 1, -1) - ground[i][:, 0].reshape(-1, 1) # discard negative delays # all_delays = all_delays.astype(np.float32) # all_delays[all_delays < 0] = np.inf # - find the nearest annotations in terms of start frame nearest_annot_idx = np.argmin(np.absolute(all_delays), axis=1) nearest_annot = annot[nearest_annot_idx] nearest_delays = all_delays[np.arange(all_delays.shape[0]), nearest_annot_idx] # - keep only valid annotations (IoU > 0.5) annot_ious = iou(nearest_annot, ground[i]) valid = annot_ious >= 0.5 valid_delays = nearest_delays[valid] global_ious.append(annot_ious[valid]) global_delays.append(valid_delays) global_thr.append(np.ones_like(valid_delays) * thr) # save delays and number of valid annotations delays.append(valid_delays) accuracy += len(valid_delays) if delays: mean_delay = np.concatenate(delays).mean() accuracy /= n_annotations mean_delays.append(mean_delay / 120.0) accuracies.append(accuracy) else: print(thr, 'no valid predictions') metrics = (p, r, f1) names = ('Precision', 'Recall', 'F1') # fig, axes = plt.subplots(2, 2, figsize=(10, 8)) # for i, (y, ylabel, ax) in enumerate(zip(metrics, names, axes.ravel()[:3])): for i, (y, ylabel) in enumerate(zip(metrics, names)): fig = plt.figure(figsize=(5, 4)) ax = plt.gca() ax.plot(mean_delays, y, c='k', linewidth='0.5', zorder=1) im = ax.scatter(mean_delays, y, marker='.', c=t, zorder=2) fig.colorbar(im, ax=ax, use_gridspec=True, ticks=[0, 0.2, 0.4, 0.6, 0.8, 1.0]) ''' plt.minorticks_on() plt.grid(b=True, which='minor', linestyle='--', linewidth=0.5) xticks = plt.gca().get_xticks() labels = ['\\textrm{{{:g}}}\n\\textrm{{({:g})}}'.format(x, round(x*120)) for x in xticks] plt.gca().set_xticklabels(labels) ''' ax.set_xlabel(r'\textrm{Average Delay [$s$]}') ax.set_ylabel('\\textrm{{{}}}'.format(ylabel)) # n_thr_points = 10 # skip = len(y) // n_thr_points # # show_d = mean_delays[::skip] if skip else mean_delays # show_y = y[::skip] if skip else y # show_t = t[::skip] if skip else t # # for d, _y, thr in zip(show_d, show_y, show_t): # # if thr < 0.01: continue # txt = 'T={:3.2f}'.format(thr) # txt = r'\textrm{' + txt + '}' # ax.annotate(txt, xy=(d,_y), fontsize=6) ax.set_title('\\textrm{{{} vs Average Delay}}'.format(ylabel)) plt.tight_layout() plt.savefig('delay-{}.pdf'.format(ylabel.lower())) plt.close() # Last ax # ax = axes[1, 1] fig = plt.figure(dpi=600, figsize=(5, 4)) ax = plt.gca() global_delays = np.concatenate(global_delays) global_ious = np.concatenate(global_ious) global_thr = np.concatenate(global_thr) ax.set_title(r'\textrm{Delay vs IoU}') ax.set_xlabel(r'\textrm{IoU}') ax.set_ylabel(r'\textrm{Delay (frames)}') ax.set_xlim([0.48, 1]) ax.set_ylim([-500, 300]) im = ax.scatter(global_ious, global_delays, 1, c=global_thr, rasterized=True) fig.colorbar(im, ax=ax, use_gridspec=True, ticks=[0, 0.2, 0.4, 0.6, 0.8, 1.0]) fig.tight_layout() plt.savefig('delay-iou.pdf') plt.close()