def predict():
    try:
        input_data = flask.request.json['comment']
        preds = get_predictions(input_data)
        return flask.jsonify(preds)
    except:
        return "JSON containing 'comment' key must be passed", 400
Exemple #2
0
def lossandaccuracy(loader, model, factor):
    epoch_loss = []
    ious = []
    model.eval()
    with torch.no_grad():
        for i, batchdata in enumerate(loader):
            #            print (len(batchdata))
            img, labels, index, spatialWeights, maxDist = batchdata
            data = img.to(device)

            target = labels.to(device).long()
            output = model(data)

            ## loss from cross entropy is weighted sum of pixel wise loss and Canny edge loss *20
            CE_loss = criterion(output, target)
            loss = CE_loss * (torch.from_numpy(np.ones(
                spatialWeights.shape)).to(torch.float32).to(device) +
                              (spatialWeights).to(torch.float32).to(device))

            loss = torch.mean(loss).to(torch.float32).to(device)
            loss_dice = criterion_DICE(output, target)
            loss_sl = torch.mean(
                criterion_SL(output.to(device), (maxDist).to(device)))

            ##total loss is the weighted sum of suface loss and dice loss plus the boundary weighted cross entropy loss
            loss = (1 - factor) * loss_sl + factor * (loss_dice) + loss

            epoch_loss.append(loss.item())
            predict = get_predictions(output)
            iou = mIoU(predict, labels)
            ious.append(iou)
    return np.average(epoch_loss), np.average(ious)
Exemple #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--EXP_NAME', type=str, default='segment')
    parser.add_argument('--EXP_DIR',
                        type=str,
                        default='/home/yangle/result/TrainNet/')
    parser.add_argument('--DATASET_PATH',
                        type=str,
                        default='/home/yangle/BasicDataset/dataset/MSRA10K/')
    parser.add_argument('--SAVE_DIR',
                        type=str,
                        default='/home/yangle/result/mask/MSRA10K/')
    parser.add_argument('--LEARNING_RATE', type=float, default=1e-4)
    parser.add_argument('--WEIGHT_DECAY', type=float, default=0.0001)
    args = parser.parse_args()

    if not os.path.exists(args.SAVE_DIR):
        os.makedirs(args.SAVE_DIR)

    normalize = transforms.Normalize(mean=saliency.mean, std=saliency.std)
    test_joint_transformer = transforms.Compose(
        [joint_transforms.JointResize(224)])
    test_dset = saliency.TestImage(args.DATASET_PATH,
                                   'val',
                                   joint_transform=None,
                                   transform=transforms.Compose([
                                       transforms.ToTensor(),
                                       normalize,
                                   ]))
    test_loader = torch.utils.data.DataLoader(test_dset,
                                              batch_size=1,
                                              shuffle=False)

    model = tiramisu.FCDenseNet57(in_channels=3, n_classes=2)
    # model = model.cuda()
    model = torch.nn.DataParallel(model).cuda()
    optimizer = optim.RMSprop(model.parameters(),
                              lr=args.LEARNING_RATE,
                              weight_decay=args.WEIGHT_DECAY)

    exper = experiment.Experiment(args.EXP_NAME, args.EXP_DIR)
    # exper.resume(model, optimizer)
    base_path = args.EXP_DIR + args.EXP_NAME + '/weights/'
    weights_fpath = base_path + 'segment-weights-132-0.109-4.278-0.120-4.493.pth'
    optim_path = base_path + 'segment-optim-132.pth'
    exper.resume(model, optimizer, weights_fpath, optim_path)

    # count = 1
    for count, (img, name) in enumerate(test_loader):
        # for img, name in test_loader:
        data = Variable(img.cuda(), volatile=True)
        output = model(data)
        pred = utils.get_predictions(output)
        pred = pred[0]
        img_name = name[0]
        # img_name = str(name)
        # img_name = img_name.replace('tif', 'png')
        save_path = args.SAVE_DIR + img_name
        torchvision.utils.save_image(pred, save_path)
        print(count)
Exemple #4
0
def train(model,
          trainloader,
          vaildloader,
          device=device,
          EPOCHS=100,
          lr=1e-5,
          debug=False):
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    best_acc = -np.inf
    for epoch in range(EPOCHS):
        running_loss = 0.0
        for data in tqdm(trainloader):
            tokens_tensors, segments_tensors, masks_tensors, labels = [
                t.to(device) for t in data
            ]
            optimizer.zero_grad()
            outputs = model(input_ids=tokens_tensors,
                            token_type_ids=segments_tensors,
                            attention_mask=masks_tensors,
                            labels=labels)
            loss = outputs[0]
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if debug == True:
                break
        _, _, acc = get_predictions(model, vaildloader, compute_acc=True)
        print('[epoch %d] loss: %.3f, acc: %.3f' %
              (epoch + 1, running_loss, acc))
        if acc >= best_acc:
            best_acc = acc
            print('model is improve so dump model')
            joblib.dump(model, 'checkpoint/bert_model.pkl')
Exemple #5
0
def calc_entropy(data):
    entropy = 0
    predictions = get_predictions(get_counts(data, LABEL_LOCATION))

    for label in predictions:
        entropy -= predictions[label] * math.log(predictions[label], 2)

    return entropy
def mixup(model, s_x, s_y, t_x, alpha, num_class, temperature):
    t_y = get_predictions(model, t_x, num_class, temperature)
    beta_distribution = torch.distributions.Beta(alpha, alpha)
    lambdas_mu = beta_distribution.sample([s_x.shape[0]]).to('cuda')
    lambdas_mu = torch.max(lambdas_mu, 1 - lambdas_mu)
    lambdas_mu_features = lambdas_mu.view(lambdas_mu.shape[0], 1, 1, 1)
    lambdas_mu_labels = lambdas_mu.view(lambdas_mu.shape[0], 1)
    x_aug = s_x * lambdas_mu_features + t_x * (1 - lambdas_mu_features)
    y_aug = s_y * lambdas_mu_labels + t_y * (1 - lambdas_mu_labels)
    return x_aug, y_aug
def cutmix(model, s_x, s_y, t_x, alpha, num_class):
    x_aug = s_x.clone()
    temp_tx = t_x.clone()
    t_y = get_predictions(model, t_x, num_class, temperature)
    mu = np.random.beta(alpha, alpha)
    lmbd = np.maximum(mu, 1 - mu)
    lmbd = torch.tensor(lmbd)
    bbx1, bby1, bbx2, bby2 = generate_bbox(s_x.size(), lmbd)
    x_aug[:, :, bbx1:bbx2, bby1:bby2] = temp_tx[:, :, bbx1:bbx2, bby1:bby2]
    y_aug = (lmbd * s_y.float()) + ((1 - lmbd) * t_y.float())
    return x_aug.cuda(), y_aug.cuda()
Exemple #8
0
def evaluate(ckpt, image_folder, gpu):
    """Run model evaluation
    
    Running this evaluation script will output a csv file, `result.csv` at the
    working directory.
    """
    model_info = torch.load(ckpt, map_location='cpu')
    model = Model_(ckpt=ckpt, gpu=gpu).eval()
    if gpu:
        model = model.cuda()

    # dataset
    dataset = TestDataset(image_folder=image_folder,
                          shape=model_info['input_size'])
    dataloader = DataLoader(dataset, batch_size=1)

    filenames = []
    logits = []
    predicted_indices, predicted_strings = [], []
    for filename, each_image in dataloader:
        print(filename[0])
        if gpu:
            each_image = each_image.cuda()

        with torch.no_grad():
            logit, _, _ = model(each_image)
            logit = logit.squeeze()
            logit = torch.softmax(logit, dim=0)
            logit = logit.cpu().data.numpy()
            predicted_idx, predicted_str = get_predictions(logit)

            logits.append(logit)
            filenames.append(filename)
            predicted_indices.append(predicted_idx)
            predicted_strings.append(predicted_str)

    # write out the result
    filenames = pd.DataFrame(np.stack(filenames, axis=0))
    logits = pd.DataFrame(np.stack(logits, axis=0))
    predicted_indices = pd.DataFrame(np.stack(predicted_indices, axis=0))
    predicted_strings = pd.DataFrame(np.stack(predicted_strings, axis=0))

    # save the confidence and result
    confidence = pd.concat([filenames, logits], axis=1)
    confidence.to_csv('./data/confidence.csv', index=None, header=False)

    result = pd.concat([filenames, predicted_indices, predicted_strings],
                       axis=1)
    result.to_csv('./data/result.csv', index=None, header=False)

    return logits
Exemple #9
0
def get_mask_from_cv2_image(image, model, useGpu=True, pupilOnly=False):
    if useGpu:
        device=torch.device("cuda")
    else:
        device=torch.device("cpu")
        
    img = image.unsqueeze(1)
    data = img.to(device)   
    output = model(data)
    predict = get_predictions(output)
    pred_img = 1 - predict[0].cpu().numpy()/3.0
    if pupilOnly:
        pred_img = np.ceil(pred_img) * 0.5
    return pred_img
def evaluate(validation_generator):
    # evaluate the model with validation set
    y_true = np.array([0] * len(os.listdir('../data/validation/cats/')) + [1] * len(os.listdir('../data/validation/dogs/')))
    model = load_model('../CNN/cnn.h5')
    print(model.summary())
    scores = model.evaluate_generator(validation_generator)
    print('val_loss: {}, val_acc: {}'.format(scores[0], scores[1]))
    y_pred = get_predictions(model, validation_generator)
    cm = confusion_matrix(y_true, y_pred)
    cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    # plot percentage confusion matrix
    plot_confusion_matrix(cm_percent, class_names=['Cat', 'Dog'])
    plt.savefig('../MLP/cm_percent_val.png', format='png')
    plt.show()
Exemple #11
0
def get_mask_from_path(path: str, model, useGpu=True):
    if useGpu:
        device=torch.device("cuda")
    else:
        device=torch.device("cpu")
    clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8,8))
    
    pilimg = Image.open(path).convert("L")
    table = 255.0*(np.linspace(0, 1, 256)**0.8)
    pilimg = cv2.LUT(np.array(pilimg), table)
    img = clahe.apply(np.array(np.uint8(pilimg)))    
    img = Image.fromarray(img)
    img = img.unsqueeze(1)
    data = img.to(device)   
    output = model(data)
    predict = get_predictions(output)
    return predict
Exemple #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--DATASET_PATH',
                        type=str,
                        default='/home/zhangdong/database/DUTS/')
    parser.add_argument('--SAVE_DIR',
                        type=str,
                        default='/home/yangle/DAVIS/result/DUTS/')
    args = parser.parse_args()

    normalize = transforms.Normalize(mean=saliency.mean, std=saliency.std)
    # test_joint_transforms = transforms.Compose([joint_transforms.JointResize(224)])
    test_dset = saliency.TestImage(args.DATASET_PATH,
                                   'val',
                                   joint_transform=None,
                                   transform=transforms.Compose([
                                       transforms.Resize((224, 224)),
                                       transforms.ToTensor(), normalize
                                   ]))

    model = tiramisu.FCDenseNet57(n_classes=2)
    # model = model.cuda()
    model = torch.nn.DataParallel(model).cuda()
    weight_path = '/home/yangle/DAVIS/result/TrainNet/tiramisu/weights/latest_weights.pth'
    state = torch.load(weight_path)
    model.load_state_dict(state['state_dict'])
    model = model.module

    test_loader = torch.utils.data.DataLoader(test_dset,
                                              batch_size=1,
                                              shuffle=False)
    count = 1
    for data, name in test_loader:
        data = Variable(data.cuda(), volatile=True)
        _, _, hei, wid = data.size()
        output = model(data)
        pred = utils.get_predictions(output)
        pred = pred[0]
        # transforms_size = torchvision.transforms.Resize((hei, wid))
        # mask = transforms_size([pred])
        name = name[0]
        img_name = str(name)
        save_path = args.SAVE_DIR + img_name
        torchvision.utils.save_image(pred, save_path)
        print(count)
        count += 1
Exemple #13
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--DATASET_PATH', type=str, default='/disk5/yangle/DAVIS/dataset/cat_128/process/')
	parser.add_argument('--SAVE_DIR', type=str, default='/disk5/yangle/DAVIS/result/tiramisu/tiramisu57-8chs/')
	parser.add_argument('--LEARNING_RATE', type=float, default=1e-4)
	parser.add_argument('--WEIGHT_DECAY', type=float, default=0.0001)
	args = parser.parse_args()

	if not os.path.exists(args.SAVE_DIR):
		os.makedirs(args.SAVE_DIR)

	normalize = transforms.Normalize(mean=saliency.mean, std=saliency.std)
	test_dset = saliency.Saliency(
		args.DATASET_PATH, 'val', joint_transform=None,
		transform=transforms.Compose([transforms.ToTensor(), normalize, ]))
	test_loader = torch.utils.data.DataLoader(test_dset, batch_size=1, shuffle=False)

	# model = tiramisu.FCDenseNet67(in_channels=8, n_classes=N_CLASSES)
	model = tiramisu.FcDnSubtle(in_channels=8, n_classes=N_CLASSES)

	# model = model.cuda()
	model = torch.nn.DataParallel(model).cuda()
	weights_fpath = 'cat_8ch-111.pth'
	state = torch.load(weights_fpath)
	model.load_state_dict(state['state_dict'])
	model.eval()

	count = 1
	for iord, (img, target, img_cont, fomask, comask) in enumerate(test_loader):
		inputs = torch.cat((img, comask, img_cont, fomask), 1)
		inputs = Variable(inputs.cuda(), volatile=True)
		output = model(inputs)
		pred = utils.get_predictions(output)
		pred = pred[0]
		img_name = str(iord) + '.png'
		save_path = args.SAVE_DIR + img_name
		torchvision.utils.save_image(pred, save_path)
		print(count)
		count += 1
Exemple #14
0
from utils import FakeNewsDataset,get_predictions,create_mini_batch
from torch.utils.data import DataLoader
from transformers import BertTokenizer
import pandas as pd

# load trained model
tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
model = joblib.load('checkpoint/bert_model.pkl')
model.eval()

# load testset
testset = FakeNewsDataset("test", tokenizer=tokenizer)
testloader = DataLoader(testset, batch_size=256, collate_fn=create_mini_batch)

# get predictions
predictions , confidence = get_predictions(model, testloader)

# 用來將預測的 label id 轉回 label 文字
index_map = {v: k for k, v in testset.label_map.items()}

# 生成 Kaggle 繳交檔案
df = pd.DataFrame({"Category": predictions.tolist()})
df['Category'] = df.Category.apply(lambda x: index_map[x])
df['Confidence'] = confidence.detach().cpu().numpy()
df_pred = pd.concat([testset.df.loc[:, ["Id"]], df.loc[:, ['Category','Confidence']]], axis=1)
print(df_pred.head())

# 上傳到 Kaggle 網站 不需要 Confidence
df_pred = df_pred.drop('Confidence',axis=1)
df_pred.to_csv('output/bert_1_prec_training_samples.csv', index=False)
print('save done!')
Exemple #15
0
 def __init__(self, rows):
     self.counts = get_counts(rows, LABEL_LOCATION)
     self.predictions = get_predictions(self.counts)
     self.data_class = None
Exemple #16
0
def get_pupil_ellipse_from_cv2_image(image,
                                     model,
                                     useGpu=True,
                                     predict=None,
                                     isEllseg=False,
                                     ellsegPrecision=None,
                                     ellsegEllipse=False,
                                     debugWindowName=None):
    """
    OUTPUT FORMAT
    {
        0: center x,
        1: center y,
        2: ellipse major axis radius,
        3: ellipse minor axis radius,
        4: ellipse angle
    }
    """
    if useGpu:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    if predict is None:
        if not isEllseg:
            img = image.unsqueeze(1)
            data = img.to(device)
            output = model(data)
            predict = get_predictions(output)
            pred_img = predict[0].numpy()
        else:  # w:320 h:240
            img = np.array(transforms.ToPILImage()(image).convert("L"))
            img = (img - img.mean()) / img.std()
            img = torch.from_numpy(img).unsqueeze(0).to(
                ellsegPrecision)  # Adds a singleton for channels
            img = img.unsqueeze(0)
            img = img.to(device).to(ellsegPrecision)
            x4, x3, x2, x1, x = model.enc(img)
            op = model.dec(x4, x3, x2, x1, x)

            if ellsegEllipse:  # Option to get ellipse directly from ellseg output
                ellpred = model.elReg(x, 0).view(-1)
                _, _, H, W = img.shape
                H_mat = np.array([[W / 2, 0, W / 2], [0, H / 2, H / 2],
                                  [0, 0, 1]])
                p_cx, p_cy, p_a, p_b, p_theta, _ = my_ellipse(
                    ellpred[5:].tolist()).transform(H_mat)[0]
                return [p_cx, p_cy, p_a, p_b, p_theta]
                # [centerX, centerY, axis1, axis2, angle]

            #elOut = model.elReg(x, 0) # Linear regression to ellipse parameters

            #print(elOut.shape)

            predict = get_predictions(op)
            pred_img = predict[0].numpy()

            # cv2.imshow("ELLIPSE", pred_img/2)
    else:
        pred_img = predict[0].numpy()

    if debugWindowName is not None:
        outIm = pred_img / np.max(pred_img)
        cv2.imshow(debugWindowName, outIm)
    return get_pupil_parameters(pred_img)
dummy_vars = ['touchscreen', 'detachable_keyboard', 'discrete_gpu']
target_vars = ['min_price', 'max_price']
target = 'min_price'
num_vars = [col for col in df.columns if col not in cat_vars + dummy_vars + target_vars]
variable_lists = [cat_vars, dummy_vars, target_vars, num_vars]

df = utils.imputation(df)
utils.drop_columns(df, ['name', 'base_name', 'pixels_y'], variable_lists)
# utils.decrease_cat_size_handling(df, cat_vars, target)
# df = utils.one_hot_encoding(df, cat_vars)
utils.smooth_handling(df, cat_vars, target)

estimator = xgb.XGBRegressor(n_estimators=200, max_depth=4, gamma=0.3, colsample_bytree=0.6, subsample=1, min_child_weight=15)

df_min = utils.fit_predict(df, estimator, target, 'id', 'MIN')
df_complete_predictions = utils.get_predictions(df, estimator, target, 'id', 'min_price_pred')


##### max_price
train_min = pd.read_csv("train.csv")
train_min.drop(columns=['min_price'], inplace=True)
test_min = pd.read_csv("test.csv")
df = utils.merge_train_test(train_min, test_min, 'min_price')

cat_vars = ['name', 'brand', 'base_name', 'cpu', 'cpu_details', 'gpu', 'os', 'os_details', 'screen_surface']
dummy_vars = ['touchscreen', 'detachable_keyboard', 'discrete_gpu']
target_vars = ['min_price', 'max_price']
target = 'max_price'
num_vars = [col for col in df.columns if col not in cat_vars + dummy_vars + target_vars]
variable_lists = [cat_vars, dummy_vars, target_vars, num_vars]
Exemple #18
0
    while infinite:
        camlinks_closed_count = 0
        # print(infinite)
        for ix, cam in enumerate(cams):
            # print(ix)
            
            # if cam.grabbed:
            if cam.more():
                frame_time, frame = cam.read()
                face_locations = detector.detect_faces(frame)

                if len(face_locations) > 0:
                    detected_faces = detector.extract_faces(frame, face_locations)
                    embeddings = facenet.get_embeddings(detected_faces)
                    if embeddings is not None:
                        predictions = u.get_predictions(embeddings, annoy_object, labels, DEFAULT_THRESH)
                    
                        for pred in predictions:
                            stuID = pred[0]
                            dist = pred[1]
                            if stuID != "Unknown":
                                log = f"{frame_time} -- {stuID} - {dist}"
                                print(log)
                                # day = frame_time.date().strftime("%d/%m/%Y")
                                # tm = frame_time.time().strftime("%H:%M:%S")
                                if stuID not in logs.keys():
                                    logs[stuID] = [{"dt":frame_time, "dist":dist}]
                                else:
                                    lastLog = logs[stuID][-1]
                                    minutes = int((frame_time - lastLog["dt"]).total_seconds()//60)
                                    
Exemple #19
0
def get_mask_from_cv2_image(image,
                            model,
                            useGpu=True,
                            pupilOnly=False,
                            includeRawPredict=False,
                            channels=3,
                            trim_pupil=False,
                            isEllseg=False,
                            ellsegPrecision=None,
                            useEllsegEllipseAsMask=False):
    if useGpu:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    if not isEllseg:
        img = image.unsqueeze(1)
        data = img.to(device)
        output = model(data)
        rawpredict = get_predictions(output)
        predict = rawpredict + 1
        # print(np.unique(predict[0].cpu().numpy()))
        pred_img = 1 - predict[0].cpu().numpy() / channels
    else:
        img = np.array(Image.fromarray(image).convert("L"))
        img = (img - img.mean()) / img.std()
        img = torch.from_numpy(img).unsqueeze(0).to(
            ellsegPrecision)  # Adds a singleton for channels
        img = img.unsqueeze(0)
        img = img.to(device).to(ellsegPrecision)
        x4, x3, x2, x1, x = model.enc(img)
        op = model.dec(x4, x3, x2, x1, x)
        rawpredict = get_predictions(op)
        plt.imshow(rawpredict[0], cmap="BrBG", alpha=0.3)
        if useEllsegEllipseAsMask:
            ellpred = model.elReg(x, 0).view(-1)
            #i1, i2, i3, i4, i5, p1, p2, p3, p4, p5 = ellpred[0].cpu().detach().numpy()
            _, _, H, W = img.shape
            H_mat = np.array([[W / 2, 0, W / 2], [0, H / 2, H / 2], [0, 0, 1]])

            #import pdb
            #pdb.set_trace()
            i_cx, i_cy, i_a, i_b, i_theta, _ = my_ellipse(
                ellpred[:5].tolist()).transform(H_mat)[0]
            p_cx, p_cy, p_a, p_b, p_theta, _ = my_ellipse(
                ellpred[5:].tolist()).transform(H_mat)[0]

            ellimage = np.full((int(H), int(W)), 2 / 3)
            startAngle = 0
            endAngle = 360
            iris_color = 1 / 3
            pupil_color = 0.0
            pred_img = draw_ellipse(ellimage, (i_cx, i_cy), (i_a, i_b),
                                    i_theta, startAngle, endAngle, iris_color,
                                    -1)
            pred_img = draw_ellipse(ellimage, (p_cx, p_cy), (p_a, p_b),
                                    p_theta, startAngle, endAngle, pupil_color,
                                    -1)
        else:
            predict = rawpredict + 1
            pred_img = 1 - predict[0].cpu().numpy() / channels

    #print(pred_img)
    # trim pupil if asked to
    if trim_pupil:
        newimg = np.invert(pred_img > 0)
        labeled_img = measure.label(newimg)
        labels = np.unique(labeled_img)
        newimg = np.zeros((newimg.shape[0], newimg.shape[1]))
        old_sum = 0
        old_label = None
        for label in (y for y in labels if y != 0):
            if np.sum(labeled_img == label) > old_sum:
                old_sum = np.sum(labeled_img == label)
                old_label = label
        if old_label is not None:
            newimg = newimg + (labeled_img == old_label)
        newimg[newimg == 0] = 2
        newimg[newimg == 1] = 0
        newimg[newimg == 2] = 1
        pred_img[pred_img == 0] = 1 - (1 / channels)
        pred_img[newimg == 0] = 0

    #print(np.unique(pred_img))
    if pupilOnly:
        pred_img = np.ceil(pred_img) * 0.5
    if includeRawPredict:
        return pred_img, rawpredict
    return pred_img
Exemple #20
0
def predict():
    predictions = get_predictions(request)

    return jsonify(predictions)
Exemple #21
0
def main(args):

    if args.delay:
        delay_plot(args)
        return

    if args.pr:
        targets, predictions, _ = get_predictions(args.run_dir, stream=False)
        pr_fname = os.path.join(args.run_dir, 'pr.npz')
        _ = find_thresholds(targets, predictions, stream=False, pr=pr_fname)
        return

    if args.compute_metrics or args.plot_predictions:
        run_info, model, loader = load_run(args.run_dir, data=args.data)
        params = run_info[-1]
        labels = np.array([
            a.replace('hdm05_', '')
            for a in loader[1].dataset.action_descriptions
        ])

    if args.compute_metrics:
        rows = []
        thr_tab = pd.DataFrame(
            index=labels,
            columns=pd.MultiIndex.from_product([['fair', 'unfair'],
                                                ['stream', 'sequences']]))
        for stream, fair in itertools.product((False, True), repeat=2):

            targets, predictions, annot_time = get_predictions(
                args.run_dir, stream=stream, force=args.force)
            thr_targets, thr_predictions = targets, predictions
            if fair:
                thr_targets, thr_predictions, _ = get_predictions(
                    args.run_dir, train=True, stream=stream, force=args.force)
                train_targets = thr_targets

            print('Stream: {} Fair: {}'.format(stream, fair))
            thrs = find_thresholds(thr_targets, thr_predictions, stream=stream)

            thr_tab[(('fair' if fair else 'unfair'),
                     ('stream' if stream else 'sequences'))] = thrs[1]
            metrics = compute_metrics(targets,
                                      predictions,
                                      thrs,
                                      stream=stream)
            row = (stream, fair) + metrics + (annot_time, )
            rows.append(row)

        thr_tab['train_support'] = train_targets.sum(axis=0)
        thr_tab['test_support'] = targets.sum(axis=0)
        thresholds_file = os.path.join(args.run_dir, 'thresholds.csv')
        thr_tab.to_csv(thresholds_file)

        columns = ('Stream', 'Fair', 'microAP', 'macroAP', 'microF1',
                   'macroF1', 'catMicroF1', 'catMacroF1', 'AnnotTime')

        metrics = pd.DataFrame.from_records(rows, columns=columns)
        metrics_file = os.path.join(args.run_dir, 'metrics.csv')
        metrics.to_csv(metrics_file)
        print(metrics)

    if args.plot_predictions:
        stream, fair = False, False
        targets, predictions, annot_time = get_predictions(args.run_dir,
                                                           stream=stream,
                                                           force=args.force)
        thr_targets, thr_predictions = targets, predictions
        if fair:
            thr_targets, thr_predictions, _ = get_predictions(args.run_dir,
                                                              train=True,
                                                              stream=stream,
                                                              force=args.force)
            train_targets = thr_targets

        thrs = find_thresholds(thr_targets, thr_predictions, stream=stream)

        global_thr, multiple_thrs = thrs
        out = os.path.join(args.run_dir, 'time-analysis.pdf')
        seq_ids = [
            int(loader[1].dataset.data[i]['seq_id'])
            for i in range(len(targets))
        ]

        plot_preditctions(targets, predictions, seq_ids, labels, global_thr,
                          out)
        return
Exemple #22
0
                            batch_size=args.bs,
                            shuffle=False,
                            num_workers=0)
    # windows下num_workers改成0,源代码为2
    counter = 0

    os.makedirs('test/labels/', exist_ok=True)
    os.makedirs('test/output/', exist_ok=True)
    os.makedirs('test/mask/', exist_ok=True)

    with torch.no_grad():
        for i, batchdata in tqdm(enumerate(testloader), total=len(testloader)):
            img, labels, index, x, y = batchdata
            data = img.to(device)
            output = model(data)
            predict = get_predictions(output)
            for j in range(len(index)):
                np.save('test/labels/{}.npy'.format(index[j]),
                        predict[j].cpu().numpy())
                try:
                    plt.imsave('test/output/{}.jpg'.format(index[j]),
                               255 * labels[j].cpu().numpy())
                except:
                    pass

                pred_img = predict[j].cpu().numpy() / 3.0
                inp = img[j].squeeze() * 0.5 + 0.5
                img_orig = np.clip(inp, 0, 1)
                img_orig = np.array(img_orig)
                combine = np.hstack([img_orig, pred_img])
                plt.imsave('test/mask/{}.jpg'.format(index[j]), combine)
Exemple #23
0
def main_script():
    args = parse_args()

    print("save = {}".format(args.save))
    print("csv = {}".format(args.csv))

    if args.MWE not in [0, 1, 2, 3, 4]:
        sys.exit('MWE must be 0,1,2,3 or 4')
    if args.num_threads <= 0:
        sys.exit('num_threads must be a positive integer')
    if args.num_epochs <= 0:
        sys.exit('num_epochs must be a positive integer')
    if args.dim <= 0:
        sys.exit('dim must be a positive integer')
    if args.neg_samples <= 0:
        sys.exit('neg_samples must be a positive integer')
    if args.report_schedule <= 0:
        sys.exit('report_schedule must be a positive integer')

    # set report schedule based on MWE case
    #if args.MWE == 1:
    #    report_schedule = 1000
    #else:
    #    report_schedule = 100
    #    max_list_length = 6203

    ######################### LOAD DATA ###########################################
    if args.MWE == 1:
        filename = 'war_and_peace.txt'
        #with open(filename, 'r') as file:
        #data = tokenizer_MWE1(file.read().replace('\n', ' '))
        #print(data)
        lst = []
        f = open(filename, "r")
        for line in f:
            for word in line.split(" "):
                if word == "\n":
                    continue
                elif "\n" in word:
                    lst.append(word[:-1])
                else:
                    lst.append(word)
        data_string = listToString(lst, args.MWE)
        print("STRING CREATED")
        text_file = open("w_and_p.txt", "w")
        text_file.write(data_string)
        text_file.close()
        print("STRING WRITTEN TO TEXT FILE")
        data = tokenizer_MWE1(data_string)
        print("STRING TOKENIZED")
        #print(data)

    elif args.MWE == 3:
        if os.path.exists("wire_3match.gz"):
            print("loading from gzip files")
            file = "wire_3match.gz"
            data_list = list(_open_file(file))[0]
            #print(data_list)
        else:
            wire_vocab = set()
            df_wire = pd.read_csv(validation_path)
            for _, record in df_wire.iterrows():
                wire_vocab.add(record["srcWikiTitle"])
                wire_vocab.add(record["dstWikiTitle"])
            wire_vocab = list(wire_vocab)
            print("WiRe vocab loaded successfully")

            files = []
            for _, _, fs in os.walk("data/", topdown=False):
                files += [f for f in fs if f.endswith(".gz")]

            files = [
                os.path.join("data/page_dist_training_data/", f) for f in files
            ]
            data_list = []
            for i, file in tqdm(enumerate(files)):
                sentences = list(_open_file(file))
                data_list += sentences

            original_data_length = len(data_list)

            new_list = []
            for i, page in enumerate(data_list):
                if i % 10000 == 0:
                    print("{}/{}".format(i, original_data_length))
                c = sum(item in page for item in wire_vocab)
                # only include Wikipedia pages that mention at least 2 WiRe elements
                if c >= 3:
                    decoded_page = [x.encode('ascii', 'ignore') for x in page]
                    new_list.append(decoded_page)

            data_list = new_list

            print("Original data length = {}".format(original_data_length))
            print("Reduced data length = {}".format(len(new_list)))

            with gzip.open("wire_3match.gz", "w") as tfz:
                tfz.write(json.dumps(new_list))
            tfz.close()

        print("WRITING DATA")
        lst = []
        for entities in tqdm(data_list):
            lst.append(listToString(entities, args.MWE))
            lst.append("\n")
        #print(lst)
        data_string = listToString(lst, args.MWE)
        #print(data_string)
        print("STRING CREATED")
        text_file = open("wire.txt", "w")
        text_file.write(data_string)
        text_file.close()
        print("STRING WRITTEN TO TEXT FILE")
        data = tokenizer_MWE0(data_string)
        print("STRING TOKENIZED")

    elif args.MWE == 4:
        if os.path.exists("wire_video_3match.gz"):
            print("loading from gzip files")
            file = "wire_video_3match.gz"
            data_list = list(_open_file(file))[0]
            #print(data_list)
        else:
            # load wire vocab
            if os.path.exists("wire_video_vocab.pkl"):
                wire_video_vocab = pkl.load(open('wire_video_vocab.pkl', 'rb'))
                print(
                    "WiRe + Video vocab loaded from pickle file successfully")
            else:
                wire_vocab = set()
                df_wire = pd.read_csv(validation_path)
                for _, record in df_wire.iterrows():
                    wire_vocab.add(record["srcWikiTitle"])
                    wire_vocab.add(record["dstWikiTitle"])
                wire_vocab = list(wire_vocab)
                print("WiRe vocab loaded successfully. Length = {} entities".
                      format(len(wire_vocab)))
                # load video vocab
                video_vocab = set()
                path = 'data/wikipedias'
                for i, file in enumerate(
                        glob.glob(os.path.join(
                            path,
                            '*.json'))):  #only process .JSON files in folder.
                    with open(file, encoding='utf-8', mode='r') as f:
                        json_object = json.load(f)
                        for dic in json_object:
                            if dic['lang'] == 'en':  # add only English entities
                                entity = dic['name']
                                video_vocab.add(entity)
                video_vocab = list(video_vocab)
                print("Video vocab loaded successfully. Length = {} entities".
                      format(len(video_vocab)))
                # create combined vocab
                wire_video_vocab = wire_vocab + video_vocab
                pkl.dump(wire_video_vocab,
                         open('wire_video_vocab.pkl', 'wb'),
                         protocol=pkl.HIGHEST_PROTOCOL)
                print(
                    "Combined vocab loaded successfully. Length = {} entities".
                    format(len(wire_video_vocab)))

            # filter wikipedia files with combined vocab
            files = []
            for _, _, fs in os.walk("data/", topdown=False):
                files += [f for f in fs if f.endswith(".gz")]

            files = [
                os.path.join("data/page_dist_training_data/", f) for f in files
            ]
            data_list = []
            for i, file in tqdm(enumerate(files)):
                sentences = list(_open_file(file))
                data_list += sentences

            original_data_length = len(data_list)

            new_list = []
            for i, page in enumerate(data_list):
                if i % 10000 == 0:
                    print("{}/{}".format(i, original_data_length))
                c = sum(item in page for item in wire_video_vocab)
                # only include Wikipedia pages that mention at least 2 WiRe elements
                if c >= 3:
                    decoded_page = [x.encode('ascii', 'ignore') for x in page]
                    new_list.append(decoded_page)

            data_list = new_list

            print("Original data length = {}".format(original_data_length))
            print("Reduced data length = {}".format(len(new_list)))

            with gzip.open("wire_video_3match.gz", "w") as tfz:
                tfz.write(json.dumps(new_list))
            tfz.close()

    else:
        print("\n\n----------- LOADING DATA ----------")
        if os.path.exists("data_list.pkl"):
            #     start = time.time()
            #     print("loading from existing pickle")
            #     pickle_in = open("data_list.pkl","rb")
            #     data_list = pkl.load(pickle_in)
            #     end = time.time()
            #     print("loaded in {} secs".format(round(end - start,2)))
            # else:
            print("loading from gzip files")
            files = []
            for _, _, fs in os.walk("data/", topdown=False):
                if args.MWE == 2:
                    files += [f for f in fs if f.endswith("00000.gz")]
                else:
                    files += [f for f in fs if f.endswith(".gz")]

            files = [
                os.path.join("data/page_dist_training_data/", f) for f in files
            ]
            data_list = []
            with gzip.open(files[0], 'rt') as f:
                for line in f:
                    print(line)
            for i, file in tqdm(enumerate(files)):
                sentences = list(_open_file(file))
                data_list += sentences
            # pickle_out = open("data_list.pkl","wb")
            # pkl.dump(data_list, pickle_out)
            # pickle_out.close()

        #if args.MWE == 2:
        #data_list = data_list[20]

        print("WRITING DATA")
        lst = []
        for entities in tqdm(data_list):
            lst.append(listToString(entities, args.MWE))
            lst.append("\n")
        data_string = listToString(lst, args.MWE)
        print(data_string)
        print("STRING CREATED")
        text_file = open("wikipedia.txt", "w")
        text_file.write(data_string)
        text_file.close()
        print("STRING WRITTEN TO TEXT FILE")
        data = tokenizer_MWE0(data_string)
        print("STRING TOKENIZED")
        #print(data[:2])

    #print(corpus)
    #print(data)

    ################################################################################

    entity_2_idx = defaultdict(lambda: len(entity_2_idx))
    counter = Counter()
    dataset = []

    print("WRITING ENTITY2IDX DICT")
    for entity in tqdm(data):
        entity_2_idx[entity]
        counter[entity_2_idx[entity]] += 1
        dataset.append(entity_2_idx[entity])

    # print(entity_2_idx)
    num_tokens = len(entity_2_idx)
    print("num_tokens = {}".format(num_tokens))

    #print(entity_2_idx)
    #print("\n\n")
    #print(counter)
    #print("\n\n")
    #print(dataset[:2])
    dataset_length = len(dataset)
    print(dataset)
    print("Dataset length = {}".format(dataset_length))

    # load the vocabulary
    if args.MWE == 1:
        vocab = Vocabulary(entity_2_idx, tokenizer_MWE1)
    else:
        vocab = Vocabulary(entity_2_idx, tokenizer_MWE0)

    ############################################################################

    # create the embedding to train
    # use 100 dimensional spherical Gaussian with KL-divergence as energy function

    # embed = GaussianEmbedding(num_tokens, dimension,
    #     covariance_type=cov_type, energy_type=E_type)

    embed = GaussianEmbedding(N=num_tokens,
                              neg_samples=args.neg_samples,
                              size=args.dim,
                              covariance_type=cov_type,
                              energy_type=E_type,
                              mu_max=mu_max,
                              sigma_min=sigma_min,
                              sigma_max=sigma_max,
                              init_params={
                                  'mu0': mu0,
                                  'sigma_mean0': sigma_mean0,
                                  'sigma_std0': sigma_std0
                              },
                              eta=args.eta,
                              Closs=args.Closs,
                              verbose_loss=args.verbose_loss,
                              verbose_gradients=args.verbose_gradients,
                              grad_weight_by_Npairs=args.grad_weight_by_Npairs)

    ###########################################################################

    # open the corpus and train with 8 threads
    # the corpus is just an iterator of documents, here a new line separated
    # gzip file for example

    if print_init_embeddings:
        print("---------- INITIAL EMBEDDING MEANS ----------")
        print(embed.mu)
        print("---------- INITIAL EMBEDDING COVS ----------")
        print(embed.sigma)

    epoch_losses = []
    epoch_fwd_KL_pears = []
    epoch_fwd_KL_spears = []
    epoch_rev_KL_pears = []
    epoch_rev_KL_spears = []
    epoch_fisher_pears = []
    epoch_fisher_spears = []
    epoch_cos_pears = []
    epoch_cos_spears = []
    epoch_times = []

    train_time_start = time.time()

    for e in range(args.num_epochs):
        epoch_start = time.time()
        print("---------- EPOCH {} ----------".format(e + 1))
        #print(embed.mu[:10])
        #print(embed.sigma[:10])
        if args.MWE == 1:
            with open('w_and_p.txt', 'r') as corpus:
                total_num_examples = len(open('w_and_p.txt').readlines())
                if args.dynamic_window_size:
                    epoch_losses.append(
                        embed.train_dynamic(
                            iter_pairs(
                                corpus,
                                vocab,
                                dynamic_window_size=args.dynamic_window_size,
                                batch_size=batch_size,
                                nsamples=args.neg_samples,
                                window=window),
                            n_workers=args.num_threads,
                            total_num_examples=total_num_examples,
                            verbose_pairs=args.verbose_pairs,
                            report_interval=args.report_schedule))
                else:
                    epoch_losses.append(
                        embed.train(iter_pairs(
                            corpus,
                            vocab,
                            dynamic_window_size=args.dynamic_window_size,
                            batch_size=batch_size,
                            nsamples=args.neg_samples,
                            window=window),
                                    n_workers=args.num_threads,
                                    verbose_pairs=args.verbose_pairs,
                                    report_interval=args.report_schedule))

        elif args.MWE == 3:
            with open('wire.txt', 'r') as corpus:
                total_num_examples = len(open('wire.txt').readlines())
                if args.dynamic_window_size:
                    epoch_losses.append(
                        embed.train_dynamic(
                            iter_pairs(
                                corpus,
                                vocab,
                                dynamic_window_size=args.dynamic_window_size,
                                batch_size=batch_size,
                                nsamples=args.neg_samples,
                                window=window),
                            n_workers=args.num_threads,
                            total_num_examples=total_num_examples,
                            verbose_pairs=args.verbose_pairs,
                            report_interval=args.report_schedule))
                else:
                    epoch_losses.append(
                        embed.train(iter_pairs(
                            corpus,
                            vocab,
                            dynamic_window_size=args.dynamic_window_size,
                            batch_size=batch_size,
                            nsamples=args.neg_samples,
                            window=window),
                                    n_workers=args.num_threads,
                                    verbose_pairs=args.verbose_pairs,
                                    report_interval=args.report_schedule))

        else:
            with open('wikipedia.txt', 'r') as corpus:
                total_num_examples = len(open('wikipedia.txt').readlines())
                if args.dynamic_window_size:
                    epoch_losses.append(
                        embed.train_dynamic(
                            iter_pairs(
                                corpus,
                                vocab,
                                dynamic_window_size=args.dynamic_window_size,
                                batch_size=batch_size,
                                nsamples=args.neg_samples,
                                window=window),
                            n_workers=args.num_threads,
                            total_num_examples=total_num_examples,
                            verbose_pairs=args.verbose_pairs,
                            report_interval=args.report_schedule))
                else:
                    epoch_losses.append(
                        embed.train(iter_pairs(
                            corpus,
                            vocab,
                            dynamic_window_size=args.dynamic_window_size,
                            batch_size=batch_size,
                            nsamples=args.neg_samples,
                            window=window),
                                    n_workers=args.num_threads,
                                    verbose_pairs=args.verbose_pairs,
                                    report_interval=args.report_schedule))

        epoch_end = time.time()
        epoch_times.append(round(epoch_end - epoch_start, 2))

        if args.save == True:
            print("Epoch {} complete. Saving model.".format(e + 1))
            os.chdir("Models/")
            embed.save(
                'model_MWE={}_d={}_e={}_neg={}_eta={}_C={}_epoch={}'.format(
                    args.MWE, args.dim, args.num_epochs, args.neg_samples,
                    args.eta, args.Closs, e + 1),
                vocab=vocab.id2word,
                full=True)
            os.chdir('..')

        print("MEASURING EMBEDDING PERFORMANCE ON VALIDATION DATA")
        actual, pred_KL_fwd, pred_KL_rev, pred_fisher, pred_cos = get_predictions(
            validation_path, e, embed, vocab, is_round=False)

        ### forward KL predictions ###
        pear_r_fwd, _ = pearsonr(actual, pred_KL_fwd)
        spear_r_fwd, _ = spearmanr(actual, pred_KL_fwd)
        print("------ Epoch: {} FORWARD KL SIMILARITY KL[src||dst] ------".
              format(e + 1))
        print("Pearson R: {},  Spearman R: {}".format(pear_r_fwd, spear_r_fwd))

        ### reverse KL predictions ###
        pear_r_rev, _ = pearsonr(actual, pred_KL_rev)
        spear_r_rev, _ = spearmanr(actual, pred_KL_rev)
        print("------ Epoch: {} REVERSE KL SIMILARITY KL[dst||src] ------".
              format(e + 1))
        print("Pearson R: {},  Spearman R: {}".format(pear_r_rev, spear_r_rev))

        ### fisher predictions ###
        pear_fisher, _ = pearsonr(actual, pred_fisher)
        spear_fisher, _ = spearmanr(actual, pred_fisher)
        print("------ Epoch: {} FISHER DISTANCE ------".format(e + 1))
        print("Pearson R: {},  Spearman R: {}".format(pear_fisher,
                                                      spear_fisher))

        ### cosine predictions ###
        pear_r_cos, _ = pearsonr(actual, pred_cos)
        spear_r_cos, _ = spearmanr(actual, pred_cos)
        print("------ Epoch: {} COSINE SIMILARITY OF MEANS ------".format(e +
                                                                          1))
        print("Pearson R: {},  Spearman R: {}".format(pear_r_cos, spear_r_cos))

        epoch_fwd_KL_pears.append(pear_r_fwd)
        epoch_fwd_KL_spears.append(spear_r_fwd)
        epoch_rev_KL_pears.append(pear_r_rev)
        epoch_rev_KL_spears.append(spear_r_rev)
        epoch_fisher_pears.append(pear_fisher)
        epoch_fisher_spears.append(spear_fisher)
        epoch_cos_pears.append(pear_r_cos)
        epoch_cos_spears.append(spear_r_cos)

    train_time_end = time.time()
    training_time = round(train_time_end - train_time_start, 2)

    print("\n\n\nEPOCH LOSSES : {}".format(epoch_losses))
    print("EPOCH fwd KL Pearson R : {}".format(epoch_fwd_KL_pears))
    print("EPOCH fwd KL Spearman R : {}".format(epoch_fwd_KL_spears))
    print("EPOCH rev KL Pearson R : {}".format(epoch_rev_KL_pears))
    print("EPOCH rev KL Spearman R : {}".format(epoch_rev_KL_spears))
    print("EPOCH Fisher Pearson R : {}".format(epoch_fisher_pears))
    print("EPOCH Fisher Spearman R : {}".format(epoch_fisher_spears))
    print("EPOCH cosine Pearson R : {}".format(epoch_cos_pears))
    print("EPOCH cosine Spearman R : {}".format(epoch_cos_spears))
    print("TOTAL TRAININT TIME = {} secs".format(training_time))
    if print_final_embeddings:
        print("---------- FINAL EMBEDDING MEANS ----------")
        print(embed.mu)
        print("---------- FINAL EMBEDDING COVS ----------")
        print(embed.sigma)

    ############################################################################

    if calc_general_and_specific:
        sigma_norms = np.linalg.norm(embed.sigma, axis=1)
        most_general_indices = np.split(sigma_norms,
                                        2)[0].argsort()[-10:][::-1]
        most_specific_indices = np.split(sigma_norms, 2)[0].argsort()[:10]

        idx_2_entity = {v: k for k, v in entity_2_idx.items()}

        print("MOST GENERAL ENTITIES")
        for idx in most_general_indices:
            print(idx_2_entity[idx])

        print("MOST SPECIFIC ENTITIES")
        for idx in most_specific_indices:
            print(idx_2_entity[idx])

    ###########################################################################

    # print("LOADING MODEL")
    # test = GaussianEmbedding(N=num_tokens, size=dimension,
    #           covariance_type=cov_type, energy_type=E_type,
    #           mu_max=mu_max, sigma_min=sigma_min, sigma_max=sigma_max,
    #           init_params={'mu0': mu0,
    #               'sigma_mean0': sigma_mean0,
    #               'sigma_std0': sigma_std0},
    #           eta=eta, Closs=Closs)
    #
    # test.load('model_file_location')

    ###########################################################################

    if calc_similarity_example:
        print("TESTING KL SIMILARITY")
        entity1 = 'Copenhagen'
        entity2 = 'Denmark'
        idx1 = vocab.word2id(entity1)
        idx2 = vocab.word2id(entity2)
        mu1 = embed.mu[idx1]
        Sigma1 = np.diag(embed.sigma[idx1])
        mu2 = embed.mu[idx2]
        Sigma2 = np.diag(embed.sigma[idx2])
        print("ENTITY 1 : {}".format(entity1))
        #print("mu1 = {}".format(mu1))
        #print("Sigma1 = {}".format(Sigma1))
        print("ENTITY 2 : {}".format(entity2))
        #print("mu2 = {}".format(mu2))
        #print("Sigma2 = {}".format(Sigma2))
        forward_KL_similarity = KL_Multivariate_Gaussians(
            mu1, Sigma1, mu2, Sigma2)
        reverse_KL_similarity = KL_Multivariate_Gaussians(
            mu2, Sigma2, mu1, Sigma1)
        print("KL[entity1 || entity2] similarity = {}".format(
            round(forward_KL_similarity, 4)))
        print("KL[entity2 || entity1] similarity = {}".format(
            round(reverse_KL_similarity, 4)))
        print("cosine similarity = {}".format(
            round(cosine_between_vecs(mu1, mu2), 4)))

    ############################################################################

    if calc_nearest_neighbours:
        print("\n\n")
        print("FINDING NEAREST NEIGHBOURS")

        target = "war"
        metric = cosine
        num = 10

        target_idx = entity_2_idx[target]
        neighbours = embed.nearest_neighbors(target=target_idx,
                                             metric=metric,
                                             num=num + 1,
                                             vocab=vocab,
                                             sort_order='similarity')

        print("Target = {}".format(target))
        print("Similarity metric = {}".format(metric))
        for i in range(1, num + 1):
            print("{}: {}".format(i, neighbours[i]))
            # print("rank {}: word = {}, sigma = {}, id = {}, similarity = {}".format(i,neighbours[i][word],neighbours[i][sigma],neighbours[i][id],neighbours[i][similarity]))

    ############################################################################

    if args.csv:
        f_results = 'grid_search_results_MWE={}_threads={}_epochs={}_d={}_neg={}_eta={}_C={}_mu0={}_sig0={}_sigstd0={}_mumax={}_sigmax={}_sigmin={}_windowweighting={}.csv'.format(
            args.MWE, args.num_threads, args.num_epochs, args.dim,
            args.neg_samples, args.eta, args.Closs, mu0, sigma_mean0,
            sigma_std0, mu_max, sigma_max, sigma_min,
            args.grad_weight_by_Npairs)

        hyperparameter_list = [
            "Threads", "Dimension", "Neg samples", "Eta", "Closs"
        ]
        epoch_list = [
            'Epoch {} Loss'.format(i + 1) for i in range(args.num_epochs)
        ]
        pear_r_fwd_list = [
            'Epoch {} fwd KL Pearson R'.format(i + 1)
            for i in range(args.num_epochs)
        ]
        spear_r_fwd_list = [
            'Epoch {} fwd KL Spearman R'.format(i + 1)
            for i in range(args.num_epochs)
        ]
        pear_r_rev_list = [
            'Epoch {} rev KL Pearson R'.format(i + 1)
            for i in range(args.num_epochs)
        ]
        spear_r_rev_list = [
            'Epoch {} rev KL Spearman R'.format(i + 1)
            for i in range(args.num_epochs)
        ]
        pear_r_cos_list = [
            'Epoch {} cosine Pearson R'.format(i + 1)
            for i in range(args.num_epochs)
        ]
        spear_r_cos_list = [
            'Epoch {} cosine Spearman R'.format(i + 1)
            for i in range(args.num_epochs)
        ]
        time_list = [
            'Epoch {} Time'.format(i + 1) for i in range(args.num_epochs)
        ]

        header_list = hyperparameter_list + epoch_list + pear_r_fwd_list + spear_r_fwd_list + pear_r_rev_list + spear_r_rev_list + pear_r_cos_list + spear_r_cos_list + time_list

        if os.path.exists(f_results):
            append_write = 'a'  # append if already exists
        else:
            # write header
            with open(f_results, 'w') as file:
                writer = csv.writer(file)
                writer.writerow(header_list)
            append_write = 'a'  # make a new file if not

        with open(f_results, append_write) as file:
            writer = csv.writer(file)
            hyperparameter_values = [
                args.num_threads, args.dim, args.neg_samples, args.eta,
                args.Closs
            ]
            values_list = hyperparameter_values + epoch_losses + epoch_fwd_KL_pears + epoch_fwd_KL_spears + epoch_rev_KL_pears + epoch_rev_KL_spears + epoch_cos_pears + epoch_cos_spears + epoch_times
            writer.writerow(values_list)
Exemple #24
0
def train(cfg):
    gpu_id = cfg["hyperparameters"]["gpu_id"]
    # Use GPU if available
    if gpu_id >= 0:
        assert torch.cuda.is_available()
        device = torch.device("cuda:" + str(gpu_id))
        logging.info("Using GPU {} | {}".format(
            gpu_id, torch.cuda.get_device_name(gpu_id)))
    elif gpu_id == -1:
        device = torch.device("cpu")
        logging.info("Using the CPU")
    else:
        raise NotImplementedError(
            "Device ID {} not recognized. gpu_id = 0, 1, 2 etc. Use -1 for CPU"
            .format(gpu_id))
    _set_seed(cfg["hyperparameters"]["seed"])
    net = bAbI_dnc(cfg)
    _set_seed(cfg["hyperparameters"]["seed"])
    net_lstm = bAbI_lstm(cfg)

    if cfg["hyperparameters"]["distributed_tr"]:
        net = nn.DataParallel(net).to(device)
        net_lstm = nn.DataParallel(net_lstm).to(device)
    else:
        net = net.to(device)
        net_lstm = net_lstm.to(device)
    logging.info("Network successfully constructed")
    optimizer = optim.RMSprop(net.parameters(),
                              lr=cfg["hyperparameters"]["lr"],
                              momentum=cfg["hyperparameters"]["momentum"],
                              eps=1e-10)
    optimizer_lstm = optim.RMSprop(net_lstm.parameters(),
                                   lr=cfg["hyperparameters"]["lr"],
                                   momentum=cfg["hyperparameters"]["momentum"],
                                   eps=1e-10)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.8)
    start_epoch = 0
    batch_iter_tr = 0
    batch_iter_val = 0

    # Prepare train and validation datasets
    logging.info("Loading the data ...")
    tr_dataset = bAbI_Dataset(cfg["paths"]["train_data"],
                              cfg["paths"]["lexicon"],
                              cfg["hyperparameters"]["use_one_hot"])
    val_dataset = bAbI_Dataset(cfg["paths"]["val_data"],
                               cfg["paths"]["lexicon"],
                               cfg["hyperparameters"]["use_one_hot"])

    # Prepare the dataloaders
    tr_dataloader = DataLoader(tr_dataset,
                               batch_size=cfg["hyperparameters"]["batch_size"],
                               shuffle=True)
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=cfg["hyperparameters"]["batch_size"],
        shuffle=True)
    loss_weight = tr_dataset.loss_weight.to(device)
    num_classes = tr_dataset.len_lexicon
    modes = ["TRAIN", "VAL"]
    datasets = {"TRAIN": tr_dataset, "VAL": val_dataset}
    dataloaders = {"TRAIN": tr_dataloader, "VAL": val_dataloader}

    # Define tensorBoard logger
    summary_writer = SummaryWriter(cfg["logging"]["tensorboard_dir"])
    idx_to_word = pickle.load(open(cfg["paths"]["idx_to_word"], "rb"))
    num_epochs = cfg["hyperparameters"]["num_epochs"]
    lr = 0
    (chx, mhx, rv) = (None, None, None)
    for epoch in range(start_epoch, num_epochs):
        for mode in modes:
            if mode == "TRAIN":
                net.train()
                net_lstm.train()
            else:
                net.eval()
                net_lstm.eval()
            loss_epoch = 0
            acc_epoch = 0
            loss_epoch_lstm = 0
            acc_epoch_lstm = 0
            dataset = datasets[mode]
            dataloader = dataloaders[mode]
            prog_bar = tqdm(dataloader)
            prog_bar.set_description("{} | Epoch {}/{}".format(
                mode, epoch, num_epochs))
            for batch in prog_bar:
                # Send the data to the appropriate device
                for k in batch.keys():
                    if k in ["input", "output", "mask_answer"]:
                        batch[k] = batch[k].to(device)

                optimizer.zero_grad()
                optimizer_lstm.zero_grad()
                with torch.set_grad_enabled(mode == "TRAIN"):
                    if batch["input"].size(
                            0) != cfg["hyperparameters"]["batch_size"]:
                        continue
                    if cfg["dnc"]["debug"]:
                        logits, (chx, mhx, rv), v = net(
                            batch["input"],
                            None,
                            mhx,
                            None,
                            reset_experience=cfg["dnc"]["reset_experience"],
                            pass_through_memory=cfg["dnc"]["pass_through_mem"])
                    else:
                        logits, (chx, mhx, rv) = net(
                            batch["input"],
                            None,
                            mhx,
                            None,
                            reset_experience=cfg["dnc"]["reset_experience"],
                            pass_through_memory=cfg["dnc"]["pass_through_mem"])

                    logits_lstm = net_lstm(batch["input"])
                    logits_ = logits.view(-1, num_classes)
                    logits_lstm_ = logits_lstm.view(-1, num_classes)
                    output = batch["output"].view(-1)
                    mask = batch["mask_answer"].view(-1)
                    loss = MaskedCrossEntropy(logits_, output, mask)
                    loss_lstm = MaskedCrossEntropy(logits_lstm_, output, mask)

                    if mode == "TRAIN":
                        # Back propagation with anomaly detection -> Makes it easier to locate the faulty parts of the net
                        # if some undesirable phenomena happen, e.g. if some layers produce NaN of Inf values.
                        # with torch.autograd.detect_anomaly():
                        loss.backward()
                        loss_lstm.backward()
                        # clip gradients to avoid explosion
                        nn.utils.clip_grad_norm_(net.parameters(), 10.0)
                        nn.utils.clip_grad_norm_(net_lstm.parameters(), 10.0)

                        optimizer.step()
                        optimizer_lstm.step()
                        lr = optimizer.param_groups[0]["lr"]
                        summary_writer.add_scalar("lr_DNC",
                                                  lr,
                                                  global_step=batch_iter_tr)

                mhx = {
                    k: (v.detach() if isinstance(v, var) else v)
                    for k, v in mhx.items()
                }
                loss_dict = {"DNC": loss.item(), "LSTM": loss_lstm.item()}
                summary_writer.add_scalars("Loss_" + mode + "_Batch",
                                           loss_dict,
                                           global_step=batch_iter_tr if mode
                                           == "TRAIN" else batch_iter_val)
                # accumulate batch losses
                loss_epoch += loss.item()
                loss_epoch_lstm += loss_lstm.item()
                answers_gt = regroup(batch["answers_gt"],
                                     batch["length_answers"])

                predictions_dnc = get_predictions(logits, batch["answers_idx"])
                predictions_dnc = regroup(predictions_dnc,
                                          batch["length_answers"])
                acc, acc_list = accuracy(answers_gt, predictions_dnc)

                predictions_lstm = get_predictions(logits_lstm,
                                                   batch["answers_idx"])
                predictions_lstm = regroup(predictions_lstm,
                                           batch["length_answers"])
                acc_lstm, acc_list_lstm = accuracy(answers_gt,
                                                   predictions_lstm)

                acc_dict = {"DNC": acc, "LSTM": acc_lstm}
                tb_text_dnc = get_tb_text_babi(answers_gt, predictions_dnc,
                                               acc_list, idx_to_word, 8)
                tb_text_lstm = get_tb_text_babi(answers_gt, predictions_lstm,
                                                acc_list_lstm, idx_to_word, 8)
                summary_writer.add_scalars("Accuracy_" + mode + "_Batch",
                                           acc_dict,
                                           global_step=batch_iter_tr if mode
                                           == "TRAIN" else batch_iter_val)

                summary_writer.add_text("Output_DNC_" + mode + "_Batch",
                                        tb_text_dnc,
                                        global_step=batch_iter_tr
                                        if mode == "TRAIN" else batch_iter_val)

                summary_writer.add_text("Output_LSTM_" + mode + "_Batch",
                                        tb_text_lstm,
                                        global_step=batch_iter_tr
                                        if mode == "TRAIN" else batch_iter_val)

                # Accumulate accuracy
                acc_epoch += acc
                acc_epoch_lstm += acc_lstm

                # update the learning rate using the cyclic scheme
                # and log in into tensorboard.
                # if mode == "TRAIN":
                #     if cfg["hyperparameters"]["lr_schedule"] == "cyclic":
                #         lr = scheduler.get_lr()[0]
                #         scheduler.step()
                #     else:
                #         lr = optimizer.param_groups[0]["lr"]
                #     summary_writer.add_scalar("lr", lr, global_step=batch_iter_tr)

                # Release GPU memory cache
                torch.cuda.empty_cache()
                if mode == "TRAIN":
                    batch_iter_tr += 1
                else:
                    batch_iter_val += 1

            # Compute average epoch and loss accuracies
            if len(dataset) % cfg["hyperparameters"]["batch_size"] == 0:
                total = len(dataloader)
            else:
                total = len(dataloader) - 1
            loss_epoch /= total
            loss_epoch_lstm /= total
            acc_epoch /= total
            acc_epoch_lstm /= total

            loss_dict = {"DNC": loss_epoch, "LSTM": loss_epoch_lstm}
            acc_dict = {"DNC": acc_epoch, "LSTM": acc_epoch_lstm}

            summary_writer.add_scalars("Loss_" + mode + "_Epoch",
                                       loss_dict,
                                       global_step=epoch)
            summary_writer.add_scalars("Accuracy_" + mode + "_Epoch",
                                       acc_dict,
                                       global_step=epoch)

            if mode == "TRAIN":
                _save_checkpoint("dnc", net, optimizer, epoch, batch_iter_tr,
                                 batch_iter_val, lr, cfg)
                _save_checkpoint("lstm", net_lstm, optimizer_lstm, epoch,
                                 batch_iter_tr, batch_iter_val, lr, cfg)
                lr_scheduler.step()
                # Delete the oldest checkpoint if the number of checkpoints exceeds 10 to save disk space.
                checkpoints = [
                    ckpt
                    for ckpt in os.listdir(cfg["logging"]["checkpoints_dir"])
                    if ckpt.endswith("pth")
                ]
                checkpoints = [
                    os.path.join(cfg["logging"]["checkpoints_dir"], checkpoint)
                    for checkpoint in checkpoints
                ]
                if len(checkpoints) > 10:
                    oldest_checkpoint_pth = min(
                        checkpoints,
                        key=lambda s: int(
                            s.split("/")[-1].split("_")[-1].split(".")[0]))
                    os.remove(oldest_checkpoint_pth)
                    if "dnc" in oldest_checkpoint_pth:
                        oldest_checkpoint_pth = oldest_checkpoint_pth.replace(
                            "dnc", "lstm")
                        os.remove(oldest_checkpoint_pth)
                    elif "lstm" in oldest_checkpoint_pth:
                        oldest_checkpoint_pth = oldest_checkpoint_pth.replace(
                            "lstm", "dnc")
                        os.remove(oldest_checkpoint_pth)
    def validate_video(self, loader, base_model, logits_model, criterion,
                       epoch, args):
        """ Run video-level validation on the Charades test set"""
        with torch.no_grad():
            batch_time = AverageMeter()
            ids = []

            sov_prediction = dict()

            # switch to evaluate mode
            base_model.eval()
            logits_model.eval()
            criterion.eval()

            end = time.time()
            for i, (input, s_target, o_target, v_target,
                    meta) in enumerate(loader):
                gc.collect()
                meta['epoch'] = epoch
                s_target = s_target.long().cuda(async=True)
                o_target = o_target.long().cuda(async=True)
                v_target = v_target.long().cuda(async=True)
                input_var = torch.autograd.Variable(input.cuda())
                s_target_var = torch.autograd.Variable(s_target)
                o_target_var = torch.autograd.Variable(o_target)
                v_target_var = torch.autograd.Variable(v_target)

                feat = base_model(input_var)
                s, o, v, so, ov, vs, ss, oo, vv, so_t, ov_t, vs_t, os_t, vo_t, sv_t = logits_model(
                    feat)

                s_output, o_output, v_output, loss = criterion(
                    *((s, o, v, so, ov, vs, ss, oo, vv, so_t, ov_t, vs_t, os_t,
                       vo_t, sv_t) +
                      (s_target_var, o_target_var, v_target_var, meta)),
                    synchronous=True)

                # store predictions
                s_output_video = s_output.max(dim=0)[0]
                o_output_video = o_output.max(dim=0)[0]
                v_output_video = v_output.max(dim=0)[0]

                sov_prediction[meta['id'][0]] = get_predictions(
                    s_output_video.data.cpu().numpy(),
                    o_output_video.data.cpu().numpy(),
                    v_output_video.data.cpu().numpy())

                ids.append(meta['id'][0])
                batch_time.update(time.time() - end)
                end = time.time()

                if i % args.print_freq == 0:
                    print('Test2: [{0}/{1}]\t'
                          'Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.
                          format(i, len(loader), batch_time=batch_time))

            sov_mAP, sov_rec_at_n, sov_mprec_at_n = eval_visual_relation(
                prediction=sov_prediction,
                groundtruth_path=args.groundtruth_lookup)
            print(' * sov_mAP {:.3f}'.format(sov_mAP))
            print(' * sov_rec_at_n', sov_rec_at_n)
            print(' * sov_mprec_at_n', sov_mprec_at_n)
            return sov_mAP, sov_rec_at_n, sov_mprec_at_n
Exemple #26
0
def evaluate_ellseg_on_image(frame, model):

    assert len(frame.shape) == 4, 'Frame must be [1,1,H,W]'

    with torch.no_grad():
        x4, x3, x2, x1, x = model.enc(frame)
        latent = torch.mean(x.flatten(start_dim=2), -1)
        elOut = model.elReg(x, 0)
        seg_out = model.dec(x4, x3, x2, x1, x)

    seg_out, elOut, latent = seg_out.cpu(), elOut.squeeze().cpu(
    ), latent.squeeze().cpu()

    seg_map = get_predictions(seg_out).squeeze().numpy()

    ellipse_from_network = 1 if args.ellseg_ellipses == 1 else 0
    ellipse_from_output = 1 if args.ellseg_ellipses == 0 else 0
    no_ellipse = 1 if args.ellseg_ellipses == -1 else 0

    if ellipse_from_network:
        # Get EllSeg proposed ellipse predictions
        # Ellipse Centers -> derived from segmentation output
        # Ellipse axes and orientation -> Derived from latent space

        _, norm_pupil_center = get_seg2ptLoss(seg_out[:, 2, ...],
                                              torch.zeros(2, ),
                                              temperature=4)
        _, norm_iris_center = get_seg2ptLoss(-seg_out[:, 0, ...],
                                             torch.zeros(2, ),
                                             temperature=4)

        norm_pupil_ellipse = torch.cat([norm_pupil_center, elOut[7:10]])
        norm_iris_ellipse = torch.cat([norm_iris_center, elOut[2:5]])

        # Transformation function H
        _, _, H, W = frame.shape
        H = np.array([[W / 2, 0, W / 2], [0, H / 2, H / 2], [0, 0, 1]])

        pupil_ellipse = my_ellipse(
            norm_pupil_ellipse.numpy()).transform(H)[0][:-1]
        iris_ellipse = my_ellipse(
            norm_iris_ellipse.numpy()).transform(H)[0][:-1]

    if ellipse_from_output:
        # Get ElliFit derived ellipse fits from segmentation mask

        seg_map_temp = copy.deepcopy(seg_map)
        seg_map_temp[seg_map_temp == 2] += 1  # Pupil by PartSeg standard is 3
        seg_map_temp[seg_map_temp == 1] += 1  # Iris by PartSeg standard is 2

        pupilPts, irisPts = getValidPoints(seg_map_temp, isPartSeg=False)

        if np.sum(seg_map_temp == 3) > 50 and type(pupilPts) is not list:
            if args.skip_ransac:
                model_pupil = ElliFit(**{'data': pupilPts})
            else:
                model_pupil = ransac(pupilPts, ElliFit, 15, 40, 5e-3,
                                     15).loop()
        else:
            print('Not enough pupil points')
            model_pupil = type('model', (object, ), {})
            model_pupil.model = np.array([-1, -1, -1, -1, -1])

        if np.sum(seg_map_temp == 2) > 50 and type(irisPts) is not list:
            if args.skip_ransac:
                model_iris = ElliFit(**{'data': irisPts})
            else:
                model_iris = ransac(irisPts, ElliFit, 15, 40, 5e-3, 15).loop()
        else:
            print('Not enough iris points')
            model_iris = type('model', (object, ), {})
            model_iris.model = np.array([-1, -1, -1, -1, -1])
            model_iris.Phi = np.array([-1, -1, -1, -1, -1])
            # iris_fit_error = np.inf

        pupil_ellipse = np.array(model_pupil.model)
        iris_ellipse = np.array(model_iris.model)

    if no_ellipse:
        pupil_ellipse = np.array([-1, -1, -1, -1, -1])
        iris_ellipse = np.array([-1, -1, -1, -1, -1])

    return seg_map, latent.cpu().numpy(), pupil_ellipse, iris_ellipse
                                    average='micro',
                                    sample_weight=None))

# Tag unlabelled content

untagged_raw = pd.read_csv(os.path.join(DATADIR, 'untagged_content.csv.gz'),
                           dtype=object,
                           compression='gzip')

new_texts = untagged_raw['combined_text']

pred_untagged = get_predictions(new_texts=new_texts,
                                df=untagged_raw,
                                model=model,
                                labels_index=labels_index,
                                tokenizer=tokenizer,
                                logger=logger,
                                max_sequence_length=MAX_SEQUENCE_LENGTH,
                                p_threshold=P_THRESHOLD,
                                level1taxon=False)

logger.debug('Number of unique content items: %s',
             pred_untagged.content_id.nunique())
logger.debug(
    'Number of content items tagged to taxons with more than p_threshold: %s',
    pred_untagged.shape)

# TODO set 0.65 and 0.85 as environment vars

pred_untagged.loc[(pred_untagged['probability'] > 0.65)
                  & (pred_untagged['probability'] < 0.85)].sort_values(
Exemple #28
0
import torch
import pandas as pd
from dataset import create_data_loader
from model import multimodal
from utils import get_predictions
from zipfile import ZipFile

LOAD_MODEL = True
device = 'cuda'

model = multimodal()
model = model.to(device)

if LOAD_MODEL:
    model.load_state_dict(torch.load('vit-bert-1.0val.bin'))

df_test = pd.read_csv('test_captions.csv')
df_test.drop('Unnamed: 0', axis=1, inplace=True)
extract_path = 'test_img.zip'
with ZipFile(extract_path, 'r') as zipObj:
    zipObj.extractall()

test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE,
                                      my_trans, 'test_img', False)
submission_preds = get_predictions(model, test_data_loader, device)
Exemple #29
0
def main(output_dir, n_attentions, image_shape, batch_size, learning_rate,
         gpu):
    """Perform model training"""

    # initialize the dataset
    train_set = TrainDataset(phase='train', shape=image_shape)
    val_set = TrainDataset(phase='val', shape=image_shape)
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8,
                              pin_memory=True)
    val_loader = DataLoader(val_set,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=8,
                            pin_memory=True)

    # initialize the model
    model = Model(n_classes=196,
                  input_size=image_shape,
                  n_attentions=n_attentions,
                  gpu=gpu)
    if gpu:
        model = model.cuda()

    # initialize related optimization methods
    criterion = nn.CrossEntropyLoss()
    criterion_attention = nn.MSELoss()
    optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)
    feature_center = torch.zeros(196, n_attentions * 2208)
    scheduler = SuperConvergence(optimizer,
                                 max_lr=learning_rate,
                                 stepsize=5000,
                                 better_as_larger=False,
                                 last_epoch=-1)
    if gpu:
        feature_center = feature_center.cuda()

    # initialize other hyperparameters
    crop_threshold = 0.5
    drop_threshold = 0.5
    focal_weight = 0.4

    # perform the training
    epoch = 0
    while True:
        print('Starting epoch {:03d}'.format(epoch))

        # statistic tracking
        train_loss_tracker = Tracker()
        train_accuracy_tracker = Tracker()

        model = model.train()
        for idx, (X, y) in enumerate(train_loader):
            if gpu:
                X = X.cuda()
                y = y.cuda()

            mini_batch = X.size(0)
            logits, feature_matrix, sampled_attentions = model(X)

            loss = (criterion(logits, y) +
                    criterion_attention(feature_matrix, feature_center[y]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            feature_center[y] = feature_center[y] + (
                focal_weight * (feature_matrix.detach() - feature_center[y]))

            preds, _ = get_predictions(logits.squeeze().cpu().data.numpy())
            preds = np.array(preds) == y.cpu().squeeze().data.numpy()
            accuracy = np.mean(preds)

            train_loss_tracker.step(loss.item() * mini_batch, mini_batch)
            train_accuracy_tracker.step(accuracy * mini_batch, mini_batch)

            # perform data cropping
            with torch.no_grad():
                crop_attentions = F.interpolate(
                    sampled_attentions.unsqueeze(1),
                    size=image_shape,
                    mode='bilinear',
                    align_corners=False)
                crop_attentions = crop_attentions > crop_threshold
                cropped_images = []
                for _idx in range(crop_attentions.size(0)):
                    positive_indices = torch.nonzero(crop_attentions[_idx])
                    x_min = torch.min(positive_indices[:, 2])
                    y_min = torch.min(positive_indices[:, 1])
                    x_max = torch.max(positive_indices[:, 2])
                    y_max = torch.max(positive_indices[:, 1])
                    cropped_image = F.interpolate(
                        crop_attentions[_idx, :, y_min:y_max + 1,
                                        x_min:x_max + 1].float().unsqueeze(0) *
                        X[_idx, :, y_min:y_max + 1,
                          x_min:x_max + 1].unsqueeze(0),
                        size=image_shape,
                        mode='bilinear',
                        align_corners=False)
                    cropped_images.append(cropped_image)
                cropped_images = torch.cat(cropped_images, dim=0)

            logits, _, _ = model(cropped_images)
            loss = criterion(logits, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # perform attention dropping
            with torch.no_grad():
                drop_attentions = F.interpolate(
                    sampled_attentions.unsqueeze(1),
                    size=image_shape,
                    mode='bilinear',
                    align_corners=False)
                drop_attentions = (drop_attentions < drop_threshold).float()
                dropped_images = drop_attentions * X

            logits, _, _ = model(dropped_images)
            loss = criterion(logits, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            stop = (epoch == 10)
            scheduler.step(epoch=None,
                           metrics=train_loss_tracker.get_average(),
                           stop=stop)

            if idx % 100 == 0:
                _temp_lr = optimizer.param_groups[0]['lr']
                print('Batch {}, average loss {} - average accuracy {}, lr {}'.
                      format(idx, train_loss_tracker.get_average(),
                             train_accuracy_tracker.get_average(), _temp_lr))

        # do validation pass
        val_loss_tracker = Tracker()
        val_accuracy_tracker = Tracker()

        model = model.eval()
        for X_val, y_val in val_loader:
            if gpu:
                X_val = X_val.cuda()
                y_val = y_val.cuda()

            mini_batch = X_val.size(0)

            with torch.no_grad():
                logits, _, _ = model(X_val)
                val_loss = criterion(logits, y_val)

                preds, _ = get_predictions(logits.squeeze().cpu().data.numpy())
                preds = np.array(preds) == y_val.cpu().squeeze().data.numpy()
                accuracy = np.mean(preds)

                val_loss_tracker.step(val_loss.item() * mini_batch, mini_batch)
                val_accuracy_tracker.step(accuracy * mini_batch, mini_batch)

        state_dict = {
            'n_classes': 196,
            'input_size': image_shape,
            'n_attentions': n_attentions,
            'state_dict': model.state_dict()
        }
        torch.save(state_dict,
                   os.path.join(output_dir, '{:03d}.ckpt'.format(epoch)))
        print('Validation - loss {}, accuracy {}'.format(
            val_loss_tracker.get_average(),
            val_accuracy_tracker.get_average()))
        epoch += 1
Exemple #30
0
def delay_plot(args):
    n_points = 100

    targets, predictions, _ = get_predictions(args.run_dir)
    targets = np.concatenate(targets, axis=0)
    predictions = np.concatenate(predictions, axis=0)

    t = np.linspace(0, 1, 500, endpoint=False)
    p, r, f1 = [], [], []
    for thr in tqdm(t):
        pp, rr, ff1, _ = precision_recall_fscore_support(
            targets.ravel(), predictions.ravel() > thr, average='binary')
        p.append(pp)
        r.append(rr)
        f1.append(ff1)

    p, r, f1 = map(np.array, (p, r, f1))

    # keep = p > 0.75
    # t = t[keep]
    # f1 = f1[keep]

    # offset = len(f1) // n_points
    # f1 = f1[::offset]
    # t = t[::offset]

    print('Num. Thresholds:', len(t))

    def find_annotations(curve):
        start = 0
        for value, sublist in itertools.groupby(curve):
            duration = len(list(sublist))
            if value == 1:  # skip 0s
                end = start + duration - 1
                yield (start, end, duration)
            start += duration

    def iou(annot, ground):
        # min of ends - max of starts + 1
        intersection = np.minimum(annot[:, 1], ground[:, 1]) - np.maximum(
            annot[:, 0], ground[:, 0]) + 1
        intersection = np.maximum(intersection, 0)
        # union = sum of durations - intersection
        union = annot[:, 2] + ground[:, 2] - intersection
        return intersection / union

    n_classes = targets.shape[1]

    # find all annotations of groundtruth
    ground = []
    n_annotations = 0
    for i in range(n_classes):
        annot = find_annotations(targets[:, i])
        annot = np.array(list(annot))
        ground.append(annot)
        n_annotations += annot.shape[0]

    # iterate over thresholds
    global_ious = []
    global_delays = []
    global_thr = []
    mean_delays = []
    accuracies = []

    for thr in tqdm(t):
        hard_predictions = predictions > thr

        delays = []
        accuracy = 0
        for i in range(n_classes):
            annot = find_annotations(hard_predictions[:, i])
            annot = np.array(list(annot))
            if annot.size:
                # For each ground-truth start, search the nearest start of an annotation:
                # - compute start distances between all (prediction, gt) pair
                all_delays = annot[:, 0].reshape(
                    1, -1) - ground[i][:, 0].reshape(-1, 1)

                # discard negative delays
                # all_delays = all_delays.astype(np.float32)
                # all_delays[all_delays < 0] = np.inf

                # - find the nearest annotations in terms of start frame
                nearest_annot_idx = np.argmin(np.absolute(all_delays), axis=1)
                nearest_annot = annot[nearest_annot_idx]
                nearest_delays = all_delays[np.arange(all_delays.shape[0]),
                                            nearest_annot_idx]
                # - keep only valid annotations (IoU > 0.5)
                annot_ious = iou(nearest_annot, ground[i])
                valid = annot_ious >= 0.5
                valid_delays = nearest_delays[valid]

                global_ious.append(annot_ious[valid])
                global_delays.append(valid_delays)
                global_thr.append(np.ones_like(valid_delays) * thr)

                # save delays and number of valid annotations
                delays.append(valid_delays)
                accuracy += len(valid_delays)

        if delays:
            mean_delay = np.concatenate(delays).mean()
            accuracy /= n_annotations

            mean_delays.append(mean_delay / 120.0)
            accuracies.append(accuracy)
        else:
            print(thr, 'no valid predictions')

    metrics = (p, r, f1)
    names = ('Precision', 'Recall', 'F1')

    #    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    #    for i, (y, ylabel, ax) in enumerate(zip(metrics, names, axes.ravel()[:3])):

    for i, (y, ylabel) in enumerate(zip(metrics, names)):
        fig = plt.figure(figsize=(5, 4))
        ax = plt.gca()

        ax.plot(mean_delays, y, c='k', linewidth='0.5', zorder=1)
        im = ax.scatter(mean_delays, y, marker='.', c=t, zorder=2)
        fig.colorbar(im,
                     ax=ax,
                     use_gridspec=True,
                     ticks=[0, 0.2, 0.4, 0.6, 0.8, 1.0])
        '''
        plt.minorticks_on()
        plt.grid(b=True, which='minor', linestyle='--', linewidth=0.5)
        
        xticks = plt.gca().get_xticks()
        labels = ['\\textrm{{{:g}}}\n\\textrm{{({:g})}}'.format(x, round(x*120)) for x in xticks]
        plt.gca().set_xticklabels(labels)
        '''

        ax.set_xlabel(r'\textrm{Average Delay [$s$]}')
        ax.set_ylabel('\\textrm{{{}}}'.format(ylabel))

        # n_thr_points = 10
        # skip = len(y) // n_thr_points
        #
        # show_d = mean_delays[::skip] if skip else mean_delays
        # show_y = y[::skip] if skip else y
        # show_t = t[::skip] if skip else t
        #
        # for d, _y, thr in zip(show_d, show_y, show_t):
        #     # if thr < 0.01: continue
        #     txt = 'T={:3.2f}'.format(thr)
        #     txt = r'\textrm{' + txt + '}'
        #     ax.annotate(txt, xy=(d,_y), fontsize=6)

        ax.set_title('\\textrm{{{} vs Average Delay}}'.format(ylabel))
        plt.tight_layout()
        plt.savefig('delay-{}.pdf'.format(ylabel.lower()))
        plt.close()

    # Last ax
    # ax = axes[1, 1]
    fig = plt.figure(dpi=600, figsize=(5, 4))
    ax = plt.gca()

    global_delays = np.concatenate(global_delays)
    global_ious = np.concatenate(global_ious)
    global_thr = np.concatenate(global_thr)

    ax.set_title(r'\textrm{Delay vs IoU}')
    ax.set_xlabel(r'\textrm{IoU}')
    ax.set_ylabel(r'\textrm{Delay (frames)}')
    ax.set_xlim([0.48, 1])
    ax.set_ylim([-500, 300])
    im = ax.scatter(global_ious,
                    global_delays,
                    1,
                    c=global_thr,
                    rasterized=True)

    fig.colorbar(im,
                 ax=ax,
                 use_gridspec=True,
                 ticks=[0, 0.2, 0.4, 0.6, 0.8, 1.0])

    fig.tight_layout()
    plt.savefig('delay-iou.pdf')
    plt.close()