Beispiel #1
0
def test(path):
    model = Model()
    model.to("cuda:0")
    model.eval()
    checkpoint = torch.load("./model.pth")
    model.load_state_dict(checkpoint["model"])
    img = np.array(Image.open(path).resize([448, 448]))[np.newaxis]
    img = np.transpose(img, axes=[0, 3, 1, 2]) / 255
    img = torch.tensor(img, dtype=torch.float32).to("cuda:0")
    preds = model(img).cpu().detach().numpy()
    cell_h, cell_w = IMG_H / S, IMG_W / S
    x, y = np.meshgrid(range(S), range(S))
    preds_xywhs = []
    for i in range(B):
        preds_x = (preds[0, :, :, i * 4] + x) * cell_w
        preds_y = (preds[0, :, :, i * 4 + 1] + y) * cell_h
        preds_w = preds[0, :, :, i * 4 + 2] * IMG_W
        preds_h = preds[0, :, :, i * 4 + 3] * IMG_H
        preds_xywh = np.dstack((preds_x, preds_y, preds_w, preds_h))
        preds_xywhs.append(preds_xywh)
    preds_xywhs = np.dstack(preds_xywhs)
    preds_xywhs = np.reshape(preds_xywhs, [-1, 4])
    preds_class = preds[0, :, :, 10:]
    preds_class = np.reshape(preds_class, [-1, 20])
    preds_c = preds[0, :, :, 8:10]
    preds_c = np.reshape(preds_c, [-1, 1])
    max_arg = np.argmax(preds_c, axis=0)
    print("max confidence: %f" % (preds_c[max_arg]))
    max_arg_ = np.argmax(preds_class[int(max_arg // 2)])
    print("class confidence: %f" % (preds_class[max_arg // 2, max_arg_]))
    print("class category: %s" % (CLASSES[int(max_arg_)]))
    Image.fromarray(
        np.uint8(
            draw_bboxes(np.array(Image.open(path).resize([448, 448])),
                        preds_xywhs[max_arg[0]:max_arg[0] + 1]))).show()
Beispiel #2
0
def ini_model_train(opt):
    X_ini, y_ini, X_test, y_test, X_train_All, y_train_All = ini_model(opt)
    mod = Model().to(device)
    optimizer = optim.SGD(mod.parameters(), lr=opt.ini_lr)
    criterion = nn.CrossEntropyLoss()
    num_batches_train = X_ini.shape[0] // opt.ini_batch_size
    mod.train()
    for i in range(opt.ini_epoch):
        loss = 0
        for j in range(num_batches_train):
            slce = get_slice(j, opt.ini_batch_size)
            X_tra = torch.from_numpy(X_ini[slce]).float().to(device)
            Y_tra = torch.from_numpy(y_ini[slce]).long().to(device)
            optimizer.zero_grad()
            out = mod(X_tra)
            batch_loss = criterion(out, Y_tra)
            batch_loss.backward()
            optimizer.step()
            loss += batch_loss
        mod.eval()
        acc = test_without_dropout(X_test, y_test, mod, device)
        print('\n[{}/{} epoch], training loss:{:.4f}, test accuracy is:{} \n'.
              format(i, opt.ini_epoch,
                     loss.item() / num_batches_train, acc))
        if i + 1 == opt.ini_epoch:
            for d in range(opt.num_dev):
                torch.save(
                    {
                        'epoch': i,
                        'model_state_dict': mod.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': loss.item()
                    },
                    os.path.join(opt.ini_model_path, 'device' + str(d),
                                 "ini.model.pth.tar"))
            torch.save(
                {
                    'epoch': i,
                    'model_state_dict': mod.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss.item()
                }, opt.ini_model_path)
    return X_test, y_test, X_train_All, y_train_All
Beispiel #3
0
def ini_train(X_ini, y_ini, X_te, y_te, epochs, paths, device, batch_size, lr,
              momentum, arr_drop):
    mod = Model(arr_drop).to(device)
    optimizer = optim.SGD(mod.parameters(), lr=lr, momentum=momentum)
    criterion = nn.CrossEntropyLoss()
    #batch_size = 200
    num_batches_train = X_ini.shape[0] // batch_size
    print("number of batch ", num_batches_train)
    mod.train()
    for i in range(epochs):
        loss = 0
        for j in range(num_batches_train):
            slce = get_slice(j, batch_size)
            X_tra = torch.from_numpy(X_ini[slce]).float().to(device)
            Y_tra = torch.from_numpy(y_ini[slce]).long().to(device)
            optimizer.zero_grad()
            out = mod(X_tra)
            batch_loss = criterion(out, Y_tra)
            batch_loss.backward()
            optimizer.step()
            loss += batch_loss
        mod.eval()
        with torch.no_grad():
            X_va = torch.from_numpy(X_te).float().to(device)
            Y_va = torch.from_numpy(y_te).long().to(device)
            output = mod(X_va)
            preds = torch.max(output, 1)[1]
            acc = accuracy_score(Y_va, preds)
        print('\n[{}/{} epoch], training loss:{:.4f}, test accuracy is:{} \n'.
              format(i, epochs,
                     loss.item() / num_batches_train, acc))
    if i + 1 == epochs:
        for path in paths:
            torch.save(
                {
                    'epoch': i,
                    'model_state_dict': mod.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss.item()
                }, os.path.join(path, "ini.model.pth.tar"))
    return mod
Beispiel #4
0
class TestNetwork(unittest.TestCase):
    class RandomData(Dataset):
        def __init__(self, n_images, n_classes, input_transform=None):
            self.n_images = n_images
            self.n_classes = n_classes
            self.input_transform = input_transform

        def __getitem__(self, index):
            images = torch.rand(3, 300, 300)
            labels = random.randint(0, self.n_classes - 1)
            if self.input_transform:
                images = self.input_transform(images)
            return images, labels

        def __len__(self):
            return self.n_images

    def test_network(self):
        input_transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((300, 300)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        self.realset = TestNetwork.RandomData(n_images=10,
                                              n_classes=7,
                                              input_transform=input_transform)
        self.model = Model(n_classes=7)
        self.model.eval()
        all_features, all_outputs, all_preds, all_labels = predict(
            self.model, self.realset, batch_size=4, n_classes=7, GPUs=None)
        recall = np.sum(all_preds == all_labels) / float(len(self.realset))
        ap = AP(all_outputs, all_labels)
        mean_ap = meanAP(all_outputs, all_labels)
        self.assertGreaterEqual(mean_ap, 0)
        self.assertLessEqual(mean_ap, 1)
Beispiel #5
0
def model(dataset, model_name=None, device=None, train=True):
    """加载模型"""
    device = device or torch.device(
        "cuda" if torch.cuda.is_available() else "cpu")
    net = Model(vocab_size=dataset.vocab_size,
                embedding_dim=config.embedding_dim,
                output_size=dataset.target_vocab_size,
                encoder_hidden_size=config.encoder_hidden_size,
                decoder_hidden_size=config.decoder_hidden_size,
                encoder_layers=config.encoder_layers,
                decoder_layers=config.decoder_layers,
                dropout=config.dropout,
                embedding_weights=dataset.vector_weights,
                device=device)
    if model_name:  # 如果指定了模型名称, 就加载对应的模型
        pre_trained_state_dict = torch.load(FILE_PATH + config.model_path +
                                            model_name,
                                            map_location=device)
        state_dict = net.state_dict()
        state_dict.update(pre_trained_state_dict)
        net.load_state_dict(state_dict)
    net.train() if train else net.eval()
    return net
Beispiel #6
0
val_indices = indices[:split]

# check if dataset load order is correct
# for ind in val_indices:
#     print(ind)
#     data = my_dataset[ind]
#     img = data['image']
#     plt.figure()
#     plt.imshow(img.permute(1,2,0))
#     plt.show()

# load model
model = Model().to(device=device)
model.load_state_dict(torch.load('model_saved.pth'))
model = model.float()
model.eval()

for ind in val_indices:
    data = my_dataset[ind]
    img = data['image']
    img = img.to(device=device)
    img = img.unsqueeze(dim=0)
    position_map, feature_maps = model(img)

    position_map = position_map.squeeze()  # must be (128,128)
    feature_maps = feature_maps.squeeze()  # should be (16,128,128)
    feature_maps = feature_maps.permute(1, 2, 0)  # should be (128,128,16)

    position_map = position_map.detach().cpu().numpy()
    feature_maps = feature_maps.detach().cpu().numpy()
Beispiel #7
0
def main(args: argparse.Namespace):
    # Load input data
    with open(args.train_metadata, 'r') as f:
        train_posts = json.load(f)

    with open(args.val_metadata, 'r') as f:
        val_posts = json.load(f)

    # Load labels
    labels = {}
    with open(args.label_intent, 'r') as f:
        intent_labels = json.load(f)
        labels['intent'] = {}
        for label in intent_labels:
            labels['intent'][label] = len(labels['intent'])

    with open(args.label_semiotic, 'r') as f:
        semiotic_labels = json.load(f)
        labels['semiotic'] = {}
        for label in semiotic_labels:
            labels['semiotic'][label] = len(labels['semiotic'])

    with open(args.label_contextual, 'r') as f:
        contextual_labels = json.load(f)
        labels['contextual'] = {}
        for label in contextual_labels:
            labels['contextual'][label] = len(labels['contextual'])

    # Build dictionary from training set
    train_captions = []
    for post in train_posts:
        train_captions.append(post['orig_caption'])
    dictionary = Dictionary(tokenizer_method="TreebankWordTokenizer")
    dictionary.build_dictionary_from_captions(train_captions)

    # Set up torch device
    if 'cuda' in args.device and torch.cuda.is_available():
        device = torch.device(args.device)
        kwargs = {'pin_memory': True}
    else:
        device = torch.device('cpu')
        kwargs = {}

    # Set up number of workers
    num_workers = min(multiprocessing.cpu_count(), args.num_workers)

    # Set up data loaders differently based on the task
    # TODO: Extend to ELMo + word2vec etc.
    if args.type == 'image_only':
        train_dataset = ImageOnlyDataset(train_posts, labels)
        val_dataset = ImageOnlyDataset(val_posts, labels)
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=args.batch_size,
                                                        shuffle=args.shuffle,
                                                        num_workers=num_workers,
                                                        collate_fn=collate_fn_pad_image_only,
                                                        **kwargs)
        val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                    batch_size=1,
                                                    num_workers=num_workers,
                                                    collate_fn=collate_fn_pad_image_only,
                                                    **kwargs)
    elif args.type == 'image_text':
        train_dataset = ImageTextDataset(train_posts, labels, dictionary)
        val_dataset = ImageTextDataset(val_posts, labels, dictionary)
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=args.batch_size,
                                                        shuffle=args.shuffle,
                                                        num_workers=num_workers,
                                                        collate_fn=collate_fn_pad_image_text,
                                                        **kwargs)
        val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                    batch_size=1,
                                                    num_workers=num_workers,
                                                    collate_fn=collate_fn_pad_image_text,
                                                    **kwargs)
    elif args.type == 'text_only':
        train_dataset = TextOnlyDataset(train_posts, labels, dictionary)
        val_dataset = TextOnlyDataset(val_posts, labels, dictionary)
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=args.batch_size,
                                                        shuffle=args.shuffle,
                                                        num_workers=num_workers,
                                                        collate_fn=collate_fn_pad_text_only,
                                                        **kwargs)
        val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                    batch_size=1,
                                                    num_workers=num_workers,
                                                    collate_fn=collate_fn_pad_text_only,
                                                    **kwargs)

    # Set up the model
    model = Model(vocab_size=dictionary.size()).to(device)

    # Set up an optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_scheduler_step_size, gamma=args.lr_scheduler_gamma) # decay by 0.1 every 15 epochs

    # Set up loss function
    loss_fn = torch.nn.CrossEntropyLoss()

    # Setup tensorboard
    if args.tensorboard:
        writer = tensorboard.SummaryWriter(log_dir=args.log_dir + "/" + args.name, flush_secs=1)
    else:
        writer = None

    # Training loop
    if args.classification == 'intent':
        keys = ['intent']
    elif args.classification == 'semiotic':
        keys = ['semiotic']
    elif args.classification == 'contextual':
        keys = ['contextual']
    elif args.classification == 'all':
        keys = ['intent', 'semiotic', 'contextual']
    else:
        raise ValueError("args.classification doesn't exist.")
    best_auc_ovr = 0.0
    best_auc_ovo = 0.0
    best_acc = 0.0
    best_model = None
    best_optimizer = None
    best_scheduler = None
    for epoch in range(args.epochs):
        for mode in ["train", "eval"]:
            # Set up a progress bar
            if mode == "train":
                pbar = tqdm.tqdm(enumerate(train_data_loader), total=len(train_data_loader))
                model.train()
            else:
                pbar = tqdm.tqdm(enumerate(val_data_loader), total=len(val_data_loader))
                model.eval()

            total_loss = 0
            label = dict.fromkeys(keys, np.array([], dtype=np.int))
            pred = dict.fromkeys(keys, None)
            for _, batch in pbar:
                if 'caption' not in batch:
                    caption_data = None
                else:
                    caption_data = batch['caption'].to(device)
                if 'image' not in batch:
                    image_data = None
                else:
                    image_data = batch['image'].to(device)
                label_batch = {}
                for key in keys:
                    label_batch[key] = batch['label'][key].to(device)
                    
                if mode == "train":
                    model.zero_grad()

                pred_batch = model(image_data, caption_data)
                
                for key in keys:
                    label[key] = np.concatenate((label[key], batch['label'][key].cpu().numpy()))
                    x = pred_batch[key].detach().cpu().numpy()
                    x_max = np.max(x, axis=1).reshape(-1, 1)
                    z = np.exp(x - x_max)
                    prediction_scores = z / np.sum(z, axis=1).reshape(-1, 1)
                    if pred[key] is not None:
                        pred[key] = np.vstack((pred[key], prediction_scores))
                    else:
                        pred[key] = prediction_scores
                       
                loss_batch = {}
                loss = None
                for key in keys:
                    loss_batch[key] = loss_fn(pred_batch[key], label_batch[key])
                    if loss is None:
                        loss = loss_batch[key]
                    else:
                        loss += loss_bath[key] 

                total_loss += loss.item()

                if mode == "train":
                    loss.backward()
                    optimizer.step()

            # Terminate the progress bar
            pbar.close()
            
            # Update lr scheduler
            if mode == "train":
                scheduler.step()

            for key in keys:
                auc_score_ovr = roc_auc_score(label[key], pred[key], multi_class='ovr') # pylint: disable-all
                auc_score_ovo = roc_auc_score(label[key], pred[key], multi_class='ovo') # pylint: disable-all
                accuracy = accuracy_score(label[key], np.argmax(pred[key], axis=1))
                print("[{} - {}] [AUC-OVR={:.3f}, AUC-OVO={:.3f}, ACC={:.3f}]".format(mode, key, auc_score_ovr, auc_score_ovo, accuracy))
                
                if mode == "eval":
                    best_auc_ovr = max(best_auc_ovr, auc_score_ovr)
                    best_auc_ovo = max(best_auc_ovo, auc_score_ovo)
                    best_acc = max(best_acc, accuracy)
                    best_model = model
                    best_optimizer = optimizer
                    best_scheduler = scheduler
                
                if writer:
                    writer.add_scalar('AUC-OVR/{}-{}'.format(mode, key), auc_score_ovr, epoch)
                    writer.add_scalar('AUC-OVO/{}-{}'.format(mode, key), auc_score_ovo, epoch)
                    writer.add_scalar('ACC/{}-{}'.format(mode, key), accuracy, epoch)
                    writer.flush()

            if writer:
                writer.add_scalar('Loss/{}'.format(mode), total_loss, epoch)
                writer.flush()

            print("[{}] Epoch {}: Loss = {}".format(mode, epoch, total_loss))

    hparam_dict = {
        'train_split': args.train_metadata,
        'val_split': args.val_metadata,
        'lr': args.lr,
        'epochs': args.epochs,
        'batch_size': args.batch_size,
        'num_workers': args.num_workers,
        'shuffle': args.shuffle,
        'lr_scheduler_gamma': args.lr_scheduler_gamma,
        'lr_scheduler_step_size': args.lr_scheduler_step_size,
    }
    metric_dict = {
        'AUC-OVR': best_auc_ovr,
        'AUC-OVO': best_auc_ovo,
        'ACC': best_acc
    }

    if writer:
        writer.add_hparams(hparam_dict=hparam_dict, metric_dict=metric_dict)
        writer.flush()
    
    Path(args.output_dir).mkdir(exist_ok=True)
    torch.save({
        'hparam_dict': hparam_dict,
        'metric_dict': metric_dict,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
    }, Path(args.output_dir) / '{}.pt'.format(args.name))