Example #1
0
def valid(epoch):
    clevr = CLEVR(sys.argv[1], 'val', transform=None)
    valid_set = DataLoader(clevr,
                           batch_size=batch_size,
                           num_workers=4,
                           collate_fn=collate_data)
    dataset = iter(valid_set)

    net_running.train(False)
    family_correct = Counter()
    family_total = Counter()
    with torch.no_grad():
        for image, question, q_len, answer, family in tqdm(dataset):
            image, question = image.to(device), question.to(device)

            output = net_running(image, question, q_len)
            correct = output.detach().argmax(1) == answer.to(device)
            for c, fam in zip(correct, family):
                if c:
                    family_correct[fam] += 1
                family_total[fam] += 1

    with open('log/log_{}.txt'.format(str(epoch + 1).zfill(2)), 'w') as w:
        for k, v in family_total.items():
            w.write('{}: {:.5f}\n'.format(k, family_correct[k] / v))

    print('Avg Acc: {:.5f}'.format(
        sum(family_correct.values()) / sum(family_total.values())))

    clevr.close()
Example #2
0
def valid(accum_net, clevr_dir, epoch):
    clevr = CLEVR(clevr_dir, "val", transform=None)
    valid_set = DataLoader(clevr,
                           batch_size=batch_size,
                           num_workers=4,
                           collate_fn=collate_data)
    dataset = iter(valid_set)

    accum_net.train(False)
    family_correct = Counter()
    family_total = Counter()
    with torch.no_grad():
        for image, question, q_len, answer, family in tqdm(dataset):
            image, question = image.to(device), question.to(device)

            output = accum_net(image, question, q_len)
            correct = output.detach().argmax(1) == answer.to(device)
            for c, fam in zip(correct, family):
                if c:
                    family_correct[fam] += 1
                family_total[fam] += 1

    with open("log/log_{}.txt".format(str(epoch + 1).zfill(2)), "w") as w:
        for k, v in family_total.items():
            w.write("{}: {:.5f}\n".format(k, family_correct[k] / v))

    print("Avg Acc: {:.5f}".format(
        sum(family_correct.values()) / sum(family_total.values())))

    clevr.close()
def valid(epoch):
    clevr = CLEVR(cfg.DATALOADER.FEATURES_PATH, 'val', transform=None)
    valid_set = DataLoader(
        clevr, batch_size=cfg.DATALOADER.BATCH_SIZE, num_workers=cfg.DATALOADER.NUM_WORKERS, collate_fn=collate_data, drop_last=True
    )
    dataset = iter(valid_set)

    net_running.train(False)
    with torch.no_grad():
        all_corrects = 0

        for image, question, q_len, answer, _, _ in tqdm(dataset):
            image, question = image.to(device), question.to(device)

            output = net_running(image, question, q_len)
            correct = output.detach().argmax(1) == answer.to(device)
            
            all_corrects += correct.float().mean().item()
        
        if scheduler:
            scheduler.step(all_corrects / len(dataset))

        print('Avg Acc: {:.5f}'.format(all_corrects / len(dataset)))

    clevr.close()
Example #4
0
def train(epoch, dataset_type):
    root = args.root
    if dataset_type == "CLEVR":
        dataset_object = CLEVR(root, transform=transform)
    else:
        dataset_object = GQA(root, transform=transform)

    train_set = DataLoader(dataset_object,
                           batch_size=BATCH_SIZE,
                           num_workers=multiprocessing.cpu_count(),
                           collate_fn=collate_data)

    dataset = iter(train_set)
    pbar = tqdm(dataset)
    running_loss = 0
    correct_counts = 0
    total_counts = 0

    net.train()
    for image, question, q_len, answer in pbar:
        image, question, answer = (
            image.to(DEVICE),
            question.to(DEVICE),
            answer.to(DEVICE),
        )
        net.zero_grad()
        output = net(image, question, q_len)
        loss = criterion(output, answer)
        loss.backward()
        optimizer.step()

        correct = output.detach().argmax(1) == answer
        correct_counts += sum(correct).item()
        total_counts += image.size(0)

        correct = correct.clone().type(
            torch.FloatTensor).detach().sum() / BATCH_SIZE
        running_loss += loss.item() / BATCH_SIZE

        pbar.set_description(
            '[Training] Epoch: {}; Loss: {:.8f}; Acc: {:.5f}'.format(
                epoch + 1, loss.item(), correct))

    print('[Training] loss: {:8f}, accuracy: {:5f}'.format(
        running_loss / len(train_set.dataset), correct_counts / total_counts))
    dataset_object.close()
    return running_loss / len(train_set.dataset), correct_counts / total_counts
Example #5
0
def train(net, accum_net, optimizer, criterion, clevr_dir, epoch):
    clevr = CLEVR(clevr_dir, transform=transform)
    train_set = DataLoader(clevr,
                           batch_size=batch_size,
                           num_workers=4,
                           collate_fn=collate_data)

    dataset = iter(train_set)
    pbar = tqdm(dataset)
    moving_loss = 0

    net.train(True)
    for i, (image, question, q_len, answer, _) in enumerate(pbar):
        image, question, answer = (image.to(device), question.to(device),
                                   answer.to(device))

        net.zero_grad()
        output = net(image, question, q_len)
        loss = criterion(output, answer)
        loss.backward()

        # if wrapped in a DataParallel, the actual net is at DataParallel.module
        m = net.module if isinstance(net, nn.DataParallel) else net
        torch.nn.utils.clip_grad_norm_(m.mac.read.parameters(), 1)
        # torch.nn.utils.clip_grad_value_(net.parameters(), 0.05)

        # if i % 1000 == 0:
        #     plot_grad_flow(net.named_parameters())

        optimizer.step()
        correct = output.detach().argmax(1) == answer
        correct = torch.tensor(correct, dtype=torch.float32).sum() / batch_size

        if moving_loss == 0:
            moving_loss = correct

        else:
            moving_loss = moving_loss * 0.99 + correct * 0.01

        pbar.set_description("Epoch: {}; Loss: {:.5f}; Acc: {:.5f}".format(
            epoch + 1, loss.item(), moving_loss))

        accumulate(accum_net, net)

    clevr.close()
Example #6
0
def test(accum_net, clevr_dir):
    print("Starting tests!")
    print(accum_net)
    clevr = CLEVR(clevr_dir, "val", transform=None)
    test_set = DataLoader(clevr,
                          batch_size=batch_size,
                          num_workers=4,
                          collate_fn=collate_data)
    dataset = iter(test_set)

    accum_net.train(False)
    family_correct = Counter()
    family_total = Counter()
    with torch.no_grad():
        for image, question, q_len, answer, family in tqdm(dataset):
            image, question = image.to(device), question.to(device)

            output = accum_net(image, question, q_len)

            # if wrapped in a DataParallel, the actual net is at DataParallel.module
            m = accum_net.module if isinstance(accum_net,
                                               nn.DataParallel) else accum_net
            # [{read, write}, n_steps, batch_size, {??????, n_memories}]
            attentions = m.saved_attns
            for i, step in enumerate(attentions):
                print(f"Step {i}")
                print("Read attn shape:", torch.tensor(step["read"][0]).shape)
                print(image.shape)

            sys.exit()
            correct = output.detach().argmax(1) == answer.to(device)
            for c, fam in zip(correct, family):
                if c:
                    family_correct[fam] += 1
                family_total[fam] += 1

    with open("log/test_log.txt", "w") as w:
        for k, v in family_total.items():
            w.write("{}: {:.5f}\n".format(k, family_correct[k] / v))

    print("Avg Acc: {:.5f}".format(
        sum(family_correct.values()) / sum(family_total.values())))

    clevr.close()
Example #7
0
def valid(epoch, dataset_type):
    root = args.root
    if dataset_type == "CLEVR":
        dataset_object = CLEVR(root, 'val', transform=None)
    else:
        dataset_object = GQA(root, 'val', transform=None)

    valid_set = DataLoader(dataset_object,
                           batch_size=BATCH_SIZE,
                           num_workers=multiprocessing.cpu_count(),
                           collate_fn=collate_data)
    dataset = iter(valid_set)

    net.eval()
    correct_counts = 0
    total_counts = 0
    running_loss = 0.0
    with torch.no_grad():
        pbar = tqdm(dataset)
        for image, question, q_len, answer in pbar:
            image, question, answer = (
                image.to(DEVICE),
                question.to(DEVICE),
                answer.to(DEVICE),
            )

            output = net(image, question, q_len)
            loss = criterion(output, answer)
            correct = output.detach().argmax(1) == answer
            correct_counts += sum(correct).item()
            total_counts += image.size(0)
            running_loss += loss.item() / BATCH_SIZE

            pbar.set_description(
                '[Val] Epoch: {}; Loss: {:.8f}; Acc: {:.5f}'.format(
                    epoch + 1, loss.item(), correct_counts / total_counts))

    print('[Val] loss: {:8f}, accuracy: {:5f}'.format(
        running_loss / len(valid_set.dataset), correct_counts / total_counts))

    dataset_object.close()
    return running_loss / len(valid_set.dataset), correct_counts / total_counts
Example #8
0
def train(epoch):
    clevr = CLEVR(sys.argv[1], transform=transform)
    train_set = DataLoader(
        clevr, batch_size=batch_size, num_workers=4, collate_fn=collate_data
    )

    dataset = iter(train_set)
    pbar = tqdm(dataset)
    moving_loss = 0

    net.train(True)
    for image, question, q_len, answer, _ in pbar:
        image, question, answer = (
            image.to(device),
            question.to(device),
            answer.to(device),
        )

        net.zero_grad()
        output = net(image, question, q_len)
        loss = criterion(output, answer)
        loss.backward()
        optimizer.step()
        correct = output.detach().argmax(1) == answer
        correct = torch.tensor(correct, dtype=torch.float32).sum() / batch_size

        if moving_loss == 0:
            moving_loss = correct

        else:
            moving_loss = moving_loss * 0.99 + correct * 0.01

        pbar.set_description(
            'Epoch: {}; Loss: {:.5f}; Acc: {:.5f}'.format(
                epoch + 1, loss.item(), moving_loss
            )
        )

        accumulate(net_running, net)

    clevr.close()
def train(epoch):
    clevr = CLEVR(cfg.DATALOADER.FEATURES_PATH, transform=transform)
    train_set = DataLoader(
        clevr, batch_size=cfg.DATALOADER.BATCH_SIZE, num_workers=cfg.DATALOADER.NUM_WORKERS, collate_fn=collate_data, drop_last=True
    )

    dataset = iter(train_set)
    pbar = tqdm(dataset)
    moving_loss = 0

    net.train(True)
    for image, question, q_len, answer, _, _ in pbar:
        image, question, answer = (
            image.to(device),
            question.to(device),
            answer.to(device),
        )

        net.zero_grad()
        output = net(image, question, q_len)
        loss = criterion(output, answer)
        loss.backward()
        if cfg.SOLVER.GRAD_CLIP:
            nn.utils.clip_grad_norm_(net.parameters(), cfg.SOLVER.GRAD_CLIP)
        optimizer.step()
        correct = output.detach().argmax(1) == answer
        accuracy = correct.float().mean().item()

        if moving_loss == 0:
            moving_loss = accuracy
        else:
            moving_loss = moving_loss * 0.99 + accuracy * 0.01

        pbar.set_description(
            'Epoch: {}; Loss: {:.5f}; Acc: {:.5f}'.format(
                epoch, loss.item(), moving_loss
            )
        )
        accumulate(net_running, net)

    clevr.close()
Example #10
0
def valid(epoch, dataset_type):
    if dataset_type == "CLEVR":
        dataset_object = CLEVR('data/CLEVR_v1.0', 'val', transform=None)
    else:
        dataset_object = GQA('data/gqa', 'val', transform=None)

    valid_set = DataLoader(dataset_object, batch_size=BATCH_SIZE, num_workers=multiprocessing.cpu_count(),
                           collate_fn=collate_data)
    dataset = iter(valid_set)

    net.eval()
    correct_counts = 0
    total_counts = 0
    running_loss = 0.0
    with torch.no_grad():
        pbar = tqdm(dataset)
        for image, question, q_len, answer in pbar:
            image, question, answer = (
                image.to(DEVICE),
                question.to(DEVICE),
                answer.to(DEVICE),
            )

            output = net(image, question, q_len)
            loss = criterion(output, answer)
            correct = output.detach().argmax(1) == answer
            correct_counts += sum(correct).item()
            total_counts += image.size(0)
            running_loss += loss.item() / BATCH_SIZE

            pbar.set_description(
                'Epoch: {}; Loss: {:.8f}; Acc: {:.5f}'.format(epoch + 1, loss.item(), correct_counts / total_counts))

    with open('log/log_{}.txt'.format(str(epoch + 1).zfill(2)), 'w') as w:
        w.write('{:.5f}\n'.format(correct_counts / total_counts))

    print('Training loss: {:8f}, accuracy: {:5f}'.format(running_loss / len(valid_set.dataset),
                                                         correct_counts / total_counts))

    dataset_object.close()