def write_dataset_info(ds, active_indices, normal_indices, filename):
    active_els = [0] * 10
    normal_els = [0] * 10

    dataloader_1 = tud.DataLoader(ds._train_val_set,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=2,
                                  sampler=customcifar.CustomSampler(
                                      [x for x in active_indices]))
    dataloader_2 = tud.DataLoader(ds._train_val_set,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=2,
                                  sampler=customcifar.CustomSampler(
                                      [x for x in normal_indices]))

    with torch.no_grad():
        for b, (input, target, i) in enumerate(dataloader_1):
            active_els[target.item()] += 1
        for b, (input, target, i) in enumerate(dataloader_2):
            normal_els[target.item()] += 1

    with open(filename + "_datainfo.csv", "a") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([active_els[i] for i in range(len(active_els))] +
                        [""] + [normal_els[i] for i in range(len(normal_els))])
예제 #2
0
def generate_weak_labels(net, cds, indices, howmany, train_indices, n=5):
    net.eval()

    normalized_confidence = [torch.Tensor().to("cuda:0"), torch.Tensor().long()]

    randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False)
    dataloaders = [tud.DataLoader(cds.train_indices, batch_size=500, shuffle=False, num_workers=4,
                                  sampler=customcifar.CustomSampler(randomized_list)) for i in range(n)]

    with torch.no_grad():
        for batch_index, element in enumerate(zip(*dataloaders)):  # unlabelled samples
            normalized_confidence[1] = torch.cat((normalized_confidence[1], element[0][2]), 0)

            els = [x for x in element]
            o = torch.Tensor().to("cuda:0")
            predictions = torch.Tensor().long()

            for input in els:
                input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                output = net(input[0])
#                out = output[1].reshape(len(input[0]), 512, 1)

#                o = torch.cat((o, out), 2)
                predictions = torch.cat((predictions, output[0].max(1)[1].reshape(len(output[0]), 1).cpu()), 1)

            print(predictions)
            normalized_confidence[0] = torch.cat((normalized_confidence[0].cpu(), 1 - torch.Tensor(
                acquisition_functions.confidence(predictions.transpose(0,1), details=True)).cpu() / n), 0).cpu()

            print(normalized_confidence)
예제 #3
0
    def entropy(self, ds, indices, howmany):
        tots = len(indices)
        self.net.eval()
        list_of_errors = []

        randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False)

        dataloaders = [tud.DataLoader(ds._train_val_set, batch_size=100, shuffle=False,
             num_workers=2, sampler=customcifar.CustomSampler(randomized_list)) for i in range(5)]

        with torch.no_grad():
            for batch_index, element in enumerate(zip(*dataloaders)):
                els = [x for x in element]

                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                outputs = [self.net(i[0]) for i in els]

                confidence = acquisition_functions.avg_entropy(outputs)
                for x in range(len(confidence)):
                    list_of_errors.append([confidence[x], els[0][2][x].item()])
                #  print(list_of_errors)
                print("\r Checked: {0} / {1}".format(len(list_of_errors), tots), end='')
            sorlist = sorted(list_of_errors, key=lambda xp: xp[0], reverse=True)

            return [el[1] for el in sorlist[:howmany]]
예제 #4
0
    def greedy_k_centers(self, ds, indices, howmany, _train_loader, n=5):

        self.kl_divergence(ds, indices, howmany, _train_loader)

        self.net.eval()
        N = torch.Tensor().to("cuda:0")
        S = torch.Tensor().to("cuda:0")

        randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False)

        dataloader = tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=2,
                                      sampler=customcifar.CustomSampler(randomized_list))
        with torch.no_grad():
            for batch_index, (inputs, targets, index) in enumerate(_train_loader):
                inputs, targets = inputs.to("cuda:0"), targets.to("cuda:0")
                outputs = self.net(inputs)[0]
                N = torch.cat((N, outputs), 0)

            for batch_index, (inputs, targets, index) in enumerate(dataloader):
                # x = arg max(i in S/N) min(j in N) d(X_i, X_j)
                inputs, targets = inputs.to("cuda:0"), targets.to("cuda:0")
                outputs = self.net(inputs)[0]
                S = torch.cat((S, outputs), 0)

        differences = S.to("cpu").unsqueeze(1) - N.to("cpu").unsqueeze(0)
        print(differences.size())
        dist_m = torch.sum(differences * differences, -1).pow(.5)

        mindist = [x for x in zip(randomized_list, torch.min(dist_m.to("cuda:0"), 1)[0].to("cpu").data)]
        sorlist = sorted(mindist, key=lambda xp: xp[1].item(), reverse=True)
        print(sorlist)
        return [x[0] for x in sorlist[:howmany]]
 def all_train(self, otherDS=None, excluded=[]):
     if otherDS is None:
         return tud.DataLoader(self._train_val_set,
                               batch_size=1,
                               shuffle=False,
                               num_workers=2,
                               sampler=customcifar.CustomSampler([
                                   x for x in self.train_indices
                                   if x not in excluded
                               ]))
     else:
         return tud.DataLoader(otherDS,
                               batch_size=1,
                               shuffle=False,
                               num_workers=2,
                               sampler=customcifar.CustomSampler([
                                   x for x in self.train_indices
                                   if x not in excluded
                               ]))
    def __init__(self,
                 transform=None,
                 first_time_multiplier=1,
                 name=None,
                 joking=False):
        if joking:
            return

        self._train_val_set = customcifar.UnbalancedCIFAR10(
            root="./cifar",
            train=True,
            download=True,
            transform=transform,
            filename=name,
            percentage=.1)

        self._test_set = customcifar.UnbalancedCIFAR10(
            root="./cifar", train=False, download=True,
            transform=transform)  # 10000

        self.validation_indices = self._train_val_set._val_indices
        self.train_indices = [
            x for x in self._train_val_set.indices
            if x not in self.validation_indices
        ]
        self.already_selected_indices = numpy.random.choice(
            self.train_indices,
            size=tslp * first_time_multiplier,
            replace=False).tolist()
        self._train = tud.DataLoader(self._train_val_set,
                                     batch_size=train_batch_size,
                                     shuffle=False,
                                     num_workers=2,
                                     sampler=customcifar.CustomRandomSampler(
                                         self.already_selected_indices))

        self._v = tud.DataLoader(self._train_val_set,
                                 batch_size=100,
                                 shuffle=False,
                                 num_workers=2,
                                 sampler=customcifar.CustomRandomSampler(
                                     self.validation_indices))
        self._t = torch.utils.data.DataLoader(
            self._test_set,
            batch_size=100,
            shuffle=False,
            num_workers=2,
            sampler=customcifar.CustomSampler(
                [x for x in range(len((self._test_set)))]))
예제 #7
0
    def bestofn(self, ds, indices, howmany, n=5):
        self.net.eval()
        total_normalized_confidence = 0
        total = 0
        list_of_errors = []

        errors_by_class = [0 for x in range(10)]
        printiter = 0

        randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False)
        dataloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False,
             num_workers=2, sampler=customcifar.CustomSampler(randomized_list)) for i in range(n)]

        with torch.no_grad():
            for batch_index, element in enumerate(zip(*dataloaders)):
                els = [x for x in element]
                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                res_net = [self.net(i[0]) for i in els]

                outputs = [i[0] for i in res_net]
                intrep = [i[1] for i in res_net][0]

                # intrep = torch.cat((intrep, [i[1] for i in res_net][0]), 0)
                predictions = [out.max(1)[1] for out in outputs]

                normalized_confidence = [float(c/n) for c in acquisition_functions.confidence(predictions)]
                differences = intrep.unsqueeze(1) - intrep.unsqueeze(0)
                dist_m = torch.sum(differences * differences, -1).pow(.5)
                for x in range(len(normalized_confidence)):
                    sbregio = [dist_m[x][y].item() for y in range(len(dist_m[x])) if y!=x]
                    mindist = min(sbregio)

                    list_of_errors.append([(1 -normalized_confidence[x]) * mindist, els[0][2][x].item()])
                    if normalized_confidence[x] < 1:
                        errors_by_class[els[0][1][x].item()] += 1
                    total_normalized_confidence += normalized_confidence[x]
                    total += 1
                    if printiter % 50 == 0:
                        print("\r Avg confidence: {0:.2f}% ({1:.1f}/{2})  {3}".format((total_normalized_confidence / total)*100, total_normalized_confidence, total, ""), end='')
                    printiter += 1

            # qui va cambiato
            sorlist = sorted(list_of_errors, key=lambda xp: xp[0], reverse=True)
            print("\n Errors by class:  {0}".format(["{0}: {1}".format(i, errors_by_class[i]) for i in range(10)]))
            return [el[1] for el in sorlist[:howmany]]
    def restore(self, all, selected, validation, transform=None, name=None):
        self._train_val_set = customcifar.UnbalancedCIFAR10(
            root="./cifar",
            train=True,
            download=True,
            transform=transform,
            filename=name,
            percentage=.1,
            provided_indices=(all, validation))
        self._test_set = customcifar.UnbalancedCIFAR10(
            root="./cifar", train=False, download=True,
            transform=transform)  # 10000
        self.validation_indices = validation

        self.train_indices = [
            x for x in all if x not in self.validation_indices
        ]
        self.already_selected_indices = selected
        self._train = tud.DataLoader(self._train_val_set,
                                     batch_size=train_batch_size,
                                     shuffle=False,
                                     num_workers=2,
                                     sampler=customcifar.CustomRandomSampler(
                                         self.already_selected_indices))
        self._v = tud.DataLoader(self._train_val_set,
                                 batch_size=100,
                                 shuffle=False,
                                 num_workers=2,
                                 sampler=customcifar.CustomRandomSampler(
                                     self.validation_indices))
        self._t = torch.utils.data.DataLoader(
            self._test_set,
            batch_size=100,
            shuffle=False,
            num_workers=2,
            sampler=customcifar.CustomSampler(
                [x for x in range(len((self._test_set)))]))
        return self
    def __init__(self,
                 transform=None,
                 first_time_multiplier=1,
                 name=None,
                 unbal=True):
        self._train_val_set = customcifar.UnbalancedCIFAR10(
            root="./cifar",
            train=True,
            download=True,
            transform=transform,
            filename=name,
            percentage=.1)

        self._test_set = customcifar.UnbalancedCIFAR10(
            root="./cifar", train=False, download=True,
            transform=transform)  # 10000

        self.validation_indices = self._train_val_set._val_indices

        self.train_indices = [
            x for x in self._train_val_set.indices
            if x not in self.validation_indices
        ]

        print([
            len([
                x for x in self.train_indices
                if x in self._train_val_set.el_for_class[i]
            ]) for i in range(10)
        ])

        if unbal:
            self.already_selected_indices = numpy.random.choice(
                self.train_indices,
                size=tslp * first_time_multiplier,
                replace=False).tolist()
        else:
            lenel = [
                int(tslp / 10) + (1 if i < tslp % int(tslp / 10) else 0)
                for i in range(10)
            ]
            self.already_selected_indices = [
                x for i in range(10)
                for x in numpy.random.choice([
                    xx for xx in self._train_val_set.el_for_class[i]
                    if xx not in self.validation_indices
                ],
                                             size=lenel[i],
                                             replace=False).tolist()
            ]

        print("Selected: {}".format([
            len([
                x for x in self.already_selected_indices
                if x in self._train_val_set.el_for_class[i]
            ]) for i in range(10)
        ]))

        self._train = tud.DataLoader(self._train_val_set,
                                     batch_size=train_batch_size,
                                     shuffle=False,
                                     num_workers=2,
                                     sampler=customcifar.CustomRandomSampler(
                                         self.already_selected_indices))

        self._v = tud.DataLoader(self._train_val_set,
                                 batch_size=100,
                                 shuffle=False,
                                 num_workers=2,
                                 sampler=customcifar.CustomRandomSampler(
                                     self.validation_indices))
        self._t = torch.utils.data.DataLoader(
            self._test_set,
            batch_size=100,
            shuffle=False,
            num_workers=2,
            sampler=customcifar.CustomSampler(
                [x for x in range(len((self._test_set)))]))
예제 #10
0
    def distance_and_varratio(self, ds, indices, howmany, train_indices, n=5):
        distance_weight = 1e-5
        varratio_weight = 1

        self.net.eval()
        N = torch.Tensor().to("cuda:0")  # labelled
        S = torch.Tensor().to("cuda:0")  # unlabelled
        normalized_confidence = [torch.Tensor().to("cuda:0"), torch.Tensor().long()]

        randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False)

        trainloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4,
                                       sampler=customcifar.CustomRandomSampler(train_indices)) for i in range(n)]
        dataloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4,
                                      sampler=customcifar.CustomSampler(randomized_list)) for i in range(n)]
        with torch.no_grad():
            for batch_index, element in enumerate(zip(*trainloaders)):  # labelled samples
                els = [x for x in element]
                o = torch.Tensor().to("cuda:0")
                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                    o = torch.cat((o, self.net(input[0])[1].reshape(len(input[0]), 512, 1)), 2)
                N = torch.cat((N, o), 0)
                print("\r N: {0} ".format(N.size()), end="")
            print("")

            for batch_index, element in enumerate(zip(*dataloaders)):  # unlabelled samples
                normalized_confidence[1] = torch.cat((normalized_confidence[1], element[0][2]), 0)

                els = [x for x in element]
                o = torch.Tensor().to("cuda:0")
                predictions = torch.Tensor().long()

                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                    output = self.net(input[0])
                    out = output[1].reshape(len(input[0]), 512, 1)

                    o = torch.cat((o, out), 2)
                    predictions = torch.cat((predictions, output[0].max(1)[1].reshape(len(output[0]), 1).cpu()), 1)

                normalized_confidence[0] = torch.cat((normalized_confidence[0].cpu(), 1 - torch.Tensor(
                    acquisition_functions.confidence(predictions.transpose(0,1))).cpu() / n), 0).cpu()

                S = torch.cat((S, o), 0)
                print("\r S: {0} ".format(S.size()), end="")
            print("")
            S = (torch.sum(S, 2)) / n
            N = (torch.sum(N, 2)) / n

            S_batches = torch.split(S, 25, dim =0)
            dist_S_N = torch.Tensor()
            for el in S_batches:
                partial_dist = el.unsqueeze(1) - N.unsqueeze(0)
                partial_dist = torch.sum(partial_dist * partial_dist, -1)
                partial_dist = torch.sqrt(partial_dist)
                dist_S_N = torch.cat((dist_S_N, partial_dist.cpu()), 0)

            mindist = torch.min(dist_S_N, 1)[0].to("cuda:0")

            normalizing_factor = torch.max(mindist, -1)[0]
            print("NF : " + str(normalizing_factor))

            mindist_confidence = (distance_weight*(mindist / normalizing_factor)) + (varratio_weight * normalized_confidence[0].to("cuda:0")) # devo calcolare la confidenza ancora

            erlist_indexes = normalized_confidence[1]
            new_N = []

            for i in range(howmany):
                #  maxx = torch.max(mindist, -1)[1]
                maxx = torch.max(mindist_confidence, -1)[1]
                print("Max: {0:.3f} = ({1:.3f} * {3}) + ({2:.3f} * {4})".format(mindist_confidence[maxx], mindist[maxx]/normalizing_factor, normalized_confidence[0][maxx], distance_weight, varratio_weight))

                if erlist_indexes[maxx].item() in new_N:
                    print("Error: Duplicate")

                new_N.append(erlist_indexes[maxx].item())
                mindist[maxx] = float("-inf")
                mindist_confidence[maxx] = float("-inf")

                newdists = S - S[maxx].reshape(1, len(S[maxx]))
                newdists = torch.sum(newdists * newdists, -1)
                newdists = torch.sqrt(newdists)
                mindist = torch.min(mindist, newdists)
                mindist_confidence = (distance_weight*(mindist / normalizing_factor)) + (varratio_weight * normalized_confidence[0].to("cuda:0"))
            return new_N
예제 #11
0
    def kl_divergence(self, ds, indices, howmany, train_indices, n=5):
        self.net.eval()
        N = torch.Tensor().to("cuda:0") #labelled
        S = torch.Tensor().to("cuda:0") #unlabelled
        normalized_confidence = [torch.Tensor().to("cuda:0"), torch.Tensor().long()]


        randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False)

        trainloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4,
                                    sampler=customcifar.CustomRandomSampler(train_indices)) for i in range(n)]
        dataloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4,
                                      sampler=customcifar.CustomSampler(randomized_list)) for i in range(n)]
        with torch.no_grad():
            for batch_index, element in enumerate(zip(*trainloaders)): #labelled samples
                els = [x for x in element]
                o = torch.Tensor().to("cuda:0")
                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                    o = torch.cat((o, self.net(input[0])[0].reshape(len(input[0]),10, 1)), 2)
                N = torch.cat((N, o), 0)
                print("\r N: {0} ".format(N.size()), end="")
            print("")

            for batch_index, element in enumerate(zip(*dataloaders)): #unlabelled samples
                normalized_confidence[1] = torch.cat((normalized_confidence[1], element[0][2]), 0)

                els = [x for x in element]
                o = torch.Tensor().to("cuda:0")
                predictions = torch.Tensor().long().to("cuda:0")
                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                    out = self.net(input[0])[0].reshape(len(input[0]), 10, 1)
                    o = torch.cat((o, out), 2)
                    predictions = torch.cat((predictions, out.max(1)[1]), 1).to("cuda:0")
                normalized_confidence[0] = torch.cat((normalized_confidence[0].cpu(), 1.1 - torch.Tensor(acquisition_functions.confidence(predictions.transpose(1, 0))).cpu() / n), 0).cpu()

                S = torch.cat((S, o), 0)
                print("\r S: {0} ".format(S.size()), end="")
            print("")

            # calc KL divergence
            S = (torch.sum(F.softmax(S, dim=1), 2)) /n
            N = (torch.sum(F.softmax(N, dim=1), 2)) /n

            S_on_N = S.to("cpu").unsqueeze(1) / N.to("cpu").unsqueeze(0)
            ln_S_on_N = numpy.log2(S_on_N).reshape(len(N), len(S), 10).transpose(0,1)

            ln_S_on_N_batches = torch.split(ln_S_on_N, 300, dim=0)
            S_batches = torch.split(S, 300, dim=0)




            kldiv = torch.Tensor()
            for i in range(len(ln_S_on_N_batches)):
                partial_kldiv = torch.bmm(ln_S_on_N_batches[i].to("cuda:0"), S_batches[i].reshape(len(S_batches[i]), 10, 1)).cpu()
                kldiv = torch.cat((partial_kldiv, kldiv), 0)
                print(kldiv.size())
            kldiv = kldiv.reshape(len(S), len(N))

            mindiv = torch.min(kldiv, 1)[0]* normalized_confidence[0]
            errorlist = [[mindiv[i].item(), normalized_confidence[1][i].item() ]for i in range(len(normalized_confidence[0]))]
            sorlist = sorted(errorlist, key=lambda xp: xp[0], reverse=True)

            return [x[1] for x in sorlist[:howmany]]