Beispiel #1
0
 def __init__(self, channels, classes, imagesize, **kwargs):
     super(ModelCnn, self).__init__()
     self.layers = Cnn.get_layers(channels, classes, imagesize)
     self.distills = torch.nn.ModuleList([
         models.GlobalSumPool(
             h=models.DenseNet(headsize=32, layers=1, dropout=0.2),
             c=models.Classifier(32,
                                 classes + 1,
                                 useprototype=1,
                                 usenorm=0,
                                 p=2),
         ),
         models.GlobalSumPool(
             h=models.DenseNet(headsize=64, layers=1, dropout=0.2),
             c=models.Classifier(64,
                                 classes + 1,
                                 useprototype=1,
                                 usenorm=0,
                                 p=2),
         ),
         models.GlobalSumPool(
             h=models.DenseNet(headsize=64, layers=1, dropout=0.2),
             c=models.Classifier(64,
                                 classes + 1,
                                 useprototype=1,
                                 usenorm=0,
                                 p=2),
         )
     ])
Beispiel #2
0
 def make_distillpools(self, classes):
     return [
         models.GlobalSumPool(
             h=models.DenseNet(headsize=64,
                               bodysize=256,
                               tailsize=self.squash[3],
                               layers=self.layers,
                               dropout=0.2,
                               activation=self.act,
                               bias=self.usebias),
             c=models.Classifier(self.squash[3],
                                 classes + self.optout,
                                 useprototype=self.useprototype,
                                 usenorm=self.usenorm,
                                 p=self.p),
         ),
         models.GlobalSumPool(
             h=models.DenseNet(headsize=128,
                               bodysize=256,
                               tailsize=self.squash[5],
                               layers=self.layers,
                               dropout=0.2,
                               activation=self.act,
                               bias=self.usebias),
             c=models.Classifier(self.squash[5],
                                 classes + self.optout,
                                 useprototype=self.useprototype,
                                 usenorm=self.usenorm,
                                 p=self.p),
         ),
         models.GlobalSumPool(
             h=models.DenseNet(headsize=256,
                               bodysize=1024,
                               tailsize=self.squash[7],
                               layers=self.layers,
                               dropout=0.2,
                               activation=self.act,
                               bias=self.usebias),
             c=models.Classifier(self.squash[7],
                                 classes + self.optout,
                                 useprototype=self.useprototype,
                                 usenorm=self.usenorm,
                                 p=self.p),
         )
     ]
def test(in_dataset, out_dataset, wide, epsilon, temperature):

    testsetout = torchvision.datasets.ImageFolder(os.path.expanduser(
        "./data/{}".format(out_dataset)),
                                                  transform=transform)
    testloaderOut = torch.utils.data.DataLoader(testsetout,
                                                batch_size=100,
                                                shuffle=False,
                                                num_workers=2)

    if in_dataset == "cifar100":
        testset = torchvision.datasets.CIFAR100(root='./data',
                                                train=False,
                                                download=True,
                                                transform=transform)
        testloaderIn = torch.utils.data.DataLoader(testset,
                                                   batch_size=100,
                                                   shuffle=False,
                                                   num_workers=2)

    elif in_dataset == "cifar10":
        testset = torchvision.datasets.CIFAR10(root='./data',
                                               train=False,
                                               download=True,
                                               transform=transform)
        testloaderIn = torch.utils.data.DataLoader(testset,
                                                   batch_size=100,
                                                   shuffle=False,
                                                   num_workers=2)

    for fold in range(1, 6):
        print(f"Processing fold {fold}")

        nclasses = int(in_dataset[5:])
        if wide:
            net = models.WideResNet(int(nclasses * 4 / 5))
            ck = torch.load(
                f"./checkpoints/{in_dataset}_fold_{fold}_wide_checkpoint/model_best.pth.tar"
            )
        else:
            net = models.DenseNet(int(nclasses * 4 / 5))
            ck = torch.load(
                f"./checkpoints/{in_dataset}_fold_{fold}_dense_checkpoint/model_best.pth.tar"
            )

        net.load_state_dict(ck['state_dict'])

        net.cuda()
        net.eval()

        d.testData(net, criterion, testloaderIn, testloaderOut, in_dataset,
                   out_dataset, epsilon, temperature, fold)

    m.test(in_dataset, out_dataset, plot=True)
Beispiel #4
0
 def set_model(self):
     if args.wide:
         self.g = models.WideResNet().cuda()
         self.c1 = models.Classifier(self.g.nChannels,
                                     self.args.num_classes).cuda()
         self.c2 = models.Classifier(self.g.nChannels,
                                     self.args.num_classes).cuda()
     else:
         self.g = models.DenseNet().cuda()
         self.c1 = models.Classifier(self.g.in_planes,
                                     self.args.num_classes).cuda()
         self.c2 = models.Classifier(self.g.in_planes,
                                     self.args.num_classes).cuda()
Beispiel #5
0
def main():
    logger = setup_logger(filename='log.txt')

    train_epoch_number = 10
    batch_size = 100

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # dataset = np.loadtxt(models.Config.dataset_path, delimiter=',')
    # np.random.shuffle(dataset[:5000])

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # trainset = models.DataSet(dataset[:5000])
    trainset = torchvision.datasets.CIFAR10(root='./data/cifar10',
                                            train=True,
                                            download=False,
                                            transform=transform)
    trainloader = DataLoader(dataset=trainset,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=24)

    # testset = models.DataSet(dataset[5000:])
    testset = torchvision.datasets.CIFAR10(root='./data/cifar10',
                                           train=False,
                                           download=False,
                                           transform=transform)
    testloader = DataLoader(dataset=testset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=24)

    prototypes = {}

    # net = models.CNNNet(device=device)
    net = models.DenseNet(device=device,
                          number_layers=8,
                          growth_rate=12,
                          drop_rate=0.0)
    logger.info("DenseNet Channels: %d", net.channels)

    gcpl = models.GCPLLoss(threshold=models.Config.threshold,
                           gamma=models.Config.gamma,
                           b=models.Config.threshold,
                           tao=1.0,
                           beta=0.5,
                           lambda_=0.001)
    sgd = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    if not os.path.exists("pkl"):
        os.mkdir("pkl")

    if os.path.exists(models.Config.pkl_path):
        state_dict = torch.load(models.Config.pkl_path)
        try:
            net.load_state_dict(state_dict)
            logger.info("Load state from file %s.", models.Config.pkl_path)
        except RuntimeError:
            logger.error("Loading state from file %s failed.",
                         models.Config.pkl_path)

    for epoch in range(train_epoch_number):
        logger.info("Trainset size: %d, Epoch number: %d", len(trainset),
                    epoch + 1)

        running_loss = 0.0

        for i, (features, labels) in enumerate(trainloader):
            features = features.to(net.device)
            sgd.zero_grad()
            features = net(features).view(batch_size, 1, -1)
            loss = gcpl(features, labels, prototypes)
            loss.backward()
            sgd.step()

            running_loss += loss.item() / batch_size

            logger.debug("[%3d, %5d] loss: %7.4f", epoch + 1, i + 1,
                         loss.item() / batch_size)

        torch.save(net.state_dict(), models.Config.pkl_path)

        prototype_count = 0

        for c in prototypes:
            prototype_count += len(prototypes[c])

        logger.info("Prototypes Count: %d", prototype_count)

        # if (epoch + 1) % 5 == 0:
        distance_sum = 0.0
        correct = 0

        for i, (feature, label) in enumerate(testloader):
            feature = net(feature.to(net.device)).view(1, -1)
            predicted_label, probability, min_distance = models.predict(
                feature, prototypes)

            if label == predicted_label:
                correct += 1

            distance_sum += min_distance

            logger.debug(
                "%5d: Label: %d, Prediction: %d, Probability: %7.4f, Distance: %7.4f, Accuracy: %7.4f",
                i + 1, label, predicted_label, probability, min_distance,
                correct / (i + 1))

        logger.info("Distance Average: %7.4f", distance_sum / len(testloader))
        logger.info("Accuracy: %7.4f\n", correct / len(testloader))
 def set_model(self):
     if args.wide:
         self.model = models.WideResNet(int(args.num_classes * 4 / 5),
                                        dropRate=0.3).cuda()
     else:
         self.model = models.DenseNet(int(args.num_classes * 4 / 5)).cuda()
Beispiel #7
0
def stream(config, trainset, streamset):
    logger = logging.getLogger(__name__)

    net = models.DenseNet(device=torch.device(config.device),
                          tensor_view=trainset.tensor_view,
                          number_layers=config.number_layers,
                          growth_rate=config.growth_rate,
                          drop_rate=config.drop_rate)
    logger.info("densenet channel: %d", net.channels)

    try:
        net.load(config.net_path)
    except FileNotFoundError:
        pass
    else:
        logger.info("Load model from file '%s'.", config.net_path)

    criterion = models.CPELoss(gamma=config.gamma,
                               tao=config.tao,
                               b=config.b,
                               beta=config.beta,
                               lambda_=config.lambda_)
    optimizer = optim.SGD(net.parameters(),
                          lr=config.learning_rate,
                          momentum=0.9)

    prototypes = models.Prototypes(threshold=config.threshold)
    # load saved prototypes
    try:
        prototypes.load(config.prototypes_path)
    except FileNotFoundError:
        pass
    else:
        logger.info("load prototypes from file '%s'.", config.prototypes_path)
    logger.info("original prototype count: %d", len(prototypes))

    detector = None

    def train(train_dataset):
        logger.info('---------------- train ----------------')
        dataloader = DataLoader(dataset=train_dataset,
                                batch_size=1,
                                shuffle=True)
        for epoch in range(config.epoch_number):
            logger.info('---------------- epoch: %d ----------------',
                        epoch + 1)
            logger.info("threshold: %.4f, gamma: %.4f, tao: %.4f, b: %.4f",
                        config.threshold, config.gamma, config.tao, config.b)
            logger.info("prototypes count before training: %d",
                        len(prototypes))

            net.train()
            for i, (feature, label) in enumerate(dataloader):
                feature, label = feature.to(net.device), label.to(net.device)
                optimizer.zero_grad()
                feature, out = net(feature)
                loss, distance = criterion(feature, out, label, prototypes)
                loss.backward()
                optimizer.step()

                logger.debug("[train %d, %d] %7.4f %7.4f", epoch + 1, i + 1,
                             loss.item(), distance)

            logger.info("prototypes count after training: %d", len(prototypes))
            prototypes.update()
            logger.info("prototypes count after update: %d", len(prototypes))

        net.save(config.net_path)
        logger.info("net has been saved.")
        prototypes.save(config.prototypes_path)
        logger.info("prototypes has been saved.")

        intra_distances = []
        with torch.no_grad():
            net.eval()
            for i, (feature, label) in enumerate(dataloader):
                feature, label = feature.to(net.device), label.item()
                feature, out = net(feature)
                closest_prototype, distance = prototypes.closest(
                    feature, label)
                intra_distances.append((label, distance))

        novelty_detector = models.Detector(intra_distances,
                                           train_dataset.label_set,
                                           config.std_coefficient)
        logger.info("distance average: %s", novelty_detector.average_distances)
        logger.info("distance std: %s", novelty_detector.std_distances)
        logger.info("detector threshold: %s", novelty_detector.thresholds)
        novelty_detector.save(config.detector_path)
        logger.info("detector has been saved.")

        return novelty_detector

    def test(test_dataset, novelty_detector):
        logger.info('---------------- test ----------------')
        dataloader = DataLoader(dataset=test_dataset,
                                batch_size=1,
                                shuffle=False)

        logger.info("known labels: %s", novelty_detector.known_labels)
        logger.info("distance average: %s", novelty_detector.average_distances)
        logger.info("distance std: %s", novelty_detector.std_distances)
        logger.info("detector threshold: %s", novelty_detector.thresholds)

        detection_results = []

        with torch.no_grad():
            net.eval()
            for i, (feature, label) in enumerate(dataloader):
                feature, label = feature.to(net.device), label.item()
                feature, out = net(feature)
                predicted_label, distance = models.predict(feature, prototypes)
                prob = models.probability(feature,
                                          predicted_label,
                                          prototypes,
                                          gamma=config.gamma)
                detected_novelty = novelty_detector(predicted_label, distance)
                real_novelty = label not in novelty_detector.known_labels

                detection_results.append(
                    (label, predicted_label, real_novelty, detected_novelty))

                logger.debug("[test %5d]: %d, %d, %7.4f, %7.4f, %5s, %5s",
                             i + 1, label, predicted_label, prob, distance,
                             real_novelty, detected_novelty)

        tp, fp, fn, tn, cm, acc, acc_all = novelty_detector.evaluate(
            detection_results)
        precision = tp / (tp + fp + 1)
        recall = tp / (tp + fn + 1)

        logger.info("accuracy of known labels: %.4f", acc)
        logger.info("accuracy of all labels: %.4f", acc_all)
        logger.info("true positive: %d", tp)
        logger.info("false positive: %d", fp)
        logger.info("false negative: %d", fn)
        logger.info("true negative: %d", tn)
        logger.info("precision: %7.4f", precision)
        logger.info("recall: %7.4f", recall)
        logger.info("confusion matrix: \n%s", cm)

    def stream_train(train_dataset, stream_dataset):
        logger.info('---------------- stream train ----------------')

        logger.info('---------------- initial train ----------------')
        novelty_detector = train(trainset)
        logger.info('---------------- initial test ----------------')
        test(stream_dataset, novelty_detector)

        novelty_dataset = dataset.NoveltyDataset(train_dataset)
        iter_streamloader = enumerate(
            DataLoader(dataset=stream_dataset, batch_size=1, shuffle=True))
        buffer = []

        for i, (feature, label) in iter_streamloader:
            sample = (feature.squeeze(dim=0), label.squeeze(dim=0))
            with torch.no_grad():
                net.eval()
                feature, label = feature.to(net.device), label.item()
                feature, out = net(feature)
                predicted_label, distance = models.predict(feature, prototypes)
                prob = models.probability(feature,
                                          predicted_label,
                                          prototypes,
                                          gamma=config.gamma)
                detected_novelty = novelty_detector(predicted_label, distance)
                real_novelty = label not in novelty_detector.known_labels

            if detected_novelty:
                buffer.append(sample)

            logger.debug("[stream %5d]: %d, %d, %7.4f, %7.4f, %5s, %5s, %4d",
                         i + 1, label, predicted_label, prob, distance,
                         real_novelty, detected_novelty, len(buffer))

            if len(buffer) == 1000:
                logger.info("novelty dataset size before extending: %d",
                            len(novelty_dataset))
                novelty_dataset.extend(buffer,
                                       config.novelty_buffer_sample_rate)
                logger.info("novelty dataset size after extending: %d",
                            len(novelty_dataset))
                logger.info(
                    '---------------- incremental train ----------------')
                novelty_detector = train(novelty_dataset)
                buffer.clear()

        return novelty_detector

    if config.train:
        for period in range(config.period):
            logger.info('---------------- period: %d ----------------',
                        period + 1)
            detector = stream_train(trainset, streamset)
            test(streamset, detector)
    else:
        test(streamset, detector)
print(args)
args.cuda = not args.no_cuda and torch.cuda.is_available()
print("Random Seed: ", args.seed)
torch.manual_seed(args.seed)

if args.cuda:
    torch.cuda.manual_seed(args.seed)

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

print('Load model')
if args.model == 'resnet':
    model = models.resnet18(num_classes=args.num_classes)
elif args.model == 'densenet':
    model = models.DenseNet(growth_rate=48,
                            drop_rate=0.2,
                            block_config=(6, 6, 6),
                            num_init_features=96)
else:
    raise Exception('invalid model selected')
model.load_state_dict(torch.load(args.pre_trained_net))

print('load target data: ', args.dataset)
_, test_loader = data_loader.getTargetDataSet(args.dataset, args.batch_size,
                                              args.imageSize, args.dataroot)

print('load non target data: ', args.out_dataset)
nt_test_loader = data_loader.getNonTargetDataSet(args.out_dataset,
                                                 args.batch_size,
                                                 args.imageSize, args.dataroot)

if args.cuda:
 def set_model(self):
     self.model = models.DenseNet(self.args.num_classes).cuda()
    if configuration.down_samp == 'Convolution':
        down_samp = False
    else:
        down_samp = True

    if configuration.up_samp == 'UpConvolution':
        up_samp = False
    else:
        up_samp = True

    if configuration.model_type == 'DenseNet':
        network = models.DenseNet(configuration.input_channels,
                                  configuration.n_classes,
                                  ignore_class=configuration.mask_class,
                                  k0=32,
                                  Theta=0.5,
                                  Dropout=0.2,
                                  Growth_rate=configuration.gr)
    elif configuration.model_type == 'DenseUNet':
        network = models.DenseUNet(configuration.input_channels,
                                   configuration.n_classes,
                                   ignore_class=configuration.mask_class,
                                   k0=32,
                                   Theta=0.5,
                                   Dropout=0.2,
                                   Growth_rate=configuration.gr)
    else:
        network = models.DenseNetStack(configuration.input_channels,
                                       configuration.n_classes,
                                       ignore_class=configuration.mask_class,
Beispiel #11
0
 def set_model(self):
     self.g = models.DenseNet().cuda()
     self.c1 = models.Classifier(self.g.in_planes, self.args.num_classes).cuda()
     self.c2 = models.Classifier(self.g.in_planes, self.args.num_classes).cuda()
Beispiel #12
0
def fit(max_epochs, patience, batch_size):
    """fitting the model"""

    #defining the split for cross-validation
    skf = KFold(n_splits=5, shuffle=True, random_state=47
                )  #random_state set, such that the split is reproducible

    #list for saving the output logs
    logs = []

    #training for each of the above defined folds
    for fold, (train_idx, val_idx) in enumerate(
            skf.split(X=np.zeros(len(train_df)),
                      y=train_df['target'],
                      groups=train_df['patient_id'].tolist()), 1):
        print("FOLD: " + str(fold))
        logs.append("FOLD: " + str(fold))

        #creating the chosen model
        model = models.DenseNet(32, [6, 12, 24, 16],
                                len(meta_features)).to(device)
        print("model initialized!")

        #setting the optimizer and learning rate scheduler
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        lr_scheduler = ReduceLROnPlateau(optimizer=optimizer,
                                         mode='min',
                                         patience=1,
                                         verbose=True,
                                         factor=0.2)

        #getting the indices for the respective train-validation split
        train_data = train_df.iloc[train_idx].reset_index(drop=True)
        valid_data = train_df.iloc[val_idx].reset_index(drop=True)

        #creating the dataset with the respective train-validation splits
        train_set = MelanomaDataset(df=train_data,
                                    imfolder='train',
                                    train=True,
                                    transforms=train_transform,
                                    meta_features=meta_features)

        val_set = MelanomaDataset(df=valid_data,
                                  imfolder='train',
                                  train=True,
                                  transforms=test_transform,
                                  meta_features=meta_features)

        #initializing the dataloaders
        train_loader = DataLoader(dataset=train_set,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=4)
        val_loader = DataLoader(dataset=val_set,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=4)

        #variables to save the best loss and best ROC (Area Under Receiving Operator Characteristic)
        best_loss = 100000000
        best_roc = 0.0

        #setting the loss functios, BCEWithLogitsLoss includes a sigmoid layer into the Binary Cross-Entropy loss
        criterion = torch.nn.BCEWithLogitsLoss()
        criterion_val = torch.nn.BCEWithLogitsLoss()

        for epoch in range(max_epochs):

            print(f"Epoch {epoch + 1}/{max_epochs}")
            # Training one epoch
            print("")
            print("training...")
            epoch_loss, train_acc, train_roc = run_epoch(
                model,
                optimizer,
                train_loader,
                train=True,
                loss_fnc=criterion,
                length_dataset=len(train_set),
                batch_size=batch_size)

            print(f"Train loss: {epoch_loss}")
            print(f"ACC on training set: {train_acc}")
            print(f"ROC on training set: {train_roc}")
            print("")

            #saving the logs to the list
            logs.append(("Fold: " + str(fold) + ", epoch: " + str(epoch) +
                         ": train loss: " + str(epoch_loss) + ", train acc: " +
                         str(train_acc) + ", train roc: " + str(train_roc)))

            # Validating one epoch
            print("validating...")

            val_loss, val_acc, val_roc = run_epoch(model,
                                                   None,
                                                   val_loader,
                                                   train=False,
                                                   loss_fnc=criterion_val,
                                                   length_dataset=len(val_set),
                                                   batch_size=batch_size)

            print(f"Test loss: {val_loss}")
            print(f"Val Acc: {val_acc}")
            print(f"Val Roc: {val_roc}")
            print("")

            #saving the logs to the list
            logs.append(("Fold: " + str(fold) + ", epoch: " + str(epoch) +
                         ": val loss: " + str(val_loss) + ", val acc: " +
                         str(val_acc) + ", val roc: " + str(val_roc)))

            #learning rate scheduler step, minimizing the learning rate when the train loss does not decrease for a epoch
            lr_scheduler.step(epoch_loss)

            # saving best weights
            if val_roc >= best_roc:
                best_epoch = epoch
                best_loss = val_loss
                best_acc = val_acc
                best_roc = val_roc
                best_model_weights = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), "best_model" + str(fold))

            # Early stopping
            if epoch - best_epoch >= patience:
                break

            #saving the log to a .json file
            with open("results" + str(fold) + ".json", "w") as file:
                json.dump(logs, file)
Beispiel #13
0
def visualization(config, trainset):
    logger = logging.getLogger(__name__)

    trainloader = DataLoader(dataset=trainset,
                             batch_size=1,
                             shuffle=False,
                             num_workers=4)

    # net = models.CNNNet(device=device)
    net = models.DenseNet(device=torch.device(config.device),
                          in_channels=config.in_channels,
                          number_layers=config.number_layers,
                          growth_rate=config.growth_rate,
                          drop_rate=0.0)
    logger.info("DenseNet Channels: %d", net.channels)

    if config.loss_type == 'gcpl':
        criterion = models.GCPLLoss(threshold=config.threshold,
                                    gamma=config.gamma,
                                    lambda_=config.lambda_)
    elif config.loss_type == 'pdce':
        criterion = models.PairwiseDCELoss(threshold=config.threshold,
                                           tao=config.tao,
                                           b=config.b,
                                           lambda_=config.lambda_)
    else:
        raise RuntimeError('Cannot find "{}" loss type.'.format(
            config.loss_type))

    # load saved model state dict
    if os.path.exists(config.model_path):
        state_dict = torch.load(config.model_path)
        try:
            net.load_state_dict(state_dict)
            logger.info("Load model from file '%s'.", config.model_path)
        except RuntimeError:
            logger.error("Loading model from file '%s' failed.",
                         config.model_path)

    # load saved prototypes
    if os.path.exists(config.prototypes_path):
        try:
            criterion.load_prototypes(config.prototypes_path)
            logger.info("Load prototypes from file '%s'.",
                        config.prototypes_path)
        except RuntimeError:
            logger.error("Loading prototypes from file '%s' failed.",
                         config.prototypes_path)

    # original_features = []
    features = []
    labels = []

    for i, (feature, label) in enumerate(trainloader):
        feature, label = net(feature.to(net.device)).view(-1), label.item()

        features.append(feature.data.cpu().numpy())
        labels.append(label)

    features = np.array(features)
    labels = np.array(labels)

    feature_tsne = TSNE(n_components=2, random_state=30)
    features = feature_tsne.fit_transform(features, labels)
    # original_features = feature_tsne.fit_transform(original_features, labels)

    plt.figure(figsize=(6, 4))
    colors = 'r', 'g', 'b', 'c', 'm', 'y', 'k', 'gray', 'orange', 'purple'
    for c, label in zip(colors, sorted(list(trainset.label_set))):
        # print(c, label)
        plt.scatter(features[labels == label, 0],
                    features[labels == label, 1],
                    c=c,
                    label=label)
    plt.legend()
    plt.show()
Beispiel #14
0
def run_cel(config, trainset, testset):
    logger = logging.getLogger(__name__)

    trainloader = DataLoader(dataset=trainset,
                             batch_size=1,
                             shuffle=True,
                             num_workers=0)
    testloader = DataLoader(dataset=testset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=0)

    device = torch.device(config.device)

    net = models.DenseNet(device=device,
                          in_channels=config.in_channels,
                          number_layers=config.number_layers,
                          growth_rate=12,
                          drop_rate=0.0)
    logger.info("DenseNet Channels: %d", net.channels)
    fc_net = models.LinearNet(device=device,
                              in_features=net.channels *
                              (config.tensor_view[1] // 8) *
                              (config.tensor_view[2] // 8))

    cel = nn.CrossEntropyLoss()
    sgd = optim.SGD(net.parameters(), lr=config.learning_rate, momentum=0.9)

    # load saved model state dict
    if os.path.exists(config.model_path):
        state_dict = torch.load(config.model_path)
        try:
            net.load_state_dict(state_dict)
            logger.info("Load state from file '%s'.", config.model_path)
        except RuntimeError:
            logger.error("Loading state from file '%s' failed.",
                         config.model_path)

    for epoch in range(config.epoch_number):
        logger.info("Epoch number: %d", epoch + 1)

        probs = []

        # train
        if not config.testonly:
            running_loss = 0.0

            for i, (feature, label) in enumerate(trainloader):
                feature, label = feature.to(net.device), label.to(net.device)
                sgd.zero_grad()
                feature = net(feature).view(1, -1)
                feature = fc_net(feature)
                loss = cel(feature, label)
                loss.backward()
                sgd.step()

                feature = feature.data.squeeze()

                running_loss += loss.item()

                probs.append((label.item(), feature[label.item()]))
                logger.debug("[%d, %d] %7.4f", epoch + 1, i + 1, loss.item())

            torch.save(net.state_dict(), config.model_path)
            torch.save(probs, config.probs_path)
        # end train

        # load saved probs
        if config.testonly:
            if os.path.exists(config.probs_path):
                try:
                    probs = torch.load(config.probs_path)
                except RuntimeError:
                    logger.error("Loading probs from file '%s' failed.",
                                 config.probs_path)
                else:
                    logger.info("Load probs from file '%s'.",
                                config.probs_path)

        # test
        detector = models.SoftmaxDetector(probs, config.std_coefficient,
                                          trainset.label_set)

        if (epoch + 1) % config.testfreq == 0 or config.testonly:

            detection_results = []
            with torch.no_grad():
                for i, (feature, label) in enumerate(testloader):
                    feature, label = feature.to(net.device), label.item()
                    feature = net(feature).view(1, -1)
                    probability, predicted_label = fc_net(feature).max(dim=1)
                    detected_novelty = detector(predicted_label, probability)
                    real_novelty = label not in trainset.label_set

                    detection_results.append(
                        (label, predicted_label.item(), probability,
                         real_novelty, detected_novelty))
                    logger.debug("%5d: %d, %d, %7.4f, %s, %s", i + 1, label,
                                 predicted_label, probability, real_novelty,
                                 detected_novelty)

            true_positive, false_positive, false_negative = detector.evaluate(
                detection_results)
            precision = true_positive / (true_positive + false_positive + 1)
            recall = true_positive / (true_positive + false_negative + 1)
            cm = confusion_matrix(detector.results['true_label'],
                                  detector.results['predicted_label'],
                                  sorted(list(testset.label_set)))
            results = detector.results[np.isin(detector.results['true_label'],
                                               list(trainset.label_set))]

            logger.info(
                "Accuracy: %7.4f",
                accuracy_score(results['true_label'],
                               results['predicted_label']))
            logger.info("True Positive: %d", true_positive)
            logger.info("False Positive: %d", false_positive)
            logger.info("False Negative: %d", false_negative)
            logger.info("Precision: %7.4f", precision)
            logger.info("Recall: %7.4f", recall)
            logger.info("Confusion Matrix: \n%s", cm)
Beispiel #15
0
def run(config, trainset, testset):
    logger = logging.getLogger(__name__)

    trainloader = DataLoader(dataset=trainset,
                             batch_size=1,
                             shuffle=True,
                             num_workers=4)
    testloader = DataLoader(dataset=testset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=4)

    # net = models.CNNNet(device=device)
    net = models.DenseNet(device=torch.device(config.device),
                          in_channels=config.in_channels,
                          number_layers=config.number_layers,
                          growth_rate=config.growth_rate,
                          drop_rate=0.0)
    logger.info("DenseNet Channels: %d", net.channels)

    if config.loss_type == 'gcpl':
        criterion = models.GCPLLoss(threshold=config.threshold,
                                    gamma=config.gamma,
                                    lambda_=config.lambda_)
    elif config.loss_type == 'pdce':
        criterion = models.PairwiseDCELoss(threshold=config.threshold,
                                           tao=config.tao,
                                           b=config.b,
                                           lambda_=config.lambda_)
    else:
        raise RuntimeError('Cannot find "{}" loss type.'.format(
            config.loss_type))

    sgd = optim.SGD(net.parameters(), lr=config.learning_rate, momentum=0.9)
    # adam = optim.Adam(net.parameters(), lr=config.learning_rate)

    # load saved optim
    # if os.path.exists(config.model_path):
    #     state_dict = torch.load(config.optim_path)
    #     try:
    #         net.load_state_dict(state_dict)
    #         logger.info("Load optim from file '%s'.", config.optim_path)
    #     except RuntimeError:
    #         logger.error("Loading optim from file '%s' failed.", config.optim_path)

    # load saved model state dict
    if os.path.exists(config.model_path):
        state_dict = torch.load(config.model_path)
        try:
            net.load_state_dict(state_dict)
            logger.info("Load model from file '%s'.", config.model_path)
        except RuntimeError:
            logger.error("Loading model from file '%s' failed.",
                         config.model_path)

    # load saved prototypes
    if os.path.exists(config.prototypes_path):
        try:
            criterion.load_prototypes(config.prototypes_path)
            logger.info("Load prototypes from file '%s'.",
                        config.prototypes_path)
        except RuntimeError:
            logger.error("Loading prototypes from file '%s' failed.",
                         config.prototypes_path)

    for epoch in range(config.epoch_number):

        intra_class_distances = []

        # train
        if not config.testonly:
            logger.info("Epoch number: %d", epoch + 1)
            logger.info("threshold: %.4f, gamma: %.4f, tao: %.4f, b: %.4f",
                        config.threshold, config.gamma, config.tao, config.b)

            running_loss = 0.0
            distance_sum = 0.0

            if len(criterion.prototypes) > len(trainset.label_set):
                criterion.clear_prototypes()
            else:
                criterion.upgrade_prototypes()

            for i, (feature, label) in enumerate(trainloader):
                feature, label = feature.to(net.device), label.item()
                sgd.zero_grad()
                feature = net(feature).view(1, -1)
                loss, distance = criterion(feature, label)
                loss.backward()
                sgd.step()

                running_loss += loss.item()

                distance_sum += distance
                intra_class_distances.append((label, distance))

                logger.debug("[%d, %d] %7.4f, %7.4f", epoch + 1, i + 1,
                             loss.item(), distance)

            distances = np.array(intra_class_distances,
                                 dtype=[('label', np.int32),
                                        ('distance', np.float32)])
            average_distance = np.average(distances['distance']).item()
            std_distance = distances['distance'].std().item()

            config.threshold = (average_distance + 3 * std_distance)
            config.gamma = 2 / average_distance
            config.tao = average_distance + std_distance
            config.b = std_distance

            criterion.set_threshold(config.threshold)
            criterion.set_gamma(config.gamma)
            criterion.set_tao(config.tao)
            criterion.set_b(config.b)

            torch.save(net.state_dict(), config.model_path)
            criterion.save_prototypes(config.prototypes_path)
            torch.save(intra_class_distances,
                       config.intra_class_distances_path)

            logger.info("Prototypes Count: %d", len(criterion.prototypes))
        # end train

        # test
        if config.testonly:
            # load saved intra class distances
            if os.path.exists(config.intra_class_distances_path):
                try:
                    intra_class_distances = torch.load(
                        config.intra_class_distances_path)
                    logger.info("Load intra class distances from file '%s'.",
                                config.intra_class_distances_path)
                except RuntimeError:
                    logger.error(
                        "Loading intra class distances from file '%s' failed.",
                        config.intra_class_distances_path)

        detector = models.Detector(intra_class_distances,
                                   config.std_coefficient, trainset.label_set)
        logger.info("Distance Average: %s", detector.average_distances)
        logger.info("Distance Std: %s", detector.std_distances)
        logger.info("Distance Threshold: %s", detector.thresholds)

        if (epoch + 1) % config.testfreq == 0 or config.testonly:

            detection_results = []

            for i, (feature, label) in enumerate(testloader):
                feature, label = net(feature.to(net.device)).view(
                    1, -1), label.item()
                predicted_label, probability, distance = criterion.predict(
                    feature)
                detected_novelty = detector(predicted_label, probability,
                                            distance)
                real_novelty = label not in trainset.label_set

                detection_results.append(
                    (label, predicted_label, probability, distance,
                     real_novelty, detected_novelty))

                logger.debug("%5d: %d, %d, %7.4f, %7.4f, %s, %s", i + 1, label,
                             predicted_label, probability, distance,
                             real_novelty, detected_novelty)

            true_positive, false_positive, false_negative = detector.evaluate(
                detection_results)

            precision = true_positive / (true_positive + false_positive + 1)
            recall = true_positive / (true_positive + false_negative + 1)
            cm = confusion_matrix(detector.results['true_label'],
                                  detector.results['predicted_label'],
                                  sorted(list(testset.label_set)))
            results = detector.results[np.isin(detector.results['true_label'],
                                               list(trainset.label_set))]

            logger.info(
                "Accuracy: %7.4f",
                accuracy_score(results['true_label'],
                               results['predicted_label']))
            # logger.info("Accuracy: %7.4f", accuracy_score(detector.results['true_label'], detector.results['predicted_label']))
            logger.info("True Positive: %d", true_positive)
            logger.info("False Positive: %d", false_positive)
            logger.info("False Negative: %d", false_negative)
            logger.info("Precision: %7.4f", precision)
            logger.info("Recall: %7.4f", recall)
            logger.info("Confusion Matrix: \n%s", cm)
Beispiel #16
0
def main():
    logger = setup_logger(level=logging.DEBUG, filename='log.txt')

    train_epoch_number = 200

    batch_size = 1

    pairwise = False

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    dataset = np.loadtxt(models.Config.dataset_path, delimiter=',')
    np.random.shuffle(dataset[:5000])

    trainset = models.DataSet(dataset[:5000], pairwise=pairwise)
    trainloader = DataLoader(dataset=trainset,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=24)

    testset = models.DataSet(dataset[5000:])
    testloader = DataLoader(dataset=testset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=2)

    # net = models.CNNNet(device=device)
    net = models.DenseNet(device=device,
                          number_layers=8,
                          growth_rate=12,
                          drop_rate=0.0)
    logger.info("DenseNet Channels: %d", net.channels)

    # cel = torch.nn.CrossEntropyLoss()
    gcpl = functions.GCPLLoss(threshold=models.Config.threshold,
                              gamma=models.Config.gamma,
                              lambda_=0.01)
    pwl = functions.PairwiseLoss(tao=10.0, b=2.0, beta=0.5)
    sgd = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    if not os.path.exists("pkl"):
        os.mkdir("pkl")

    if os.path.exists(models.Config.pkl_path):
        state_dict = torch.load(models.Config.pkl_path)
        try:
            net.load_state_dict(state_dict)
            logger.info("Load state from file %s.", models.Config.pkl_path)
        except RuntimeError:
            logger.error("Loading state from file %s failed.",
                         models.Config.pkl_path)

    for epoch in range(train_epoch_number):
        logger.info("Trainset size: %d, Epoch number: %d", len(trainset),
                    epoch + 1)

        # CPL train
        prototypes = train(net,
                           trainloader, (gcpl, pwl),
                           sgd,
                           pairwise=pairwise)
        torch.save(net.state_dict(), models.Config.pkl_path)

        prototype_count = 0

        for c in prototypes:
            prototype_count += len(prototypes[c])

        logger.info("Prototype Count: %d", prototype_count)

        accuracy, average_distance = test(net, testloader, prototypes,
                                          gcpl.gamma)

        models.Config.threshold = average_distance * 1.5

        logger.info("Distance Average: %7.4f", average_distance)
        logger.info("Accuracy: %7.4f\n", accuracy)