Python dr Examples

Programming Language: Python

Namespace/Package Name: os.path

Method/Function: dr

Examples at hotexamples.com: 25

Python dr - 25 examples found. These are the top rated real world Python examples of os.path.dr extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: calculate_kmeans.py Project: steven-ari/activelearning_thesis_semseg

def main():
    cluster_range = range(0, 21, 1)

    print("Start k-means clustering!")

    # plot kmeans distribution, coded MNIST
    mnist_root = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                              'Dataset_MNIST_n')
    # dataset, coded MNIST
    data_train, target_train = datasets_preset.provide_reduced_mnist(
        train=True)

    normalized_vectors = preprocessing.normalize(data_train)
    scores = [
        KMeans(n_clusters=i + 2).fit(normalized_vectors).inertia_
        for i in cluster_range
    ]
    print("Error scores for coded MNIST:")
    print(cluster_range)
    print(*[str(a).replace(".", ",") for a in scores], sep="\n")

    # dataset, unreduced MNIST
    data_train, target_train = datasets_preset.provide_unreduced_mnist(
        train=True)

    normalized_vectors = preprocessing.normalize(data_train)
    scores = [
        KMeans(n_clusters=i + 2).fit(normalized_vectors).inertia_
        for i in cluster_range
    ]
    print("Error scores for unreduced MNIST:")
    print(cluster_range)
    print(*[str(a).replace(".", ",") for a in scores], sep="\n")

    # dataset, coded F-MNIST
    data_train, target_train = datasets_preset.provide_reduced_f_mnist(
        train=True)

    normalized_vectors = preprocessing.normalize(data_train)
    scores = [
        KMeans(n_clusters=i + 2).fit(normalized_vectors).inertia_
        for i in cluster_range
    ]
    print("Error scores for coded F-MNIST:")
    print(cluster_range)
    print(*[str(a).replace(".", ",") for a in scores], sep="\n")

    # dataset, unreduced F-MNIST
    data_train, target_train = datasets_preset.provide_unreduced_f_mnist(
        train=True)

    normalized_vectors = preprocessing.normalize(data_train)
    scores = [
        KMeans(n_clusters=i + 2).fit(normalized_vectors).inertia_
        for i in cluster_range
    ]
    print("Error scores for unreduced F-MNIST:")
    print(cluster_range)
    print(*[str(a).replace(".", ",") for a in scores], sep="\n")

Example #2

Show file

def main():
    global args
    argv = sys.argv[1:]

    os.environ['CITYSCAPES_RESULTS'] = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'inference')
    os.environ['CITYSCAPES_EXPORT_DIR'] = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes')
    args.groundTruthSearch = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes',
                                          'gtFine', 'val', '*', '*_gtFine_labelIds.png')

    predictionImgList = []
    groundTruthImgList = []

    # the image lists can either be provided as arguments
    if (len(argv) > 3):
        for arg in argv:
            if ("gt" in arg or "groundtruth" in arg):
                groundTruthImgList.append(arg)
            elif ("pred" in arg):
                predictionImgList.append(arg)
    # however the no-argument way is prefered
    elif len(argv) == 0:
        # use the ground truth search string specified above
        groundTruthImgList = glob.glob(args.groundTruthSearch)
        if not groundTruthImgList:
            printError("Cannot find any ground truth images to use for evaluation. Searched for: {}".format(args.groundTruthSearch))
        # get the corresponding prediction for each ground truth imag
        for gt in groundTruthImgList:
            predictionImgList.append(getPrediction(args, gt))

    # evaluate
    all_result_dict = evaluateImgLists(predictionImgList, groundTruthImgList, args)

    return all_result_dict

Example #3

Show file

def plot_unlabeled_lots():
    dpi = 100
    color_palette = ("tab:blue", "tab:orange", "tab:green", "tab:red", "tab:purple", "tab:brown")
    plot_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'plots', 'for_ppt')
    plt.interactive(True)

    # prepare figure
    figure = plt.figure(figsize=(25, 15),  dpi=dpi)
    random_all = np.random.rand(2, int(200*7.5))
    plt.scatter(x=random_all[0, :], y=random_all[1, :], c='#606060', marker='x')
    plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
    a = 1

Example #4

Show file

def plot_ugly(output_list_train, data_train, target_train):
    # index for high entropy for committee high acc
    idx = [
        500, 10116, 15434, 20773, 25562, 25678, 26560, 28620, 36104, 39184,
        41594, 42566, 45352, 47034, 50329, 51248, 51794, 52086
    ]

    for i_data in range(idx.__len__()):

        # plot titleless
        plt.imshow(data_train.reshape(data_train.__len__(), 28,
                                      28)[idx[i_data]],
                   cmap='gray')
        pred = np.bincount(output_list_train[:, idx[i_data]]).argmax()
        print(target_train[idx[i_data]])
        print(pred)
        name = 'titleless_' + str(i_data)
        plt.title('')
        plot_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                                 'plots', 'malicious')
        file_name = os.path.join(plot_path, (name + '.png'))
        plt.tick_params(axis='both',
                        which='both',
                        bottom=False,
                        left=False,
                        labelbottom=False,
                        labelleft=False)
        plt.tight_layout()
        plt.savefig(file_name, format='png', dpi=300)

        # plot with title
        plt.clf()
        plt.imshow(data_train.reshape(data_train.__len__(), 28,
                                      28)[idx[i_data]],
                   cmap='gray')
        pred = np.bincount(output_list_train[:, idx[i_data]]).argmax()
        print(target_train[idx[i_data]])
        print(pred)
        name = 'title_' + str(i_data)
        plt.title('Data: ' + str(idx[i_data]) + '; Prediction: ' + str(pred) +
                  ', GT: ' + str(target_train[idx[i_data]]),
                  fontsize=16)
        plt.tick_params(axis='both',
                        which='both',
                        bottom=False,
                        left=False,
                        labelbottom=False,
                        labelleft=False)
        plt.tight_layout()
        file_name = os.path.join(plot_path, (name + '.png'))
        plt.savefig(file_name, format='png', dpi=300)

Example #5

Show file

def coded_mnist_plot():
    batch_example = 10
    model_path = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'results',
                              'Autoencoder_fmnist.pt')
    mnist_root = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'data')
    # about datasets
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    test_dataset = Dataset_F_MNIST_n(root=mnist_root,
                                     train=False,
                                     download=True,
                                     transform=transform,
                                     n=100)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_example,
                             shuffle=True)

    model = Autoencoder()
    saved_weights = torch.load(model_path, map_location=torch.device('cpu'))

    model.load_state_dict(saved_weights)

    data_test, _, _ = next(iter(test_loader))
    data_test = data_test.view(-1, 784)
    model.eval()
    output = model(data_test)

Example #6

Show file

def get_entropy_acc(entropy, output_list, target):
    csv_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've.csv')
    rand_idx = np.random.randint(low=0, high=60000, size=1000)
    train_text = 0
    rev_entropy = np.around(entropy[rand_idx], decimals=3)

    # calculate accuracy
    rev_output = output_list[:, rand_idx]
    rev_target = target[rand_idx]
    target_stack = np.array([rev_target] * output_list.shape[0])
    rev_acc = np.sum(rev_output == target_stack, axis=0) / output_list.shape[0]

    # np.savetxt(csv_path, rev_acc, delimiter=";")
    with open(csv_path, mode='a+') as test_file:
        test_writer = csv.writer(test_file, delimiter=';')
        test_writer.writerow(rev_acc)
        test_writer.writerow(rev_entropy)

Example #7

Show file

def plot_diversity():

    dpi = 100
    color_palette = ("tab:blue", "tab:orange", "tab:green", "tab:red", "tab:purple", "tab:brown")
    plot_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'plots', 'for_ppt')
    plt.interactive(True)

    # prepare figure
    figure = plt.figure(figsize=(12, 7), dpi=dpi)

    # prepare random data
    rand1_x = np.random.normal(loc=2.0, scale=0.5, size=200)
    rand1_y = np.random.normal(loc=5.0, scale=0.35, size=200)
    rand1 = np.array([rand1_x, rand1_y])
    rand2_x = np.random.normal(loc=5.0, scale=0.25, size=200)
    rand2_y = np.random.normal(loc=5.0, scale=0.5, size=200)
    rand2 = np.array([rand2_x, rand2_y])
    rand3_x = np.random.normal(loc=9.0, scale=0.5, size=200)
    rand3_y = np.random.normal(loc=6.0, scale=0.45, size=200)
    rand3 = np.array([rand3_x, rand3_y])
    rand4_x = np.random.normal(loc=3.0, scale=0.5, size=200)
    rand4_y = np.random.normal(loc=9.0, scale=0.5, size=200)
    rand4 = np.array([rand4_x, rand4_y])
    rand5_x = np.random.normal(loc=6.5, scale=0.75, size=200)
    rand5_y = np.random.normal(loc=7.5, scale=0.5, size=200)
    rand5 = np.array([rand5_x, rand5_y])

    plt.scatter(x=rand1[0, :], y=rand1[1, :], c=color_palette[0], marker='o')
    plt.scatter(x=rand2[0, :], y=rand2[1, :], c=color_palette[1], marker='o')
    plt.scatter(x=rand3[0, :], y=rand3[1, :], c=color_palette[2], marker='o')
    plt.scatter(x=rand4[0, :], y=rand4[1, :], c=color_palette[3], marker='o')
    plt.scatter(x=rand5[0, :], y=rand5[1, :], c=color_palette[4], marker='o')

    # make it pretty
    plt.tick_params(axis='both', which='both', labelbottom=False, labelleft=False)
    plt.tick_params(axis='both', which='both', direction='in')
    plt.tight_layout()

    # save fig
    name = 'qbc_w_diversity'
    file_name = os.path.join(plot_path, (name + '.png'))
    plt.savefig(file_name, format='png', dpi=300)

Example #8

Show file

def plot_qbc_confusion():
    dpi = 100
    color_palette = ("tab:blue", "tab:orange", "tab:green", "tab:red", "tab:purple", "tab:brown")
    plot_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'plots', 'for_ppt')
    plt.interactive(True)

    # prepare figure
    figure = plt.figure(figsize=(9, 3), dpi=dpi)

    # random points, [0, :] for x; [1, :] for y, sorted for x
    random_all_1 = np.random.rand(2, 60)
    random_all_1[0, :] = np.sort(random_all_1[0, :])*0.43
    random_all_2 = np.random.rand(2, 60)
    random_all_2[0, :] = (np.sort(random_all_2[0, :])*0.43)+0.57
    random_all_conf = np.random.rand(2, 20)
    random_all_conf[0, :] = (np.sort(random_all_conf[0, :])*0.1)+0.45

    # plot all labelled
    plt.scatter(x=random_all_1[0, :], y=random_all_1[1, :], c=color_palette[0], marker="o")
    plt.scatter(x=random_all_2[0, :], y=random_all_2[1, :], c=color_palette[1], marker="s")
    plt.scatter(x=random_all_conf[0, :], y=random_all_conf[1, :], c='k', marker="$?$")

    # plot boundary lines
    plt.autoscale(False)
    plt.plot([0.45, 0.53], [-0.12, 1.06], c=color_palette[2])
    plt.plot([0.42, 0.49], [-0.1, 1.2], c=color_palette[2])
    plt.plot([0.58, 0.42], [-0.1, 1.2], c=color_palette[2])
    plt.plot([0.58, 0.54], [-0.1, 1.2], c=color_palette[2])
    plt.plot([0.5, 0.5], [-0.1, 1.2], c=color_palette[2])

    # make it pretty
    plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
    # plt.title('QBC: Versions within committee', fontsize=16)
    plt.tight_layout()

    # save fig
    name = 'qbc_confusion'
    file_name = os.path.join(plot_path, (name + '.png'))
    plt.savefig(file_name, format='png', dpi=300)

Example #9

Show file

File: qbc_vote_90_70.py Project: steven-ari/activelearning_thesis_semseg

def main():
    """
    :param n_model: number of models for the comittee
    :param n_train: number of training data to be used, this decides how long the training process will be
    :param batch_train_size: batch size for training process, keep it under 20
    :param idx_ratio: ratio of high entropy:ratio of random
    :return:
    """

    # paths
    save_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                             've_test')
    csv_name_train = 'train.csv'
    csv_name_test = 'test.csv'
    csv_name_index = 'index.csv'
    dir_name = 'vote_90_5_70_'
    save_weights_flag = True
    cityscape_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                                  'cityscapes')
    cityscape_loss_weight_path = os.path.join(dr(dr(dr(abspath(__file__)))),
                                              'data', 'cityscapes',
                                              'class_weights.pkl')
    cityscape_pretrain_path = os.path.join(dr(dr(dr(abspath(__file__)))),
                                           'data', 'cityscape_pretrain')
    inference_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                                  'cityscapes', 'inference')
    color_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                              'cityscapes', 'color')
    print('cityscape_path: ' + cityscape_path)
    print(dir_name)

    # arguments
    n_train = 2880  # divisible by 8: batch size and 10: 10% increment of training data increase
    n_pretrain = 0
    n_test = 500
    n_epoch = 40
    n_model = 10
    test_factor = 3  # committee only tested every test_factor-th batch
    batch_train_size = 3 * max(torch.cuda.device_count(), 1)
    batch_test_size = 25 * max(torch.cuda.device_count(), 1)
    lr = 0.0001
    loss_print = 2
    continue_flag = False
    poly_exp = 1.0
    feature_extract = True
    dropout_rate = 0.9
    idx_ratio = [1.0, 0.0]
    data_limit = 0.7
    manual_seed = 1

    # report qbc semseg to user in terminal
    text = (('n_model(dropout): ' + str(n_model)) +
            (', n_train: ' + str(n_train)) +
            (', batch_train_size: ' + str(batch_train_size)) +
            (', idx_ratio: ' + str(idx_ratio)) +
            (', test_factor: ' + str(test_factor)))
    print(text)

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print(torch.cuda.device_count(), "GPUs detected")
    torch.manual_seed(manual_seed)
    # print("Max memory allocated:" + str(np.round(torch.cuda.max_memory_allocated(device) / 1e9, 3)) + ' Gb')

    # get data and index library
    mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    transform = T.Compose([
        T.Resize((800, 800), Image.BICUBIC),
        T.ToTensor(),
        T.Normalize(*mean_std)
    ])
    train_dataset = dataset_preset.Dataset_Cityscapes_n(
        root=cityscape_path,
        split='train',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=segmen_preset.label_id2label,
        n=n_train)
    train_dataset_idx = dataset_preset.Dataset_Cityscapes_n_i(
        root=cityscape_path,
        split='train',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=segmen_preset.label_id2label,
        n=n_train)  # also get index of data
    test_dataset = dataset_preset.Dataset_Cityscapes_n_i(
        root=cityscape_path,
        split='val',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=segmen_preset.label_id2label,
        n=n_test)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=batch_test_size,
                                 shuffle=True,
                                 num_workers=3 *
                                 max(torch.cuda.device_count(), 1),
                                 drop_last=False)
    print("Datasets loaded!")

    # create models, optimizers, scheduler, criterion, the model
    fcn_model = torchvision.models.segmentation.deeplabv3_resnet101(
        pretrained=False,
        progress=True,
        num_classes=segmen_preset.n_labels_valid,
        aux_loss=True)
    fcn_model = fcn_model.cuda()
    fcn_model = nn.DataParallel(fcn_model)

    # the optimizers
    optimizer = torch.optim.Adam(
        [{
            'params': fcn_model.module.classifier.parameters()
        }, {
            'params':
            list(fcn_model.module.backbone.parameters()) +
            list(fcn_model.module.aux_classifier.parameters())
        }],
        lr=lr)
    lambda1 = lambda epoch: math.pow(1 - (epoch / n_epoch), poly_exp)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

    with open(cityscape_loss_weight_path,
              "rb") as file:  # (needed for python3)
        class_weights = np.array(pickle.load(file))
    class_weights = torch.from_numpy(class_weights)
    class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda()

    # report everything
    print(str(n_model) + " fcn models created")
    text = ('n_model: ' + str(n_model)) + (', n_train: ' + str(n_train)) + (', n_epoch: ' + str(n_epoch)) +\
           (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio))
    print(text)

    # to document training process, create directory, etc
    train_text = [str(x) for x in range(1, n_epoch + 1)]
    test_text = [str(x) for x in range(1, n_epoch + 1)]
    train_index_text = [str(x) for x in range(1, 8)]
    train_index_docu = 0
    train_index = []
    test_text_index = 0

    # write text to csv
    dir_number = 1
    while os.path.exists(
            os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))):
        dir_number += 1
    run_path = os.path.join(save_path,
                            (dir_name + '{:03d}'.format(dir_number)))
    os.makedirs(run_path)  # make run_* dir
    f = open(os.path.join(run_path, 'info.txt'), 'w+')  # write .txt file
    f.write(text)
    f.close()
    copy(__file__, os.path.join(run_path, os.path.basename(__file__)))

    # write training progress
    csv_path_train = os.path.join(run_path, csv_name_train)
    title = [
        "Training progress for n_model = " + str(n_model) + ", idx_ratio:  " +
        str(idx_ratio) + ', for multiple epoch'
    ]
    with open(csv_path_train, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # write test progress
    csv_path_test = os.path.join(run_path, csv_name_test)
    title = [
        "Test progress for n_model = " + str(1) + ", idx_ratio:  " +
        str(idx_ratio) + ', for multiple epoch, torch seed: ' +
        str(manual_seed) + 'run_path: ' + run_path
    ]
    with open(csv_path_test, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # write index and train progress
    csv_path_index = os.path.join(run_path, csv_name_index)
    title = [
        "Index progress for n_model = " + str(n_model) + ", idx_ratio:  " +
        str(idx_ratio) + ', for multiple epoch'
    ]
    with open(csv_path_train, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # training start
    for i_epoch in range(n_epoch):
        # initialize with random
        if len(train_index) == 0:
            train_index = np.array(
                random.sample(range(n_train), k=int(n_train / 10)))
            train_index_text[train_index_docu] = train_index_text[train_index_docu] + ': ' \
                                                 + str([x for x in train_index]).strip('[]')

            # update train and index documentation
            text = train_index_text[train_index_docu].split(";")
            with open(csv_path_index, mode='a+', newline='') as test_file:
                test_writer = csv.writer(test_file, delimiter=';')
                test_writer.writerow(text)
            print(train_index_text)
            train_index_docu = train_index_docu + 1
        # append with vote entropy
        elif (len(train_index) < int(0.7 * n_train)) and (i_epoch % 5 == 0):
            t = Timer()
            t.start()
            # perform vote entropy on entire dataset
            indices, fcn_model = vote_entropy_dropout(fcn_model,
                                                      train_dataset_idx,
                                                      train_index,
                                                      idx_ratio,
                                                      batch_test_size,
                                                      device,
                                                      n_model,
                                                      dropout_rate,
                                                      i_epoch,
                                                      n_data=int(n_train / 10))
            train_index = np.append(train_index, indices)
            train_index_text[train_index_docu] = train_index_text[train_index_docu] + ': ' + \
                                                 str([x for x in train_index]).strip('[]') +\
                                                 ";{:.4f}".format(np.array(t.stop()).mean())

            # update train and index documentation
            text = train_index_text[train_index_docu].split(";")
            with open(csv_path_index, mode='a+', newline='') as test_file:
                test_writer = csv.writer(test_file, delimiter=';')
                test_writer.writerow(text)
            print(train_index_text)
            train_index_docu = train_index_docu + 1

        # retrain with selected data
        print(train_index)
        print('length: ' + str(len(train_index)))
        train_subset = Subset(train_dataset_idx, train_index)
        train_dataloader = DataLoader(train_subset,
                                      batch_size=batch_train_size,
                                      shuffle=True)
        loss_epoch = []
        time_epoch = []
        for i_batch, (data_train, target_train, index,
                      _) in enumerate(train_dataloader):
            # train batch
            t = Timer()
            t.start()
            output, loss, iou, fcn_model, optimizer = train_batch(
                fcn_model, data_train, target_train, optimizer, device,
                criterion)
            print('Epoch: ' + str(i_epoch) + '\t Batch: ' + str(i_batch) +
                  '/' + str(len(train_dataloader)) + '; model ' + str(0) +
                  '; train loss avg: ' + "{:.3f}".format(loss) +
                  '; train iou avg: ' + "{:.3f}".format(iou.mean()))
            for param_group in optimizer.param_groups:
                print(param_group['lr'])
            loss_epoch.append(loss)
            time_epoch.append(t.stop())

        # document train result
        train_text[i_epoch] = train_text[i_epoch] + ";{:.4f}".format(np.array(loss_epoch).mean()) + \
                              ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr']))\
                              + ';' + str(len(train_index)) + ";{:.4f}".format(np.array(time_epoch).mean())

        # update train documentation
        text = train_text[i_epoch].split(";")
        with open(csv_path_train, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

        # save temporary model and perform test
        if i_epoch % 10 == 0 or (i_epoch + 1) == n_epoch:
            print('Save and Test Model')
            fcn_model.train()
            torch.save(
                fcn_model.state_dict(),
                os.path.join(run_path, ('model_weight_epoch_train' +
                                        '{:03d}'.format(i_epoch) + '.pt')))
            fcn_model.eval()
            torch.save(
                fcn_model.state_dict(),
                os.path.join(run_path, ('model_weight_epoch_' +
                                        '{:03d}'.format(i_epoch) + '.pt')))

        # perform test
        test_idx = test_text_index
        create_pred_img(fcn_model, test_dataloader, inference_path, color_path)
        all_result_dict = cityscapes_eval()

        # document test result
        test_text[test_idx] = test_text[test_idx] + ";{:.4f}".format(all_result_dict['averageScoreClasses']) +\
                              ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr']))\
                              + ';' + str(len(train_index))

        # update test documentation
        text = test_text[test_idx].split(";")
        with open(csv_path_test, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

        test_text_index = test_text_index + 1

        # one epoch ends here
        scheduler.step()
        print(optimizer)

Example #10

Show file

File: qbc_xgboost.py Project: steven-ari/activelearning_thesis_semseg

def qbc(n_model, n_train, batch_size, idx_ratio, dataset):
    # parameters
    n_model = n_model
    n_train = n_train
    batch_size = batch_size
    idx_ratio = idx_ratio
    n_cluster = 20
    dataset = dataset.lower()  # 'reduced_f_mnist', 'reduced_mnist','unreduced_f_mnist','unreduced_mnist',
    text = (('n_model: ' + str(n_model)) + (', n_train: ' + str(n_train)) + (', batch_size: ' + str(batch_size))
            + (', idx_ratio: ' + str(idx_ratio)) + (', n_cluster: ' + str(n_cluster)) + (', dataset: ' + dataset))
    print(text)

    # paths
    model_path = os.path.join(dr(dr(abspath(__file__))), 'results', dataset)
    csv_path = os.path.join(model_path, 'xgb_qbc.csv')

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    # load dataset
    if dataset == 'reduced_f_mnist':
        data_train, target_train = datasets_preset.provide_reduced_f_mnist(train=True)
        data_test, target_test = datasets_preset.provide_reduced_f_mnist(train=False)
    elif dataset == 'reduced_mnist':
        data_train, target_train = datasets_preset.provide_reduced_mnist(train=True)
        data_test, target_test = datasets_preset.provide_reduced_mnist(train=False)
    elif dataset == 'unreduced_f_mnist':
        data_train, target_train = datasets_preset.provide_unreduced_f_mnist(train=True)
        data_test, target_test = datasets_preset.provide_unreduced_f_mnist(train=False)
    elif dataset == 'unreduced_mnist':
        data_train, target_train = datasets_preset.provide_unreduced_mnist(train=True)
        data_test, target_test = datasets_preset.provide_unreduced_mnist(train=False)

    # execute kmeans-clustering for entire training dataset
    cluster_index, cluster_centers = kmeans(X=torch.from_numpy(data_train),
                                            num_clusters=n_cluster, distance='cosine', device=device)
    # show clustering result, document data per cluster
    n_data_cr = np.zeros(n_cluster, dtype=int)
    idx_data_cr = []
    for i_cluster in range(n_cluster):
        n_data_cr[i_cluster] = np.sum(cluster_index.numpy() == i_cluster)
        idx_data_cr.append(np.argwhere(cluster_index == i_cluster).numpy())
        print("Cluster " + str(i_cluster) + ": " + str(n_data_cr[i_cluster])
              + " data, or " + "{:.4f}".format(n_data_cr[i_cluster] / cluster_index.__len__() * 100) + "%")
    print("Cluster data size variance: " + "{:.4f}".format(n_data_cr.var() ** 0.5) + ", (smaller is better)")

    # to document training process, create directory, etc
    train_text = [str(x) for x in range(batch_size, n_train + 1, batch_size)]
    dir_name = 'run_'
    dir_number = 1
    while os.path.exists(os.path.join(model_path, (dir_name + '{:03d}'.format(dir_number)))):
        dir_number += 1
    run_path = os.path.join(model_path, (dir_name + '{:03d}'.format(dir_number)))
    os.makedirs(run_path)  # make run_* dir
    f = open(os.path.join(run_path, 'info.txt'), 'w+')  # write .txt file
    f.write(text)
    f.close()

    # create models and index library
    models = []
    tree_method = "auto"  # "gpu_hist" if cuda_flag else "auto"
    print('Tree creation method: ' + tree_method)
    idx_library = [np.array([]).astype(int) for x in range(n_model)]
    for i_model in range(n_model):
        xgbc = XGBClassifier(max_depth=8, objective='objective=multi:softmax', n_estimators=1, n_jobs=32,
                             reg_lambda=1, gamma=2, learning_rate=1, num_classes=10, tree_method=tree_method)
        models.append(xgbc)
    print(str(n_model) + " xgboost models created")

    # training and test process, 1st batch
    output_list_test = np.zeros((n_model, data_test.__len__())).astype(int) # n_models x n_data x n_classes
    for i_model in range(n_model):
        random_index = np.array(random.sample(range(data_train.__len__()), k=batch_size))
        idx_library[i_model] = np.append(idx_library[i_model], random_index)
        models[i_model].fit(data_train[random_index], target_train[random_index])
        output_list_test[i_model, :] = models[i_model].predict(data_test)

    # Document first batch
    acc_models = qbc_preset.each_model_acc(output_list_test, target_test)
    acc_committee = qbc_preset.committee_vote(output_list_test, target_test)  # committee vote
    train_text[0] = train_text[0] + ' '.join([";" + "{:.4f}".format(elem) for elem in acc_models])
    train_text[0] = train_text[0] + '; ' + "{:.3f}".format(acc_committee * 100) + '%'  # committee vote
    print("First batch added!")
    print("Batch " + str(0) + ": average acc of models is " + "{:.3f}".format(acc_models.mean() * 100) + "%")
    print("Batch " + str(0) + ": acc of committee is " + "{:.3f}".format(acc_committee * 100) + "%")
    print("Library sizes, after first batch:" + str([np.unique(idx_library[i_model]).shape for x in range(n_model)]))
    pickle.dump(models, open(os.path.join(run_path, ('models_batch_' + "{0:0=3d}".format(0) + '.pkl')), 'wb'))
    pickle.dump(idx_library, open(os.path.join(run_path, ('indices_batch_' + "{0:0=3d}".format(0) + '.pkl')), 'wb'))

    # training process, n-th batch
    for i_batch in range(1, train_text.__len__()):
        print("Starting Batch " + str(i_batch))
        output_list_train = np.zeros((n_model, data_train.__len__())).astype(int)

        # calculate entropy & acc of current data
        for i_model in range(n_model):
            output_list_train[i_model, :] = models[i_model].predict(data_train)
        acc_models = qbc_preset.each_model_acc(output_list_train, target_train)
        acc_target = qbc_preset.each_target_acc(output_list_train, target_train)
        entropy = qbc_preset.vote_entropy_xgb(output_list_train, target_train)
        # qbc_preset.get_entropy_acc(entropy, output_list_train, target_train)
        # show entropy, show committee acc, 3 highest guess, entropy value, show 8 of it?
        # qbc_preset.show_entropy_result(acc_models, entropy, output_list, data_train, target_train)
        # qbc_preset.plot_ugly(output_list_train, data_train, target_train)
        print("Library sizes:" + str([np.unique(idx_library[i_model]).shape for x in range(n_model)])) 
        index_1 = np.random.choice(range(n_model))
        index_2 = np.random.choice(np.setdiff1d(range(0, n_model), index_1))
        print("Overlap size:" + str(np.intersect1d(idx_library[index_1], idx_library[index_2]).__len__()) +
              ", overlap ideal: " + str(int((idx_library[index_2].__len__() - batch_size)
                                            * (idx_ratio[0] + idx_ratio[1]))) +
              ", library size: " + str(idx_library[index_2].__len__()) + ", dataset: " + dataset
              + ", idx_ratio: " + str(idx_ratio))

        # train and test for each model and each batch
        for i_model in range(n_model):
            # indexes
            idx_library[i_model] = \
                qbc_preset.get_next_indices(idx_library[i_model], entropy, idx_data_cr, batch_size,
                                            idx_ratio, data_train.__len__())
            # train model
            models[i_model].fit(data_train[idx_library[i_model]], target_train[idx_library[i_model]])
            # test model
            output_list_test[i_model, :] = models[i_model].predict(data_test)
            print('Model ' + str(i_model))

        # check committee vote
        acc_models = qbc_preset.each_model_acc(output_list_test, target_test)
        acc_committee = qbc_preset.committee_vote(output_list_test, target_test)  # committee vote method
        print("Batch " + str(i_batch) + ": average acc of models is " + "{:.3f}".format(acc_models.mean() * 100) + "%")
        print("Batch " + str(i_batch) + ": acc of committee is " + "{:.3f}".format(acc_committee * 100) + "%")

        # Document training progress
        train_text[i_batch] = train_text[i_batch] + ' '.join([";" + "{:.4f}".format(elem) for elem in acc_models])
        train_text[i_batch] = train_text[i_batch] + '; ' + "{:.3f}".format(
            acc_committee * 100) + '%'  # committee vote method
        # save models and indices
        pickle.dump(models, open(os.path.join(run_path, ('models_batch_' + "{0:0=3d}".format(i_batch) + '.pkl')), 'wb'))
        pickle.dump(idx_library,
                    open(os.path.join(run_path, ('indices_batch_' + "{0:0=3d}".format(i_batch) + '.pkl')), 'wb'))

    # write text to csv
    title = ["New Vote, Results for n_model = " + str(n_model) + ", idx_ratio:  " + str(idx_ratio)
             + ", n_cluster: " + str(n_cluster) + ", with highest entropy, avg and var documented"]
    with open(csv_path, mode='a+') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)
    # loop through train_text
    for i_text in range(0, train_text.__len__()):
        text = train_text[i_text].split(";")
        mean = statistics.mean([float(i) for i in text[1:-2]])
        var = statistics.variance([float(i) for i in text[1:-2]]) ** 0.5
        text.append("{:.3f}".format(mean * 100) + "%")
        text.append("{:.3f}".format(var * 100) + "%")
        with open(csv_path, mode='a+') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

Example #11

Show file

def main():
    # training param
    batch_train = 64
    batch_test = 2000
    lr = 1e-3
    epochs = 4
    momentum = 0.9
    torch.manual_seed(1)  # reset random for reproducibility
    save_model = True
    model_path = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'results', 'mnist', 'Autoencoder_mnist.pt')
    output_path = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'results', 'mnist', 'AE_output_mnist.pt')
    input_path = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'results', 'mnist', 'AE_input_mnist.pt')
    mnist_root = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'data')

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    # about datasets
    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize((0.1307,), (0.3081,))])

    train_dataset = Dataset_MNIST_n(root=mnist_root, train=True, download=True, transform=transform, n=60000)
    test_dataset = Dataset_MNIST_n(root=mnist_root, train=False, download=True, transform=transform, n=10000)

    train_loader = DataLoader(train_dataset, batch_size=batch_train, shuffle=True, **dataloader_kwargs)
    test_loader = DataLoader(test_dataset, batch_size=batch_test, shuffle=True, **dataloader_kwargs)

    model = Autoencoder().to(device) # define model

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # train autoencoder
    for i_epoch in range(epochs):
        print("Epoch: " + str(i_epoch))
        model.train()
        for _, (data, _, _) in enumerate(train_loader):
            data = data.view(-1, 784).to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, data)
            loss.backward()
            optimizer.step()
            print('Train autoencoder, loss = ' + str(loss.item()))

    # store some output of trained AE, to view the result
    data_test, _, _ = next(iter(test_loader))
    data_test = data_test.view(-1, 784).to(device)
    model.eval()
    output = model(data_test)

    # all to cpu
    data_test = data_test.to(device_cpu)
    model = model.to(device_cpu)
    output = output.to(device_cpu)

    # save output
    if save_model:
        torch.save(model.state_dict(), model_path)
        torch.save(output, output_path)
        torch.save(data_test, input_path)

Example #12

Show file

File: bulk_40_getter.py Project: steven-ari/activelearning_thesis_semseg

def main(n_train, batch_train_size, n_test, batch_test_size):
    """
        :param n_model: number of models for the comittee
        :param n_train: number of training data to be used, this decides how long the training process will be
        :param batch_train_size: batch size for training process, keep it under 20
        :param idx_ratio: ratio of high entropy:ratio of random
        :return:
        """

    # paths
    img_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 've_test',
                            'example.png')
    save_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                             've_test')
    csv_name_train = 'train.csv'
    csv_name_test = 'test.csv'
    csv_name_index = 'index.csv'
    dir_name = 'vote_bulk_40_from_90_005_'
    index_path_name = 'vote_90_5_005'
    save_weights_flag = True
    cityscape_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                                  'cityscapes')
    cityscape_loss_weight_path = os.path.join(dr(dr(dr(abspath(__file__)))),
                                              'data', 'cityscapes',
                                              'class_weights.pkl')
    cityscape_pretrain_path = os.path.join(dr(dr(dr(abspath(__file__)))),
                                           'data', 'cityscape_pretrain')
    inference_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                                  'cityscapes', 'inference')
    color_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                              'cityscapes', 'color')
    print('cityscape_path: ' + cityscape_path)
    print(dir_name)
    print(index_path_name)

    # arguments
    n_train = 2880
    n_pretrain = 0
    n_test = 500
    n_epoch = 40
    test_factor = 3  # committee only tested every test_factor-th batch
    batch_train_size = 3 * max(torch.cuda.device_count(), 1)
    batch_train_size_pretrain = 4
    batch_test_size = 25 * max(torch.cuda.device_count(), 1)
    lr = 0.0001
    loss_print = 2
    idx_ratio = [0.0, 1.0]  # proportion to qbc:random
    continue_flag = False
    poly_exp = 1.0
    feature_extract = True
    manual_seed = 2
    np.random.seed(manual_seed)

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print(torch.cuda.device_count(), "GPUs detected")
    torch.manual_seed(manual_seed)
    # print("Max memory allocated:" + str(np.round(torch.cuda.max_memory_allocated(device) / 1e9, 3)) + ' Gb')

    # get data and index library
    mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    transform = T.Compose([
        T.Resize((800, 800), Image.BICUBIC),
        T.ToTensor(),
        T.Normalize(*mean_std)
    ])
    train_dataset = dataset_preset.Dataset_Cityscapes_n(
        root=cityscape_path,
        split='train',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=segmen_preset.label_id2label,
        n=n_train)
    # read used index
    csv_path_index_source = os.path.join(save_path, index_path_name,
                                         csv_name_index)
    with open(csv_path_index_source) as csv_file:
        data = csv_file.readlines()
        train_index = np.array(
            list(
                map(
                    int, data[-1][3:data[-1].find(';', (len(data[-1]) -
                                                        20))].split(','))))
        print(len(train_index))
        # np.random.shuffle(train_index)
        train_index = train_index[int(n_train * 0.1):int(n_train * 0.5)]
    print(len(train_index))
    train_dataset = Subset(train_dataset, indices=train_index)
    test_dataset = dataset_preset.Dataset_Cityscapes_n_i(
        root=cityscape_path,
        split='val',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=segmen_preset.label_id2label,
        n=n_test)
    # only test on part of data
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_train_size,
                                  shuffle=True,
                                  num_workers=3 *
                                  max(torch.cuda.device_count(), 1),
                                  drop_last=True)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=batch_test_size,
                                 shuffle=True,
                                 num_workers=3 *
                                 max(torch.cuda.device_count(), 1),
                                 drop_last=True)
    print("Datasets loaded!")

    # create models, optimizers, scheduler, criterion
    # the models
    fcn_model = torchvision.models.segmentation.deeplabv3_resnet101(
        pretrained=False,
        progress=True,
        num_classes=segmen_preset.n_labels_valid,
        aux_loss=True)
    fcn_model = fcn_model.cuda()
    fcn_model = nn.DataParallel(fcn_model)

    # the optimizers
    params_to_update = fcn_model.parameters()
    print("Params to learn:")
    if feature_extract:
        params_to_update = []
        for name, param in fcn_model.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
                print("\t", name)
    else:
        for name, param in fcn_model.named_parameters():
            if param.requires_grad == True:
                print("\t", name)
    params = add_weight_decay(fcn_model, l2_value=0.0001)
    '''optimizer = torch.optim.SGD([{'params': fcn_model.module.classifier.parameters()},
                                  {'params': list(fcn_model.module.backbone.parameters()) +
                                             list(fcn_model.module.aux_classifier.parameters())}
                                  ], lr=lr, momentum=0.9)'''

    optimizer = torch.optim.Adam(
        [{
            'params': fcn_model.module.classifier.parameters()
        }, {
            'params':
            list(fcn_model.module.backbone.parameters()) +
            list(fcn_model.module.aux_classifier.parameters())
        }],
        lr=lr,
        weight_decay=0.0001)
    lambda1 = lambda epoch: math.pow(1 - (epoch / n_epoch), poly_exp)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

    with open(cityscape_loss_weight_path,
              "rb") as file:  # (needed for python3)
        class_weights = np.array(pickle.load(file))
    class_weights = torch.from_numpy(class_weights)
    class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda()

    # report everything
    text = ('Model created' + (', n_train: ' + str(n_train)) +
            (', n_epoch: ' + str(n_epoch)) +
            (', batch_train_size: ' + str(batch_train_size)) +
            (', idx_ratio: ' + str(idx_ratio)) + (', n_test: ' + str(n_test)) +
            (', batch_test_size: ' + str(batch_test_size)) +
            (', test_factor: ' + str(test_factor)) +
            (', optimizer: ' + str(optimizer)) +
            (', model: ' + str(fcn_model)))
    print(text)

    # for documentation
    train_text = [str(x) for x in range(1, n_epoch + 1)]
    test_text = [str(x) for x in range(1, n_epoch + 1)]
    test_text_index = 0

    # write text to csv
    dir_number = 1
    while os.path.exists(
            os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))):
        dir_number += 1
    run_path = os.path.join(save_path,
                            (dir_name + '{:03d}'.format(dir_number)))
    os.makedirs(run_path)  # make run_* dir
    f = open(os.path.join(run_path, 'info.txt'), 'w+')  # write .txt file
    f.write(text)
    f.close()
    copy(__file__, os.path.join(run_path, os.path.basename(__file__)))

    # write training progress
    csv_path_train = os.path.join(run_path, csv_name_train)
    title = [
        "Training progress for n_model = " + str(1) + ", idx_ratio:  " +
        str(idx_ratio) + ', for multiple epoch, torch seed: ' +
        str(manual_seed)
    ]
    with open(csv_path_train, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # write test progress
    csv_path_test = os.path.join(run_path, csv_name_test)
    title = [
        "Test progress for n_model = " + str(1) + ", idx_ratio:  " +
        str(idx_ratio) + ', for multiple epoch, torch seed: ' +
        str(manual_seed) + 'run_path: ' + run_path + 'index_from: ' +
        index_path_name
    ]
    with open(csv_path_test, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # load from previous run if requested
    if continue_flag:
        fcn_model.load_state_dict(
            torch.load(
                'C:\\Users\\steve\\Desktop\\projects\\al_kitti\\results\\first_test\\adam_run_005\\model_weight_epoch_10.pt'
            ))
        print('weight loaded')

    # training process, n-th batch
    for i_epoch in range(n_epoch):
        loss_epoch = []
        iou_epoch = []
        time_epoch = []
        for i_batch, (data_train, target_train) in enumerate(train_dataloader):

            t = Timer()
            t.start()
            # train batch
            output, loss, iou, fcn_model, optimizer = train_batch(
                fcn_model, data_train, target_train, optimizer, device,
                criterion)
            print('Epoch: ' + str(i_epoch) + '\t Batch: ' + str(i_batch) +
                  '/' + str(len(train_dataloader)) + '; model ' + str(0) +
                  '; train loss avg: ' + "{:.3f}".format(loss) +
                  '; train iou avg: ' + "{:.3f}".format(iou.mean()))
            for param_group in optimizer.param_groups:
                print(param_group['lr'])
            loss_epoch.append(loss)
            iou_epoch.append(iou.mean())
            time_epoch.append(t.stop())

        # document train result
        train_text[i_epoch] = train_text[i_epoch] + ";{:.4f}".format(np.array(loss_epoch).mean()) + \
                              ";{:.4f}".format(np.array(iou_epoch).mean()) + \
                              ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) + ';' + str(len(train_index))

        # update train documentation
        text = train_text[i_epoch].split(";")
        with open(csv_path_train, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

        # one epoch ends here
        scheduler.step()
        print(optimizer)
        # save temporary model
        if i_epoch % 10 == 0 or (i_epoch + 1) == n_epoch:
            fcn_model.train()
            torch.save(
                fcn_model.state_dict(),
                os.path.join(run_path, ('model_weight_epoch_train' +
                                        '{:03d}'.format(i_epoch) + '.pt')))
            fcn_model.eval()
            torch.save(
                fcn_model.state_dict(),
                os.path.join(run_path, ('model_weight_epoch_' +
                                        '{:03d}'.format(i_epoch) + '.pt')))

        # perform test
        create_pred_img(fcn_model, test_dataloader, inference_path, color_path)
        all_result_dict = cityscapes_eval()

        # average training time
        mean_time = np.array(time_epoch).mean()

        # document test result
        test_text[test_text_index] = test_text[test_text_index] + \
                                     ";{:.4f}".format(all_result_dict['averageScoreClasses']) + \
                                     ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) \
                                     + ";{:.4f}".format(mean_time) + ';' + str(len(train_index))

        # update test documentation
        text = test_text[test_text_index].split(";")
        with open(csv_path_test, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

        test_text_index = test_text_index + 1

Example #13

Show file

def main():
    # load dataset
    datasets = []  # reduced_MNIST, reduced_F_MNIST, Dataset_MNIST_n, Dataset_F_MNIST_n

    # dataset, reduced mnist
    data_train_all, target_train_all = datasets_preset.provide_reduced_mnist(train=True)
    datasets.append((data_train_all, target_train_all))
    print('Reduced MNIST loaded')

    '''# dataset, unreduced mnist
    data_train_all, target_train_all = datasets_preset.provide_unreduced_mnist(train=True)
    datasets.append((data_train_all, target_train_all))
    print('Unreduced MNIST loaded')'''

    # dataset, reduced f-mnist
    data_train_all, target_train_all = datasets_preset.provide_reduced_f_mnist(train=True)
    datasets.append((data_train_all, target_train_all))
    print('Reduced F-MNIST loaded')

    '''# dataset, unreduced f-mnist
    data_train_all, target_train_all = datasets_preset.provide_unreduced_f_mnist(train=True)
    datasets.append((data_train_all, target_train_all))
    print('Unreduced F-MNIST loaded')'''

    # training run directory
    results_dir = [os.path.join(dr(dr(abspath(__file__))), 'results', 'reduced_mnist'),
                   os.path.join(dr(dr(abspath(__file__))), 'results', 'unreduced_mnist'),
                   os.path.join(dr(dr(abspath(__file__))), 'results', 'reduced_f_mnist'),
                   os.path.join(dr(dr(abspath(__file__))), 'results', 'unreduced_f_mnist')]

    results_dir = [os.path.join(dr(dr(abspath(__file__))), 'results', 'reduced_mnist'),
                   os.path.join(dr(dr(abspath(__file__))), 'results', 'reduced_f_mnist')]

    # loop through datasets
    for i_dataset in range(datasets.__len__()):
        models_dirs = [os.path.join(results_dir[i_dataset], run_dir)
                      for run_dir in sorted(os.listdir(results_dir[i_dataset])) if 'run_' in run_dir]
        models_dirs = models_dirs
        print(str(models_dirs))
        csv_path = os.path.join(results_dir[i_dataset], 'xgb_qbc.csv')
        csv_save_path = os.path.join(results_dir[i_dataset], 'xgb_qbc_train.csv')
        csv_data = csv_train_reader(csv_path, '')

        train_data = datasets[i_dataset][0]
        target_data = datasets[i_dataset][1]
        # loop through training runs
        for i_data_size in range(csv_data['batch_size_list'].__len__()):
            # to document training process
            train_text = [str(x) for x in csv_data['batch_size_list'][i_data_size]]
            run_path = [os.path.join(models_dirs[i_data_size], path)
                          for path in sorted(os.listdir(models_dirs[i_data_size])) if 'models_batch_' in path]
            # loop through batch in run, get committee
            for i_batch in range(run_path.__len__()):
                print('Load from: ' + run_path[i_batch])
                committee_list = pickle.load(open(run_path[i_batch], "rb"))
                output_list_train = np.zeros((committee_list.__len__(), train_data.shape[0]))
                # loop through committee, prediction for each model
                for i_model in range(committee_list.__len__()):
                    output_list_train[i_model, :] = committee_list[i_model].predict(train_data)
                # check committee vote
                acc_models = qbc_preset.each_model_acc(output_list_train, target_data)
                acc_committee = qbc_preset.committee_vote(output_list_train, target_data)  # committee vote method
                print("Batch " + str(i_batch) + ": average acc of models is " + "{:.3f}".format(acc_models.mean() * 100) + "%")
                print("Batch " + str(i_batch) + ": acc of committee is " + "{:.3f}".format(acc_committee * 100) + "%")

                # Document training progress
                train_text[i_batch] = train_text[i_batch] + ' '.join([";" + "{:.4f}".format(elem) for elem in acc_models])
                train_text[i_batch] = train_text[i_batch] + '; ' + "{:.3f}".format(acc_committee * 100) + '%'  # committee vote method

            # write text to csv
            title = [csv_data["title_list"][i_data_size]]
            with open(csv_save_path, mode='a+') as test_file:
                test_writer = csv.writer(test_file, delimiter=',')
                test_writer.writerow(title)
            # loop through train_text
            for i_text in range(0, train_text.__len__()):
                text = train_text[i_text].split(";")
                mean = statistics.mean([float(i) for i in text[1:-2]])
                var = statistics.variance([float(i) for i in text[1:-2]]) ** 0.5
                text.append("{:.3f}".format(mean * 100) + "%")
                text.append("{:.3f}".format(var * 100) + "%")
                with open(csv_save_path, mode='a+') as test_file:
                    test_writer = csv.writer(test_file, delimiter=';')
                    test_writer.writerow(text)
            print("Saved to csv! Data size: " + str(csv_data['batch_size_list'][i_data_size]) + ", Dataset:" +
                  str(i_dataset))

Example #14

Show file

File: qbc_cnn_ce_batchless.py Project: steven-ari/activelearning_thesis_semseg

def qbc(n_model, n_train, qbc_batch_size, batch_size, idx_ratio, dataset):
    # parameters
    n_model = n_model
    n_train = n_train
    dataset = dataset.lower(
    )  # 'reduced_f_mnist', 'reduced_mnist','unreduced_f_mnist','unreduced_mnist',
    qbc_batch_size = qbc_batch_size
    batch_train_size = batch_size
    batch_test_size = 10000
    lr = 0.0001
    test_factor = 5  # committee only tested every test_factor-th batch
    poly_exp = 1.0
    n_epoch = 20
    dropout_rate = 0.25

    # paths
    result_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                               dataset, 'incremental_with_step', 'cnn_qbc')
    csv_path = os.path.join(result_path, 'cnn_qbc.csv')
    csv_name_train = 'train.csv'
    csv_name_test = 'test.csv'
    csv_name_index = 'index.csv'
    csv_name_index_compare = 'index_ce.csv'
    mnist_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data')
    index_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                              dataset, 'incremental_with_step', 'cnn_qbc')

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    torch.manual_seed(0)
    torch.cuda.empty_cache()
    kwargs = {'num_workers': 4, 'pin_memory': True, 'shuffle': True}

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    train_dataset = datasets_preset.Dataset_MNIST_n(root=mnist_path,
                                                    train=True,
                                                    transform=transform,
                                                    n=n_train)
    test_dataset = datasets_preset.Dataset_MNIST_n(root=mnist_path,
                                                   train=False,
                                                   transform=transform,
                                                   n=10000)
    test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_test_size,
                                                  **kwargs)
    data_test, target_test, index = next(iter(test_dataloader))

    # to document training process, create directory, etc
    text = (('CE_batchless: n_model: ' + str(n_model)) +
            (', n_train: ' + str(n_train)) +
            (', batch_size: ' + str(batch_size)) +
            (', idx_ratio: ' + str(idx_ratio)) + (', dataset: ' + dataset))
    print(text)
    dir_name = 'cnn_'
    dir_number = 1
    while os.path.exists(
            os.path.join(result_path,
                         (dir_name + '{:03d}'.format(dir_number)))):
        dir_number += 1
    run_path = os.path.join(result_path,
                            (dir_name + '{:03d}'.format(dir_number)))
    os.makedirs(run_path)  # make run_* dir
    f = open(os.path.join(run_path, 'info.txt'), 'w+')  # write .txt file
    f.write(text)
    f.close()

    # model
    model = Cnn_model()
    print("CNN model created")

    model = model.cuda()
    model = nn.DataParallel(model)

    # the optimizers
    optimizer = torch.optim.Adam(model.module.parameters(), lr=lr)
    lambda1 = lambda epoch: math.pow(1 - epoch / n_epoch, poly_exp)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)
    criterion = F.nll_loss

    # report everything
    print(str(n_model) + " fcn models created")
    text = ('CE_batchless: n_model: ' + str(n_model)) + (', n_train: ' + str(n_train)) + (', n_epoch: ' + str(n_epoch)) + \
           (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio))
    print(text)

    # to document training process, create directory, etc
    train_text = [str(x) for x in range(1, n_epoch + 1)]
    test_text = [str(x) for x in range(1, n_epoch + 1)]
    train_index = np.array([]).astype(np.int16)
    run_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', dataset,
                            'incremental_with_step', 'cnn_qbc')

    # write training progress
    csv_path_train = os.path.join(run_path, csv_name_train)
    title = [
        "CE_batchless: Training progress for n_model = " + str(n_model) +
        ", idx_ratio:  " + str(idx_ratio) + ', for multiple epoch'
    ]
    with open(csv_path_train, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # write test progress
    csv_path_test = os.path.join(run_path, csv_name_test)
    title = [
        "CE: Test progress for n_model = " + str(n_model) + ", idx_ratio:  " +
        str(idx_ratio) + ', for multiple epoch'
    ]
    with open(csv_path_test, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # training start
    for i_epoch in range(n_epoch):
        # initialize with random
        if len(train_index) == 0:
            '''train_index = np.array(random.sample(range(n_train), qbc_batch_size))
            trained_index = train_index'''
            with open(os.path.join(index_path, csv_name_index),
                      'r') as csv_file:
                data = csv_file.readlines()
            train_index = np.array(list(map(int, data[-1][3:-1].split(
                ','))))  # take last index list (last line in csv)
            trained_index = train_index
            index_text = ['One_experiment']
            csv_path_index = os.path.join(index_path, csv_name_index_compare)
            with open(csv_path_index, mode='a+', newline='') as test_file:
                test_writer = csv.writer(test_file, delimiter=';')
                test_writer.writerow(index_text)
            train_index, ce = entropy_dropout_mnist(model,
                                                    train_dataset,
                                                    trained_index,
                                                    device,
                                                    n_model,
                                                    dropout_rate,
                                                    batch_test_size,
                                                    i_epoch,
                                                    n_data=n_train -
                                                    qbc_batch_size)

        # append with vote entropy
        elif len(train_index) < int(n_train):
            # perform vote entropy on entire dataset
            trained_index = np.append(
                trained_index,
                train_index[i_epoch * qbc_batch_size:(i_epoch + 1) *
                            qbc_batch_size])

            index_text = str(ce[train_index[0]]) + ': ' + str(
                [x for x in train_index]).strip('[]')
            index_text = index_text.split(';')
            csv_path_index = os.path.join(index_path, csv_name_index_compare)
            with open(csv_path_index, mode='a+', newline='') as test_file:
                test_writer = csv.writer(test_file, delimiter=';')
                test_writer.writerow(index_text)
        # retrain with selected data
        print(train_index)
        print('length: ' + str(len(train_index)))
        train_subset = Subset(train_dataset, trained_index)
        train_dataloader = DataLoader(train_subset,
                                      batch_size=batch_train_size,
                                      shuffle=True)
        loss_epoch = []

        for i_batch, (data_train, target_train,
                      index) in enumerate(train_dataloader):
            # train batch
            loss, model, optimizer = train_batch(model, data_train,
                                                 target_train, optimizer,
                                                 device, criterion)
            print('Epoch: ' + str(i_epoch) + '\t Batch: ' + str(i_batch) +
                  '/' + str(len(train_dataloader)) + '; model ' + str(0) +
                  '; train loss avg: ' + "{:.3f}".format(loss))
            for param_group in optimizer.param_groups:
                print(param_group['lr'])
            loss_epoch.append(loss)

        # document train result
        train_text[i_epoch] = train_text[i_epoch] + ";{:.4f}".format(np.array(loss_epoch).mean()) + \
                              ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) \
                              + ';' + str(len(trained_index))
        # update train documentation
        text = train_text[i_epoch].split(";")
        with open(csv_path_train, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

        # save temporary model and perform test
        print('Save and Test Model')
        '''model.train()
        torch.save(model.state_dict(), os.path.join(run_path, ('model_weight_epoch_train' +
                                                                   '{:03d}'.format(i_epoch) + '.pt')))
        model.eval()
        torch.save(model.state_dict(), os.path.join(run_path, ('model_weight_epoch_' +
                                                                   '{:03d}'.format(i_epoch) + '.pt')))'''

        # test
        acc = model_test(model, data_test, target_test)

        # document test result
        test_text[i_epoch] = test_text[i_epoch] + ";{:.4f}".format(acc) + \
                              ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) \
                              + ';' + str(len(trained_index))
        print("Acc: " + "{:.4f}".format(acc))

        # update test documentation
        text = test_text[i_epoch].split(";")
        with open(csv_path_test, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

        # one epoch ends here
        scheduler.step()
        print(optimizer)

Example #15

Show file

File: plotter_citycapes_shaded.py Project: steven-ari/activelearning_thesis_semseg

def pretrained_70():

    color_palette = ('tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple',
                     'tab:brown', 'tab:pink', 'tab:gray', 'tab:olive', 'tab:cyan',
                     'b', 'g', 'r', 'c', 'm', 'y')
    my_dpi = 120

    # Pretrained or increment
    csv_paths = [
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_90_5_70_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_90_5_70_002', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_90_5_70_003', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_90_5_70_004', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_90_5_70_005', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_90_5_70_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_90_5_70_002', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_90_5_70_003', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_90_5_70_004', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_90_5_70_005', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_5_70_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_5_70_002', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_5_70_003', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_5_70_004', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_5_70_005', 'test.csv'),
        ]

    titles = ['consensus_5_70_DR90_001',
              'consensus_5_70_DR90_002',
              'consensus_5_70_DR90_003',
              'consensus_5_70_DR90_004',
              'consensus_5_70_DR90_005',
              'vote_5_70_DR90_001',
              'vote_5_70_DR90_002',
              'vote_5_70_DR90_003',
              'vote_5_70_DR90_004',
              'vote_5_70_DR90_005',
              'random_5_70_DR90_001',
              'random_5_70_DR90_002',
              'random_5_70_DR90_003',
              'random_5_70_DR90_004',
              'random_5_70_DR90_005',
              ]
    fig_title = "Pretrained setting, 50 epochs, increase every 5 epoch until 50% Cityscapes"

    data_all = []

    for i_data in range(len(csv_paths)):
        data_all.append(csv_scanner(csv_paths[i_data], titles[i_data]))

    # plot_single(data_all, color_palette, my_dpi, fig_title)

    data_consensus = []
    data_vote = []
    data_random = []

    for i_data in range(5):
        data_consensus.append(csv_scanner(csv_paths[i_data], titles[i_data]))

    for i_data in range(5, 10):
        data_vote.append(csv_scanner(csv_paths[i_data], titles[i_data]))

    for i_data in range(10, 15):
        data_random.append(csv_scanner(csv_paths[i_data], titles[i_data]))

    plot_shaded_std_amount([data_consensus, data_vote, data_random], color_palette, my_dpi, "70%")

    a = 1

Example #16

Show file

def plot_qbc_example():
    dpi = 100
    color_palette = ("tab:blue", "tab:orange", "tab:green", "tab:red", "tab:purple", "tab:brown")
    plot_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'plots', 'for_ppt')
    plt.interactive(True)

    # prepare figure
    figure = plt.subplots(nrows=2, ncols=2, figsize=(12, 6),  dpi=dpi)
    ax_all = figure[0].axes[0]
    ax_gt = figure[0].axes[1]
    ax_random = figure[0].axes[2]
    ax_active = figure[0].axes[3]

    # random points, [0, :] for x; [1, :] for y, sorted for x
    random_all = np.random.rand(2, 200)
    random_all[0, :] = np.sort(random_all[0, :])
    random_all[1, :] = random_all[1, np.argsort(random_all[0, :])]

    # plot random unlabelled
    ax_all.scatter(x=random_all[0, :], y=random_all[1, :], c='#606060', marker='x')
    ax_all.set_title('Unlabeled data', fontsize=16)
    ax_all.set_autoscale_on(False)
    # make it pretty

    # plot all labelled
    idx_cat1 = np.argwhere(random_all[0, :] < 0.5)
    idx_cat2 = np.argwhere(random_all[0, :] > 0.5)
    # give overlap
    idx_temp = idx_cat1[-6:-1].copy()
    idx_cat1[-6:-1] = idx_cat2[0:5]
    idx_cat2[0:5] = idx_temp.copy()
    # plot
    ax_gt.scatter(x=random_all[0, idx_cat1], y=random_all[1, idx_cat1], c=color_palette[0], marker="o")
    ax_gt.scatter(x=random_all[0, idx_cat2], y=random_all[1, idx_cat2], c=color_palette[1], marker="s")
    # plot boundary
    ax_gt.set_autoscale_on(False)
    # ax_gt.axvline(x=0.5, c=color_palette[2])
    ax_gt.set_title('If All Data Labeled', fontsize=16)
    # make it pretty

    # plot random training, skew towards cat 1
    skewer_cat1 = idx_cat1[np.intersect1d(np.argwhere(random_all[0, idx_cat1] > 0.35),
                                          np.argwhere(random_all[1, idx_cat1] < 0.25))].squeeze()
    skewer_cat2 = idx_cat2[np.intersect1d(np.argwhere(random_all[0, idx_cat2] < 0.65),
                                          np.argwhere(random_all[1, idx_cat2] > 0.75))].squeeze()
    idx_cat1_random = np.random.randint(low=0, high=idx_cat1.max(), size=30-min(skewer_cat1.size, 8))
    idx_cat2_random = np.random.randint(low=idx_cat2.min(), high=idx_cat2.max(), size=30-min(skewer_cat2.size, 8))
    idx_cat1_rand = np.concatenate([idx_cat1_random, skewer_cat1[0:(min(skewer_cat1.size, 8))]])  # 5 skewer data
    idx_cat2_rand = np.concatenate([idx_cat2_random, skewer_cat2[0:(min(skewer_cat2.size, 8))]])  # 5 skewer data
    # plot unlabeled
    ax_random.scatter(x=random_all[0, np.setdiff1d(range(200), [idx_cat1_rand, idx_cat2_rand])],
                      y=random_all[1, np.setdiff1d(range(200), [idx_cat1_rand, idx_cat2_rand])],
                      c='#606060', marker='x')
    # plot labeled
    ax_random.scatter(x=random_all[0, idx_cat1_rand], y=random_all[1, idx_cat1_rand],
                      c=color_palette[0], marker="o")
    ax_random.scatter(x=random_all[0, idx_cat2_rand], y=random_all[1, idx_cat2_rand],
                      c=color_palette[1], marker="s")
    # plot boundary
    ax_random.set_autoscale_on(False)
    # ax_random.plot([0.55, 0.45], [-0.1, 1.1], c=color_palette[2])
    ax_random.set_title('Trained with random', fontsize=16)
    # make it pretty

    # plot active learning
    idx_al1 = idx_cat1[-30:].squeeze()
    idx_al2 = idx_cat2[0:30].squeeze()
    # plot unlabeled
    ax_active.scatter(x=random_all[0, np.setdiff1d(range(200), [idx_al1, idx_al2])],
                      y=random_all[1, np.setdiff1d(range(200), [idx_al1, idx_al2])], c='#606060', marker='x')
    # plot labeled
    ax_active.scatter(x=random_all[0, idx_al1], y=random_all[1, idx_al1], c=color_palette[0], marker="o")
    ax_active.scatter(x=random_all[0, idx_al2], y=random_all[1, idx_al2], c=color_palette[1], marker="s")
    # plot boundary
    ax_active.set_autoscale_on(False)
    # ax_active.plot([0.49, 0.51], [-0.1, 1.1], c=color_palette[2])
    ax_active.set_title('Trained with active learning', fontsize=16)

    # make it pretty
    plt.tight_layout()
    ax_all.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
    ax_gt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
    ax_random.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
    ax_active.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)

    # save fig
    name = 'random_vs_al_example'
    file_name = os.path.join(plot_path, (name + '.png'))
    plt.savefig(file_name, format='png', dpi=300)

Example #17

Show file

def cart_2_cnn(dataset):
    # parameters
    al_batch = 3000
    dataset = dataset.lower(
    )  # 'reduced_f_mnist', 'reduced_mnist','unreduced_f_mnist','unreduced_mnist',
    batch_train_size = 60
    batch_test_size = 10000
    n_train = 60000
    lr = 0.0001
    test_factor = 5  # committee only tested every test_factor-th batch
    poly_exp = 1.0
    n_epoch = 20
    dropout_rate = 0.25

    # seed
    seed = 5
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    # paths
    result_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                               dataset, 'from_cart', 'cnn_qbc')
    csv_path = os.path.join(result_path, 'cnn_qbc.csv')
    csv_name_train = 'train.csv'
    csv_name_test = 'test.csv'
    csv_name_index = 'index.csv'
    mnist_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data')

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    torch.cuda.empty_cache()
    kwargs = {'num_workers': 4, 'pin_memory': True, 'shuffle': True}

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    train_dataset = datasets_preset.Dataset_MNIST_n(root=mnist_path,
                                                    train=True,
                                                    transform=transform,
                                                    n=n_train)
    test_dataset = datasets_preset.Dataset_MNIST_n(root=mnist_path,
                                                   train=False,
                                                   transform=transform,
                                                   n=10000)
    test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_test_size,
                                                  **kwargs)
    data_test, target_test, index = next(iter(test_dataloader))

    # to document training process, create directory, etc
    dir_name = 'cnn_from_cart_'
    dir_number = 1
    while os.path.exists(
            os.path.join(result_path,
                         (dir_name + '{:03d}'.format(dir_number)))):
        dir_number += 1
    run_path = os.path.join(result_path,
                            (dir_name + '{:03d}'.format(dir_number)))
    os.makedirs(run_path)  # make run_* dir
    f = open(os.path.join(run_path, 'info.txt'), 'w+')  # write .txt file
    f.close()

    # to document training process, create directory, etc
    train_text = [str(x) for x in range(1, n_epoch + 1)]
    test_text = [str(x) for x in range(1, n_epoch + 1)]

    # write training progress
    csv_path_train = os.path.join(run_path, csv_name_train)
    title = [
        "Random: Training progress for n_model = " + str(1) +
        ", idx_ratio:  " + str('none') + ', for multiple epoch'
    ]
    with open(csv_path_train, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # write test progress
    csv_path_test = os.path.join(run_path, csv_name_test)
    title = [
        "Random: Test progress for n_model = " + str(1) + ", idx_ratio:  " +
        str('none') + ', for multiple epoch'
    ]
    with open(csv_path_test, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # read indices
    index_library = pickle.load(
        open(
            "C:\\Users\\steve\\Desktop\\projects_software\\active-learning-prototypes\\results\\unreduced_mnist\\run_013\\indices_batch_019.pkl",
            "rb"))
    index_library = index_library[0]

    for i_al_batch in range(int(len(index_library) / al_batch)):
        index_batch = index_library[0:(i_al_batch + 1) * al_batch]
        train_subset = Subset(train_dataset, index_batch)
        train_dataloader = DataLoader(train_subset,
                                      batch_size=batch_train_size,
                                      shuffle=True)
        loss_epoch = []

        # model
        model = Cnn_model()
        model = model.cuda()
        model = nn.DataParallel(model)
        print("CNN model created")

        # the optimizers
        optimizer = torch.optim.Adam(model.module.parameters(), lr=lr)
        lambda1 = lambda epoch: math.pow(1 - epoch / n_epoch, poly_exp)
        scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)
        criterion = F.nll_loss
        for i_batch, (data_train, target_train,
                      index) in enumerate(train_dataloader):
            # train batch
            loss, model, optimizer = train_batch(model, data_train,
                                                 target_train, optimizer,
                                                 device, criterion)
            print('Epoch: ' + str(i_batch) + '\t Batch: ' + str(i_batch) +
                  '/' + str(len(train_dataloader)) + '; model ' + str(0) +
                  '; train loss avg: ' + "{:.3f}".format(loss))
            for param_group in optimizer.param_groups:
                print(param_group['lr'])
            loss_epoch.append(loss)

        # document train result
        train_text[i_al_batch] = train_text[i_al_batch] + ";{:.4f}".format(np.array(i_al_batch).mean()) + \
                              ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) \
                              + ';' + str(len(index_batch))
        # update train documentation
        text = train_text[i_al_batch].split(";")
        with open(csv_path_train, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)  # test
        acc = model_test(model, data_test, target_test)

        # document test result
        test_text[i_al_batch] = test_text[i_al_batch] + ";{:.4f}".format(acc) + \
                              ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) \
                              + ';' + str(len(index_batch))
        print("Acc: " + "{:.4f}".format(acc))

        # update test documentation
        text = test_text[i_al_batch].split(";")
        with open(csv_path_test, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

        # one epoch ends here
        # scheduler.step()
        print(optimizer)
        del model

Example #18

Show file

File: plotter_citycapes_shaded.py Project: steven-ari/activelearning_thesis_semseg

def bulk_70():

    color_palette = ('tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple',
                     'tab:brown', 'tab:pink', 'tab:gray', 'tab:olive', 'tab:cyan',
                     'b', 'g', 'r', 'c', 'm', 'y')
    my_dpi = 100

    # Pretrained or increment
    csv_paths_consensus = [
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_bulk_70_from_90_001_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_bulk_70_from_90_002_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_bulk_70_from_90_003_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_bulk_70_from_90_004_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test', 'consensus_bulk_70_from_90_005_001', 'test.csv'),
    ]
    csv_paths_ve = [
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_bulk_70_from_90_001_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_bulk_70_from_90_002_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_bulk_70_from_90_003_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_bulk_70_from_90_004_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test', 'vote_bulk_70_from_90_005_001', 'test.csv'),
    ]
    csv_paths_random = [
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_bulk_70_001', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_bulk_70_002', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_bulk_70_003', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_bulk_70_004', 'test.csv'),
        os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random', 'random_bulk_70_005', 'test.csv'),
        ]

    """titles = ['consensus_5_DR90_001',
              'consensus_5_DR90_002',
              'consensus_5_DR90_003',
              'consensus_5_DR90_004',
              'consensus_5_DR90_005',
              'vote_5_DR90_001',
              'vote_5_DR90_002',
              'vote_5_DR90_003',
              'vote_5_DR90_004',
              'vote_5_DR90_005',
              'random_5_DR90_001',
              'random_5_DR90_002',
              'random_5_DR90_003',
              'random_5_DR90_004',
              'random_5_DR90_005',
              ]
    fig_title = "Pretrained setting, 50 epochs, increase every 5 epoch until 50% Cityscapes"

    data_all = []

    for i_data in range(len(csv_paths)):
        data_all.append(csv_scanner(csv_paths[i_data], titles[i_data]))
"""
    # plot_single(data_all, color_palette, my_dpi, fig_title)

    data_consensus = []
    data_vote = []
    data_random = []

    for i_data in range(len(csv_paths_consensus)):
        data_consensus.append(csv_scanner(csv_paths_consensus[i_data], "-"))

    for i_data in range(len(csv_paths_ve)):
        data_vote.append(csv_scanner(csv_paths_ve[i_data], "-"))

    for i_data in range(len(csv_paths_random)):
        data_random.append(csv_scanner(csv_paths_random[i_data], "-"))

    plot_shaded_std([data_consensus, data_vote, data_random], color_palette, my_dpi, '70%')

    a = 1

    return [data_consensus, data_vote, data_random]

Example #19

Show file

File: automate_mini_70_vote.py Project: steven-ari/activelearning_thesis_semseg

import os
from os.path import dirname as dr, abspath
from al_ma_thesis_tjong.mini import bulk_70_mini_getter as getter_mini


if __name__ == '__main__':

    source_path_input = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test')
    dir_name_input = 'bulk_70_mini_getter_from_vote_90_001_'
    index_path_name = 'vote_90_5_70_001'

    getter_mini.main(manual_seed_input=5, source_path_input=source_path_input,
                     dir_name_input=dir_name_input, index_path_name=index_path_name)

    source_path_input = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test')
    dir_name_input = 'bulk_70_mini_getter_from_vote_90_002_'
    index_path_name = 'vote_90_5_70_002'

    getter_mini.main(manual_seed_input=6, source_path_input=source_path_input,
                     dir_name_input=dir_name_input, index_path_name=index_path_name)

    source_path_input = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test')
    dir_name_input = 'bulk_70_mini_getter_from_vote_90_003_'
    index_path_name = 'vote_90_5_70_003'

    getter_mini.main(manual_seed_input=7, source_path_input=source_path_input,
                     dir_name_input=dir_name_input, index_path_name=index_path_name)

    source_path_input = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test')
    dir_name_input = 'bulk_70_mini_getter_from_vote_90_004_'
    index_path_name = 'vote_90_5_70_004'

Example #20

Show file

def qbc(dataset):
    # parameters
    n_model = 20
    dataset = dataset.lower()

    # paths
    model_path = os.path.join(dr(dr(abspath(__file__))), 'results', dataset)
    csv_path = os.path.join(model_path, 'xgb_qbc.csv')

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    # load dataset
    if dataset == 'reduced_f_mnist':
        data_train, target_train = datasets_preset.provide_reduced_f_mnist(
            train=True)
        data_test, target_test = datasets_preset.provide_reduced_f_mnist(
            train=False)
    elif dataset == 'reduced_mnist':
        data_train, target_train = datasets_preset.provide_reduced_mnist(
            train=True)
        data_test, target_test = datasets_preset.provide_reduced_mnist(
            train=False)
    elif dataset == 'unreduced_f_mnist':
        data_train, target_train = datasets_preset.provide_unreduced_f_mnist(
            train=True)
        data_test, target_test = datasets_preset.provide_unreduced_f_mnist(
            train=False)
    elif dataset == 'unreduced_mnist':
        data_train, target_train = datasets_preset.provide_unreduced_mnist(
            train=True)
        data_test, target_test = datasets_preset.provide_unreduced_mnist(
            train=False)

    # load index
    train_index = pickle.load(
        open(
            'C:\\Users\\steve\\Desktop\\projects\\active-learning-prototypes\\results\\'
            'unreduced_mnist\\run_001\\indices_batch_019.pkl', 'rb'))
    get_from = -30001
    train_index = train_index[1][get_from:-1]
    random_index = np.random.randint(0, 60000, len(train_index))

    models = []
    tree_method = "auto"  # "gpu_hist" if cuda_flag else "auto"
    print('Tree creation method: ' + tree_method)
    xgbc = XGBClassifier(max_depth=8,
                         objective='objective=multi:softmax',
                         n_estimators=1,
                         n_jobs=32,
                         reg_lambda=1,
                         gamma=2,
                         learning_rate=1,
                         num_classes=10,
                         tree_method=tree_method)
    xgbc.fit(data_train[train_index], target_train[train_index])
    models.append(xgbc)

    xgbc = XGBClassifier(max_depth=8,
                         objective='objective=multi:softmax',
                         n_estimators=1,
                         n_jobs=32,
                         reg_lambda=1,
                         gamma=2,
                         learning_rate=1,
                         num_classes=10,
                         tree_method=tree_method)
    xgbc.fit(data_train[random_index], target_train[random_index])
    models.append(xgbc)

    # training and test process, 1st batch
    output_list_test = np.zeros(
        (2, data_test.__len__())).astype(int)  # n_models x n_data x n_classes
    for i_model in range(2):
        output_list_test[i_model, :] = models[i_model].predict(data_test)

    # Document first batch
    acc_models = qbc_preset.each_model_acc(output_list_test, target_test)
    print(acc_models)
    a = 1

Example #21

Show file

File: plotter.py Project: steven-ari/activelearning_thesis_semseg

def plot_shaded_std(train_idx, csv_dict, color_palette, my_dpi):
    plt.interactive(True)
    name = 'entire_data'
    max_val = True
    legend_loc = 7
    x_fontsize = 10
    plot_max = False

    figure = plt.figure(figsize=(1200 / my_dpi, 1200 / my_dpi), dpi=my_dpi)
    shaded_std_line(train_idx[2], csv_dict, figure, 'QBC entropy + diversity',
                    color_palette[0])
    shaded_std_line(train_idx[0], csv_dict, figure, 'QBC: Vote',
                    color_palette[1])
    shaded_std_line(train_idx[1], csv_dict, figure, 'without AL',
                    color_palette[2])
    '''# draw line for max acc
    if max_val:
        color_list = [1, 2, 0]
        for i_line in range(3):
            y_lim = figure.axes[0].get_ylim()
            # get x position of max value
            max_acc = csv_dict["committee_vote_list"][train_idx[i_line]].max()
            x_pos = csv_dict["batch_size_list"][train_idx[i_line]][csv_dict["committee_vote_list"][train_idx[i_line]].argmax()]
            plt.axvline(x=x_pos, c=color_palette[color_list[i_line]], ls='dashed')
            # prevent text goes outside axes
            if x_pos < (figure.axes[0].get_xlim()[1]*0.6):
                h_align = 'right'
                x_text = x_pos * 0.98
            else:
                h_align = 'right'
                x_text = x_pos*0.98
            # text
            t = plt.text(x=x_text, y=y_lim[0] + (y_lim[1] - y_lim[0])*(0.25 - i_line*0.1),
                         s=("{:.2f}".format(max_acc) + ' %; data = ' + str(x_pos)), c=color_palette[color_list[i_line]],
                         ha=h_align)
            t.set_bbox(dict(facecolor='w', alpha=0.8, edgecolor='w'))'''

    if plot_max:
        acc_all_data = csv_dict['committee_vote_list'][8][-1]
        plt.axhline(y=acc_all_data, color='r', linestyle='dashed', alpha=0.6)
        x_lim = figure.axes[0].get_xlim()
        text = ('Acc, entire dataset: ')
        plt.text(x=x_lim[0] + (x_lim[1] - x_lim[0]) * 0.05,
                 y=acc_all_data,
                 s=(text + "{:.2f}".format(acc_all_data) + ' %'),
                 c='r',
                 ha='left')

    n_plots = 10
    # for x_ticks
    indices = [0] + list(
        range(
            int(csv_dict["batch_size_list"][train_idx[1]].shape[0] / n_plots) -
            1, csv_dict["batch_size_list"][train_idx[1]].shape[0],
            int(csv_dict["batch_size_list"][train_idx[1]].shape[0] / n_plots)))
    plt.title(
        name.replace("_", " ").capitalize() + ' size, ' +
        csv_dict["data_name"])
    plt.grid(which='both', axis='both')
    plt.xlabel('Number of training samples')
    plt.legend(loc=legend_loc)
    plt.ylabel('Test data acc [%]')

    # safe fig
    if csv_dict["batch_size_list"][train_idx[1]][indices][0] == 1000:
        file_name = name + '_' + str(1000)
        indices = [0] + list(
            range(
                int(csv_dict["batch_size_list"][train_idx[1]].shape[0] /
                    n_plots) - 1,
                csv_dict["batch_size_list"][train_idx[1]].shape[0],
                int(csv_dict["batch_size_list"][train_idx[1]].shape[0] /
                    n_plots)))
    elif csv_dict["batch_size_list"][train_idx[1]][indices][0] == 3000:
        file_name = name + '_' + str(3000)
        indices = [0] + list(
            range(
                int(csv_dict["batch_size_list"][train_idx[1]].shape[0] /
                    n_plots) - 1,
                csv_dict["batch_size_list"][train_idx[1]].shape[0],
                int(csv_dict["batch_size_list"][train_idx[1]].shape[0] /
                    n_plots)))

    plt.xticks(ticks=csv_dict["batch_size_list"][train_idx[1]][indices],
               fontsize=x_fontsize,
               rotation=45)
    plt.tight_layout()
    result_dir = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                              'plots', csv_dict["data_name"].replace(" ", "_"))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    file_name = os.path.join(result_dir, (file_name + '.png'))
    plt.savefig(file_name, format='png', dpi=300)

Example #22

Show file

File: plotter.py Project: steven-ari/activelearning_thesis_semseg

def main():
    """SMALL_SIZE = 12
    MEDIUM_SIZE = 16
    BIGGER_SIZE = 20

    plt.rc('font', size=SMALL_SIZE)  # controls default text sizes
    plt.rc('axes', titlesize=BIGGER_SIZE)  # fontsize of the axes title
    plt.rc('axes', labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels
    plt.rc('xtick', labelsize=SMALL_SIZE)  # fontsize of the tick labels
    plt.rc('ytick', labelsize=SMALL_SIZE)  # fontsize of the tick labels
    plt.rc('legend', fontsize=SMALL_SIZE)  # legend fontsize
    plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title"""

    plot_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'plots')
    csv_mnist_reduced = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                                     'reduced_mnist', 'xgb_qbc.csv')
    csv_mnist_unreduced = os.path.join(dr(dr(dr(abspath(__file__)))),
                                       'results', 'unreduced_mnist',
                                       'xgb_qbc.csv')
    csv_f_mnist_reduced = os.path.join(dr(dr(dr(abspath(__file__)))),
                                       'results', 'reduced_f_mnist',
                                       'xgb_qbc.csv')
    csv_f_mnist_unreduced = os.path.join(dr(dr(dr(abspath(__file__)))),
                                         'results', 'unreduced_f_mnist',
                                         'xgb_qbc.csv')
    csv_entropy = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                               've.csv')
    csv_cluster_size = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                                    'cluster_size.csv')
    csv_normal = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                              'normal_dist_committee.csv')

    mnist_reduced = csv_train_reader(csv_mnist_reduced, 'encoded MNIST')
    mnist_unreduced = csv_train_reader(csv_mnist_unreduced, 'MNIST')
    f_mnist_reduced = csv_train_reader(csv_f_mnist_reduced, 'reduced_F-MNIST')
    # f_mnist_unreduced = csv_train_reader(csv_f_mnist_unreduced, 'unreduced_F-MNIST')
    '''# For testing on training data
    csv_mnist_reduced = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'reduced_mnist', 'xgb_qbc_train.csv')
    # csv_mnist_unreduced = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'unreduced_mnist', 'xgb_qbc_train.csv')
    csv_f_mnist_reduced = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'reduced_f_mnist', 'xgb_qbc_train.csv')
    # csv_f_mnist_unreduced = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'unreduced_f_mnist', 'xgb_qbc_train.csv')

    mnist_reduced = csv_train_reader(csv_mnist_reduced, 'reduced_MNIST_train')
    # mnist_unreduced = csv_train_reader(csv_mnist_unreduced, 'unreduced_MNIST_train')
    # f_mnist_reduced = csv_train_reader(csv_f_mnist_reduced, 'reduced_F-MNIST_train')
    # f_mnist_unreduced = csv_train_reader(csv_f_mnist_unreduced, 'unreduced_F-MNIST_train')'''

    avg_mnist_reduced_1000 = avg_runs_1000(mnist_reduced)
    # avg_f_mnist_reduced_1000 = avg_runs_1000(f_mnist_reduced)
    avg_mnist_reduced_3000 = avg_runs_3000(mnist_reduced)
    avg_f_mnist_reduced_3000 = avg_runs_3000(f_mnist_reduced)
    avg_mnist_unreduced_3000 = avg_runs_3000(mnist_unreduced)

    color_palette = ("tab:blue", "tab:orange", "tab:green", "tab:red",
                     "tab:purple", "tab:brown")
    my_dpi = 180

    # plot vote entropy
    # entropy_plotter(csv_entropy, color_palette, my_dpi)
    # cluster_size(csv_cluster_size, color_palette, my_dpi)
    # normal_dist(csv_normal, color_palette, my_dpi)

    # plot_shaded_std([0, 1, 2], avg_mnist_reduced_1000, color_palette, my_dpi)
    # plot_shaded_std([0, 1, 2], avg_f_mnist_reduced_1000, color_palette, my_dpi)

    # plot_shaded_std([0, 1, 2], avg_mnist_reduced_3000, color_palette, my_dpi)
    # plot_shaded_std([0, 1, 2], avg_f_mnist_reduced_3000, color_palette, my_dpi)
    plot_shaded_std([0, 1, 2], avg_mnist_unreduced_3000, color_palette, 200)

    a = 1

Example #23

Show file

File: plotter.py Project: steven-ari/activelearning_thesis_semseg

def entropy_plotter(csv_path, color_palette, my_dpi):

    plt.interactive(True)
    title_list = [
        'vote entropy, acc=30%', 'vote entropy, acc=50%',
        'vote entropy, acc=80%'
    ]
    plot_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'plots',
                             'for_ppt')

    entropy_list = []
    acc_list = []
    acc_flag = True

    # open and iterate through .csv
    with open(csv_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=';')
        for row in csv_reader:
            if acc_flag:
                acc = np.array(row).astype(float)
                acc_list.append(acc)
                acc_flag = False
            else:
                en = np.array(row).astype(float)
                entropy_list.append(en)
                acc_flag = True

    # plot it
    fig = plt.figure(figsize=(1000 / my_dpi, 600 / my_dpi), dpi=my_dpi)
    gs = gridspec.GridSpec(nrows=2,
                           ncols=2,
                           width_ratios=[1, 6],
                           height_ratios=[6, 1])
    ax_en = plt.subplot(gs[0])
    ax_data = plt.subplot(gs[1])
    ax_acc = plt.subplot(gs[3])
    axes = [ax_en, ax_data, ax_acc]

    for i_en in range(3):
        ax_data.scatter(x=acc_list[i_en][0:500] * 100,
                        y=entropy_list[i_en][0:500],
                        c=color_palette[i_en],
                        alpha=0.4,
                        label=title_list[i_en])
        # draw data distribution, acc
        sns.kdeplot(acc_list[i_en][0:500] * 100,
                    color=color_palette[i_en],
                    ax=ax_acc,
                    shade=True)
        # draw data distribution, entropy
        sns.kdeplot(entropy_list[i_en][0:500],
                    color=color_palette[i_en],
                    ax=ax_en,
                    vertical=True,
                    shade=True)

    # plot maximal entropy value possible
    max_en = -1 * (10 * 0.1 * math.log(0.1) / math.log(2))
    ax_data.axhline(y=max_en, color='r', linestyle='dashed')

    # plot ideal line entropy value possible
    ax_data.plot([0, 10, 100], [(math.log(9) / math.log(2)), max_en, 0],
                 color='#003399',
                 linestyle='dashed')

    # make data looks pretty
    ax_data.legend(loc=(0.61, 0.71))
    ax_data.grid(which='both')
    ax_data.minorticks_on
    ax_data.set_title(
        'Vote Entropy on 500 random MNIST data, Committee: 20 models',
        fontsize=15)
    ax_data.set_xlabel('Model with correct prediction in committee [%]',
                       fontsize=14)
    ax_data.set_ylabel('Vote Entropy []', fontsize=14)

    # make entropy dist look pretty
    ax_en.tick_params(axis='x', which='both', bottom=False, labelbottom=False)
    ax_en.set_ylim(ax_data.get_ylim())
    ax_en.grid(which='both', axis='y')

    # make acc dist look pretty
    ax_acc.tick_params(axis='y', which='both', left=False, labelleft=False)
    ax_acc.set_xlim(ax_data.get_xlim())
    ax_acc.grid(which='both', axis='x')

    plt.tight_layout()
    # save fig
    name = 'vote_entropy_fig'
    file_name = os.path.join(plot_path, (name + '.png'))
    plt.savefig(file_name, format='png', dpi=300)

Example #24

Show file

def main(dropout):
    """
    :param n_model: number of models for the comittee
    :param n_train: number of training data to be used, this decides how long the training process will be
    :param batch_train_size: batch size for training process, keep it under 20
    :param idx_ratio: ratio of high entropy:ratio of random
    :return:
    """

    # paths
    save_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results',
                             'random')
    csv_name_train = 'train.csv'
    csv_name_test = 'test.csv'
    csv_name_index = 'index.csv'
    dir_name = 've_ranker_'
    index_path_name = 'random_bulk_40_003'
    save_weights_flag = True
    cityscape_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                                  'cityscapes')
    cityscape_loss_weight_path = os.path.join(dr(dr(dr(abspath(__file__)))),
                                              'data', 'cityscapes',
                                              'class_weights.pkl')
    cityscape_pretrain_path = os.path.join(dr(dr(dr(abspath(__file__)))),
                                           'data', 'cityscape_pretrain')
    inference_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                                  'cityscapes', 'inference')
    color_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data',
                              'cityscapes', 'color')
    print('cityscape_path: ' + cityscape_path)

    # arguments
    n_train = 2880  # divisible by 8: batch size and 10: 10% increment of training data increase
    n_pretrain = 0
    n_test = 500
    n_epoch = 40
    n_model = 10
    test_factor = 3  # committee only tested every test_factor-th batch
    batch_train_size = 3 * max(torch.cuda.device_count(), 1)
    batch_test_size = 25 * max(torch.cuda.device_count(), 1)
    lr = 0.0001
    loss_print = 2
    continue_flag = False
    poly_exp = 1.0
    feature_extract = True
    dropout_rate = dropout
    idx_ratio = [1.0, 0.0]
    data_limit = 0.7

    # report qbc semseg to user in terminal
    text = (('n_model(dropout): ' + str(n_model)) +
            (', n_train: ' + str(n_train)) +
            (', batch_train_size: ' + str(batch_train_size)) +
            (', idx_ratio: ' + str(idx_ratio)) +
            (', test_factor: ' + str(test_factor)))
    print(text)

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print(torch.cuda.device_count(), "GPUs detected")
    # print("Max memory allocated:" + str(np.round(torch.cuda.max_memory_allocated(device) / 1e9, 3)) + ' Gb')

    # get data and index library
    mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    transform = T.Compose([
        T.Resize((800, 800), Image.BICUBIC),
        T.ToTensor(),
        T.Normalize(*mean_std)
    ])
    train_dataset = dataset_preset.Dataset_Cityscapes_n(
        root=cityscape_path,
        split='train',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=segmen_preset.label_id2label,
        n=n_train)
    train_dataset_idx = dataset_preset.Dataset_Cityscapes_n_i(
        root=cityscape_path,
        split='train',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=segmen_preset.label_id2label,
        n=n_train)  # also get index of data
    test_dataset = dataset_preset.Dataset_Cityscapes_n_i(
        root=cityscape_path,
        split='val',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=segmen_preset.label_id2label,
        n=n_test)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=batch_test_size,
                                 shuffle=True,
                                 num_workers=3 *
                                 max(torch.cuda.device_count(), 1),
                                 drop_last=False)
    print("Datasets loaded!")

    # create models, optimizers, scheduler, criterion, the model
    fcn_model = torchvision.models.segmentation.deeplabv3_resnet101(
        pretrained=False,
        progress=True,
        num_classes=segmen_preset.n_labels_valid,
        aux_loss=True)
    fcn_model = fcn_model.cuda()
    fcn_model = nn.DataParallel(fcn_model)
    fcn_model.load_state_dict(
        torch.load(
            os.path.join(save_path, index_path_name,
                         'model_weight_epoch_train039.pt')))

    # the optimizers
    optimizer = torch.optim.Adam(
        [{
            'params': fcn_model.module.classifier.parameters()
        }, {
            'params':
            list(fcn_model.module.backbone.parameters()) +
            list(fcn_model.module.aux_classifier.parameters())
        }],
        lr=lr)
    lambda1 = lambda epoch: math.pow(1 - (epoch / n_epoch), poly_exp)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

    with open(cityscape_loss_weight_path,
              "rb") as file:  # (needed for python3)
        class_weights = np.array(pickle.load(file))
    class_weights = torch.from_numpy(class_weights)
    class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda()

    # report everything
    print(str(n_model) + " fcn models created")
    text = ('n_model: ' + str(n_model)) + (', n_train: ' + str(n_train)) + (', n_epoch: ' + str(n_epoch)) +\
           (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio))
    print(text)

    # to document training process, create directory, etc
    train_text = [str(x) for x in range(1, n_epoch + 1)]
    test_text = [str(x) for x in range(1, n_epoch + 1)]
    train_index_text = [str(x) for x in range(1, 8)]
    train_index_docu = 0
    train_index = np.array([])
    test_text_index = 0

    # write text to csv
    dir_number = 1
    while os.path.exists(
            os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))):
        dir_number += 1
    run_path = os.path.join(save_path,
                            (dir_name + '{:03d}'.format(dir_number)))
    os.makedirs(run_path)  # make run_* dir
    f = open(os.path.join(run_path, 'info.txt'), 'w+')  # write .txt file
    f.write(text)
    f.close()
    copy(__file__, os.path.join(run_path, os.path.basename(__file__)))

    # write training progress
    csv_path_train = os.path.join(run_path, csv_name_train)
    title = [
        "Training progress for n_model = " + str(n_model) + ", idx_ratio:  " +
        str(idx_ratio) + ', for multiple epoch'
    ]
    with open(csv_path_train, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # write test progress
    csv_path_test = os.path.join(run_path, csv_name_test)
    title = [
        "Test progress for n_model = " + str(1) + ", idx_ratio:  " +
        str(idx_ratio) + ', for multiple epoch, torch seed: ' + str(None) +
        'run_path: ' + run_path + 'index_from: ' + index_path_name
    ]
    with open(csv_path_test, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # write index and train progress
    csv_path_index = os.path.join(run_path, csv_name_index)
    title = [
        "Index progress for n_model = " + str(n_model) + ", idx_ratio:  " +
        str(idx_ratio) + ', for multiple epoch'
    ]
    with open(csv_path_index, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # append with vote entropy
    t = Timer()
    t.start()
    # perform vote entropy on entire dataset
    indices, entropy, fcn_model = vote_entropy_dropout(fcn_model,
                                                       train_dataset_idx,
                                                       train_index,
                                                       idx_ratio,
                                                       batch_test_size,
                                                       device,
                                                       n_model,
                                                       dropout_rate,
                                                       1,
                                                       n_data=int(n_train))
    # save ranked indices
    np.save(os.path.join(run_path, 'indices.npy'), indices)
    np.save(os.path.join(run_path, 'entropy.npy'), entropy)

    # write index and train progress
    csv_path_index = os.path.join(run_path, csv_name_index)
    string_txt = "1: " + " ".join([str(x) + ',' for x in indices])
    with open(csv_path_index, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(string_txt)

Example #25

Show file

def main():

    torch.manual_seed(1)  # reset random for reproducibility
    model_path = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'results',
                              'f_mnist', 'Autoencoder_f_mnist.pt')
    train_path = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'data',
                              'Dataset_F_MNIST_n', 'coded_f_mnist_train.pt')
    test_path = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'data',
                             'Dataset_F_MNIST_n', 'coded_f_mnist_test.pt')
    mnist_root = os.path.join(dr(dr(dr(dr(abspath(__file__))))), 'data')

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    train_dataset = Dataset_F_MNIST_n(root=mnist_root,
                                      train=True,
                                      download=True,
                                      transform=transform,
                                      n=60000)
    test_dataset = Dataset_F_MNIST_n(root=mnist_root,
                                     train=False,
                                     download=True,
                                     transform=transform,
                                     n=10000)
    train_loader = DataLoader(train_dataset,
                              batch_size=train_dataset.__len__(),
                              shuffle=False)
    test_loader = DataLoader(test_dataset,
                             batch_size=test_dataset.__len__(),
                             shuffle=False)

    # model
    saved_weights = torch.load(
        model_path, map_location=torch.device('cpu'))  # load saved weights
    model_weights = {
        "encoder_in.weight": saved_weights[
            "encoder_in.weight"],  # load weight only from encoder part
        "encoder_hidden.weight": saved_weights["encoder_hidden.weight"],
        "encoder_in.bias": saved_weights["encoder_in.bias"],
        "encoder_hidden.bias": saved_weights["encoder_hidden.bias"]
    }
    model = Encoder()
    model.load_state_dict(model_weights)
    model = nn.DataParallel(
        model.to(device))  # DataParallel allow using multiple GPU

    # code training and test data
    with torch.no_grad():  # stop tracking var gradient to reduce comp cost
        # convert train MNIST
        data, target, index = next(iter(train_loader))
        data = data.view(-1, 784).to(device)
        output = model(data)
        output = output.to(device_cpu)
        print("Training data converted")
        print(index)
        torch.save((output, target, index), train_path)  # save converted data

        # convert test MNIST
        data, target, index = next(iter(test_loader))
        data = data.view(-1, 784).to(device)
        output = model(data)
        output = output.to(device_cpu)
        print("Test data converted")
        print(index)
        torch.save((output, target, index), test_path)  # save converted data