from lib.experiment import Experiment
from lib import scheduler, utils

experiment = Experiment(N=1000,
                        M=5000,
                        t_max=10000,
                        beta_scheduler=scheduler.ConstantBetaScheduler(0.5),
                        algorithm="Metropolis",
                        batch_size=None,
                        use_gpu=False)
errors, energies, x = experiment.run()
utils.plot_errors_energies(errors, energies)
Exemple #2
0
def peharz_experiment():
    '''
    runs the peharz experiment and generates its plots
    '''
    # load experiment config file
    config = yaml.safe_load(open('./config/dev.yml'))
    config['clip'] = False  # otherwise methods diverge?
    experiment_config = yaml.safe_load(open('./experiments/peharz.yml'))
    name = 'peharz'
    solvers = experiment_config['solver_list']
    # generate data
    n = experiment_config['n']
    m = experiment_config['m']
    r = experiment_config['r']
    l0 = np.array([0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
    X, W, H = generate_synthetic_data(n, m, r, l0)
    l0_axis = np.array(
        [Solver.get_nonzeros(H[:, :, i]) for i in range(len(l0))])
    print('Data generated, rank of X: ', np.linalg.matrix_rank(X[:, :, 0]))
    accuracy = np.zeros((len(l0), len(solvers)))
    total = [
        np.zeros((len(experiment_config['solver_list']), 0))
        for feature in experiment_config['plot']
    ]
    for i in range(len(l0)):
        # generate experiment object
        config['project_l0'] = l0_axis[i]
        experiment = Experiment(config, X[:, :, i], experiment_config)
        #print([solver.name for solver in experiment.solvers])
        experiment.run()
        summary = experiment.summary
        #summary = experiment.get_summary()
        for i, feature in enumerate(experiment_config['plot']):
            a = summary[feature]
            a = np.array(a).reshape((len(a), 1))
            total[i] = np.hstack((total[i], a))
    print(total)
    # plotting
    for i, feature in enumerate(experiment_config['plot']):
        fig = plt.figure(figsize=(4, 4))
        ax0 = fig.add_subplot(111)
        #color = ['r', 'g', 'b', 'cyan', 'k']
        ax0.set_xlabel('$\ell_0 (H_o )$')
        for j in range(total[i].shape[0]):
            ax0.plot(l0_axis,
                     total[i][j, :],
                     color=COLORS[j],
                     label=LABELS[solvers[j]],
                     linestyle='--',
                     markersize=15,
                     marker='.')
        ax0.yaxis.set_major_formatter(FormatStrFormatter('%g'))
        ax0.xaxis.set_major_formatter(FormatStrFormatter('%g'))
        ax0.get_yaxis().set_tick_params(which='both', direction='in')
        ax0.get_xaxis().set_tick_params(which='both', direction='in')
        ax0.grid()
        ax0.set_ylabel(Y_LABELS[feature])
        #ax0.legend()
        #ax0.set_xscale('log')
        #ax0.set_yscale('log')
        s = '_' + str(n) + '_' + str(m) + '_' + str(r)
        fig.savefig('./experiments/' + name + '/' + feature + s + '.pgf',
                    bbox_inches='tight')
        fig.savefig('./experiments/' + name + '/' + feature + s + '.pdf',
                    bbox_inches='tight')
Exemple #3
0
import argparse
import yaml
from lib.experiment import Experiment


def parse_args():

    parser = argparse.ArgumentParser()
    parser.add_argument("--config", required=True)
    args = parser.parse_args()

    return args


def read_yaml(path):

    with open(path, "r") as f:
        config = yaml.load(f)

    return config


if __name__ == "__main__":

    args = parse_args()
    config = read_yaml(args.config)

    exper = Experiment(config)
    exper.run()
Exemple #4
0
def complexity_experiment():
    '''
    tries to compare the complexity of iterations 
    '''
    # load experiment config file
    config = yaml.safe_load(open('./config/dev.yml'))
    config['clip'] = False  # otherwise methods diverge?
    experiment_config = yaml.safe_load(open('./experiments/complexity.yml'))
    name = 'complexity'
    solvers = experiment_config['solver_list']
    # generate data
    n = np.arange(190, 290, 10)
    m = np.arange(190, 290, 10)
    r = [5, 10, 15]
    l0 = [0.7]
    threshold = 0.2
    iterations = np.zeros((len(r), len(n), len(solvers)))
    for i in range(len(n)):
        for j in range(len(r)):
            X, W, H = generate_synthetic_data(n[i], m[i], r[j], l0)
            print('Data generated, rank of X: ',
                  np.linalg.matrix_rank(X[:, :, 0]))
            experiment = Experiment(config, X[:, :, i], experiment_config)
            experiment.run()
            for k, solver in enumerate(experiment.solvers):
                iterations_ = solver.output['iteration']
                rel_error = solver.output['rel_error']
                index_list = np.where(np.array(rel_error) < threshold)[0]
                if len(index_list) > 0:
                    index = index_list[0]
                    iterations[j, i, k] = iterations_[index]
                else:
                    iterations[j, i, k] = iterations_[-1]

    fig = plt.figure(figsize=(6, 6))
    ax0 = fig.add_subplot(111)
    #color = ['r', 'g', 'b', 'cyan', 'k']
    ax0.set_xlabel('Size of $X$')
    ax0.set_ylabel('Iterations until relative error $< 0.3$')
    for i in range(len(r)):
        for j in range(len(solvers)):
            ax0.plot(n * m,
                     iterations[i, :, j],
                     color=COLORS[j],
                     label=solvers[j],
                     linestyle='--',
                     markersize=15,
                     marker='.')
    ax0.yaxis.set_major_formatter(FormatStrFormatter('%g'))
    ax0.xaxis.set_major_formatter(FormatStrFormatter('%g'))
    ax0.get_yaxis().set_tick_params(which='both', direction='in')
    ax0.get_xaxis().set_tick_params(which='both', direction='in')
    ax0.grid()
    #ax0.set_ylabel(Y_LABELS[feature])
    ax0.legend()
    #ax0.set_xscale('log')
    #ax0.set_yscale('log')
    #s = '_' + str(n) + '_' + str(m) + '_' + str(r)
    fig.savefig('./experiments/' + name + '/' + 'graph.pgf',
                bbox_inches='tight')
    fig.savefig('./experiments/' + name + '/' + 'graph.pdf',
                bbox_inches='tight')
def main(args):
    sid = args.sid
    RND_STATE = 1234
    BATCH_SIZE = 48
    IMG_SIZE = 280
    n_classes = 1
    learning_rate = 2e-4

    efficientnet_b = 0
    cv_folds = 5

    seed_everything(RND_STATE + sid)

    IMG_PATH_2019_TRAIN = r"input/2019_train"
    DF_PATH_2019_TRAIN = r"input/trainLabels19_unique.csv"

    IMG_PATH_2015_TRAIN = r"input/2015_train"
    DF_PATH_2015_TRAIN = r"input/trainLabels15.csv"

    IMG_PATH_2015_TEST = r"input/2015_test"
    DF_PATH_2015_TEST = r"input/testLabels15.csv"

    IMG_PATH_MESSIDOR = r"input/messidor1_jpg"
    DF_PATH_MESSIDOR = r"input/messidor1_labels_adjudicated.csv"

    df_train = pd.read_csv(DF_PATH_2019_TRAIN)
    X_2019_train = df_train.id_code.values
    X_2019_train = IMG_PATH_2019_TRAIN + "/" + X_2019_train + ".jpg"
    y_2019_train = df_train.diagnosis.values.astype(np.float32)

    df_train_2015_train = pd.read_csv(DF_PATH_2015_TRAIN)
    X_2015_train = df_train_2015_train.image.values
    X_2015_train = IMG_PATH_2015_TRAIN + "/" + X_2015_train + ".jpg"
    y_2015_train = df_train_2015_train.level.values.astype(np.float32)

    df_train_2015_test = pd.read_csv(DF_PATH_2015_TEST)

    X_2015_test = df_train_2015_test.image.values
    X_2015_test = IMG_PATH_2015_TEST + "/" + X_2015_test + ".jpg"
    y_2015_test = df_train_2015_test.level.values.astype(np.float32)

    # df_messidor = pd.read_csv(DF_PATH_MESSIDOR)
    # df_messidor = df_messidor[df_messidor.adjudicated_dr_grade > -1]
    # X_messidor = df_messidor.image.values
    # X_messidor = IMG_PATH_MESSIDOR + "/" + X_messidor + ".jpg"
    # y_messidor = df_messidor.adjudicated_dr_grade.values.astype(np.float32)

    normalize = [[0.43823998, 0.29557559, 0.20054542],
                 [0.27235733, 0.19562355, 0.16674458]]

    img_size = (IMG_SIZE, IMG_SIZE)
    transform_train = albumentations.Compose([
        albumentations.RandomCrop(*img_size),
        albumentations.HueSaturationValue(hue_shift_limit=7),
        albumentations.RandomBrightnessContrast(),
        albumentations.ShiftScaleRotate(shift_limit=0,
                                        scale_limit=(-0.05, 0.15),
                                        interpolation=cv2.INTER_CUBIC),
        albumentations.HorizontalFlip(),
        albumentations.VerticalFlip(),
        albumentations.Blur(),
        albumentations.Normalize(*normalize, p=1),
        ToTensorV2(),
    ])

    transform_validation = albumentations.Compose([
        albumentations.CenterCrop(*img_size),
        albumentations.Normalize(*normalize, p=1),
        ToTensorV2(),
    ])

    skf9 = StratifiedKFold(n_splits=cv_folds,
                           random_state=RND_STATE,
                           shuffle=True)

    for split_id, (tra9,
                   tes9) in enumerate(skf9.split(X_2019_train, y_2019_train)):

        if split_id != sid:
            continue
        X_aptos_train, X_aptos_valid = X_2019_train[tra9], X_2019_train[tes9]
        y_aptos_train, y_aptos_valid = y_2019_train[tra9], y_2019_train[tes9]

        X_train = np.concatenate([
            X_aptos_train,
            # X_messidor,
            X_2015_train,
            X_2015_test,
        ])
        y_train = np.concatenate([
            y_aptos_train,
            # y_messidor,
            y_2015_train,
            y_2015_test,
        ])

        X_valid = np.concatenate([
            X_aptos_valid,
        ])
        y_valid = np.concatenate([
            y_aptos_valid,
        ])

        print("train/validation set size: {}/{}".format(
            len(y_train), len(y_valid)))

        dataset_train = ImageDataset(
            files=X_train,
            labels=y_train,
            transform=transform_train,
            buffer_size=
            100,  # lower this value if out-of-memory is thrown <<<<<<<<<<<<<<<<<<<<
            image_size=IMG_SIZE)

        dataset_valid = ImageDataset(files=X_valid,
                                     labels=y_valid,
                                     transform=transform_validation,
                                     buffer_size=0,
                                     image_size=IMG_SIZE,
                                     size_is_min=True)

        # sampling weight for inputs of each class
        weights = np.array([1, 5, 5, 10, 10])
        weights = calc_sampler_weights(y_train, weights)
        # increase probability of selecting aptos 2019 train images by 5 times
        weights[:y_aptos_train.shape[0]] *= 5

        dataloader_train = DataLoader(
            dataset_train,
            batch_size=BATCH_SIZE,
            num_workers=4,
            # shuffle=True,
            sampler=WeightedRandomSampler(weights, 45000, True),
            pin_memory=True,
            drop_last=True,
        )
        dataloader_valid = DataLoader(
            dataset_valid,
            batch_size=BATCH_SIZE,
            num_workers=4,
            shuffle=False,
            pin_memory=True,
            drop_last=False,
        )

        _, train_val_ids = train_test_split(list(range(len(X_train))),
                                            test_size=0.1,
                                            stratify=y_train,
                                            random_state=RND_STATE)

        train_val_sampler = SubsetRandomSampler(train_val_ids)
        dataloader_train_eval = DataLoader(
            dataset_train,
            batch_size=BATCH_SIZE,
            num_workers=4,
            sampler=train_val_sampler,
            pin_memory=True,
            drop_last=False,
        )

        model = EfficientNet(b=efficientnet_b,
                             in_channels=3,
                             in_spatial_shape=IMG_SIZE,
                             n_classes=n_classes,
                             activation=nn.LeakyReLU(0.001),
                             bias=False,
                             drop_connect_rate=0.2,
                             dropout_rate=None,
                             bn_epsilon=1e-3,
                             bn_momentum=0.01,
                             pretrained=True,
                             progress=False)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        print("device ? : ", device)
        model.to(device)
        optimizer = optim.Adam(model.parameters(),
                               lr=learning_rate,
                               weight_decay=0.1 * learning_rate)
        # optimizer = optim.RMSprop(model.parameters(),
        #                        lr=learning_rate,
        #                        momentum=0.9,
        #                        alpha=0.9,
        #                        weight_decay=0.1 * learning_rate)
        # criterion = nn.CrossEntropyLoss()
        criterion = nn.SmoothL1Loss()
        eval_metrics = [
            ("loss", criterion, {}),
            ("f1_score", pytorch_f1, {
                "average": "macro"
            }),
            # ("classwise_f1", pytorch_f1, {"average": None}),
            ("qwk", qw_kappa, {})
        ]

        scheduler = None

        s = (
            "{epoch}:{step}/{max_epoch} | {loss_train:.4f} / {loss_valid:.4f}"
            " | {f1_score_train:.4f} / {f1_score_valid:.4f}"
            # " | {classwise_f1_train}/{classwise_f1_valid}"
            " | {qwk_train:.4f} / {qwk_valid:.4f} | {time_delta}")
        exp = Experiment(dl_train=dataloader_train,
                         dl_train_val=dataloader_train_eval,
                         dl_validation=dataloader_valid,
                         model=model,
                         optimizer=optimizer,
                         criterion=criterion,
                         device=device,
                         max_epoch=20,
                         metrics=eval_metrics,
                         target_metric="qwk",
                         format_str=s,
                         scheduler=scheduler,
                         load_path=None,
                         save_path="save/b%d_%dpx/%d" %
                         (efficientnet_b, IMG_SIZE, split_id),
                         evaluate_freq=3)

        exp.run()