from lib.experiment import Experiment from lib import scheduler, utils experiment = Experiment(N=1000, M=5000, t_max=10000, beta_scheduler=scheduler.ConstantBetaScheduler(0.5), algorithm="Metropolis", batch_size=None, use_gpu=False) errors, energies, x = experiment.run() utils.plot_errors_energies(errors, energies)
def peharz_experiment(): ''' runs the peharz experiment and generates its plots ''' # load experiment config file config = yaml.safe_load(open('./config/dev.yml')) config['clip'] = False # otherwise methods diverge? experiment_config = yaml.safe_load(open('./experiments/peharz.yml')) name = 'peharz' solvers = experiment_config['solver_list'] # generate data n = experiment_config['n'] m = experiment_config['m'] r = experiment_config['r'] l0 = np.array([0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) X, W, H = generate_synthetic_data(n, m, r, l0) l0_axis = np.array( [Solver.get_nonzeros(H[:, :, i]) for i in range(len(l0))]) print('Data generated, rank of X: ', np.linalg.matrix_rank(X[:, :, 0])) accuracy = np.zeros((len(l0), len(solvers))) total = [ np.zeros((len(experiment_config['solver_list']), 0)) for feature in experiment_config['plot'] ] for i in range(len(l0)): # generate experiment object config['project_l0'] = l0_axis[i] experiment = Experiment(config, X[:, :, i], experiment_config) #print([solver.name for solver in experiment.solvers]) experiment.run() summary = experiment.summary #summary = experiment.get_summary() for i, feature in enumerate(experiment_config['plot']): a = summary[feature] a = np.array(a).reshape((len(a), 1)) total[i] = np.hstack((total[i], a)) print(total) # plotting for i, feature in enumerate(experiment_config['plot']): fig = plt.figure(figsize=(4, 4)) ax0 = fig.add_subplot(111) #color = ['r', 'g', 'b', 'cyan', 'k'] ax0.set_xlabel('$\ell_0 (H_o )$') for j in range(total[i].shape[0]): ax0.plot(l0_axis, total[i][j, :], color=COLORS[j], label=LABELS[solvers[j]], linestyle='--', markersize=15, marker='.') ax0.yaxis.set_major_formatter(FormatStrFormatter('%g')) ax0.xaxis.set_major_formatter(FormatStrFormatter('%g')) ax0.get_yaxis().set_tick_params(which='both', direction='in') ax0.get_xaxis().set_tick_params(which='both', direction='in') ax0.grid() ax0.set_ylabel(Y_LABELS[feature]) #ax0.legend() #ax0.set_xscale('log') #ax0.set_yscale('log') s = '_' + str(n) + '_' + str(m) + '_' + str(r) fig.savefig('./experiments/' + name + '/' + feature + s + '.pgf', bbox_inches='tight') fig.savefig('./experiments/' + name + '/' + feature + s + '.pdf', bbox_inches='tight')
import argparse import yaml from lib.experiment import Experiment def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--config", required=True) args = parser.parse_args() return args def read_yaml(path): with open(path, "r") as f: config = yaml.load(f) return config if __name__ == "__main__": args = parse_args() config = read_yaml(args.config) exper = Experiment(config) exper.run()
def complexity_experiment(): ''' tries to compare the complexity of iterations ''' # load experiment config file config = yaml.safe_load(open('./config/dev.yml')) config['clip'] = False # otherwise methods diverge? experiment_config = yaml.safe_load(open('./experiments/complexity.yml')) name = 'complexity' solvers = experiment_config['solver_list'] # generate data n = np.arange(190, 290, 10) m = np.arange(190, 290, 10) r = [5, 10, 15] l0 = [0.7] threshold = 0.2 iterations = np.zeros((len(r), len(n), len(solvers))) for i in range(len(n)): for j in range(len(r)): X, W, H = generate_synthetic_data(n[i], m[i], r[j], l0) print('Data generated, rank of X: ', np.linalg.matrix_rank(X[:, :, 0])) experiment = Experiment(config, X[:, :, i], experiment_config) experiment.run() for k, solver in enumerate(experiment.solvers): iterations_ = solver.output['iteration'] rel_error = solver.output['rel_error'] index_list = np.where(np.array(rel_error) < threshold)[0] if len(index_list) > 0: index = index_list[0] iterations[j, i, k] = iterations_[index] else: iterations[j, i, k] = iterations_[-1] fig = plt.figure(figsize=(6, 6)) ax0 = fig.add_subplot(111) #color = ['r', 'g', 'b', 'cyan', 'k'] ax0.set_xlabel('Size of $X$') ax0.set_ylabel('Iterations until relative error $< 0.3$') for i in range(len(r)): for j in range(len(solvers)): ax0.plot(n * m, iterations[i, :, j], color=COLORS[j], label=solvers[j], linestyle='--', markersize=15, marker='.') ax0.yaxis.set_major_formatter(FormatStrFormatter('%g')) ax0.xaxis.set_major_formatter(FormatStrFormatter('%g')) ax0.get_yaxis().set_tick_params(which='both', direction='in') ax0.get_xaxis().set_tick_params(which='both', direction='in') ax0.grid() #ax0.set_ylabel(Y_LABELS[feature]) ax0.legend() #ax0.set_xscale('log') #ax0.set_yscale('log') #s = '_' + str(n) + '_' + str(m) + '_' + str(r) fig.savefig('./experiments/' + name + '/' + 'graph.pgf', bbox_inches='tight') fig.savefig('./experiments/' + name + '/' + 'graph.pdf', bbox_inches='tight')
def main(args): sid = args.sid RND_STATE = 1234 BATCH_SIZE = 48 IMG_SIZE = 280 n_classes = 1 learning_rate = 2e-4 efficientnet_b = 0 cv_folds = 5 seed_everything(RND_STATE + sid) IMG_PATH_2019_TRAIN = r"input/2019_train" DF_PATH_2019_TRAIN = r"input/trainLabels19_unique.csv" IMG_PATH_2015_TRAIN = r"input/2015_train" DF_PATH_2015_TRAIN = r"input/trainLabels15.csv" IMG_PATH_2015_TEST = r"input/2015_test" DF_PATH_2015_TEST = r"input/testLabels15.csv" IMG_PATH_MESSIDOR = r"input/messidor1_jpg" DF_PATH_MESSIDOR = r"input/messidor1_labels_adjudicated.csv" df_train = pd.read_csv(DF_PATH_2019_TRAIN) X_2019_train = df_train.id_code.values X_2019_train = IMG_PATH_2019_TRAIN + "/" + X_2019_train + ".jpg" y_2019_train = df_train.diagnosis.values.astype(np.float32) df_train_2015_train = pd.read_csv(DF_PATH_2015_TRAIN) X_2015_train = df_train_2015_train.image.values X_2015_train = IMG_PATH_2015_TRAIN + "/" + X_2015_train + ".jpg" y_2015_train = df_train_2015_train.level.values.astype(np.float32) df_train_2015_test = pd.read_csv(DF_PATH_2015_TEST) X_2015_test = df_train_2015_test.image.values X_2015_test = IMG_PATH_2015_TEST + "/" + X_2015_test + ".jpg" y_2015_test = df_train_2015_test.level.values.astype(np.float32) # df_messidor = pd.read_csv(DF_PATH_MESSIDOR) # df_messidor = df_messidor[df_messidor.adjudicated_dr_grade > -1] # X_messidor = df_messidor.image.values # X_messidor = IMG_PATH_MESSIDOR + "/" + X_messidor + ".jpg" # y_messidor = df_messidor.adjudicated_dr_grade.values.astype(np.float32) normalize = [[0.43823998, 0.29557559, 0.20054542], [0.27235733, 0.19562355, 0.16674458]] img_size = (IMG_SIZE, IMG_SIZE) transform_train = albumentations.Compose([ albumentations.RandomCrop(*img_size), albumentations.HueSaturationValue(hue_shift_limit=7), albumentations.RandomBrightnessContrast(), albumentations.ShiftScaleRotate(shift_limit=0, scale_limit=(-0.05, 0.15), interpolation=cv2.INTER_CUBIC), albumentations.HorizontalFlip(), albumentations.VerticalFlip(), albumentations.Blur(), albumentations.Normalize(*normalize, p=1), ToTensorV2(), ]) transform_validation = albumentations.Compose([ albumentations.CenterCrop(*img_size), albumentations.Normalize(*normalize, p=1), ToTensorV2(), ]) skf9 = StratifiedKFold(n_splits=cv_folds, random_state=RND_STATE, shuffle=True) for split_id, (tra9, tes9) in enumerate(skf9.split(X_2019_train, y_2019_train)): if split_id != sid: continue X_aptos_train, X_aptos_valid = X_2019_train[tra9], X_2019_train[tes9] y_aptos_train, y_aptos_valid = y_2019_train[tra9], y_2019_train[tes9] X_train = np.concatenate([ X_aptos_train, # X_messidor, X_2015_train, X_2015_test, ]) y_train = np.concatenate([ y_aptos_train, # y_messidor, y_2015_train, y_2015_test, ]) X_valid = np.concatenate([ X_aptos_valid, ]) y_valid = np.concatenate([ y_aptos_valid, ]) print("train/validation set size: {}/{}".format( len(y_train), len(y_valid))) dataset_train = ImageDataset( files=X_train, labels=y_train, transform=transform_train, buffer_size= 100, # lower this value if out-of-memory is thrown <<<<<<<<<<<<<<<<<<<< image_size=IMG_SIZE) dataset_valid = ImageDataset(files=X_valid, labels=y_valid, transform=transform_validation, buffer_size=0, image_size=IMG_SIZE, size_is_min=True) # sampling weight for inputs of each class weights = np.array([1, 5, 5, 10, 10]) weights = calc_sampler_weights(y_train, weights) # increase probability of selecting aptos 2019 train images by 5 times weights[:y_aptos_train.shape[0]] *= 5 dataloader_train = DataLoader( dataset_train, batch_size=BATCH_SIZE, num_workers=4, # shuffle=True, sampler=WeightedRandomSampler(weights, 45000, True), pin_memory=True, drop_last=True, ) dataloader_valid = DataLoader( dataset_valid, batch_size=BATCH_SIZE, num_workers=4, shuffle=False, pin_memory=True, drop_last=False, ) _, train_val_ids = train_test_split(list(range(len(X_train))), test_size=0.1, stratify=y_train, random_state=RND_STATE) train_val_sampler = SubsetRandomSampler(train_val_ids) dataloader_train_eval = DataLoader( dataset_train, batch_size=BATCH_SIZE, num_workers=4, sampler=train_val_sampler, pin_memory=True, drop_last=False, ) model = EfficientNet(b=efficientnet_b, in_channels=3, in_spatial_shape=IMG_SIZE, n_classes=n_classes, activation=nn.LeakyReLU(0.001), bias=False, drop_connect_rate=0.2, dropout_rate=None, bn_epsilon=1e-3, bn_momentum=0.01, pretrained=True, progress=False) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("device ? : ", device) model.to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.1 * learning_rate) # optimizer = optim.RMSprop(model.parameters(), # lr=learning_rate, # momentum=0.9, # alpha=0.9, # weight_decay=0.1 * learning_rate) # criterion = nn.CrossEntropyLoss() criterion = nn.SmoothL1Loss() eval_metrics = [ ("loss", criterion, {}), ("f1_score", pytorch_f1, { "average": "macro" }), # ("classwise_f1", pytorch_f1, {"average": None}), ("qwk", qw_kappa, {}) ] scheduler = None s = ( "{epoch}:{step}/{max_epoch} | {loss_train:.4f} / {loss_valid:.4f}" " | {f1_score_train:.4f} / {f1_score_valid:.4f}" # " | {classwise_f1_train}/{classwise_f1_valid}" " | {qwk_train:.4f} / {qwk_valid:.4f} | {time_delta}") exp = Experiment(dl_train=dataloader_train, dl_train_val=dataloader_train_eval, dl_validation=dataloader_valid, model=model, optimizer=optimizer, criterion=criterion, device=device, max_epoch=20, metrics=eval_metrics, target_metric="qwk", format_str=s, scheduler=scheduler, load_path=None, save_path="save/b%d_%dpx/%d" % (efficientnet_b, IMG_SIZE, split_id), evaluate_freq=3) exp.run()