def duplicate_exp(regs, n_cpus=None, n_bootstrap=30): m = Mimic2(mode='total', duplicate=1) ps = ParamSearch(m, n_cpus) alphas = [0.1, 0.01, 0.001, 0.0001, 0.00001] for reg in regs: for alpha in alphas: name = reg.__name__ + '_dup_' + '^' + str(alpha) ps.add_param(name, reg, alpha) ps.run(n_bootstrap)
def reg_exp(p): m = Mimic2(mode='total', seed=p.seed) ps = ParamSearch(m, p) alphas = [0.1, 0.01, 0.001, 0.0001, 0.00001] for reg in p.regs: for alpha in alphas: name = reg.__name__ + '^' + str(alpha) ps.add_param(name, reg, alpha) ps.run(p.n_bootstrap)
def expert_feature_only_exp(n_cpus=None, n_bootstrap=30): m = Mimic2(mode='total', expert_feature_only=True) ps = ParamSearch(m, n_cpus) reg = ridge alphas = [0.1, 0.01, 0.001, 0.0001, 0.00001] for alpha in alphas: name = 'expert_only_ridge' + '^' + str(alpha) ps.add_param(name, reg, alpha) ps.run(n_bootstrap)
def expert_feature_only_exp(p): m = Mimic2(mode='total', expert_feature_only=True, seed=p.seed) ps = ParamSearch(m, p) reg = ridge alphas = [0.1, 0.01, 0.001, 0.0001, 0.00001] for alpha in alphas: name = 'expert_only_ridge' + '^' + str(alpha) ps.add_param(name, reg, alpha) ps.run(p.n_bootstrap)
def duplicate_exp(p): m = Mimic2(mode='total', duplicate=p.dup, noise=p.noise, seed=p.seed) ps = ParamSearch(m, p) alphas = [0.1, 0.01, 0.001, 0.0001, 0.00001] for reg in p.regs: for alpha in alphas: name = reg.__name__ + '_dup' + str(p.dup) + '_' + str(p.noise)\ + '^' + str(alpha) ps.add_param(name, reg, alpha) ps.run(p.n_bootstrap)
def random_risk_exp(p): # p is argparser m = Mimic2(mode='total', random_risk=True, seed=p.seed) ps = ParamSearch(m, p) reg = eye_loss alphas = [0.1, 0.01, 0.001, 0.0001, 0.00001] for alpha in alphas: name = 'random_risk_eye' + '^' + str(alpha) ps.add_param(name, reg, alpha) ps.run(p.n_bootstrap)
def random_risk_exp(regs, n_cpus=None, n_bootstrap=30): m = Mimic2(mode='total', random_risk=True) ps = ParamSearch(m, n_cpus) reg = eye_loss alphas = [0.1, 0.01, 0.001, 0.0001, 0.00001] for reg in regs: for alpha in alphas: name = 'random_risk_' + reg.__name__ + '^' + str(alpha) ps.add_param(name, reg, alpha) ps.run(n_bootstrap)
def two_stage_exp(threshold=0.90, n_cpus=None, n_bootstrap=30): ''' remove features by setting a threshold on correlation, then apply l2 regularization on the remaining features ''' m = Mimic2(mode='total', two_stage=True, threshold=float(threshold)) ps = ParamSearch(m, n_cpus) reg = ridge alphas = [0.1, 0.01, 0.001, 0.0001, 0.00001] for alpha in alphas: name = 'two_stage_ridge_' + str(threshold) + '^' + str(alpha) ps.add_param(name, reg, alpha) ps.run(n_bootstrap)
def two_stage_exp(p): ''' remove features by setting a threshold on correlation, then apply l2 regularization on the remaining features ''' m = Mimic2(mode='total', two_stage=True, threshold=p.threshold, seed=p.seed) ps = ParamSearch(m, p) reg = ridge alphas = [0.1, 0.01, 0.001, 0.0001, 0.00001] for alpha in alphas: name = 'two_stage_ridge_' + str(p.threshold) + '^' + str(alpha) ps.add_param(name, reg, alpha) ps.run(p.n_bootstrap)
def loadData(dataname, get_test=False, pin_memory=True, batch_size=1000, num_workers=0): if dataname == 'mimic2': m = Mimic2(mode='total') ndim = m.xtrain.shape[1] if get_test: xtrain = np.vstack([m.xtrain, m.xval]) xval = m.xte ytrain = np.hstack([m.ytrain, m.yval]) yval = m.yte else: xtrain = m.xtrain xval = m.xval ytrain = m.ytrain yval = m.yval # make y in {-1, 1} ytrain = ytrain * 2 - 1 yval = yval * 2 - 1 d = m.r.size(0) train_data = TensorDataset(*np2tensor(xtrain, ytrain)) data = DataLoader(train_data, shuffle=True, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory) valdata = TensorDataset(*np2tensor(xval, yval)) valdata = DataLoader(valdata, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory) if get_test: return valdata, None, ndim, None else: print('train shape:', xtrain.shape, 'n_islands:', None, 'ndim:', ndim) print('done loading data') return data, valdata, None, None, ndim, None # note: data are generated in heterogeneous groups ipython notebook X, Y, Theta, \ Xval, Yval, val_theta,\ Xtest, Ytest, test_theta,\ ndim, n_islands = joblib.load('data/%s.pkl' % dataname) if get_test: xtest = torch.from_numpy(Xtest).float() ytest = torch.from_numpy(Ytest).float() test_data = TensorDataset(xtest, ytest) test_data = DataLoader(test_data, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory) return test_data, test_theta, ndim, n_islands x = torch.from_numpy(X).float() y = torch.from_numpy(Y).float() train_data = TensorDataset(x, y) train_data = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory) xval = torch.from_numpy(Xval).float() yval = torch.from_numpy(Yval).float() val_data = TensorDataset(xval, yval) val_data = DataLoader(val_data, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory) print('train shape:', X.shape, 'n_islands:', n_islands, 'ndim:', ndim) print('done loading data') return train_data, val_data,\ Theta, val_theta,\ ndim, n_islands