dataset_path = args.dataset_path
from params_fit import p  # get parameters
from params_save import S  # class to save objects

p.which_adversarial = args.which_adversarial
p.out_dir = '../../models/SST/'
p.num_iters = 100
p.signal_strength = args.signal_strength
p.bias = "bias"
p.seed = args.seed
max_patience = 5
patience = 0
decoy_strength = args.decoy_strength

seed(p)
s = S(p)

out_name = str(args.which_adversarial) + p._str(p)
torch.cuda.set_device(args.gpu)

inputs = data.Field(lower=True)
answers = data.Field(sequential=False, unk_token=None)
tv_datafields = [("text", inputs), ("label", answers)]
train, dev, test = TabularDataset.splits(
    path=dataset_path,  # the root directory where the data lies
    train='train_bias_SST.csv',
    validation="dev_bias_SST.csv",
    test="test_bias_SST.csv",
    format='csv',
    skip_header=False,
    fields=tv_datafields)
Beispiel #2
0
    metavar='N',
    help='how many batches to wait before logging training status')
parser.add_argument(
    '--regularizer_rate',
    type=float,
    default=0.0,
    metavar='N',
    help='how heavy to regularize lower order interaction (AKA color)')
parser.add_argument('--grad_method',
                    type=int,
                    default=0,
                    metavar='N',
                    help='which gradient method is used - Grad or CD')

args = parser.parse_args()
s = S(args.epochs)
use_cuda = not args.no_cuda and torch.cuda.is_available()
regularizer_rate = args.regularizer_rate
s.regularizer_rate = regularizer_rate
num_blobs = 8
s.num_blobs = num_blobs
s.seed = args.seed

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {
    'num_workers': 0,
    'pin_memory': True,
    'worker_init_fn': np.random.seed(12)
} if use_cuda else {}
Beispiel #3
0
def fit(p):
    out_name = p._str(p)  # generate random fname str before saving
    seed(p.seed)
    s = S_save(p)

    #################################################################### DATA ##############################################################

    # testing data should always be generated with the same seed
    if p.dset == 'gaussian':
        p.n_train = int(p.n_train_over_num_features * p.num_features)

        # warning - this reseeds!
        X_train, y_train, X_test, y_test, s.betastar = \
            data.get_data_train_test(n_train=p.n_train, n_test=p.n_test, p=p.num_features,
                                noise_std=p.noise_std, noise_distr=p.noise_distr, iid=p.iid, # parameters to be determined
                                beta_type=p.beta_type, beta_norm=p.beta_norm,
                                seed_for_training_data=p.seed, cov_param=p.cov_param)
    elif p.dset == 'pmlb':
        s.dset_name = regression_dsets_large_names[p.dset_num]
        seed(703858704)
        X, y = pmlb.fetch_data(s.dset_name, return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y)  # get test set
        seed(p.seed)
        X_train, y_train = shuffle(X_train, y_train)
        p.num_features = X_train.shape[1]
        p.n_train = int(p.n_train_over_num_features * p.num_features)
        '''
        while p.n_train <= X_train.shape[0]:
            X_train = np.vstack((X_train, 
                                 1e-3 * np.random.randn(X_train.shape[0], X_train.shape[1])))
            y_train = np.vstack((y_train, y_train))
        '''
        if p.n_train > X_train.shape[0]:
            print('this value of n too large')
            exit(0)
        elif p.n_train <= 1:
            print('this value of n too small')
            exit(0)
        else:
            X_train = X_train[:p.n_train]
            y_train = y_train[:p.n_train]

    #################################################################### FITTING ##############################################################

    if not p.model_type == 'rf':

        # fit model
        if p.model_type == 'linear_sta':
            s.w = X_train.T @ y_train / X_train.shape[0]
        elif 'mdl' in p.model_type:
            if p.model_type == 'mdl_orig':
                U, sv, Vh = npl.svd(X_train / np.sqrt(p.n_train))
                a = U.T @ y_train  # / (np.sqrt(p.n_train) * p.noise_std)
                a = a[:sv.size]

                def mdl_loss(l):
                    return np.sum(
                        np.square(a) / (1 + np.square(sv) / l) +
                        np.log(1 + np.square(sv) / l))

                opt_solved = minimize(mdl_loss, x0=1e-10)
                s.lambda_opt = opt_solved.x
                s.loss_val = opt_solved.fun
                inv = npl.pinv(X_train.T @ X_train / p.n_train +
                               s.lambda_opt * np.eye(p.num_features))
                s.w = inv @ X_train.T @ y_train / p.n_train
            elif p.model_type == 'mdl_m1':
                eigenvals, eigenvecs = npl.eig(X_train.T @ X_train)
                var = p.noise_std**2

                def mdl1_loss(l):
                    inv = npl.pinv(X_train.T @ X_train +
                                   l * np.eye(p.num_features))
                    thetahat = inv @ X_train.T @ y_train
                    mse_norm = npl.norm(y_train -
                                        X_train @ thetahat)**2 / (2 * var)
                    theta_norm = npl.norm(thetahat)**2 / (2 * var)
                    eigensum = 0.5 * np.sum(np.log((eigenvals + l) / l))
                    return mse_norm + theta_norm + eigensum

                opt_solved = minimize(mdl1_loss, x0=1e-10)
                s.lambda_opt = opt_solved.x
                s.loss_val = opt_solved.fun
                inv = npl.pinv(X_train.T @ X_train +
                               s.lambda_opt * np.eye(p.num_features))
                s.w = inv @ X_train.T @ y_train
        else:
            if p.model_type == 'ols':
                m = LinearRegression(fit_intercept=False)
            elif p.model_type == 'lasso':
                m = Lasso(fit_intercept=False, alpha=p.reg_param)
            elif p.model_type == 'ridge':
                if p.reg_param == -1:
                    m = RidgeCV(fit_intercept=False,
                                alphas=np.logspace(-3, 3, num=10, base=10))
                else:
                    m = Ridge(fit_intercept=False, alpha=p.reg_param)

            m.fit(X_train, y_train)
            if p.reg_param == -1:
                s.lambda_opt = m.alpha_
            s.w = m.coef_

        # save df
        if p.model_type == 'ridge':
            S = X_train @ np.linalg.pinv(X_train.T @ X_train + p.reg_param *
                                         np.eye(X_train.shape[1])) @ X_train.T
            s.df1 = np.trace(S @ S.T)
            s.df2 = np.trace(2 * S - S.T @ S)
            s.df3 = np.trace(S)
        else:
            s.df1 = min(p.n_train, p.num_features)
            s.df2 = s.df1
            s.df3 = s.df1

        print('here!')
        # store predictions and things about w
        # s.H_trace = np.trace(H)
        s.wnorm = np.linalg.norm(s.w)
        s.num_nonzero = np.count_nonzero(s.w)
        s.preds_train = X_train @ s.w
        s.preds_test = X_test @ s.w

    elif p.model_type == 'rf':
        rf = RandomForestRegressor(n_estimators=p.num_trees,
                                   max_depth=p.max_depth)
        rf.fit(X_train, y_train)
        s.preds_train = rf.predict(X_train)
        s.preds_test = rf.predict(X_test)

    # set things
    s.train_mse = metrics.mean_squared_error(s.preds_train, y_train)
    s.test_mse = metrics.mean_squared_error(s.preds_test, y_test)

    save(out_name, p, s)
Beispiel #4
0
def fit_vision(p):
    out_name = p._str(p)  # generate random fname str before saving
    seed(p)
    use_cuda = torch.cuda.is_available()
    device = 'cuda' if use_cuda else 'cpu'

    # pick dataset and model
    print('loading dset...')
    train_loader, test_loader = data.get_data_loaders(p)
    X_train, Y_train_onehot = data.get_XY(train_loader)
    model = data.get_model(p, X_train, Y_train_onehot)
    init.initialize_weights(p, X_train, Y_train_onehot, model)

    # set up optimizer and freeze appropriate layers
    model, optimizer = optimization.freeze_and_set_lr(p, model, it=0)

    def reg_init(p):
        if p.lambda_reg == 0:
            return None

        # load the gan
        gan_dir = '/accounts/projects/vision/chandan/gan/mnist_dcgan'
        sys.path.insert(1, gan_dir)
        from dcgan import Discriminator
        D = Discriminator(
            ngpu=1 if torch.cuda.is_available() else 0).to(device)
        D.load_state_dict(
            torch.load(oj(gan_dir, 'weights/netD_epoch_99.pth'),
                       map_location=device))
        D = D.eval()
        return D

    def reg(p, it, model, D, device):
        if p.lambda_reg == 0:
            return 0

        exs = model.exs.reshape(model.exs.shape[0], 1, 28,
                                28)  # mnist-specific
        outputs = D(exs)

        # discriminator outputs 1 for real, 0 for fake
        loss = p.lambda_reg * torch.sum(1 - outputs)
        return loss

    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    if 'linear' in p.dset:
        criterion = nn.MSELoss()
    reg_model = reg_init(p)

    # things to record
    s = S(p)
    s.weight_names = models.get_weight_names(model)
    if p.siamese:
        s.exs = model.exs.data.cpu().numpy()

    # run
    print('training...')
    for i, it in enumerate(tqdm(range(0, p.num_iters))):

        # calc stats and record
        s.losses_train[it], s.accs_train[it], s.confidence_unn_train[
            it], s.confidence_norm_train[it], s.margin_unn_train[
                it], s.margin_norm_train[it] = stats.calc_loss_acc_margins(
                    train_loader, p.batch_size, use_cuda, model, criterion,
                    p.dset)
        s.losses_test[it], s.accs_test[it], s.confidence_unn_test[
            it], s.confidence_norm_test[it], s.margin_unn_test[
                it], s.margin_norm_test[it] = stats.calc_loss_acc_margins(
                    test_loader,
                    p.batch_size,
                    use_cuda,
                    model,
                    criterion,
                    p.dset,
                    print_loss=True)

        # record weights
        weight_dict = deepcopy(
            {x[0]: x[1].data.cpu().numpy()
             for x in model.named_parameters()})
        s.weights_first10[p.its[it]] = deepcopy(
            model.state_dict()[s.weight_names[0]][:20].cpu().numpy())
        s.weight_norms[p.its[it]] = stats.layer_norms(model.state_dict())
        if it % p.save_all_weights_freq == 0 or it == p.num_iters - 1 or it == 0 or (
                it < p.num_iters_small
                and it % 2 == 0):  # save first, last, jumps
            s.weights[p.its[it]] = weight_dict
            if not p.use_conv:
                s.mean_max_corrs[p.its[it]] = stats.calc_max_corr_input(
                    X_train, Y_train_onehot, model)

        if p.save_singular_vals:
            # weight singular vals
            s.singular_val_dicts.append(
                get_singular_vals_from_weight_dict(weight_dict))
            s.singular_val_dicts_cosine.append(
                get_singular_vals_kernels(weight_dict, 'cosine'))
            s.singular_val_dicts_rbf.append(
                get_singular_vals_kernels(weight_dict, 'rbf'))
            s.singular_val_dicts_lap.append(
                get_singular_vals_kernels(weight_dict, 'laplacian'))

            # activations singular vals
            act_var_dicts = calc_activation_dims(
                use_cuda,
                model,
                train_loader.dataset,
                test_loader.dataset,
                calc_activations=p.calc_activations)
            s.act_singular_val_dicts_train.append(
                act_var_dicts['train']['pca'])
            s.act_singular_val_dicts_test.append(act_var_dicts['test']['pca'])
            s.act_singular_val_dicts_train_rbf.append(
                act_var_dicts['train']['rbf'])
            s.act_singular_val_dicts_test_rbf.append(
                act_var_dicts['test']['rbf'])

        # reduced model
        if p.save_reduce:
            model_r = reduce_model(model)
            s.losses_train_r[it], s.accs_train_r[
                it] = stats.calc_loss_acc_margins(train_loader, p.batch_size,
                                                  use_cuda, model_r, criterion,
                                                  p.dset)[:2]
            s.losses_test_r[it], s.accs_test_r[
                it] = stats.calc_loss_acc_margins(test_loader, p.batch_size,
                                                  use_cuda, model_r, criterion,
                                                  p.dset)[:2]

        # training
        for batch_idx, (x, target) in enumerate(train_loader):
            optimizer.zero_grad()
            x = x.to(device)
            target = target.to(device)
            x, target = Variable(x), Variable(target)
            out = model(x)
            loss = criterion(out, target) + reg(p, it, model, reg_model,
                                                device)
            loss.backward()
            optimizer.step()

            # don't go through whole dataset
            if batch_idx > len(
                    train_loader
            ) / p.saves_per_iter and it <= p.saves_per_iter * p.saves_per_iter_end + 1:
                break

        # set lr / freeze
        if it - p.num_iters_small in p.lr_ticks:
            model, optimizer = optimization.freeze_and_set_lr(p, model, it)

        if it % p.save_all_freq == 0:
            save(out_name, p, s)

        # check for need to flip dset
        if 'flip' in p.dset and it == p.num_iters // 2:
            print('flipped dset')
            s.flip_iter = p.num_iters // 2  # flip_iter tells when dset flipped
            train_loader, test_loader = data.get_data_loaders(p,
                                                              it=s.flip_iter)
            X_train, Y_train_onehot = data.get_XY(train_loader)
            if p.flip_freeze:
                p.freeze = 'last'
                model, optimizer = optimization.freeze_and_set_lr(p, model, it)
        elif 'permute' in p.dset and it > 0 and p.its[it] % p.change_freq == 0:
            s.permute_rng.append(int(p.its[it]))
            train_loader, test_loader = data.get_data_loaders(
                p, it=s.permute_rng[-1])
            X_train, Y_train_onehot = data.get_XY(train_loader)

    save(out_name, p, s)