예제 #1
0
# weight_decay = 1000 / len(lr_data_sets.train.labels)
batch_size = 100
initial_learning_rate = 0.001
keep_probs = None
decay_epochs = [1000, 10000]
max_lbfgs_iter = 1000

tf.reset_default_graph()

tf_model = BinaryLogisticRegressionWithLBFGS(
    input_dim=input_dim,
    weight_decay=weight_decay,
    max_lbfgs_iter=max_lbfgs_iter,
    num_classes=num_classes,
    batch_size=batch_size,
    data_sets=data_sets,
    initial_learning_rate=initial_learning_rate,
    keep_probs=keep_probs,
    decay_epochs=decay_epochs,
    mini_batch=False,
    train_dir='output',
    log_dir='log',
    model_name='spam_logreg_lbfgs')

tf_model.train()

test_idx = 8
actual_loss_diffs, predicted_loss_diffs_cg, indices_to_remove = experiments.test_retraining(
    tf_model,
    test_idx,
    iter_to_load=0,
    force_refresh=False,
예제 #2
0
batch_size = 1000
initial_learning_rate = 0.001
keep_probs = None
decay_epochs = [1000, 10000]
max_lbfgs_iter = 1000
num_classes = 2

tf.reset_default_graph()

inception_model = BinaryLogisticRegressionWithLBFGS(
    input_dim=input_dim,
    weight_decay=weight_decay,
    max_lbfgs_iter=max_lbfgs_iter,
    num_classes=num_classes,
    batch_size=batch_size,
    data_sets=data_sets,
    initial_learning_rate=initial_learning_rate,
    keep_probs=keep_probs,
    decay_epochs=decay_epochs,
    mini_batch=False,
    train_dir='output',
    log_dir='log',
    model_name='%s_inception_onlytop' % dataset_name)

inception_model.train()

inception_predicted_loss_diffs = inception_model.get_influence_on_test_loss(
    [test_idx],
    np.arange(len(inception_model.data_sets.train.labels)),
    force_refresh=True)

x_test = X_test[test_idx, :]
예제 #3
0
inception_data_sets = base.Datasets(train=train,
                                    validation=validation,
                                    test=test)

print('*** Top:')
with top_graph.as_default():
    top_model_name = '%s_inception_onlytop_wd-%s' % (dataset_name,
                                                     weight_decay)
    input_dim = 2048
    top_model = BinaryLogisticRegressionWithLBFGS(
        input_dim=input_dim,
        weight_decay=weight_decay,
        max_lbfgs_iter=max_lbfgs_iter,
        num_classes=num_classes,
        batch_size=batch_size,
        data_sets=inception_data_sets,
        initial_learning_rate=initial_learning_rate,
        keep_probs=keep_probs,
        decay_epochs=decay_epochs,
        mini_batch=False,
        train_dir='output',
        log_dir='log',
        model_name=top_model_name)
    top_model.train()
    weights = top_model.sess.run(top_model.weights)
    orig_weight_path = 'output/inception_weights_%s.npy' % top_model_name
    np.save(orig_weight_path, weights)

with full_graph.as_default():
    full_model.load_weights_from_disk(orig_weight_path,
                                      do_save=False,
                                      do_check=True)
batch_size = 100
initial_learning_rate = 0.001
keep_probs = None
max_lbfgs_iter = 1000  #try adjusting this to get lowest validation error
decay_epochs = [1000, 10000]

tf.reset_default_graph()

tf_model = BinaryLogisticRegressionWithLBFGS(
    input_dim=input_dim,
    weight_decay=weight_decay,
    max_lbfgs_iter=max_lbfgs_iter,
    num_classes=num_classes,
    batch_size=batch_size,
    data_sets=data_sets,
    initial_learning_rate=initial_learning_rate,
    # damping=1e-2,  # try changing? 80
    keep_probs=keep_probs,
    decay_epochs=decay_epochs,
    mini_batch=False,
    train_dir='output',
    log_dir='log',
    model_name='mnist_small_binlogreg_lbfgs_2_classes_' + str(digit1) + '_' +
    str(digit2))

tf_model.train()

X_train = np.copy(tf_model.data_sets.train.x)
print('X_train.shape', X_train.shape)
Y_train = np.copy(tf_model.data_sets.train.labels)
X_test = np.copy(tf_model.data_sets.test.x)
Y_test = np.copy(tf_model.data_sets.test.labels)
    initial_learning_rate = 0.001
    keep_probs = None
    decay_epochs = [1000, 10000]
    max_lbfgs_iter = 1000

    num_params = 784

    tf.reset_default_graph()

    tf_model = BinaryLogisticRegressionWithLBFGS(
        input_dim=input_dim,
        weight_decay=weight_decay,
        max_lbfgs_iter=max_lbfgs_iter,
        num_classes=num_classes, 
        batch_size=batch_size,
        data_sets=lr_data_sets,
        initial_learning_rate=initial_learning_rate,
        keep_probs=keep_probs,
        decay_epochs=decay_epochs,
        mini_batch=False,
        train_dir='output',
        log_dir='log',
        model_name='mnist-%dvs%d-logreg'%(pos_class, neg_class))

    tf_model.train()
    #tf_model.load_checkpoint(0)

else:
    print('Performing multiclass setting')
    num_classes = 10
    X_train = data_sets.train.x
    Y_train = data_sets.train.labels
    lr_train = DataSet(X_train, np.array((Y_train + 1) / 2, dtype=int))
    lr_validation = None
    lr_test = DataSet(X_test, np.array((Y_test + 1) / 2, dtype=int))
    lr_data_sets = base.Datasets(train=lr_train,
                                 validation=lr_validation,
                                 test=lr_test)

    tf.reset_default_graph()

    tf_model = BinaryLogisticRegressionWithLBFGS(
        input_dim=input_dim,
        weight_decay=weight_decay,
        max_lbfgs_iter=max_lbfgs_iter,
        num_classes=num_classes,
        batch_size=batch_size,
        data_sets=lr_data_sets,
        initial_learning_rate=initial_learning_rate,
        keep_probs=keep_probs,
        decay_epochs=decay_epochs,
        mini_batch=False,
        train_dir='output',
        log_dir='log',
        model_name='mnist-17_logreg')

    tf_model.train()

    class_wise_7 = X_test[Y_test == -1]
    class_wise_1 = X_test[Y_test == 1]

    numClusters = 1
    randomState = 0
    fittedKmeans_7 = KMeans(n_clusters=numClusters,
# weight_decay = 1000 / len(lr_data_sets.train.labels)
batch_size = 100
initial_learning_rate = 0.001
keep_probs = None
decay_epochs = [1000, 10000]
max_lbfgs_iter = 1000

tf.reset_default_graph()

tf_model = BinaryLogisticRegressionWithLBFGS(
    input_dim=input_dim,
    weight_decay=weight_decay,
    max_lbfgs_iter=max_lbfgs_iter,
    num_classes=num_classes,
    batch_size=batch_size,
    data_sets=data_sets,
    initial_learning_rate=initial_learning_rate,
    keep_probs=keep_probs,
    decay_epochs=decay_epochs,
    mini_batch=False,
    train_dir='output',
    log_dir='log',
    model_name='spam_logreg_generic_loo_influence')

tf_model.train()

X_train = np.copy(tf_model.data_sets.train.x)
Y_train = np.copy(tf_model.data_sets.train.labels)
X_test = np.copy(tf_model.data_sets.test.x)
Y_test = np.copy(tf_model.data_sets.test.labels)

num_train_examples = Y_train.shape[0]
def run_spam(ex_to_leave_out=None, num_examples=None):
    """
    If ex_to_leave_out is None, don't leave any out. Otherwise, leave out the example at the specified index.
    If num_examples is None, use all the examples
    """
    data_sets = load_spam(ex_to_leave_out=ex_to_leave_out,
                          num_examples=num_examples)
    # "Spam" and "Ham"
    num_classes = 2

    input_dim = data_sets.train.x.shape[1]
    weight_decay = 0.0001
    # weight_decay = 1000 / len(lr_data_sets.train.labels)
    batch_size = 100
    initial_learning_rate = 0.001
    keep_probs = None
    decay_epochs = [1000, 10000]
    max_lbfgs_iter = 1000

    tf.reset_default_graph()

    tf_model = BinaryLogisticRegressionWithLBFGS(
        input_dim=input_dim,
        weight_decay=weight_decay,
        max_lbfgs_iter=max_lbfgs_iter,
        num_classes=num_classes,
        batch_size=batch_size,
        data_sets=data_sets,
        initial_learning_rate=initial_learning_rate,
        keep_probs=keep_probs,
        decay_epochs=decay_epochs,
        mini_batch=False,
        train_dir='output',
        log_dir='log',
        model_name='spam_logreg')

    tf_model.train()

    # NMV 7/26: appears to be unused right now.
    # X_train = np.copy(tf_model.data_sets.train.x)
    # Y_train = np.copy(tf_model.data_sets.train.labels)
    # X_test = np.copy(tf_model.data_sets.test.x)
    # Y_test = np.copy(tf_model.data_sets.test.labels)

    # num_train_examples = Y_train.shape[0]
    # num_flip_vals = 6
    # num_check_vals = 6
    # num_random_seeds = 40

    # dims = (num_flip_vals, num_check_vals, num_random_seeds, 3)
    # fixed_influence_loo_results = np.zeros(dims)
    # fixed_loss_results = np.zeros(dims)
    # fixed_random_results = np.zeros(dims)

    #flipped_results = np.zeros((num_flip_vals, num_random_seeds, 3))

    orig_results = tf_model.sess.run(
        [tf_model.loss_no_reg, tf_model.accuracy_op],
        feed_dict=tf_model.all_test_feed_dict)
    #print('Orig loss: %.5f. Accuracy: %.3f' % (orig_results[0], orig_results[1]))
    result = [tf_model, orig_results]
    return result
예제 #9
0
# weight_decay = 1000 / len(lr_data_sets.train.labels)
batch_size = 100
initial_learning_rate = 0.001
keep_probs = None
decay_epochs = [1000, 10000]
max_lbfgs_iter = 1000

tf.reset_default_graph()

tf_model = BinaryLogisticRegressionWithLBFGS(
    input_dim=input_dim,
    weight_decay=weight_decay,
    max_lbfgs_iter=max_lbfgs_iter,
    num_classes=num_classes,
    batch_size=batch_size,
    data_sets=data_sets,
    initial_learning_rate=initial_learning_rate,
    keep_probs=keep_probs,
    decay_epochs=decay_epochs,
    mini_batch=False,
    train_dir='output',
    log_dir='log',
    model_name='spam_logreg')

tf_model.train()

X_train = np.copy(tf_model.data_sets.train.x)
Y_train = np.copy(tf_model.data_sets.train.labels)
print("len(Y_train)", len(Y_train))  #4137
X_test = np.copy(tf_model.data_sets.test.x)
Y_test = np.copy(tf_model.data_sets.test.labels)
print("len(Y_test)", len(Y_test))  #1035
예제 #10
0
    def init_model(self):
        """
        Initialize a tf model based on model_name and datasets
        """

        # TODO: make it easier to use non-default hyperparams?

        # we can always infer # classes of from the training data
        num_classes = len(set(self.data_sets.train.labels))
        model_name = self.task + '_' + self.model_name
        print('Num classes', num_classes)
        if self.model_name == 'binary_logistic':
            #num_classes = 2
            assert num_classes == 2
            weight_decay = 0.0001
            batch_size = 100
            initial_learning_rate = 0.001
            keep_probs = None
            decay_epochs = [1000, 10000]
            max_lbfgs_iter = 1000

            self.model = BinaryLogisticRegressionWithLBFGS(
                input_dim=self.input_dim,
                weight_decay=weight_decay,
                max_lbfgs_iter=max_lbfgs_iter,
                num_classes=num_classes,
                batch_size=batch_size,
                data_sets=self.data_sets,
                initial_learning_rate=initial_learning_rate,
                keep_probs=keep_probs,
                decay_epochs=decay_epochs,
                mini_batch=False,
                train_dir='output',
                log_dir='log',
                model_name=model_name
            )
        elif self.model_name == 'multi_logistic':
            #num_classes = 10
            weight_decay = 0.01
            batch_size = 1400
            initial_learning_rate = 0.001 
            keep_probs = None
            max_lbfgs_iter = 1000
            decay_epochs = [1000, 10000]

            self.model = LogisticRegressionWithLBFGS(
                input_dim=self.input_dim,
                weight_decay=weight_decay,
                max_lbfgs_iter=max_lbfgs_iter,
                num_classes=num_classes, 
                batch_size=batch_size,
                data_sets=self.data_sets,
                initial_learning_rate=initial_learning_rate,
                keep_probs=keep_probs,
                decay_epochs=decay_epochs,
                mini_batch=False,
                train_dir='output',
                log_dir='log',
                model_name=model_name)

        elif self.model_name == 'cnn':
            assert num_classes == 10
            weight_decay = 0.001
            batch_size = 500

            initial_learning_rate = 0.0001
            decay_epochs = [10000, 20000]
            hidden1_units = 8
            hidden2_units = 8
            hidden3_units = 8
            conv_patch_size = 3
            keep_probs = [1.0, 1.0]

            self.model = All_CNN_C(
                input_side=self.input_side,
                input_channels=self.input_channels,
                conv_patch_size=conv_patch_size,
                hidden1_units=hidden1_units,
                hidden2_units=hidden2_units,
                hidden3_units=hidden3_units,
                weight_decay=weight_decay,
                num_classes=num_classes,
                batch_size=batch_size,
                data_sets=self.data_sets,
                initial_learning_rate=initial_learning_rate,
                damping=1e-2,
                decay_epochs=decay_epochs,
                mini_batch=True,
                train_dir='output',
                log_dir='log',
                model_name=model_name
            )
        elif self.task == 'income':
            num_classes = 2
            input_dim = self.data_sets.train.x.shape[1]
            weight_decay = 0.0001
            # weight_decay = 1000 / len(lr_data_sets.train.labels)
            batch_size = 10
            initial_learning_rate = 0.001
            keep_probs = None
            decay_epochs = [1000, 10000]
            max_lbfgs_iter = 1000

            self.model = BinaryLogisticRegressionWithLBFGS(
                input_dim=input_dim,
                weight_decay=weight_decay,
                max_lbfgs_iter=max_lbfgs_iter,
                num_classes=num_classes,
                batch_size=batch_size,
                data_sets=self.data_sets,
                initial_learning_rate=initial_learning_rate,
                keep_probs=keep_probs,
                decay_epochs=decay_epochs,
                mini_batch=False,
                train_dir='output',
                log_dir='log',
                model_name='income_logreg'
            )
        elif self.model_name == 'hinge_svm':
            #num_classes = 2
            weight_decay = 0.01
            use_bias = False
            batch_size = 100
            initial_learning_rate = 0.001 
            keep_probs = None
            decay_epochs = [1000, 10000]

            temps = [0, 0.001, 0.1]
            num_temps = len(temps)

            num_params = 784

            temp = 0
            self.model = SmoothHinge(
                use_bias=use_bias,
                temp=temp,
                input_dim=self.input_dim,
                weight_decay=weight_decay,
                num_classes=num_classes,
                batch_size=batch_size,
                data_sets=self.data_sets,
                initial_learning_rate=initial_learning_rate,
                keep_probs=keep_probs,
                decay_epochs=decay_epochs,
                mini_batch=False,
                train_dir='output',
                log_dir='log',
                model_name='smooth_hinge_17_t-%s' % temp)
예제 #11
0
initial_learning_rate = 0.001 
keep_probs = None
decay_epochs = [1000, 10000]
max_lbfgs_iter = 1000

num_params = 784

tf.reset_default_graph()

tf_model = BinaryLogisticRegressionWithLBFGS(
    input_dim=input_dim,
    weight_decay=weight_decay,
    max_lbfgs_iter=max_lbfgs_iter,
    num_classes=num_classes, 
    batch_size=batch_size,
    data_sets=lr_data_sets,
    initial_learning_rate=initial_learning_rate,
    keep_probs=keep_probs,
    decay_epochs=decay_epochs,
    mini_batch=False,
    train_dir='output',
    log_dir='log',
    model_name='mnist-17_logreg')

tf_model.train()

num_train = len(tf_model.data_sets.train.labels)

influences = tf_model.get_influence_on_test_loss(
    [test_idx], 
    np.arange(len(tf_model.data_sets.train.labels)),
    force_refresh=True) * num_train