Example #1
0
def sample(args):
    # Need to load the model from somewhere
    params = pickle.load(open("./save/{0}.model_param".format(args.params)))

    model = RegressionModel(params, infer=True)
    model.inference(model.data_placeholder)

    with tf.Session() as sess:
        # TODO: This loads the most recent checkpoint not
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        checkpoints = glob.glob("./save/model_{0}.ckpt-*".format(params.id))
        if len(checkpoints) > 0:
            # We have worked on training this model before. Resume work rather than
            # starting from scratch

            # Get the iteration number for all of them
            iterations = np.array([int(c.split("-")[1]) for c in checkpoints])
            # Index of the checkpoint with the most iterations
            idx = np.argmax(iterations)

            restore_path = checkpoints[idx]
            saver.restore(sess, restore_path)
            print "restoring {0}".format(restore_path)
            print model.sample(sess, args.n, args.prime)
        else:
            print "Unable to restore - no checkpoints found"
Example #2
0
def fit():
    # X_file = request.files['X']
    # y_file = request.files['y']
    if not request.is_json:
        return "NO JSON!"
    data = request.json
    X = np.array(data['X'])
    y = np.array(data['y'])
    # X = np.array(json.loads(X_file.read().decode('utf-8')))
    # y = np.array(json.loads(y_file.read().decode('utf-8')))

    model = RegressionModel()
    model.fit(X, y)
    model_bytes = pickle.dumps(model)

    model_ddb.add_model_to_db(dynamodb,
                              models_table_name,
                              model_bytes)

    data_ddb.add_fit_data_to_db(dynamodb,
                                data_table_name,
                                pickle.dumps(X),
                                pickle.dumps(y))

    request_url = request.url
    request_ddb.add_request_to_db(dynamodb,
                                  requests_table_name,
                                  request_url)

    return 'ok'
Example #3
0
def main():

    num_inducing = args.num_inducing
    data_name = args.fname
    prop = args.prop
    n_layers = args.layers

    with open("settings-uci.pkl", "rb") as file:
        settings = pickle.load(file)

    data = pd.read_csv('./datasets/{}.csv'.format(data_name),
                       header=None).values
    key = data_name + "-{}".format(n_layers)
    try:
        adam_lr = [settings[key][0], settings[key][1], settings[key][2]]
        max_iter = settings[key][3]
    except:
        adam_lr = [0.005, 0.0001, 0.0025]
        max_iter = 20000

    if data_name == "energy":
        X_full = data[:, :-2]
        Y_full = data[:, -2:-1]
    else:
        X_full = data[:, :-1]
        Y_full = data[:, -1:]

    N = X_full.shape[0]
    n = int(N * prop)

    np.random.seed(0)
    ind = np.arange(N)

    np.random.shuffle(ind)
    train_ind = ind[:n]
    test_ind = ind[n:]

    X = X_full[train_ind]
    Xs = X_full[test_ind]
    Y = Y_full[train_ind]
    Ys = Y_full[test_ind]

    X_mean = np.mean(X, 0)
    X_std = np.std(X, 0)
    Y_std = np.std(Y, 0)
    X = (X - X_mean) / X_std
    Xs = (Xs - X_mean) / X_std
    Y_mean = np.mean(Y, 0)
    Y = (Y - Y_mean) / Y_std
    Ys = (Ys - Y_mean) / Y_std

    model = RegressionModel(adam_lr, max_iter, n_layers, num_inducing)
    model.fit(X, Y, Xs, Ys, Y_std)
Example #4
0
def main(X_filename='data/X_train.json',
         y_filename='data/y_train.json',
         model_filename='regression_model.pkl'):
    """
    Init RegressionModel object and call `fit` method with corresponding arguments and save it as `pickle`.

    **Parameters**:

    - `X_filename`: filename where object-feature matrix is saved
    - `y_filename`: filename of `y` values form `f(X) = y`
    - `model_filename`: name of file to save model
    """

    X = np.array(json.load(codecs.open(X_filename, 'r', encoding='utf-8')))
    y = np.array(json.load(codecs.open(y_filename, 'r', encoding='utf-8')))

    reg_model = RegressionModel()
    reg_model.fit(X, y)

    with open(model_filename, 'wb') as f:
        pickle.dump(reg_model, f)
Example #5
0
def sample(args):
    # Need to load the model from somewhere
    params = pickle.load(open('./save/{0}.model_param'.format(args.params)))

    model = RegressionModel(params, infer=True)
    model.inference(model.data_placeholder)

    with tf.Session() as sess:
        # TODO: This loads the most recent checkpoint not
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        checkpoints = glob.glob("./save/model_{0}.ckpt-*".format(params.id))
        if len(checkpoints) > 0:
            # We have worked on training this model before. Resume work rather than
            # starting from scratch

            # Get the iteration number for all of them
            iterations = np.array([int(c.split('-')[1]) for c in checkpoints])
            # Index of the checkpoint with the most iterations
            idx = np.argmax(iterations)

            restore_path = checkpoints[idx]
            saver.restore(sess, restore_path)
            print "restoring {0}".format(restore_path)
            print model.sample(sess, args.n, args.prime)
        else:
            print "Unable to restore - no checkpoints found"
Example #6
0
def eval_regression(task):
    model_path = './model/{}.model'.format(task)

    D_m_text, D_m_audio, D_m_video, D_m_context = 300, 384, 35, 300
    D_g, D_p, D_e, D_h, D_a = 150, 150, 100, 100, 100

    cuda = torch.cuda.is_available()

    print('Loading model...')
    model = RegressionModel(D_m_text,
                            D_m_audio,
                            D_m_video,
                            D_m_context,
                            D_g,
                            D_p,
                            D_e,
                            D_h,
                            dropout_rec=0.1,
                            dropout=0.25)
    if cuda:
        model.cuda()
    model.load_state_dict(torch.load(model_path))

    loss_function = MaskedMSELoss()

    print('Evaluating model...')
    _, _, test_loader = train_regression.get_MOSEI_loaders(
        './data/regression.pkl', valid=0.0, batch_size=128, num_workers=0)

    _, mae, _, labels, preds, masks, sample_ids = train_regression.train_or_eval_model(
        model, loss_function, test_loader, None, cuda)

    # gather labels and predictions
    df = pd.DataFrame([(sample_id, label, pred)
                       for label, pred, mask, sample_id in zip(
                           labels, preds, masks, sample_ids) if mask == 1],
                      columns=['sample_id', 'label', 'pred'])
    df['diff'] = (df.label - df.pred).abs()
    df['label_class'] = df.label.apply(discretize)
    df['pred_class'] = df.pred.apply(discretize)
    df = df.sort_values(by='diff', ascending=False)

    if_correct = df.label_class == df.pred_class

    print('mae =', mean_absolute_error(df.label, df.pred))
    print('acc =', if_correct.sum() / len(if_correct))
    df.to_csv('./analysis/{}.csv'.format(task), index=False)
Example #7
0
def main():

    params = Params()

    model = RegressionModel(params)

    # Use functions of the model to build the graph

    out, states = model.inference(model.data_placeholder)
    loss = model.loss(out, model.labels_placeholder)
    train_op = model.train(loss, params.step_size)

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Run the Op to initialize the variables.
    init = tf.initialize_all_variables()
    sess.run(init)
    saver = tf.train.Saver(tf.all_variables())

    for i in range(params.train_steps + 1):
        data, labels = get_batch(params.batch_size, params.sequence_length,
                                 params.input_channels)
        feed_dict = {
            model.data_placeholder: data,
            model.labels_placeholder: labels
        }

        # Run one step of the model.  The return values are the activations
        # from the `train_op` (which is discarded) and the `loss` Op.  To
        # inspect the values of your Ops or variables, you may include them
        # in the list passed to sess.run() and the value tensors will be
        # returned in the tuple from the call.
        _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
        if i % params.print_every == 0:
            print i, loss_value
        if i % params.save_every == 0:
            name = "model_{0}.ckpt".format(params.get_id)
            checkpoint_path = os.path.join('./save', name)
            # TODO: If we restore a model for further training, we should
            # add the number of training steps it had completed to our global step here
            saver.save(sess, checkpoint_path, global_step=i)
            print "model saved to {0}-{1}".format(checkpoint_path, i)
            with open('./save/{0}.model_param'.format(params.get_id),
                      'w') as f:
                pickle.dump(
                    params,
                    f,
                    protocol=2  # pickle.HIGHEST_PROTOCOL as of writing
                )

    data, labels = get_batch(params.batch_size, params.sequence_length,
                             params.input_channels)
    feed_dict = {
        model.data_placeholder: data,
        model.labels_placeholder: labels
    }

    vars = sess.run(out + states, feed_dict)
    out_ = vars[0:len(out)]
    states_ = vars[len(out) + 1:]

    d = data[0, 0, :]
    o = np.array(out_)[:, 0, 0]
    l = labels[0, 0, :]

    x1 = range(d.shape[0])
    x2 = range(1, d.shape[0] + 1)
    # TODO: output graph every 100 steps
    plt.scatter(x1, d, c='r')
    plt.scatter(x2, o, c='g')
    plt.scatter(x2, l, c='b', alpha=0.5)
    plt.show()

    print "data third dim", d

    print "out", o
    # print "states", np.array(states_)

    print "labels third dim", l
    args.cuda = torch.cuda.is_available() and not args.no_cuda
    if args.cuda:
        print('Running on GPU')
    else:
        print('Running on CPU')
    print("Tensorboard logs in " + args.log_dir)

    batch_size = args.batch_size
    n_classes  = 6
    cuda       = args.cuda
    n_epochs   = args.epochs
    D_m_text, D_m_audio, D_m_video, D_m_context = 300, 384, 35, 300
    D_g, D_p, D_e, D_h, D_a = 150, 150, 100, 100, 100

    # Instantiate model
    model = RegressionModel(D_m_text, D_m_audio, D_m_video, D_m_context, D_g, D_p, D_e, D_h, dropout_rec=args.rec_dropout, dropout=args.dropout)

    if cuda:
        model.cuda()
    loss_function = MaskedMSELoss()

    # Get optimizer and relevant dataloaders
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2)
    train_loader, valid_loader, test_loader = get_MOSEI_loaders('./data/regression.pkl', valid=0.0, batch_size=batch_size, num_workers=0)
    best_loss, best_label, best_pred, best_mask, best_pear = None, None, None, None, None

    # Training loop
    for e in tqdm(range(n_epochs), desc = 'MOSEI Regression'):
        train_loss, train_mae, train_pear,_,_,_ = train_or_eval_model(model, loss_function, train_loader, e, optimizer, True)
        test_loss, test_mae, test_pear, test_label, test_pred, test_mask = train_or_eval_model(model, loss_function, test_loader, e)
        writer.add_scalar("Train Loss - MOSEI Regression", train_loss, e)
Example #9
0
def train_toy_example(args):

    # set the random seeds for reproducibility
    np.random.seed(123)
    torch.cuda.manual_seed_all(123)
    torch.manual_seed(123)

    # define the sigmas, the number of tasks and the epsilons
    # for the toy example
    sigmas = [1.0, float(args.sigma)]
    print('Training toy example with sigmas={}'.format(sigmas))
    n_tasks = len(sigmas)
    epsilons = np.random.normal(scale=3.5,
                                size=(n_tasks, 100, 250)).astype(np.float32)

    # initialize the data loader
    dataset = RegressionDataset(sigmas, epsilons)
    data_loader = data.DataLoader(dataset,
                                  batch_size=200,
                                  num_workers=4,
                                  shuffle=False)

    # initialize the model and use CUDA if available
    model = RegressionTrain(RegressionModel(n_tasks))
    if torch.cuda.is_available():
        model.cuda()

    # initialize the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    n_iterations = int(args.n_iter)
    weights = []
    task_losses = []
    loss_ratios = []
    grad_norm_losses = []

    # run n_iter iterations of training
    for t in range(n_iterations):

        # get a single batch
        for (it, batch) in enumerate(data_loader):
            # get the X and the targets values
            X = batch[0]
            ts = batch[1]
            if torch.cuda.is_available():
                X = X.cuda()
                ts = ts.cuda()

            # evaluate each task loss L_i(t)
            task_loss = model(
                X, ts
            )  # this will do a forward pass in the model and will also evaluate the loss
            # compute the weighted loss w_i(t) * L_i(t)
            weighted_task_loss = torch.mul(model.weights, task_loss)
            # initialize the initial loss L(0) if t=0
            if t == 0:
                # set L(0)
                if torch.cuda.is_available():
                    initial_task_loss = task_loss.data.cpu()
                else:
                    initial_task_loss = task_loss.data
                initial_task_loss = initial_task_loss.numpy()

            # get the total loss
            loss = torch.sum(weighted_task_loss)
            # clear the gradients
            optimizer.zero_grad()
            # do the backward pass to compute the gradients for the whole set of weights
            # This is equivalent to compute each \nabla_W L_i(t)
            loss.backward(retain_graph=True)

            # set the gradients of w_i(t) to zero because these gradients have to be updated using the GradNorm loss
            #print('Before turning to 0: {}'.format(model.weights.grad))
            model.weights.grad.data = model.weights.grad.data * 0.0
            #print('Turning to 0: {}'.format(model.weights.grad))

            # switch for each weighting algorithm:
            # --> grad norm
            if args.mode == 'grad_norm':

                # get layer of shared weights
                W = model.get_last_shared_layer()

                # get the gradient norms for each of the tasks
                # G^{(i)}_w(t)
                norms = []
                for i in range(len(task_loss)):
                    # get the gradient of this task loss with respect to the shared parameters
                    gygw = torch.autograd.grad(task_loss[i],
                                               W.parameters(),
                                               retain_graph=True)
                    # compute the norm
                    norms.append(
                        torch.norm(torch.mul(model.weights[i], gygw[0])))
                norms = torch.stack(norms)
                #print('G_w(t): {}'.format(norms))

                # compute the inverse training rate r_i(t)
                # \curl{L}_i
                if torch.cuda.is_available():
                    loss_ratio = task_loss.data.cpu().numpy(
                    ) / initial_task_loss
                else:
                    loss_ratio = task_loss.data.numpy() / initial_task_loss
                # r_i(t)
                inverse_train_rate = loss_ratio / np.mean(loss_ratio)
                #print('r_i(t): {}'.format(inverse_train_rate))

                # compute the mean norm \tilde{G}_w(t)
                if torch.cuda.is_available():
                    mean_norm = np.mean(norms.data.cpu().numpy())
                else:
                    mean_norm = np.mean(norms.data.numpy())
                #print('tilde G_w(t): {}'.format(mean_norm))

                # compute the GradNorm loss
                # this term has to remain constant
                constant_term = torch.tensor(mean_norm *
                                             (inverse_train_rate**args.alpha),
                                             requires_grad=False)
                if torch.cuda.is_available():
                    constant_term = constant_term.cuda()
                #print('Constant term: {}'.format(constant_term))
                # this is the GradNorm loss itself
                grad_norm_loss = torch.tensor(
                    torch.sum(torch.abs(norms - constant_term)))
                #print('GradNorm loss {}'.format(grad_norm_loss))

                # compute the gradient for the weights
                model.weights.grad = torch.autograd.grad(
                    grad_norm_loss, model.weights)[0]

            # do a step with the optimizer
            optimizer.step()
            '''
            print('')
            wait = input("PRESS ENTER TO CONTINUE.")
            print('')
            '''

        # renormalize
        normalize_coeff = n_tasks / torch.sum(model.weights.data, dim=0)
        model.weights.data = model.weights.data * normalize_coeff

        # record
        if torch.cuda.is_available():
            task_losses.append(task_loss.data.cpu().numpy())
            loss_ratios.append(np.sum(task_losses[-1] / task_losses[0]))
            weights.append(model.weights.data.cpu().numpy())
            grad_norm_losses.append(grad_norm_loss.data.cpu().numpy())
        else:
            task_losses.append(task_loss.data.numpy())
            loss_ratios.append(np.sum(task_losses[-1] / task_losses[0]))
            weights.append(model.weights.data.numpy())
            grad_norm_losses.append(grad_norm_loss.data.numpy())

        if t % 100 == 0:
            if torch.cuda.is_available():
                print(
                    '{}/{}: loss_ratio={}, weights={}, task_loss={}, grad_norm_loss={}'
                    .format(t, args.n_iter, loss_ratios[-1],
                            model.weights.data.cpu().numpy(),
                            task_loss.data.cpu().numpy(),
                            grad_norm_loss.data.cpu().numpy()))
            else:
                print(
                    '{}/{}: loss_ratio={}, weights={}, task_loss={}, grad_norm_loss={}'
                    .format(t, args.n_iter, loss_ratios[-1],
                            model.weights.data.numpy(), task_loss.data.numpy(),
                            grad_norm_loss.data.numpy()))

    task_losses = np.array(task_losses)
    weights = np.array(weights)

    plt.rc('text', usetex=True)
    plt.rc('font', family='serif')
    fig = plt.figure()
    ax1 = fig.add_subplot(2, 3, 1)
    ax1.set_title(r'Loss (scale $\sigma_0=1.0$)')
    ax2 = fig.add_subplot(2, 3, 2)
    ax2.set_title(r'Loss (scale $\sigma_1={})$'.format(sigmas[1]))
    ax3 = fig.add_subplot(2, 3, 3)
    ax3.set_title(r"$\sum_i L_i(t) / L_i(0)$")
    ax4 = fig.add_subplot(2, 3, 4)
    ax4.set_title(r'$L_{\text{grad}}$')

    ax5 = fig.add_subplot(2, 3, 5)
    ax5.set_title(r'Change of weights $w_i$ over time')

    ax1.plot(task_losses[:, 0])
    ax2.plot(task_losses[:, 1])
    ax3.plot(loss_ratios)
    ax4.plot(grad_norm_losses)
    ax5.plot(weights[:, 0])
    ax5.plot(weights[:, 1])
    plt.show()
Example #10
0
def main():

    params = Params()

    model = RegressionModel(params)

    # Use functions of the model to build the graph

    out, states = model.inference(model.data_placeholder)
    loss = model.loss(out, model.labels_placeholder)
    train_op = model.train(loss, params.step_size)

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Run the Op to initialize the variables.
    init = tf.initialize_all_variables()
    sess.run(init)
    saver = tf.train.Saver(tf.all_variables())

    for i in range(params.train_steps + 1):
        data, labels = get_batch(params.batch_size, params.sequence_length, params.input_channels)
        feed_dict = {
            model.data_placeholder: data,
            model.labels_placeholder: labels
        }

        # Run one step of the model.  The return values are the activations
        # from the `train_op` (which is discarded) and the `loss` Op.  To
        # inspect the values of your Ops or variables, you may include them
        # in the list passed to sess.run() and the value tensors will be
        # returned in the tuple from the call.
        _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
        if i % params.print_every == 0:
            print i, loss_value
        if i % params.save_every == 0:
            name = "model_{0}.ckpt".format(params.get_id)
            checkpoint_path = os.path.join('./save', name)
            # TODO: If we restore a model for further training, we should
            # add the number of training steps it had completed to our global step here
            saver.save(sess, checkpoint_path, global_step=i)
            print "model saved to {0}-{1}".format(checkpoint_path, i)
            with open('./save/{0}.model_param'.format(params.get_id), 'w') as f:
                pickle.dump(params,
                            f,
                            protocol=2 # pickle.HIGHEST_PROTOCOL as of writing
                            )

    data, labels = get_batch(params.batch_size, params.sequence_length, params.input_channels)
    feed_dict = {
            model.data_placeholder: data,
            model.labels_placeholder: labels
    }

    vars = sess.run(out + states, feed_dict)
    out_ = vars[0:len(out)]
    states_ = vars[len(out)+1:]

    d = data[0,0,:]
    o = np.array(out_)[:, 0, 0]
    l = labels[0,0,:]

    x1 = range(d.shape[0])
    x2 = range(1, d.shape[0] + 1)
    # TODO: output graph every 100 steps
    plt.scatter(x1, d, c='r')
    plt.scatter(x2, o, c='g')
    plt.scatter(x2, l, c='b', alpha=0.5)
    plt.show()

    print "data third dim", d

    print "out", o
    # print "states", np.array(states_)

    print "labels third dim", l
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description="Measuring Privacy and Fairness Trade-offs")
    parser.add_argument(
        "-rn",
        "--run-name",
        required=True,
        type=str,
        help="Define run name for logging",
    )
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        default=128,
        metavar="B",
        help="Input batch size for training (default: 128)",
    )
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=4119,
        metavar="TB",
        help="Input batch size for testing (default: 1024)",
    )
    parser.add_argument(
        "-n",
        "--epochs",
        type=int,
        default=20,
        metavar="N",
        help="Number of epochs to train (default: 20)",
    )
    parser.add_argument(
        "-r",
        "--n-runs",
        type=int,
        default=1,
        help="Number of runs to average on (default: 1)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=.1,
        metavar="LR",
        help="Learning rate (default: .1)",
    )
    parser.add_argument(
        "--sigma",
        type=list,
        #default=[3.0, 0.6],
        default=[
            0, 3.0, 2.85, 2.6, 2.45, 2.3, 2.15, 2.0, 1.85, 1.6, 1.45, 1.3,
            1.15, 1.0, 0.85, 0.6, 0.45, 0.3, 0.15
        ],
        metavar="S",
        help="Noise multiplier (default [0, 0.1, 0.5, 1.0])",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="Save the trained model (default: false)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )

    parser.add_argument(
        "--dataset",
        type=str,
        #default="bank",
        required=True,
        help=
        "Specify the dataset you want to test on. (bank: bank marketing, adult: adult census)",
    )
    parser.add_argument(
        "--train-data-path",
        type=str,
        default="./bank-data/bank-additional-full.csv",
        help="Path to train data",
    )
    parser.add_argument(
        "--test-data-path",
        type=str,
        default="./bank-data/bank-additional.csv",
        help="Path to test data",
    )
    parser.add_argument(
        "--num-teachers",
        type=int,
        default=0,
        help="Number of PATE teacher (default=3)",
    )
    parser.add_argument(
        "--sensitive",
        type=str,
        required=True,
        help="Name of sensitive column",
    )
    args = parser.parse_args()
    device = torch.device(args.device)

    #    for i in range(args.n_runs):
    for i, s in enumerate(args.sigma):
        if args.num_teachers == 0 or s == 0:
            dataset = data_loader(args, s)
            train_data, test_data = dataset.__getitem__()
            cat_emb_size, num_conts = dataset.get_input_properties()
            train_size, test_size = dataset.__len__()
            sensitive_cat_keys = dataset.getkeys()
            sensitive_idx = dataset.get_sensitive_idx()
            print(sensitive_cat_keys)
        else:
            dataset = data_loader(args, s)
            train_size, test_size = dataset.__len__()
            teacher_loaders = dataset.train_teachers()
            student_train_loader, student_test_loader = dataset.student_data()
            cat_emb_size, num_conts = dataset.get_input_properties()
            sensitive_cat_keys = dataset.getkeys()
            sensitive_idx = dataset.get_sensitive_idx()
            print(sensitive_cat_keys)

            print("!!!!!! DATA LOADED")

        #run_results = []

        wandb.init(project="project3",
                   name=args.run_name,
                   config={
                       "run_name": args.run_name,
                       "architecture": 'RegressionModel',
                       "dataset": args.dataset,
                       "batch_size": args.batch_size,
                       "n_epoch": args.epochs,
                       "learning_rate": args.lr,
                       "sigma(noise)": s,
                       "disable_dp": args.disable_dp,
                   })
        config = wandb.config

        model = RegressionModel(emb_szs=cat_emb_size,
                                n_cont=num_conts,
                                emb_drop=0.04,
                                out_sz=1,
                                szs=[1000, 500, 250],
                                drops=[0.001, 0.01, 0.01],
                                y_range=(0, 1)).to(device)

        for layer in model.children():
            if hasattr(layer, 'reset_parameters'):
                layer.reset_parameters()

        criterion = nn.BCELoss()
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0)

        if not args.disable_dp:
            if s > 0:
                privacy_engine = PrivacyEngine(
                    model,
                    batch_size=args.batch_size,
                    sample_size=train_size,
                    alphas=[1 + x / 10.0
                            for x in range(1, 100)] + list(range(12, 64)),
                    noise_multiplier=s,
                    max_grad_norm=args.max_per_sample_grad_norm,
                    secure_rng=False,
                )
                privacy_engine.attach(optimizer)

        if args.num_teachers == 0 or s == 0:
            if i == 0:  # print model properties
                print(model, '\n')

            print(
                "\n=== RUN # {} ====================================\n".format(
                    i))

            for epoch in range(1, args.epochs + 1):
                train(args, model, device, train_data, criterion, optimizer,
                      epoch, s)
            """

            batch = next(iter(train_data))
            cats, conts, _ = batch

            test_batch = next(iter(test_data))
            test_cats, test_conts, _ = test_batch

            explainer = shap.KernelExplainer(model, [cats.numpy(), conts.numpy()])
            print(explainer)
            shap_values = explainer.shap_values(cats.numpy())
            shap.plots.bar(shap_values)
            exit():q
            
            """
            accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results = test(
                args, model, device, test_data, test_size, sensitive_idx)
        else:  # PATE MODEL
            print("!!!!!! ENTERED HERE")
            #model_rf = RandomForestClassifier(random_state=42, warm_start=True)

            teacher_models = train_models(args, model, teacher_loaders,
                                          criterion, optimizer, device)
            preds, student_labels = aggregated_teacher(teacher_models,
                                                       student_train_loader, s,
                                                       device)

            accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results = test_student(
                args, student_train_loader, student_labels,
                student_test_loader, test_size, cat_emb_size, num_conts,
                device, sensitive_idx)
            """
            data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=student_labels,
                                                               noise_eps=s, delta=1e-5)
            print("Data Independent Epsilon:", data_ind_eps)
            print("Data Dependent Epsilon:", data_dep_eps)
            """

        #t = [accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results]
        #[print(type(i)) for i in t]

        #print("\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n".format(avg_loss,accuracy))
        result = """
===================
Test set: {}

accuracy: {:.4f}
average loss: {:.4f}
precision: {:.4f}
recall: {:.4f}
sub_pre_rec:
{}
cm:
{}
sub_cm:
{}
avg_eq_odds: {:.4f}
avg_tpr: {:.4f}
avg_dem_par: {:.4f}
""".format(args.run_name, accuracy, avg_loss, avg_precision, avg_recall,
           overall_results, cm, sub_cm, avg_eq_odds, avg_tpr, avg_dem_par)

        # append run result
        file_path = 'out//all_results.' + args.run_name
        file_object = open(file_path, 'a+')
        file_object.write(result)
        file_object.close()
        print(result)
        log_dict = {
            "accuracy": accuracy,
            "avg_loss": avg_loss,
            "precision": avg_precision,
            "recall": avg_recall,
            "avg_eq_odds": avg_eq_odds,
            "avg_tpr": avg_tpr,
            "avg_dem_par": avg_dem_par,
            "tn": cm[0],
            "fp": cm[1],
            "fn": cm[2],
            "tp": cm[3]
        }
        """
        for j in avg_recall_by_group.keys():
            category = sensitive_cat_keys[j]
            value = avg_recall_by_group[j]
            log_dict[category] = value
        """
        print(log_dict)
        wandb.log(log_dict)
def test_student(args, student_train_loader, student_labels, student_test_loader, test_size, cat_emb_size, num_conts, device, sensitive_idx):
    student_model = RegressionModel(emb_szs=cat_emb_size,
                    n_cont=num_conts,
                    emb_drop=0.04,
                    out_sz=1,
                    szs=[1000, 500, 250],
                    drops=[0.001, 0.01, 0.01],
                    y_range=(0, 1)).to(device)

    criterion = nn.BCELoss()
    optimizer = optim.SGD(student_model.parameters(), lr=args.lr, momentum=0)
    steps = 0
    running_loss = 0
    correct = 0
    print("========== Testing Student Model ==========")
    for epoch in range(args.epochs):
        student_model.train()
        train_loader = student_loader(student_train_loader, student_labels)
        for (cats, conts) , labels in train_loader:
        #for _batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            #cats = data[0]
            #conts = data[1]
            steps += 1

            optimizer.zero_grad()
            output = student_model(cats, conts).view(-1)
            labels = labels.to(torch.float32)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        #            if steps % 50 == 0:
            student_model.eval()
            test_loss = 0
            correct = 0
            i = 0

            avg_recall = 0
            avg_precision = 0
            overall_results = []
            avg_eq_odds = 0
            avg_dem_par = 0
            avg_tpr = 0
            avg_tp = 0
            avg_tn = 0
            avg_fp = 0
            avg_fn = 0

            with torch.no_grad():
                for batch_idx, (cats, conts, target) in enumerate(student_test_loader):
                    print("target\n", sum(target))
                    i+=1
                    output = student_model(cats, conts)
                    loss += criterion(output, target).item()
                    test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
                    pred = (output > 0.5).float()
                    print("pred\n", sum(pred))
                    correct += pred.eq(target.view_as(pred)).sum().item()

                    curr_datetime = datetime.now()
                    curr_hour = curr_datetime.hour
                    curr_min = curr_datetime.minute

                    pred_df = pd.DataFrame(pred.numpy())
                    pred_df.to_csv(f"pred_results/{args.run_name}_{curr_hour}-{curr_min}.csv")

                    #print(pred, np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy()))
                    #correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
                    #total += cats.size(0)


                    # confusion matrixç
                    tn, fp, fn, tp = confusion_matrix(target, pred).ravel()
                    avg_tn += tn
                    avg_fp += fp
                    avg_fn += fn
                    avg_tp += tp

                    # position of col for sensitive values
                    sensitive = [i[sensitive_idx].item() for i in cats]
                    cat_len = max(sensitive)

                    #exit()
                    sub_cm = []
                    # print(cat_len)
                    for j in range(cat_len+1):
                        try:
                            idx = list(locate(sensitive, lambda x: x == j))
                            sub_tar = target[idx]
                            sub_pred = pred[idx]
                            sub_tn, sub_fp, sub_fn, sub_tp = confusion_matrix(sub_tar, sub_pred).ravel()
                        except:
                            # when only one value to predict
                            print("----WHAT?")
                            temp_tar = int(sub_tar.numpy()[0])
                            temp_pred = int(sub_pred.numpy()[0])
                            # print(tar, pred)
                            if temp_tar and temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 0, 1
                            elif temp_tar and not temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 1, 0
                            elif not temp_tar and not temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 1, 0, 0, 0
                            elif not temp_tar and temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 1, 0, 0
                            else:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 0, 0

                        total = mysum(sub_tn, sub_fp, sub_fn, sub_tp)
                        print("??", total)
                        sub_cm.append((sub_tn / total, sub_fp / total, sub_fn / total, sub_tp / total))

                    # Fairness metrics

                    group_metrics = MetricFrame({'precision': skm.precision_score, 'recall': skm.recall_score},
                                                target, pred,
                                                sensitive_features=sensitive)


                    demographic_parity = flm.demographic_parity_difference(target, pred,
                                                                           sensitive_features=sensitive)

                    eq_odds = flm.equalized_odds_difference(target, pred,
                                                            sensitive_features=sensitive)

                    # metric_fns = {'true_positive_rate': true_positive_rate}

                    tpr = MetricFrame(true_positive_rate,
                                      target, pred,
                                      sensitive_features=sensitive)

                    # tpr = flm.true_positive_rate(target, pred,sample_weight=sensitive)
                    sub_results = group_metrics.overall.to_dict()
                    sub_results_by_group = group_metrics.by_group.to_dict()

                    # print("\n", group_metrics.by_group, "\n")
                    avg_precision += sub_results['precision']
                    avg_recall += sub_results['recall']
                    print("pre_rec", sub_results)
                    overall_results.append(sub_results_by_group)
                    avg_eq_odds += eq_odds
                    print("eqo", eq_odds)
                    avg_dem_par += demographic_parity
                    print("dempar", demographic_parity)
                    avg_tpr += tpr.difference(method='between_groups')
                    print("tpr", tpr.difference(method='between_groups'))

            total = mysum(avg_tn, avg_fp, avg_fn, avg_tp)
            print("!!", total)
            cm = (avg_tn / total, avg_fp / total, avg_fn / total, avg_tp / total)
            test_loss /= test_size
            accuracy = correct / test_size
            avg_loss = test_loss

            return accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results
Example #13
0
        print('Running on CPU')
    print("Tensorboard logs in " + args.log_dir)

    batch_size = args.batch_size
    n_classes = 6
    cuda = args.cuda
    n_epochs = args.epochs
    D_m_text, D_m_audio, D_m_video, D_m_context = 300, 384, 35, 300
    D_g, D_p, D_e, D_h, D_a = 150, 150, 100, 100, 100

    # Instantiate model
    model = RegressionModel(D_m_text,
                            D_m_audio,
                            D_m_video,
                            D_m_context,
                            D_g,
                            D_p,
                            D_e,
                            D_h,
                            dropout_rec=args.rec_dropout,
                            dropout=args.dropout)

    if cuda:
        model.cuda()
    loss_function = MaskedMSELoss()

    # Get optimizer and relevant dataloaders
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.l2)
    train_loader, valid_loader, test_loader = get_MOSEI_loaders(
        './data/regression.pkl',