def __init__(self): #contructor #model inherits from the module class Module.__init__(self) #the model is constitued of 2 linear layer with activation layer self.l1 = Sequential(Linear(2, 16), ReLu(), Linear(16, 92)) self.s1 = TanhS() self.l2 = Linear(92, 2)
def __init__(self): self.num_games = 0 # number of games played self.epsilon = 0 # randomness self.gamma = 0.9 # discount rate self.memory = deque( maxlen=MAX_MEMORY) # pops from left if memory limit is exceeded self.model = Linear(11, 256, 3) self.trainer = Trainer(self.model, lr=LEARNING_RATE, gamma=self.gamma)
def make_basic_cnn(nb_filters=64, nb_classes=10, input_shape=(None, 28, 28, 1)): layers = [Conv2D(nb_filters, (8, 8), (2, 2), "SAME"), ReLU(), Conv2D(nb_filters * 2, (6, 6), (2, 2), "VALID"), ReLU(), Conv2D(nb_filters * 2, (5, 5), (1, 1), "VALID"), ReLU(), Flatten(), Linear(nb_classes), Softmax()] model = MLP(nb_classes, layers, input_shape) return model
def main(argv): cuda = FLAGS.cuda and torch.cuda.is_available() device = torch.device("cuda" if cuda and not FLAGS.convex else "cpu") log_freq = 20 total_steps = max(FLAGS.max_steps, int(FLAGS.epoches / FLAGS.q)) #total steps #if(FLAGS.SGN==2): # total_steps*=4 if (FLAGS.dataset == 'mnist'): train_tuple = MNIST_data().train() test_tuple = MNIST_data().test() if (FLAGS.convex): train_tuple = (train_tuple[0].reshape(-1, 784), train_tuple[1]) test_tuple = (test_tuple[0].reshape(-1, 784), test_tuple[1]) model = Logistic(FLAGS.dataset).to(device) else: #train_tuple=(train_tuple[0].reshape(-1, 784), train_tuple[1]) #test_tuple=(test_tuple[0].reshape(-1, 784), test_tuple[1]) model = ConvNet().to(device) elif (FLAGS.dataset == 'covertype'): train_tuple = Covertype_data.train() test_tuple = Covertype_data.test() if (FLAGS.convex): model = Logistic(FLAGS.dataset).to(device) else: model = Linear().to(device) if (FLAGS.momentum == 0): optimizer = optim.SGD(model.parameters(), lr=FLAGS.lr, momentum=0) elif (FLAGS.momentum > 0 and FLAGS.momentum <= 1): if (FLAGS.momentum == 1): FLAGS.momentum = 0.5 optimizer = optim.SGD(model.parameters(), lr=FLAGS.lr, momentum=FLAGS.momentum) else: optimizer = optim.Adam(model.parameters()) if (FLAGS.delta == -1): FLAGS.delta = 1. / (train_tuple[0].shape[0]**2) diff = 0 if (FLAGS.delta != 0 and FLAGS.epoches != -1): if (FLAGS.auto_sigma == 0): FLAGS.sigma = get_sigma(FLAGS.q, FLAGS.epoches, FLAGS.epsilon, FLAGS.delta) elif (FLAGS.auto_sigma == 1): FLAGS.SGN = 0 FLAGS.sigma = get_sigma(FLAGS.q, FLAGS.epoches, FLAGS.epsilon, FLAGS.delta) FLAGS.sigma *= 2 #FLAGS.sigma=20 #recording information of this experiment instance experiment_info = 'Dataset: %r \nSampling probability: %r \nDelta: %r \nConvex: %r \nClip_bound: %r \nSigma: %r\nMomentum: %r\nAuto_sigma: %d\nSGN: %d \nEpoches: %d \nEpsilon: %r \n' % ( FLAGS.dataset, FLAGS.q, FLAGS.delta, FLAGS.convex, FLAGS.clip_bound, FLAGS.sigma, FLAGS.momentum, FLAGS.auto_sigma, FLAGS.SGN, FLAGS.epoches, FLAGS.epsilon) logging(experiment_info, 'w') total_privacy_l = [ 0. ] * 128 #tracking alpha at different orders [1,128], can be converted to (epsilon,delta)-differential privacy epsilons = [0.5, 1., 2.0] deltas = [0., 0., 2.0] #one delta for one epsilon log_array = [] norm_list = [] for t in range(1, total_steps + 1): #print(FLAGS.sigma, 'here') #get the gradients, notice the optimizer.step() is ran outside the train function. total_privacy_l = train(model, device, train_tuple, optimizer, diff, total_privacy_l, t, norm_list) if (FLAGS.delta > 0 and FLAGS.delta < 1): #training privately all_failed = True for i, eps in enumerate(epsilons): if (deltas[i] > FLAGS.delta ): #discarding the epsilon we already failed continue #use rdp_accountant to get delta for given epsilon if_update_delta, order = _compute_delta( range(2, 2 + 128), total_privacy_l, eps) #print(if_update_delta, 'hereheee') if ( if_update_delta > FLAGS.delta ): #record the final model satisfies (eps,deltas[i])-differential privacy accuracy = test(model, device, test_tuple, t) info = 'For epislon %r, delta %r we get accuracy: %r%% at step %r\n' % ( eps, deltas[i], accuracy, t) deltas[i] = 1. #abort current epsilon logging(info) print(info) else: deltas[i] = if_update_delta #update delta all_failed = False #still got at least one epsilon not failed if (not all_failed): optimizer.step() else: info = 'failed at all given epsilon, exiting\n' print(info) logging(info) exit() else: #training no privately optimizer.step() if (t % log_freq == 0): #aa=1 accuracy = test(model, device, test_tuple, t) log_array.append(copy.deepcopy([t, accuracy, epsilons, deltas])) np.save('logs/log%d.npy' % FLAGS.experiment_id, np.array(log_array, dtype=object))
import time import pickle import numpy as np from layers import Dense, Flatten, Conv2D, MaxPooling2D from loss import binary_crossentropy from model import Linear from activation import Sigmoid from optimizer import gradient_descent start_time = time.time() image = np.random.rand(64, 64) model = Linear() model.add(Conv2D(64, input_shape=(64, 64))) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(128)) model.add(MaxPooling2D((2, 2))) model.add(Flatten()) model.add(Dense(128)) model.add(Dense(64)) model.add(Dense(1, activation=Sigmoid, normalize_signal=False)) model.summary() model.eval(image) input_data = open("input_data.pickle", "rb") input_data = np.array(pickle.load(input_data)) / 255.0 label = open("label.pickle", "rb") label = np.expand_dims(np.array(pickle.load(label)), axis=1) model.compile(optimizer=gradient_descent, loss=binary_crossentropy) model.fit(input_data, label, epochs=20, batch_size=16)
def linear(X, y): model = Linear() model.train(X, y, 1e-4) model.draw(X, y)
# device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # data HOME = os.environ['HOME'] dataset = MNIST(os.path.join(HOME, 'datasets/MNIST/numpy'), device) # Q Q = np.load('HessFull.npy') Q_inv = np.load('HessFull_inv.npy') Q = torch.FloatTensor(Q).to(device) Q_inv = torch.FloatTensor(Q_inv).to(device) # model model = Linear().to(device) # generate path w = PSGDPath(model, dataset, eta, alpha0, num_iters, bs, Q, Q_inv) np.save(w_name, w) what = PSGDPath(model, dataset, gamma, alpha0 + l2regu, num_iters, bs, Q, Q_inv) np.save(what_name, what) # w = np.load(w_name) # what = np.load(what_name) # generate weight p = genWeight(len(w), gamma / eta) wtilde = averagePath(w, p)
# ---------------------------------------------------------------------- # Construct the model models = {} optimizers = {} if mode == 'train': word_embedding = Embedding(len(token_vocab), embedding_dim, padding_idx=0, sparse=True, pretrain=embedding_file, vocab=token_vocab, trainable=True) for target_label in labels: lstm = LSTM(embedding_dim, hidden_size, batch_first=True, forget_bias=1.0) linears = [Linear(i, o) for i, o in zip([hidden_size] + linear_sizes, linear_sizes + [2])] model = MoralClassifier(word_embedding, lstm, linears) if use_gpu: model.cuda() optimizer = torch.optim.SGD( filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=.9) models[target_label] = model optimizers[target_label] = optimizer else: # Recover the model word_embedding = Embedding(len(token_vocab), saved_state['embedding_dim'], padding_idx=0, sparse=True,
'num_layers': 2, 'size': char_cnn.output_size, 'activation': 'selu' })) lstm = LSTM(Config({ 'input_size': word_embed.output_size + char_cnn.output_size, 'hidden_size': args.lstm_hidden_size, 'forget_bias': 1.0, 'batch_first': True, 'bidirectional': True })) crf = CRF(Config({ 'label_vocab': label_vocab })) output_linear = Linear(Config({ 'in_features': lstm.output_size, 'out_features': len(label_vocab) })) # LSTM CRF Model lstm_crf = LstmCrf( token_vocab=token_vocab, label_vocab=label_vocab, char_vocab=char_vocab, word_embedding=word_embed, char_embedding=char_cnn, crf=crf, lstm=lstm, univ_fc_layer=output_linear, embed_dropout_prob=args.embed_dropout, lstm_dropout_prob=args.lstm_dropout, linear_dropout_prob=args.linear_dropout,
nb_data_errors = nb_data_errors + 1 return nb_data_errors if __name__ == "__main__": # The seed is set to 0 during the part of the models definitions. Weights of the linear layers are randomly # initialized. Sometimes, these values doesn't make the model converge (whether it is our or pytorch one, as it is the same architecture). # In real case, the user just have to relaunch the process but as it is for evaluation purpose, we # prefer our script to have a predictable result. # To put it on a nutshell, we initialize the weights with deterministic values. torch.manual_seed(0) # Model definitions model = Sequential(Linear(2, 25), ReLu(), Linear(25, 25), ReLu(), Linear(25, 25), ReLu(), Dropout(0.2), Linear(25, 2), ReLu()) model_torch = nn.Sequential(nn.Linear(2, 25), nn.ReLU(), nn.Linear(25, 25), nn.ReLU(), nn.Linear(25, 25), nn.ReLU(), nn.Dropout(0.2), nn.Linear(25, 2), nn.ReLU()) # Creating toy datas # Set the seed to a random value, this time to generate data randomly (and for the dropout layers) torch.manual_seed(random.randint(0, 2**32 - 1)) train_input, train_target, label = generate_data(10000) test_input, test_target, test_label = generate_data(200) # Training models train_model(model, train_input, train_target, 500)
# ---------------------------------------------------------------------- # Construct the model models = {} optimizers = {} if mode == 'train': word_embedding = Embedding(len(token_vocab), embedding_dim, padding_idx=0, sparse=True, pretrain=embedding_file, vocab=token_vocab, trainable=True) for target_label in labels: lstm = LSTM(embedding_dim, hidden_size, batch_first=True, forget_bias=1.0) linears = [Linear(i, o) for i, o in zip([hidden_size + el_linear_sizes[-1] + mfd_linear_sizes[-1]] + linear_sizes, linear_sizes + [2])] el_linears = [Linear(i, o) for i, o in zip([el_embedding_dim] + el_linear_sizes[:-1], el_linear_sizes)] mfd_linears = [Linear(i, o) for i, o in zip([11] + mfd_linear_sizes[:-1], mfd_linear_sizes)] model = MoralClassifierMfdBk(word_embedding, lstm, linears, el_linears, mfd_linears) if use_gpu: model.cuda() optimizer = torch.optim.SGD( filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=.9) models[target_label] = model
char_embed = CharCNN(len(char_vocab), args.char_embed_dim, filters=charcnn_filters) char_hw = Highway(char_embed.output_size, layer_num=args.charhw_layer, activation=args.charhw_func) feat_dim = args.word_embed_dim + char_embed.output_size lstm = LSTM(feat_dim, args.lstm_hidden_size, batch_first=True, bidirectional=True, forget_bias=args.lstm_forget_bias) crf_1 = CRF(label_size=len(label_vocab_1) + 2) crf_2 = CRF(label_size=len(label_vocab_2) + 2) # Linear layers for task 1 shared_linear_1 = Linear(in_features=lstm.output_size, out_features=len(label_vocab_1)) spec_linear_1_1 = Linear(in_features=lstm.output_size, out_features=len(label_vocab_1)) spec_linear_1_2 = Linear(in_features=lstm.output_size, out_features=len(label_vocab_1)) # Linear layers for task 2 shared_linear_2 = Linear(in_features=lstm.output_size, out_features=len(label_vocab_2)) spec_linear_2_1 = Linear(in_features=lstm.output_size, out_features=len(label_vocab_2)) spec_linear_2_2 = Linear(in_features=lstm.output_size, out_features=len(label_vocab_2)) lstm_crf_tgt = LstmCrf(token_vocab_1, label_vocab_1, char_vocab,
sparse=True, padding_idx=C.PAD_INDEX) char_embed = CharCNN(len(char_vocab), train_args['char_embed_dim'], filters=charcnn_filters) char_hw = Highway(char_embed.output_size, layer_num=train_args['charhw_layer'], activation=train_args['charhw_func']) feat_dim = word_embed.embedding_dim + char_embed.output_size lstm = LSTM(feat_dim, train_args['lstm_hidden_size'], batch_first=True, bidirectional=True, forget_bias=train_args['lstm_forget_bias']) crf = CRF(label_size=len(label_vocab) + 2) linear = Linear(in_features=lstm.output_size, out_features=len(label_vocab)) lstm_crf = LstmCrf(token_vocab, label_vocab, char_vocab, word_embedding=word_embed, char_embedding=char_embed, crf=crf, lstm=lstm, univ_fc_layer=linear, embed_dropout_prob=train_args['feat_dropout'], lstm_dropout_prob=train_args['lstm_dropout'], char_highway=char_hw if train_args['use_highway'] else None) word_embed.load_state_dict(state['model']['word_embed']) char_embed.load_state_dict(state['model']['char_embed']) char_hw.load_state_dict(state['model']['char_hw'])
optimizers = {} if mode == 'train': word_embedding = Embedding(len(token_vocab), embedding_dim, padding_idx=0, sparse=True, pretrain=embedding_file, vocab=token_vocab, trainable=True) for target_label in labels: lstm = LSTM(embedding_dim, hidden_size, batch_first=True, forget_bias=1.0) linears = [ Linear(i, o) for i, o in zip([hidden_size + mfd_linear_sizes[-1]] + linear_sizes, linear_sizes + [2]) ] mfd_linears = [ Linear(i, o) for i, o in zip([11] + mfd_linear_sizes[:-1], mfd_linear_sizes) ] model = MoralClassifierExt(word_embedding, lstm, linears, mfd_linears) if use_gpu: model.cuda() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=.9) models[target_label] = model optimizers[target_label] = optimizer
lstm = LSTM( Config({ 'input_size': word_embed_1.output_size + char_cnn.output_size, 'hidden_size': args.lstm_hidden_size, 'forget_bias': 1.0, 'batch_size': True, 'bidirectional': True })) # CRF layer for task 1 crf_1 = CRF(Config({'label_vocab': label_vocab_1})) # CRF layer for task 2 crf_2 = CRF(Config({'label_vocab': label_vocab_2})) # Linear layers for task 1 shared_output_linear_1 = Linear( Config({ 'in_features': lstm.output_size, 'out_features': len(label_vocab_1) })) spec_output_linear_1_1 = Linear( Config({ 'in_features': lstm.output_size, 'out_features': len(label_vocab_1) })) spec_output_linear_1_2 = Linear( Config({ 'in_features': lstm.output_size, 'out_features': len(label_vocab_1) })) # Linear layers for task 2 shared_output_linear_2 = Linear( Config({