def train(): lr = 0.002 batch_size = 16 epoch = 80 start = time() sgd = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True) model = cnn_model() model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) X_train, y_train, X_val, y_val, X_test, y_test = prepare_data() datagen = ImageDataGenerator(featurewise_center=False, featurewise_std_normalization=False, width_shift_range=0.2, height_shift_range=0.2, zoom_range=0.3, shear_range=0.15, rotation_range=30., ) datagen.fit(X_train) history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), steps_per_epoch=2000, epochs=epoch, validation_data=(X_val, y_val), callbacks=[ReduceLROnPlateau('val_loss', factor=0.2, patience=20, verbose=1, mode='auto'), ModelCheckpoint('model.h5',save_best_only=True)] ) end = time() print (end - start)
def pred_probs(self, f_pred_prob, prepare_data, data, iterator, verbose=False): """ If you want to use a trained model, this is useful to compute the probabilities of new examples. """ n_samples = len(data[0]) probs = numpy.zeros((n_samples, 2)).astype(config.floatX) n_done = 0 for _, valid_index in iterator: x, mask, y = prepare_data([data[0][t] for t in valid_index], numpy.array(data[1])[valid_index], maxlen=None) pred_probas = f_pred_prob(x, mask) probs[valid_index, :] = pred_probas n_done += len(valid_index) if verbose: print '%d/%d samples classified' % (n_done, n_samples) return probs
def test(): _, _, _, _, X_test, y_test = prepare_data() signs = get_traffic_sign_config() start = time() model = load_model('model.h5') pred = model.predict(X_test,batch_size=1000) end=time() y_pred = np.empty(12630) for i in range(0,12630): y_pred[i] = (np.argmax(pred[i][:])) if y_pred[i] != y_test[i]: print "**************************************************************************" print "the model prediction is %s,correct label is %s"%(signs[y_pred[i]],signs[y_test[i]]) acc = np.mean(y_pred==y_test) print("Test accuracy = {}".format(acc)) print (end-start)
def pred_error(self, f_pred, prepare_data, data, iterator, verbose=False): """ Just compute the error f_pred: Theano fct computing the prediction prepare_data: usual prepare_data for that dataset. """ valid_err = 0 for _, valid_index in iterator: x, mask, y = prepare_data([data[0][t] for t in valid_index], numpy.array(data[1])[valid_index], maxlen=None) preds = f_pred(x, mask) targets = numpy.array(data[1])[valid_index] valid_err += (preds == targets).sum() valid_err = 1. - numpy_floatX(valid_err) / len(data[0]) return valid_err
def main(args): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") REVIEW, SCORE, train_data, valid_data, test_data = prepare_data( train=False) test_iterator = data.Iterator(test_data, batch_size=args.batch_size, device=device, sort_within_batch=True, sort_key=lambda x: len(x.review)) print('Finished loading data.') model = None with open(args.model_path, 'rb') as f: model = pickle.load(f) criterion = nn.CrossEntropyLoss() model = model.to(device) criterion = criterion.to(device) loss, acc = evaluate(model, test_iterator, criterion) print('Testing data:') print(f'Loss: {loss:.3f}') print(f'Acc: {acc:.2f}')
def main(): args, unparsed = FLAGS.parse_known_args() if len(unparsed) != 0: raise NameError("Argument {} not recognized".format(unparsed)) if args.seed is None: args.seed = random.randint(0, 1e3) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cpu: args.dev = torch.device('cpu') else: if not torch.cuda.is_available(): raise RuntimeError("GPU unavailable.") torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False args.dev = torch.device('cuda') #logger = GOATLogger(args) use_qrnn = True # Get data train_loader, val_loader, test_loader = prepare_data(args) # Set up learner, meta-learner learner_w_grad = Learner(args.image_size, args.bn_eps, args.bn_momentum, args.n_class).to(args.dev) learner_wo_grad = copy.deepcopy(learner_w_grad) metalearner = MetaLearner(args.input_size, args.hidden_size, learner_w_grad.get_flat_params().size(0), use_qrnn).to(args.dev) metalearner.metalstm.init_cI(learner_w_grad.get_flat_params()) # Set up loss, optimizer, learning rate scheduler optim = torch.optim.Adam(metalearner.parameters(), args.lr) if args.resume: #logger.loginfo("Initialized from: {}".format(args.resume)) last_eps, metalearner, optim = resume_ckpt(metalearner, optim, args.resume, args.dev) if args.mode == 'test': #_ = meta_test(last_eps, test_loader, learner_w_grad, learner_wo_grad, metalearner, args, logger) return best_acc = 0.0 print("Starting training...") print("Shots: ", args.n_shot) print("Classes: ", args.n_class) start_time = datetime.now() # Meta-training for eps, (episode_x, episode_y) in enumerate(train_loader): # episode_x.shape = [n_class, n_shot + n_eval, c, h, w] # episode_y.shape = [n_class, n_shot + n_eval] --> NEVER USED train_input = episode_x[:, :args.n_shot].reshape( -1, *episode_x.shape[-3:]).to(args.dev) # [n_class * n_shot, :] train_target = torch.LongTensor( np.repeat(range(args.n_class), args.n_shot)).to(args.dev) # [n_class * n_shot] test_input = episode_x[:, args.n_shot:].reshape( -1, *episode_x.shape[-3:]).to(args.dev) # [n_class * n_eval, :] test_target = torch.LongTensor( np.repeat(range(args.n_class), args.n_eval)).to(args.dev) # [n_class * n_eval] # Train learner with metalearner learner_w_grad.reset_batch_stats() learner_wo_grad.reset_batch_stats() learner_w_grad.train() learner_wo_grad.train() cI = train_learner(learner_w_grad, metalearner, train_input, train_target, args) # Train meta-learner with validation loss learner_wo_grad.transfer_params(learner_w_grad, cI) output = learner_wo_grad(test_input) loss = learner_wo_grad.criterion(output, test_target) acc = accuracy(output, test_target) optim.zero_grad() loss.backward() nn.utils.clip_grad_norm_(metalearner.parameters(), args.grad_clip) optim.step() if ((eps + 1) % 250 == 0 or eps == 0): print(eps + 1, "/", args.episode, " Loss: ", loss.item(), " Acc:", acc) #logger.batch_info(eps=eps, totaleps=args.episode, loss=loss.item(), acc=acc, phase='train') # Meta-validation if ((eps + 1) % args.val_freq == 0 and eps != 0) or eps + 1 == args.episode: #save_ckpt(eps, metalearner, optim, args.save) acc = meta_test(eps, val_loader, learner_w_grad, learner_wo_grad, metalearner, args) print("Meta validation: ", eps + 1, " Acc: ", acc) if acc > best_acc: best_acc = acc print(" New best: ", acc) # logger.loginfo("* Best accuracy so far *\n") end_time = datetime.now() print("Time to execute: ", end_time - start_time) print("Average per iteration", (end_time - start_time) / args.episode) torch.cuda.empty_cache() #acc = meta_test(eps, val_loader, learner_w_grad, learner_wo_grad, metalearner, args) print("Training complete, best acc: ", best_acc)
return mcc, report if __name__ == "__main__": args = parser.parse_args() print(args.model) print('Loading SciBERT tokenizer...') tokenizer = AutoTokenizer.from_pretrained( 'allenai/scibert_scivocab_uncased') batch_size = int(HYPERPARAMS["BATCH_SIZE"]) device = 'cuda' if torch.cuda.is_available() else 'cpu' train_sentences, train_labels = prepare_data( input_dir=PATHS["TRAIN_DATA_PATH"], oversample=True) trial_sentences, trial_labels = prepare_data( input_dir=PATHS["VALIDATION_DATA_PATH"], oversample=True) train_sentences = train_sentences + trial_sentences train_labels = train_labels + trial_labels train_dataloader = create_dataloader(train_sentences, train_labels, tokenizer) test_sentences, test_labels = prepare_data( input_dir=PATHS["TEST_DATA_PATH"], oversample=False) test_dataloader = create_dataloader(test_sentences, test_labels, tokenizer)
def main(): args, unparsed = FLAGS.parse_known_args() args = brandos_load(args) if len(unparsed) != 0: raise NameError("Argument {} not recognized".format(unparsed)) if args.seed is None: args.seed = random.randint(0, 1e3) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) #args.dev = torch.device('cpu') if args.cpu: args.dev = torch.device('cpu') args.gpu_name = args.dev else: if not torch.cuda.is_available(): raise RuntimeError("GPU unavailable.") torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False args.dev = torch.device('cuda') try: args.gpu_name = torch.cuda.get_device_name(0) except: args.gpu_name = args.dev print(f'device {args.dev}') logger = GOATLogger(args) # Get data train_loader, val_loader, test_loader = prepare_data(args) # Set up learner, meta-learner learner_w_grad = Learner(args.image_size, args.bn_eps, args.bn_momentum, args.n_class).to(args.dev) learner_wo_grad = copy.deepcopy(learner_w_grad) metalearner = MetaLearner(args.input_size, args.hidden_size, learner_w_grad.get_flat_params().size(0)).to( args.dev) metalearner.metalstm.init_cI(learner_w_grad.get_flat_params()) # Set up loss, optimizer, learning rate scheduler optim = torch.optim.Adam(metalearner.parameters(), args.lr) if args.resume: logger.loginfo("Initialized from: {}".format(args.resume)) last_eps, metalearner, optim = resume_ckpt(metalearner, optim, args.resume, args.dev) if args.mode == 'test': _ = meta_test(last_eps, test_loader, learner_w_grad, learner_wo_grad, metalearner, args, logger) return best_acc = 0.0 logger.loginfo("---> Start training") # Meta-training for eps, (episode_x, episode_y) in enumerate( train_loader ): # sample data set split episode_x = D = (D^{train},D^{test}) print(f'episode = {eps}') #print(f'episode_y = {episode_y}') # print(f'episide_x.size() = {episode_x.size()}') # episide_x.size() = torch.Size([5, 20, 3, 84, 84]) i.e. N classes for K shot task with K_eval query examples # print(f'episode_x.mean() = {episode_x.mean()}') # episode_x.shape = [n_class, n_shot + n_eval, c, h, w] # episode_y.shape = [n_class, n_shot + n_eval] --> NEVER USED train_input = episode_x[:, :args.n_shot].reshape( -1, *episode_x.shape[-3:]).to(args.dev) # [n_class * n_shot, :] train_target = torch.LongTensor( np.repeat(range(args.n_class), args.n_shot)).to(args.dev) # [n_class * n_shot] test_input = episode_x[:, args.n_shot:].reshape( -1, *episode_x.shape[-3:]).to(args.dev) # [n_class * n_eval, :] test_target = torch.LongTensor( np.repeat(range(args.n_class), args.n_eval)).to(args.dev) # [n_class * n_eval] # Train learner with metalearner learner_w_grad.reset_batch_stats() learner_wo_grad.reset_batch_stats() learner_w_grad.train() learner_wo_grad.train() cI = train_learner(learner_w_grad, metalearner, train_input, train_target, args) # Train meta-learner with validation loss learner_wo_grad.transfer_params(learner_w_grad, cI) output = learner_wo_grad(test_input) loss = learner_wo_grad.criterion(output, test_target) acc = accuracy(output, test_target) optim.zero_grad() loss.backward() nn.utils.clip_grad_norm_(metalearner.parameters(), args.grad_clip) optim.step() logger.batch_info(eps=eps, totaleps=args.episode, loss=loss.item(), acc=acc, phase='train') # Meta-validation if eps % args.val_freq == 0 and eps != 0: save_ckpt(eps, metalearner, optim, args.save) acc = meta_test(eps, val_loader, learner_w_grad, learner_wo_grad, metalearner, args, logger) if acc > best_acc: best_acc = acc logger.loginfo(f"* Best accuracy so far {acc}*\n") logger.loginfo(f'acc: {acc}') logger.loginfo(f"* Best accuracy so far {acc}*\n") logger.loginfo("Done")
def main(args): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") REVIEW, SCORE, train_data, valid_data, test_data = prepare_data() train_iterator, valid_iterator = data.BucketIterator.splits( (train_data, valid_data), batch_size=args.batch_size, device=device, sort_within_batch=True, sort_key=lambda x: len(x.review)) print('Finished loading data.') vocab_size = len(REVIEW.vocab) embedding_dim = 100 hidden_dim = args.hidden_dim output_dim = 5 num_layers = args.num_layers dropout = args.dropout padding_index = REVIEW.vocab.stoi['<pad>'] unknown_index = REVIEW.vocab.stoi['<unk>'] model = BiLSTM_RNN(vocab_size, embedding_dim, hidden_dim, output_dim, num_layers, dropout, padding_index) #Load pretrained embeddings pretrained_embeddings = REVIEW.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) #Reset unknown and padding vectors model.embedding.weight.data[unknown_index] = torch.zeros(embedding_dim, device=device) model.embedding.weight.data[padding_index] = torch.zeros(embedding_dim, device=device) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() model = model.to(device) criterion = criterion.to(device) epochs = args.num_epochs best_loss = np.inf if not (os.path.exists("./model")): os.mkdir("./model") for epoch in np.arange(epochs): start = time.time() train_loss, train_acc = train(model, train_iterator, optimizer, criterion) valid_loss, valid_acc = evaluate(model, valid_iterator, criterion) end = time.time() duration = time.strftime('%H:%M:%S', time.gmtime(end - start)) if valid_loss < best_loss: best_loss = valid_loss with open('./model/model.pkl', 'wb') as f: pickle.dump(model, f) print(f'\nEpoch {epoch + 1} at {duration}') print( f'Train Loss: {train_loss:.3f} - Validation Loss: {valid_loss:.3f}' ) print(f'Train Acc: {train_acc:.2f} - Validation Acc: {valid_acc:.2f}')
import torch from dataloader import prepare_data from model import Encoder, Attention, Decoder, Seq2Seq, init_weights from trainer import Trainer from config import * """ load data """ train_loader, val_loader, test_loader, m_dh = prepare_data( TRAIN_PATH, VAL_PATH, TEST_PATH, DH_PATH, LOAD_FROM_DUMP, BATCH_SIZE) """ model setup """ INPUT_DIM, OUTPUT_DIM = len(m_dh.de_vocab), len(m_dh.en_vocab) enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) attn = Attention(ENC_HID_DIM, DEC_HID_DIM, ATTN_DIM) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn) model = Seq2Seq(enc, dec) model.apply(init_weights) """ training setup """ optimizer = torch.optim.Adam(model.parameters(), lr=LR) criterion = torch.nn.CrossEntropyLoss(ignore_index=1) trainer = Trainer(model, optimizer, criterion, train_loader, val_loader, val_best_path=VAL_BEST_PATH) trainer.load('ckpts/best.pt')
def create_and_save_adversarial_examples(self, saved_model_fpath, n_examples=100, dataset="data/imdb.pkl", saveto = "output/adversarial_examples.npz", ): """ recreates the model from saved parameters, then finds adversarial examples. right now, not especially modular :( Allen's note: n_examples is not used :param string model_fname: the name of the file where the model has been stored. """ # below: assert that the training has been done assert self.model_has_been_trained # we want to have trained nonadversarially in order to have # examples that are demonstrative of adversarialness assert not self.adversarial (_, x_sym, mask_sym, y_sym) =\ self.build_model(self.model_options,) grad_wrt_emb = tensor.grad(self.cost, wrt=self.emb)[0] anti_example = tensor.sgn(grad_wrt_emb) adv_example = self.emb + self.adv_epsilon*anti_example f_adv_example = theano.function([x_sym, mask_sym, y_sym], adv_example, name='f_adv_example') f_identity = theano.function([x_sym], self.emb, name='f_identity') # 1. get the data print 'Loading data' #TODO: remove magic 10000!!! train, valid, test = load_data(n_words=10000, valid_portion=0.05, maxlen=self.maxlen, path=dataset) corpus = valid # make a datastructure in which to store them print len(corpus[1]) sentences_and_adversaries = { 'original_sentences': None, 'adversarial_sentences': None, 'saved_model_fpath' : saved_model_fpath, #metadata 'n_sentences': len(corpus[1]), 'adversarial_parameters': { 'alpha':self.adv_alpha, 'epsilon':self.adv_epsilon, }, } x_itf, mask_itf, y_itf = prepare_data(corpus[0], corpus[1]) # print f_adv_example(x_itf, mask_itf, y_itf) # print f_adv_example(x_itf, mask_itf, y_itf).shape sentences_and_adversaries['adversarial_sentences'] = f_adv_example(x_itf, mask_itf, y_itf) sentences_and_adversaries['original_sentences'] = f_identity(x_itf) numpy.savez(saveto, sentences_and_adversaries)#, open(saveto, 'wb'))
def train_lstm( self, saveto, # The best model will be saved there dataset, #---------------------------------------------------------------------- #algorithmic hyperparameters encoder='lstm', # TODO: can be removed must be lstm. l2_reg_U=0., # Weight decay for the classifier applied to the U weights. lrate=0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) optimizer="adadelta", # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). batch_size=16, # The batch size during training. wemb_init='word2vec', #---------------------------------------------------------------------- #parameters related to convergence, saving, and similar max_epochs=5000, # The maximum number of epoch to run patience=10, # Number of epoch to wait before early stop if no progress dispFreq=10, # Display to stdout the training progress every N updates n_words=10000, # Vocabulary size validFreq=370, # Compute the validation error after this number of update. saveFreq=1110, # Save the parameters after every saveFreq updates valid_batch_size=64, # The batch size used for validation/test set. #---------------------------------------------------------------------- # Parameter for extra option (whatever that means) noise_std=0., use_dropout=True, # if False slightly faster, but worst test error # This frequently need a bigger model. reload_model=None, # Path to a saved model we want to start from. return_after_reloading=False, # Path to a saved model we want to start from. test_size=-1, # If >0, we keep only this number of test example. ): optimizer = OPTIMIZERS[optimizer] # Model options self.model_options = locals().copy() if reload_model: self.faulty_load_params(reload_model) # self.init_tparams() _, self.wdim = self.params['Wemb'].shape self.hdim, ydim = self.params['U'].shape self.model_options['ydim'] = ydim print _, self.wdim, self.hdim, ydim self.model_options['hdim'] = self.hdim self.model_options['wdim'] = self.wdim self.model_options['grad_clip_thresh'] = self.grad_clip_thresh print "model options", self.model_options # load_data, prepare_data = get_dataset(dataset) print 'Loading data' #each of the below is a tuple of # (list of sentences, where each is a list fo word indices, # list of integer labels) if not reload_model: train, valid, test = load_data(n_words=n_words, valid_portion=0.05, maxlen=self.maxlen, path=dataset) if test_size > 0: # The test set is sorted by size, but we want to keep random # size example. So we must select a random selection of the # examples. idx = numpy.arange(len(test[0])) numpy.random.shuffle(idx) idx = idx[:test_size] test = ([test[0][n] for n in idx], [test[1][n] for n in idx]) ydim = numpy.max(train[1]) + 1 self.model_options['ydim'] = ydim print 'Building model' if not reload_model: # initialize the word embedding matrix and the parameters of the model (U and b) randomly # self.params is a dict mapping name (string) -> numpy ndarray self.init_params(self.model_options) # This creates Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # self.params and self.tparams have different copy of the weights. self.init_tparams() # use_noise is for dropout (use_noise, x, mask, y) =\ self.build_model(self.model_options,) # f_pred_prob, self.f_pred, cost) if l2_reg_U > 0.: l2_reg_U = theano.shared(numpy_floatX(l2_reg_U), name='l2_reg_U') weight_decay = 0. weight_decay += (self.tparams['U']**2).sum() weight_decay *= l2_reg_U self.cost += weight_decay f_cost = theano.function([x, mask, y], self.cost, name='f_cost') grads = tensor.grad(self.cost, wrt=self.tparams.values()) f_grad = theano.function([x, mask, y], grads, name='f_grad') lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, self.tparams, grads, x, mask, y, self.cost) if self.debug: util.colorprint( "Following is the graph of the shared gradient function (f_grad_shared):", "blue") theano.printing.debugprint(f_grad_shared.maker.fgraph.outputs[0]) if return_after_reloading: self.model_has_been_trained = True return print 'Optimization' kf_valid = self.get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = self.get_minibatches_idx(len(test[0]), valid_batch_size) print "%d train examples" % len(train[0]) print "%d valid examples" % len(valid[0]) print "%d test examples" % len(test[0]) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size uidx = 0 # the number of update done estop = False # early stop start_time = time.time() try: for epoch in xrange(max_epochs): sys.stdout.flush() n_samples = 0 # Get new shuffled index for the training set. minibatches = self.get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index_list in minibatches: uidx += 1 use_noise.set_value(1.) # Select the random examples for this minibatch y = [train[1][t] for t in train_index_list] x = [train[0][t] for t in train_index_list] # Get the data in numpy.ndarray format # This swap the axis! # Return something of shape (minibatch maxlen, n samples) x, mask, y = prepare_data(x, y) n_samples += x.shape[1] cur_cost_val = f_grad_shared(x, mask, y) f_update(lrate) if numpy.isnan(cur_cost_val) or numpy.isinf(cur_cost_val): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cur_cost_val if saveto and numpy.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None: self.params = best_p else: self.params = self.unzip(self.tparams) numpy.savez(saveto, history_errs=history_errs, **self.params) pkl.dump(self.model_options, open('%s.pkl' % saveto, 'wb'), -1) print 'Done' if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) train_err = self.pred_error(self.f_pred, prepare_data, train, minibatches) valid_err = self.pred_error(self.f_pred, prepare_data, valid, kf_valid) test_err = self.pred_error(self.f_pred, prepare_data, test, kf_test) history_errs.append([valid_err, test_err]) if (uidx == 0 or valid_err <= numpy.array(history_errs)[:, 0].min()): best_p = self.unzip(self.tparams) bad_counter = 0 print('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err) if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interrupted" end_time = time.time() if best_p is not None: self.zipp(best_p, self.tparams) else: best_p = self.unzip(self.tparams) use_noise.set_value(0.) kf_train_sorted = self.get_minibatches_idx(len(train[0]), batch_size) train_err = self.pred_error(self.f_pred, prepare_data, train, kf_train_sorted) valid_err = self.pred_error(self.f_pred, prepare_data, valid, kf_valid) test_err = self.pred_error(self.f_pred, prepare_data, test, kf_test) print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err if saveto: numpy.savez(saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) print 'The code run for %d epochs, with %f sec/epochs' % ( (epoch + 1), (end_time - start_time) / (1. * (epoch + 1))) print >> sys.stderr, ('Training took %.1fs' % (end_time - start_time)) self.model_has_been_trained = True return train_err, valid_err, test_err
def create_and_save_adversarial_examples( self, saved_model_fpath, n_examples=100, dataset="data/imdb.pkl", saveto="output/adversarial_examples.npz", ): """ recreates the model from saved parameters, then finds adversarial examples. right now, not especially modular :( Allen's note: n_examples is not used :param string model_fname: the name of the file where the model has been stored. """ # below: assert that the training has been done assert self.model_has_been_trained # we want to have trained nonadversarially in order to have # examples that are demonstrative of adversarialness assert not self.adversarial (_, x_sym, mask_sym, y_sym) =\ self.build_model(self.model_options,) grad_wrt_emb = tensor.grad(self.cost, wrt=self.emb)[0] anti_example = tensor.sgn(grad_wrt_emb) adv_example = self.emb + self.adv_epsilon * anti_example f_adv_example = theano.function([x_sym, mask_sym, y_sym], adv_example, name='f_adv_example') f_identity = theano.function([x_sym], self.emb, name='f_identity') # 1. get the data print 'Loading data' #TODO: remove magic 10000!!! train, valid, test = load_data(n_words=10000, valid_portion=0.05, maxlen=self.maxlen, path=dataset) corpus = valid # make a datastructure in which to store them print len(corpus[1]) sentences_and_adversaries = { 'original_sentences': None, 'adversarial_sentences': None, 'saved_model_fpath': saved_model_fpath, #metadata 'n_sentences': len(corpus[1]), 'adversarial_parameters': { 'alpha': self.adv_alpha, 'epsilon': self.adv_epsilon, }, } x_itf, mask_itf, y_itf = prepare_data(corpus[0], corpus[1]) # print f_adv_example(x_itf, mask_itf, y_itf) # print f_adv_example(x_itf, mask_itf, y_itf).shape sentences_and_adversaries['adversarial_sentences'] = f_adv_example( x_itf, mask_itf, y_itf) sentences_and_adversaries['original_sentences'] = f_identity(x_itf) numpy.savez(saveto, sentences_and_adversaries) #, open(saveto, 'wb'))
def train_lstm(self, saveto, # The best model will be saved there dataset, #---------------------------------------------------------------------- #algorithmic hyperparameters encoder='lstm', # TODO: can be removed must be lstm. l2_reg_U=0., # Weight decay for the classifier applied to the U weights. lrate=0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) optimizer="adadelta", # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). batch_size=16, # The batch size during training. wemb_init='word2vec', #---------------------------------------------------------------------- #parameters related to convergence, saving, and similar max_epochs=5000, # The maximum number of epoch to run patience=10, # Number of epoch to wait before early stop if no progress dispFreq=10, # Display to stdout the training progress every N updates n_words=10000, # Vocabulary size validFreq=370, # Compute the validation error after this number of update. saveFreq=1110, # Save the parameters after every saveFreq updates valid_batch_size=64, # The batch size used for validation/test set. #---------------------------------------------------------------------- # Parameter for extra option (whatever that means) noise_std=0., use_dropout=True, # if False slightly faster, but worst test error # This frequently need a bigger model. reload_model=None, # Path to a saved model we want to start from. return_after_reloading=False, # Path to a saved model we want to start from. test_size=-1, # If >0, we keep only this number of test example. ): optimizer = OPTIMIZERS[optimizer] # Model options self.model_options = locals().copy() if reload_model: self.faulty_load_params(reload_model) # self.init_tparams() _, self.wdim = self.params['Wemb'].shape self.hdim, ydim = self.params['U'].shape self.model_options['ydim'] = ydim print _, self.wdim, self.hdim, ydim self.model_options['hdim'] = self.hdim self.model_options['wdim'] = self.wdim self.model_options['grad_clip_thresh'] = self.grad_clip_thresh print "model options", self.model_options # load_data, prepare_data = get_dataset(dataset) print 'Loading data' #each of the below is a tuple of # (list of sentences, where each is a list fo word indices, # list of integer labels) if not reload_model: train, valid, test = load_data(n_words=n_words, valid_portion=0.05, maxlen=self.maxlen, path=dataset) if test_size > 0: # The test set is sorted by size, but we want to keep random # size example. So we must select a random selection of the # examples. idx = numpy.arange(len(test[0])) numpy.random.shuffle(idx) idx = idx[:test_size] test = ([test[0][n] for n in idx], [test[1][n] for n in idx]) ydim = numpy.max(train[1]) + 1 self.model_options['ydim'] = ydim print 'Building model' if not reload_model: # initialize the word embedding matrix and the parameters of the model (U and b) randomly # self.params is a dict mapping name (string) -> numpy ndarray self.init_params(self.model_options) # This creates Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # self.params and self.tparams have different copy of the weights. self.init_tparams() # use_noise is for dropout (use_noise, x, mask, y) =\ self.build_model(self.model_options,) # f_pred_prob, self.f_pred, cost) if l2_reg_U > 0.: l2_reg_U = theano.shared(numpy_floatX(l2_reg_U), name='l2_reg_U') weight_decay = 0. weight_decay += (self.tparams['U'] ** 2).sum() weight_decay *= l2_reg_U self.cost += weight_decay f_cost = theano.function([x, mask, y], self.cost, name='f_cost') grads = tensor.grad(self.cost, wrt=self.tparams.values()) f_grad = theano.function([x, mask, y], grads, name='f_grad') lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, self.tparams, grads, x, mask, y, self.cost) if self.debug: util.colorprint("Following is the graph of the shared gradient function (f_grad_shared):", "blue") theano.printing.debugprint(f_grad_shared.maker.fgraph.outputs[0]) if return_after_reloading: self.model_has_been_trained = True return print 'Optimization' kf_valid = self.get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = self.get_minibatches_idx(len(test[0]), valid_batch_size) print "%d train examples" % len(train[0]) print "%d valid examples" % len(valid[0]) print "%d test examples" % len(test[0]) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size uidx = 0 # the number of update done estop = False # early stop start_time = time.time() try: for epoch in xrange(max_epochs): sys.stdout.flush() n_samples = 0 # Get new shuffled index for the training set. minibatches = self.get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index_list in minibatches: uidx += 1 use_noise.set_value(1.) # Select the random examples for this minibatch y = [train[1][t] for t in train_index_list] x = [train[0][t]for t in train_index_list] # Get the data in numpy.ndarray format # This swap the axis! # Return something of shape (minibatch maxlen, n samples) x, mask, y = prepare_data(x, y) n_samples += x.shape[1] cur_cost_val = f_grad_shared(x, mask, y) f_update(lrate) if numpy.isnan(cur_cost_val) or numpy.isinf(cur_cost_val): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cur_cost_val if saveto and numpy.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None: self.params = best_p else: self.params = self.unzip(self.tparams) numpy.savez(saveto, history_errs=history_errs, **self.params) pkl.dump(self.model_options, open('%s.pkl' % saveto, 'wb'), -1) print 'Done' if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) train_err = self.pred_error(self.f_pred, prepare_data, train, minibatches) valid_err = self.pred_error(self.f_pred, prepare_data, valid, kf_valid) test_err = self.pred_error(self.f_pred, prepare_data, test, kf_test) history_errs.append([valid_err, test_err]) if (uidx == 0 or valid_err <= numpy.array(history_errs)[:, 0].min()): best_p = self.unzip(self.tparams) bad_counter = 0 print ('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err) if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interrupted" end_time = time.time() if best_p is not None: self.zipp(best_p, self.tparams) else: best_p = self.unzip(self.tparams) use_noise.set_value(0.) kf_train_sorted = self.get_minibatches_idx(len(train[0]), batch_size) train_err = self.pred_error(self.f_pred, prepare_data, train, kf_train_sorted) valid_err = self.pred_error(self.f_pred, prepare_data, valid, kf_valid) test_err = self.pred_error(self.f_pred, prepare_data, test, kf_test) print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err if saveto: numpy.savez(saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) print 'The code run for %d epochs, with %f sec/epochs' % ( (epoch + 1), (end_time - start_time) / (1. * (epoch + 1))) print >> sys.stderr, ('Training took %.1fs' % (end_time - start_time)) self.model_has_been_trained = True return train_err, valid_err, test_err
random.seed(args.seed) torch.manual_seed(args.seed) print(f'lr:{args.lr},step:{args.step_size},gamma:{args.gamma}') for data_name in data_list: print(data_name) vocab_dict_path = args.word_embed_file file_path = args.data_path + data_name + '.json' glove_data = 'data/' + data_name + '_.glove_data.pkl' glove_matrix = 'data/' + data_name + '_glove_matrix.pkl' glove_data, matrix, review_len = dataloader.word_to_id( glove_data, glove_matrix, vocab_dict_path, file_path) train_data, test_data, user_dict, item_dict, u_max, i_max, num_users, num_items = dataloader.prepare_data( glove_data) batch = dataloader.Batch(train_data, test_data, user_dict, item_dict, u_max, i_max, batch_size, review_len, train=True) #(review_len是一条评论的长度) if args.base_model == 'NARRE': mainmodel = kbs_model.NARRE(num_users, num_items, matrix, review_len, args) elif args.base_model == 'PMF':
import torch from dataloader import prepare_data from KEflow.model import prepare_classifier from KEflow.model.utils import weights_init from KEflow.trainer import Trainer from KEflow.config import TYPE_DATA, TYPE_CLS from KEflow.config import CLS_CONFIG as Ccfg """ dataloader """ trainset, devset = prepare_data("./data", TYPE_DATA) trainloader = torch.utils.data.DataLoader(trainset, batch_size=Ccfg["BATCH_SIZE"]) devloader = torch.utils.data.DataLoader(devset, batch_size=Ccfg["BATCH_SIZE"]) """ define model """ model = prepare_classifier(TYPE_CLS, Ccfg["NC"], Ccfg["N_CLASS"]) # model.apply(weights_init) optimizer = torch.optim.Adam(model.parameters(), lr=Ccfg["LR"], weight_decay=Ccfg["WD"]) """ criterion define """ criterion = torch.nn.CrossEntropyLoss() """ train """ trainer = Trainer(model, optimizer, criterion, trainloader, devloader, best_save_path="ckpts/") # trainer.load("ckpts/best.pt") trainer.train(Ccfg["EPOCHS"], Ccfg["PRINT_FREQ"], Ccfg["VAL_FREQ"]) """ save model """ trainer.save(f"ckpts/classifier.pt")
from dataloader import prepare_data from torchvision.utils import save_image import os from KEflow.config import TYPE_DATA _, dataset = prepare_data('./data', TYPE_DATA) if not os.path.exists(f'aided_sample/{TYPE_DATA}'): os.makedirs(f'aided_sample/{TYPE_DATA}') for i in range(500): x, label = dataset[i] save_image(x, os.path.join("aided_sample", TYPE_DATA, f"{i}_image{label}.png"), normalize=True)
def main(): args, unparsed = FLAGS.parse_known_args() if len(unparsed) != 0: raise NameError("Argument {} not recognized".format(unparsed)) if args.seed is None: args.seed = random.randint(0, 1e3) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) args.dev = torch.device('cpu') logger = GOATLogger(args) # Get data train_loader, val_loader, test_loader = prepare_data(args) # Set up learner, meta-learner learner_w_grad = Learner(args.image_size, args.bn_eps, args.bn_momentum, args.n_class).to(args.dev) learner_wo_grad = copy.deepcopy(learner_w_grad) metalearner = MetaLearner(args.input_size, args.hidden_size, learner_w_grad.get_flat_params().size(0)).to( args.dev) metalearner.init_cI(learner_w_grad.get_flat_params()) # Set up loss, optimizer, learning rate scheduler optim = torch.optim.Adam(metalearner.parameters(), args.lr) if args.resume: logger.loginfo("Initialized from: {}".format(args.resume)) last_eps, metalearner, optim = resume_ckpt(metalearner, optim, args.resume, args.dev) if args.mode == 'test': _ = meta_test(last_eps, test_loader, learner_w_grad, learner_wo_grad, metalearner, args, logger) return best_acc = 0.0 logger.loginfo("Start training") # Meta-training for eps, (episode_x, episode_y) in enumerate(train_loader): # episode_x.shape = [n_class, n_shot + n_eval, c, h, w] # episode_y.shape = [n_class, n_shot + n_eval] --> NEVER USED train_input = episode_x[:, :args.n_shot].reshape( -1, *episode_x.shape[-3:]).to(args.dev) # [n_class * n_shot, :] train_target = torch.LongTensor( np.repeat(range(args.n_class), args.n_shot)).to(args.dev) # [n_class * n_shot] test_input = episode_x[:, args.n_shot:].reshape( -1, *episode_x.shape[-3:]).to(args.dev) # [n_class * n_eval, :] test_target = torch.LongTensor( np.repeat(range(args.n_class), args.n_eval)).to(args.dev) # [n_class * n_eval] # Train learner with metalearner learner_w_grad.reset_batch_stats() learner_wo_grad.reset_batch_stats() learner_w_grad.train() learner_wo_grad.train() cI = train_learner(learner_w_grad, metalearner, train_input, train_target, args) # Train meta-learner with validation loss learner_wo_grad.transfer_params(learner_w_grad, cI) output = learner_wo_grad(test_input) loss = learner_wo_grad.criterion(output, test_target) acc = accuracy(output, test_target) optim.zero_grad() loss.backward() nn.utils.clip_grad_norm_(metalearner.parameters(), args.grad_clip) optim.step() logger.batch_info(eps=eps, totaleps=args.episode, loss=loss.item(), acc=acc, phase='train') # Meta-validation if eps % args.val_freq == 0 and eps != 0: print('start eval') save_ckpt(eps, metalearner, optim, args.save) acc = meta_test(eps, val_loader, learner_w_grad, learner_wo_grad, metalearner, args, logger) if acc > best_acc: best_acc = acc logger.loginfo("* Best accuracy so far *\n") logger.loginfo("Done")