def __init__(self, channels, classes, imagesize, **kwargs): super(ModelCnn, self).__init__() self.layers = Cnn.get_layers(channels, classes, imagesize) self.distills = torch.nn.ModuleList([ models.GlobalSumPool( h=models.DenseNet(headsize=32, layers=1, dropout=0.2), c=models.Classifier(32, classes + 1, useprototype=1, usenorm=0, p=2), ), models.GlobalSumPool( h=models.DenseNet(headsize=64, layers=1, dropout=0.2), c=models.Classifier(64, classes + 1, useprototype=1, usenorm=0, p=2), ), models.GlobalSumPool( h=models.DenseNet(headsize=64, layers=1, dropout=0.2), c=models.Classifier(64, classes + 1, useprototype=1, usenorm=0, p=2), ) ])
def set_model(self): if args.wide: self.g = models.WideResNet().cuda() self.c1 = models.Classifier(self.g.nChannels, self.args.num_classes).cuda() self.c2 = models.Classifier(self.g.nChannels, self.args.num_classes).cuda() else: self.g = models.DenseNet().cuda() self.c1 = models.Classifier(self.g.in_planes, self.args.num_classes).cuda() self.c2 = models.Classifier(self.g.in_planes, self.args.num_classes).cuda()
def train(args): if args.debug: train_dataloader = torchtext.data.BucketIterator(test_data, batch_size=64, train=True) else: train_dataloader = torchtext.data.BucketIterator(train_data, batch_size=64, train=True) valid_dataloader = torchtext.data.BucketIterator(valid_data, batch_size=64, train=False) classifier = models.Classifier(args) logger = pl.loggers.TensorBoardLogger('logs', args.encoder) lr_monitor = pl.callbacks.LearningRateMonitor('epoch') checkpoint_callback = pl.callbacks.ModelCheckpoint( monitor='valid_loss', dirpath=f"checkpoints/{args.encoder}", filename=f"version_{logger.version}") trainer = pl.Trainer(max_epochs=args.max_epochs, logger=[logger], callbacks=[lr_monitor, checkpoint_callback], gpus=torch.cuda.device_count(), progress_bar_refresh_rate=args.progress_bar, weights_summary=None) print("Starting training...") trainer.fit(classifier, train_dataloader, valid_dataloader) print("Done training!")
def eval_bert_plus_lstm(): data_loader_test = prepare_byarticle_data(aug_count=constant.aug_count, batch_size=constant.batch_size, test_whole=True) # load model # bert model from pytorch_pretrained_bert import BertTokenizer, BertModel tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') bert_model = BertModel.from_pretrained('bert-base-uncased') state = torch.load("bert_model/pytorch_model.bin") bert_model.load_state_dict(state) article_model = bert_model title_model = bert_model # lstm model and classifier lstm_article = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim, num_layers=constant.n_layers, bidirectional=False, batch_first=True) lstm_title = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim_tit, num_layers=constant.n_layers, bidirectional=False, batch_first=True) classifier = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit) lstm_article.load_state_dict(torch.load("bert_model/9folds_large/fold_3_lstm_article_0.9709711056544115.bin")) lstm_title.load_state_dict(torch.load("bert_model/9folds_large/fold_3_lstm_title_0.9709711056544115.bin")) classifier.load_state_dict(torch.load("bert_model/9folds_large/fold_3_classifier_0.9709711056544115.bin")) if constant.USE_CUDA: article_model.cuda() title_model.cuda() lstm_article.cuda() lstm_title.cuda() classifier.cuda() article_model.eval() title_model.eval() lstm_article.eval() lstm_title.eval() classifier.eval() accuracy, pred, id_ = eval_bert(article_model, title_model, classifier, data_loader_test, tokenizer, lstm_article, lstm_title, False, None, 1, True)
def __init__(self, opt, num_classes, source_train_ds, source_test_ds): self.source_train_ds = source_train_ds self.source_test_ds = source_test_ds self.opt = opt self.best_val = 0 self.num_classes = num_classes # networks and optimizers self.mixer = models.Mixer(opt) self.classifier = models.Classifier(opt, num_classes) # initialize weight's self.mixer.apply(utils.weights_init) self.classifier.apply(utils.weights_init) # Defining loss criterions self.criterion = nn.CrossEntropyLoss() if opt.gpu >= 0: self.mixer.cuda() self.classifier.cuda() self.criterion.cuda() # Defining optimizers self.optimizer_mixer = optim.Adam(self.mixer.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizer_classifier = optim.Adam(self.classifier.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
def load_models(self): """ Load models. """ self.N_class = numpy.max(self.test_codes) + 1 network_units = list(map(int, self.args.network_units.split(','))) log('[Testing] using %d input channels' % self.test_images.shape[3]) self.model = models.Classifier( self.N_class, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]), architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) assert os.path.exists( self.args.classifier_file ), 'state file %s not found' % self.args.classifier_file state = State.load(self.args.classifier_file) log('[Testing] read %s' % self.args.classifier_file) self.model.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(self.model): log('[Testing] classifier is not CUDA') self.model = self.model.cuda() log('[Testing] loaded classifier') # ! self.model.eval() log('[Testing] set classifier to eval')
def make_distillpools(self, classes): return [ models.GlobalSumPool( h=models.DenseNet(headsize=64, bodysize=256, tailsize=self.squash[3], layers=self.layers, dropout=0.2, activation=self.act, bias=self.usebias), c=models.Classifier(self.squash[3], classes + self.optout, useprototype=self.useprototype, usenorm=self.usenorm, p=self.p), ), models.GlobalSumPool( h=models.DenseNet(headsize=128, bodysize=256, tailsize=self.squash[5], layers=self.layers, dropout=0.2, activation=self.act, bias=self.usebias), c=models.Classifier(self.squash[5], classes + self.optout, useprototype=self.useprototype, usenorm=self.usenorm, p=self.p), ), models.GlobalSumPool( h=models.DenseNet(headsize=256, bodysize=1024, tailsize=self.squash[7], layers=self.layers, dropout=0.2, activation=self.act, bias=self.usebias), c=models.Classifier(self.squash[7], classes + self.optout, useprototype=self.useprototype, usenorm=self.usenorm, p=self.p), ) ]
def __init__(self, config): self.config = config # Create dataloader source_loader, target_loader, nclasses = datasets.form_visda_datasets( config=config, ignore_anomaly=False) self.source_loader = source_loader self.target_loader = target_loader self.nclasses = nclasses # Create model self.netF, self.nemb = models.form_models(config) print(self.netF) self.netC = models.Classifier(self.nemb, self.nclasses, nlayers=1) utils.weights_init(self.netC) print(self.netC) if self.config.exp == 'openset': self.ano_class_id = self.source_loader.dataset.class_to_idx[ self.config.anomaly_class] self.netF = torch.nn.DataParallel(self.netF).cuda() self.netC = torch.nn.DataParallel(self.netC).cuda() # Create optimizer self.optimizerF = optim.SGD(self.netF.parameters(), lr=self.config.lr, momentum=config.momentum, weight_decay=0.0005) self.optimizerC = optim.SGD(self.netC.parameters(), lr=self.config.lrC, momentum=config.momentum, weight_decay=0.0005) self.lr_scheduler_F = optim.lr_scheduler.StepLR(self.optimizerF, step_size=7000, gamma=0.1) self.lr_scheduler_C = optim.lr_scheduler.StepLR(self.optimizerC, step_size=7000, gamma=0.1) # restoring checkpoint print('Restoring checkpoint ...') try: ckpt_data = torch.load( os.path.join(config.logdir, 'checkpoint.pth')) self.start_iter = ckpt_data['iter'] self.netF.load_state_dict(ckpt_data['F_dict']) self.netC.load_state_dict(ckpt_data['C_dict']) except: # If loading failed, begin from scratch print('Checkpoint not found. Training from scratch ...') self.start_iter = 0 # Other vars self.criterion = nn.CrossEntropyLoss().cuda()
def build_classifier(args,tasker): if 'node_cls' == args.task or 'static_node_cls' == args.task: mult = 1 else: mult = 2 if 'gru' in args.model or 'lstm' in args.model: in_feats = args.gcn_parameters['lstm_l2_feats'] * mult elif args.model == 'skipfeatsgcn' or args.model == 'skipfeatsegcn_h': in_feats = (args.gcn_parameters['layer_2_feats'] + args.gcn_parameters['feats_per_node']) * mult else: in_feats = args.gcn_parameters['layer_2_feats'] * mult return mls.Classifier(args,in_features = in_feats, out_features = tasker.num_classes).to(args.device)
def main(self): """ Main which should be overwritten. """ self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) log('[Testing] read %s' % self.args.test_images_file) # For handling both color and gray images. if len(self.test_images.shape) < 4: self.test_images = numpy.expand_dims(self.test_images, axis=3) log('[Testing] no color images, adjusted size') self.resolution = self.test_images.shape[2] log('[Testing] resolution %d' % self.resolution) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) self.test_codes = self.test_codes[:, self.args.label_index] log('[Testing] read %s' % self.args.test_codes_file) N_class = numpy.max(self.test_codes) + 1 network_units = list(map(int, self.args.network_units.split(','))) log('[Testing] using %d input channels' % self.test_images.shape[3]) self.model = models.Classifier( N_class, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]), architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) assert os.path.exists( self.args.state_file ), 'state file %s not found' % self.args.state_file state = State.load(self.args.state_file) log('[Testing] read %s' % self.args.state_file) self.model.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(self.model): log('[Testing] model is not CUDA') self.model = self.model.cuda() log('[Testing] loaded model') self.model.eval() log('[Testing] set classifier to eval') self.test()
def load_model(self): """ Load model. """ database = utils.read_hdf5(self.args.database_file).astype(numpy.float32) log('[Attack] read %sd' % self.args.database_file) self.N_font = database.shape[0] self.N_class = database.shape[1] resolution = database.shape[2] database = database.reshape((database.shape[0] * database.shape[1], database.shape[2], database.shape[3])) database = torch.from_numpy(database) if self.args.use_gpu: database = database.cuda() database = torch.autograd.Variable(database, False) N_theta = self.test_theta.shape[1] log('[Attack] using %d N_theta' % N_theta) decoder = models.AlternativeOneHotDecoder(database, self.N_font, self.N_class, N_theta) decoder.eval() image_channels = 1 if N_theta <= 7 else 3 network_units = list(map(int, self.args.network_units.split(','))) log('[Attack] using %d input channels' % image_channels) classifier = models.Classifier(self.N_class, resolution=(image_channels, resolution, resolution), architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) assert os.path.exists(self.args.classifier_file), 'state file %s not found' % self.args.classifier_file state = State.load(self.args.classifier_file) log('[Attack] read %s' % self.args.classifier_file) classifier.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(classifier): log('[Attack] classifier is not CUDA') classifier = classifier.cuda() log('[Attack] loaded classifier') # ! classifier.eval() log('[Attack] set classifier to eval') self.model = models.DecoderClassifier(decoder, classifier)
def predict(): # prepare data_loader and vocab use_by_article = False if use_by_article: _, data_loader_test, vocab = prepare_byarticle_data() else: _, _, data_loader_test, vocab = prepare_data('./data_new/preprocessed_new_{}', constant.batch_size) if constant.use_bert: from pytorch_pretrained_bert import BertTokenizer, BertModel tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') bert_model = BertModel.from_pretrained('bert-base-uncased') state = torch.load("bert_model/pytorch_model.bin") bert_model.load_state_dict(state) article_model = bert_model title_model = bert_model # print("finish bert model loading") LR = models.Classifier(hidden_dim1=768, hidden_dim2=768) classifer_state = torch.load("bert_model/classifier.bin") LR.load_state_dict(classifer_state) # else: # for basic LSTM model article_model = models.LSTM(vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim, num_layers=constant.n_layers, pretrain_emb=constant.pretrain_emb ) title_model = models.LSTM(vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim_tit, num_layers=constant.n_layers, pretrain_emb=constant.pretrain_emb ) LR = models.LR(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit) # load parameters article_model = load_model(article_model, model_name="article_model") title_model = load_model(title_model, model_name="title_model") LR = load_model(LR, model_name="LR") if constant.USE_CUDA: article_model.cuda() title_model.cuda() LR.cuda() # predict and save result in result folder predict(article_model, title_model, LR, data_loader_test, name="bypublisher", print_pred=True)
def load_model(args): classifier = models.Classifier() model_dir_class = os.path.join(args.resume) model_std = torch.load(os.path.join(model_dir_class, 'model_best_class.pth.tar'), map_location="cuda:" + str(args.gpu)) classifier.load_state_dict(model_std) classifier = classifier.cuda() #model = torch.nn.DataParallel(model, #device_ids=list(range(torch.cuda.device_count()))).cuda() return classifier
def load_models(self): """ Init models. """ log('[Training] using %d input channels' % self.train_images.shape[3]) network_units = list(map(int, self.args.network_units.split(','))) self.encoder = models.LearnedVariationalEncoder( self.args.latent_space_size, 0, resolution=(self.train_images.shape[3], self.train_images.shape[1], self.train_images.shape[2]), architecture=self.args.network_architecture, start_channels=self.args.network_channels, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, units=network_units) self.decoder = models.LearnedDecoder( self.args.latent_space_size, resolution=(self.train_images.shape[3], self.train_images.shape[1], self.train_images.shape[2]), architecture=self.args.network_architecture, start_channels=self.args.network_channels, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, units=network_units) log(self.encoder) log(self.decoder) classifier_units = list(map(int, self.args.classifier_units.split(','))) self.classifier = models.Classifier( 1, resolution=(self.train_images.shape[3], self.train_images.shape[1], self.train_images.shape[2]), architecture=self.args.classifier_architecture, activation=self.args.classifier_activation, batch_normalization=not self.args. classifier_no_batch_normalization, start_channels=self.args.classifier_channels, dropout=self.args.classifier_dropout, units=classifier_units, kernel_size=6) log(self.classifier)
def load_model(args): classifier = models.Classifier() stract = models.Stractor() model_dir_class = os.path.join(args.resume, 'Classifier') model_dir_str = os.path.join(args.resume, 'featureStractor') model_std = torch.load(os.path.join(model_dir_class, 'model_1_class.pth.tar'), map_location="cuda:"+str(args.gpu)) classifier.load_state_dict(model_std) classifier = classifier.cuda() model_std = torch.load(os.path.join(model_dir_str, 'model_1_feaStr.pth.tar'), map_location="cuda:"+str(args.gpu)) stract.load_state_dict(model_std) stract = stract.cuda() #model = torch.nn.DataParallel(model, #device_ids=list(range(torch.cuda.device_count()))).cuda() return stract, classifier
def __init__(self): self.slack_token = token self.rtmclient = slack.RTMClient(token=self.slack_token) self.webclient = slack.WebClient(token=self.slack_token) self.channels = None self.thread_ts = None self.user = None self.text_in = None self.text_out = None self.schedule = schedule.Read_google_sheet_schedule() self.responses_df = self.schedule.run(command='get_responses') self.commands = [ 'send_info', 'show_weather', 'get_schedule', 'tell_joke', 'send_email', 'who_is' ] self.classifier = models.Classifier() self.state_in = { 'intent': None, 'command': None, 'state': 'normal', 'reply': None, 'query_params': {} } self.state_out = { 'intent': None, 'command': None, 'state': 'normal', 'reply': None, 'query_params': {} } self.send_email = email.Send_email() self.weather_descr = pd.read_csv( './data/Multilingual_Weather_Conditions.csv', sep=',', encoding='utf-8') self.sleep_cnt = 0 self.time_state = 'normal'
def train_cnn(train_dataset, validation_dataset, batch_size, num_filters, filter_sizes, use_elmo=False, epochs=15, patience=None, learning_rate=3e-4, num_classes=2, use_gpu=False): """ Trains CNN on train_dataset; optionally, perform early stopping based on validation loss. Initialises word embeddings with pre-trained GloVe (OR) uses pre-trained ELMo model to dynamically compute embeddings. The CNN has one convolution layer for each ngram filter size. Parameters ---------- train_dataset: List[Instance] Instances for training set validation_dataset: List[Instance] Instances for validation set batch_size: int number of Instances to process in a batch num_filters: int output dim for each convolutional layer, which is the number of 'filters' learned by that layer filter_sizes: Tuple[int] specifies the number of convolutional layers and their sizes use_elmo: bool use ELMo embeddings if True | GloVe embeddings if False epochs: int total number of epochs to train on (default=15) patience: int or None early stopping - number of epochs to wait for validation loss to improve; 'None' to disable early stopping learning_rate: float learning rate for Adam Optimizer num_classes: int default=2 for binary classification use_gpu: bool True to use the GPU Returns ------- Trained Model, Vocabulary, Number of actual training epochs """ if use_elmo: vocab = Vocabulary() word_embeddings: TextFieldEmbedder = load_elmo_embeddings() else: vocab = Vocabulary.from_instances(train_dataset + validation_dataset) word_embeddings: TextFieldEmbedder = load_glove_embeddings(vocab) iterator = BucketIterator(batch_size=batch_size, sorting_keys=[("tokens", "num_tokens")]) iterator.index_with(vocab) # numericalize the data # CNN encoder encoder: Seq2VecEncoder = CnnEncoder( embedding_dim=word_embeddings.get_output_dim(), num_filters=num_filters, ngram_filter_sizes=filter_sizes) # Feedforward: classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(), num_classes) model = models.Classifier(vocab=vocab, word_embeddings=word_embeddings, encoder=encoder, classifier_feedforward=classifier_feedforward) if use_gpu: model.cuda() else: model optimizer = optim.Adam(model.parameters(), learning_rate) if patience == None: # Train on both train+validation dataset if patience is None trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset + validation_dataset, cuda_device=0 if use_gpu else -1, num_epochs=epochs) else: trainer = Trainer( model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, validation_dataset=validation_dataset, cuda_device=0 if use_gpu else -1, patience= patience, # stop if loss does not improve for 'patience' epochs num_epochs=epochs) metrics = trainer.train() # print(metrics) return model, vocab, metrics['training_epochs']
def main(cfg, model_cfg): # Load Configuration cfg = configuration.params.from_json(cfg) # Train or Eval cfg model_cfg = configuration.model.from_json(model_cfg) # BERT_cfg set_seeds(cfg.seed) # Load Data & Create Criterion data = load_data(cfg) if cfg.uda_mode: unsup_criterion = nn.KLDivLoss(reduction='none') data_iter = [data.sup_data_iter(), data.unsup_data_iter()] if cfg.mode=='train' \ else [data.sup_data_iter(), data.unsup_data_iter(), data.eval_data_iter()] # train_eval else: data_iter = [data.sup_data_iter()] sup_criterion = nn.CrossEntropyLoss(reduction='none') # Load Model model = models.Classifier(model_cfg, len(data.TaskDataset.labels)) # Create trainer trainer = train.Trainer(cfg, model, data_iter, optim.optim4GPU(cfg, model), get_device()) # Training def get_loss(model, sup_batch, unsup_batch, global_step): # logits -> prob(softmax) -> log_prob(log_softmax) # batch input_ids, segment_ids, input_mask, label_ids = sup_batch if unsup_batch: ori_input_ids, ori_segment_ids, ori_input_mask, \ aug_input_ids, aug_segment_ids, aug_input_mask = unsup_batch input_ids = torch.cat((input_ids, aug_input_ids), dim=0) segment_ids = torch.cat((segment_ids, aug_segment_ids), dim=0) input_mask = torch.cat((input_mask, aug_input_mask), dim=0) # logits logits = model(input_ids, segment_ids, input_mask) # sup loss sup_size = label_ids.shape[0] sup_loss = sup_criterion(logits[:sup_size], label_ids) # shape : train_batch_size if cfg.tsa: tsa_thresh = get_tsa_thresh(cfg.tsa, global_step, cfg.total_steps, start=1./logits.shape[-1], end=1) larger_than_threshold = torch.exp(-sup_loss) > tsa_thresh # prob = exp(log_prob), prob > tsa_threshold # larger_than_threshold = torch.sum( F.softmax(pred[:sup_size]) * torch.eye(num_labels)[sup_label_ids] , dim=-1) > tsa_threshold loss_mask = torch.ones_like(label_ids, dtype=torch.float32) * (1 - larger_than_threshold.type(torch.float32)) sup_loss = torch.sum(sup_loss * loss_mask, dim=-1) / torch.max(torch.sum(loss_mask, dim=-1), torch_device_one()) else: sup_loss = torch.mean(sup_loss) # unsup loss if unsup_batch: # ori with torch.no_grad(): ori_logits = model(ori_input_ids, ori_segment_ids, ori_input_mask) ori_prob = F.softmax(ori_logits, dim=-1) # KLdiv target # ori_log_prob = F.log_softmax(ori_logits, dim=-1) # confidence-based masking if cfg.uda_confidence_thresh != -1: unsup_loss_mask = torch.max(ori_prob, dim=-1)[0] > cfg.uda_confidence_thresh unsup_loss_mask = unsup_loss_mask.type(torch.float32) else: unsup_loss_mask = torch.ones(len(logits) - sup_size, dtype=torch.float32) unsup_loss_mask = unsup_loss_mask.to(_get_device()) # aug # softmax temperature controlling uda_softmax_temp = cfg.uda_softmax_temp if cfg.uda_softmax_temp > 0 else 1. aug_log_prob = F.log_softmax(logits[sup_size:] / uda_softmax_temp, dim=-1) # KLdiv loss """ nn.KLDivLoss (kl_div) input : log_prob (log_softmax) target : prob (softmax) https://pytorch.org/docs/stable/nn.html unsup_loss is divied by number of unsup_loss_mask it is different from the google UDA official The official unsup_loss is divided by total https://github.com/google-research/uda/blob/master/text/uda.py#L175 """ unsup_loss = torch.sum(unsup_criterion(aug_log_prob, ori_prob), dim=-1) unsup_loss = torch.sum(unsup_loss * unsup_loss_mask, dim=-1) / torch.max(torch.sum(unsup_loss_mask, dim=-1), torch_device_one()) final_loss = sup_loss + cfg.uda_coeff*unsup_loss return final_loss, sup_loss, unsup_loss return sup_loss, None, None # evaluation def get_acc(model, batch): # input_ids, segment_ids, input_mask, label_id, sentence = batch input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) _, label_pred = logits.max(1) result = (label_pred == label_id).float() accuracy = result.mean() # output_dump.logs(sentence, label_pred, label_id) # output dump return accuracy, result if cfg.mode == 'train': trainer.train(get_loss, None, cfg.model_file, cfg.pretrain_file) if cfg.mode == 'train_eval': trainer.train(get_loss, get_acc, cfg.model_file, cfg.pretrain_file) if cfg.mode == 'eval': results = trainer.eval(get_acc, cfg.model_file, None) total_accuracy = torch.cat(results).mean().item() print('Accuracy :' , total_accuracy)
def main(): # Load Configuration model_cfg = configuration.model.from_json(cfg.model_cfg) # BERT_cfg set_seeds(cfg.seed) # Load Data & Create Criterion #data = load_data(cfg) #if cfg.uda_mode or cfg.mixmatch_mode: # data_iter = [data.sup_data_iter(), data.unsup_data_iter()] if cfg.mode=='train' \ # else [data.sup_data_iter(), data.unsup_data_iter(), data.eval_data_iter()] # train_eval #else: # data_iter = [data.sup_data_iter()] # my own implementation dataset = DataSet(cfg) train_dataset, val_dataset, unsup_dataset = dataset.get_dataset() # Create the DataLoaders for our training and validation sets. train_dataloader = DataLoader( train_dataset, # The training samples. sampler = RandomSampler(train_dataset), # Select batches randomly batch_size = cfg.train_batch_size # Trains with this batch size. ) validation_dataloader = DataLoader( val_dataset, # The validation samples. sampler = SequentialSampler(val_dataset), # Pull out batches sequentially. batch_size = cfg.eval_batch_size # Evaluate with this batch size. ) unsup_dataloader = None if unsup_dataset: unsup_dataloader = DataLoader( unsup_dataset, sampler = RandomSampler(unsup_dataset), batch_size = cfg.train_batch_size ) if cfg.uda_mode or cfg.mixmatch_mode: data_iter = [train_dataloader, unsup_dataloader, validation_dataloader] else: data_iter = [train_dataloader, validation_dataloader] ema_optimizer = None ema_model = None if cfg.model == "custom": model = models.Classifier(model_cfg, NUM_LABELS[cfg.task]) elif cfg.model == "bert": model = BertForSequenceClassificationCustom.from_pretrained( "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab. num_labels = NUM_LABELS[cfg.task], output_attentions = False, # Whether the model returns attentions weights. output_hidden_states = False, # Whether the model returns all hidden-states. ) if cfg.uda_mode: if cfg.unsup_criterion == 'KL': unsup_criterion = nn.KLDivLoss(reduction='none') else: unsup_criterion = nn.MSELoss(reduction='none') sup_criterion = nn.CrossEntropyLoss(reduction='none') optimizer = optim.optim4GPU(cfg, model) elif cfg.mixmatch_mode: train_criterion = SemiLoss() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr) ema_model = models.Classifier(model_cfg, NUM_LABELS[cfg.task]) for param in ema_model.parameters(): param.detach_() ema_optimizer= WeightEMA(cfg, model, ema_model, alpha=cfg.ema_decay) else: sup_criterion = nn.CrossEntropyLoss(reduction='none') optimizer = optim.optim4GPU(cfg, model) # Create trainer trainer = train.Trainer(cfg, model, data_iter, optimizer, get_device(), ema_model, ema_optimizer) # loss functions def get_sup_loss(model, sup_batch, unsup_batch, global_step): # batch input_ids, segment_ids, input_mask, og_label_ids, num_tokens = sup_batch # convert label ids to hot vectors sup_size = input_ids.size(0) label_ids = torch.zeros(sup_size, 2).scatter_(1, og_label_ids.cpu().view(-1,1), 1) label_ids = label_ids.cuda(non_blocking=True) # sup mixup sup_l = np.random.beta(cfg.alpha, cfg.alpha) sup_l = max(sup_l, 1-sup_l) sup_idx = torch.randperm(sup_size) if cfg.sup_mixup and 'word' in cfg.sup_mixup: if cfg.simple_pad: simple_pad(input_ids, input_mask, num_tokens) c_input_ids = None else: input_ids, c_input_ids = pad_for_word_mixup( input_ids, input_mask, num_tokens, sup_idx ) else: c_input_ids = None # sup loss hidden = model( input_ids=input_ids, segment_ids=segment_ids, input_mask=input_mask, output_h=True, mixup=cfg.sup_mixup, shuffle_idx=sup_idx, clone_ids=c_input_ids, l=sup_l, manifold_mixup=cfg.manifold_mixup, simple_pad=cfg.simple_pad, no_grad_clone=cfg.no_grad_clone ) logits = model(input_h=hidden) if cfg.sup_mixup: label_ids = mixup_op(label_ids, sup_l, sup_idx) sup_loss = -torch.sum(F.log_softmax(logits, dim=1) * label_ids, dim=1) if cfg.tsa and cfg.tsa != "none": tsa_thresh = get_tsa_thresh(cfg.tsa, global_step, cfg.total_steps, start=1./logits.shape[-1], end=1) larger_than_threshold = torch.exp(-sup_loss) > tsa_thresh # prob = exp(log_prob), prob > tsa_threshold # larger_than_threshold = torch.sum( F.softmax(pred[:sup_size]) * torch.eye(num_labels)[sup_label_ids] , dim=-1) > tsa_threshold loss_mask = torch.ones_like(og_label_ids, dtype=torch.float32) * (1 - larger_than_threshold.type(torch.float32)) sup_loss = torch.sum(sup_loss * loss_mask, dim=-1) / torch.max(torch.sum(loss_mask, dim=-1), torch_device_one()) else: sup_loss = torch.mean(sup_loss) return sup_loss, sup_loss, sup_loss, sup_loss def get_loss_ict(model, sup_batch, unsup_batch, global_step): # batch input_ids, segment_ids, input_mask, og_label_ids, num_tokens = sup_batch ori_input_ids, ori_segment_ids, ori_input_mask, \ aug_input_ids, aug_segment_ids, aug_input_mask, \ ori_num_tokens, aug_num_tokens = unsup_batch # convert label ids to hot vectors sup_size = input_ids.size(0) label_ids = torch.zeros(sup_size, 2).scatter_(1, og_label_ids.cpu().view(-1,1), 1) label_ids = label_ids.cuda(non_blocking=True) # sup mixup sup_l = np.random.beta(cfg.alpha, cfg.alpha) sup_l = max(sup_l, 1-sup_l) sup_idx = torch.randperm(sup_size) if cfg.sup_mixup and 'word' in cfg.sup_mixup: if cfg.simple_pad: simple_pad(input_ids, input_mask, num_tokens) c_input_ids = None else: input_ids, c_input_ids = pad_for_word_mixup( input_ids, input_mask, num_tokens, sup_idx ) else: c_input_ids = None # sup loss if cfg.model == "bert": logits = model( input_ids=input_ids, c_input_ids=c_input_ids, attention_mask=input_mask, mixup=cfg.sup_mixup, shuffle_idx=sup_idx, l=sup_l, manifold_mixup = cfg.manifold_mixup, no_pretrained_pool=cfg.no_pretrained_pool ) else: hidden = model( input_ids=input_ids, segment_ids=segment_ids, input_mask=input_mask, output_h=True, mixup=cfg.sup_mixup, shuffle_idx=sup_idx, clone_ids=c_input_ids, l=sup_l, manifold_mixup=cfg.manifold_mixup, simple_pad=cfg.simple_pad, no_grad_clone=cfg.no_grad_clone ) logits = model(input_h=hidden) if cfg.sup_mixup: label_ids = mixup_op(label_ids, sup_l, sup_idx) sup_loss = -torch.sum(F.log_softmax(logits, dim=1) * label_ids, dim=1) if cfg.tsa and cfg.tsa != "none": tsa_thresh = get_tsa_thresh(cfg.tsa, global_step, cfg.total_steps, start=1./logits.shape[-1], end=1) larger_than_threshold = torch.exp(-sup_loss) > tsa_thresh # prob = exp(log_prob), prob > tsa_threshold # larger_than_threshold = torch.sum( F.softmax(pred[:sup_size]) * torch.eye(num_labels)[sup_label_ids] , dim=-1) > tsa_threshold loss_mask = torch.ones_like(og_label_ids, dtype=torch.float32) * (1 - larger_than_threshold.type(torch.float32)) sup_loss = torch.sum(sup_loss * loss_mask, dim=-1) / torch.max(torch.sum(loss_mask, dim=-1), torch_device_one()) else: sup_loss = torch.mean(sup_loss) if cfg.no_unsup_loss: return sup_loss, sup_loss, sup_loss, sup_loss # unsup loss with torch.no_grad(): if cfg.model == "bert": ori_logits = model( input_ids = ori_input_ids, attention_mask = ori_input_mask, no_pretrained_pool=cfg.no_pretrained_pool ) else: ori_logits = model(ori_input_ids, ori_segment_ids, ori_input_mask) ori_prob = F.softmax(ori_logits, dim=-1) # KLdiv target # mixup l = np.random.beta(cfg.alpha, cfg.alpha) l = max(l, 1-l) idx = torch.randperm(hidden.size(0)) if cfg.mixup and 'word' in cfg.mixup: ori_input_ids, c_ori_input_ids = pad_for_word_mixup( ori_input_ids, ori_input_mask, ori_num_tokens, idx ) else: c_ori_input_ids = None #for i in range(0, batch_size): # new_mask = ori_input_mask[i] # new_ids = ori_input_ids[i] # old_ids = c_ori_input_ids[i] # pdb.set_trace() if cfg.model == "bert": logits = model( input_ids=ori_input_ids, c_input_ids=c_ori_input_ids, attention_mask=ori_input_mask, mixup=cfg.mixup, shuffle_idx=idx, l=l, manifold_mixup = cfg.manifold_mixup, no_pretrained_pool=cfg.no_pretrained_pool ) else: hidden = model( input_ids=ori_input_ids, segment_ids=ori_segment_ids, input_mask=ori_input_mask, output_h=True, mixup=cfg.mixup, shuffle_idx=idx, clone_ids=c_ori_input_ids, l=l, manifold_mixup=cfg.manifold_mixup, simple_pad=cfg.simple_pad, no_grad_clone=cfg.no_grad_clone ) logits = model(input_h=hidden) if cfg.mixup: ori_prob = mixup_op(ori_prob, l, idx) probs_u = torch.softmax(logits, dim=1) unsup_loss = torch.mean((probs_u - ori_prob)**2) w = cfg.uda_coeff * sigmoid_rampup(global_step, cfg.consistency_rampup_ends - cfg.consistency_rampup_starts) final_loss = sup_loss + w*unsup_loss return final_loss, sup_loss, unsup_loss, w*unsup_loss # evaluation def get_acc(model, batch): # input_ids, segment_ids, input_mask, label_id, sentence = batch input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) _, label_pred = logits.max(1) result = (label_pred == label_id).float() accuracy = result.mean() # output_dump.logs(sentence, label_pred, label_id) # output dump return accuracy, result if cfg.mode == 'train': trainer.train(get_loss, None, cfg.model_file, cfg.pretrain_file) if cfg.mode == 'train_eval': if cfg.mixmatch_mode: trainer.train(get_mixmatch_loss_short, get_acc, cfg.model_file, cfg.pretrain_file) elif cfg.uda_test_mode: trainer.train(get_sup_loss, get_acc, cfg.model_file, cfg.pretrain_file) elif cfg.uda_test_mode_two: trainer.train(get_loss_ict, get_acc, cfg.model_file, cfg.pretrain_file) else: trainer.train(get_sup_loss, get_acc, cfg.model_file, cfg.pretrain_file) if cfg.mode == 'eval': results = trainer.eval(get_acc, cfg.model_file, None) total_accuracy = torch.cat(results).mean().item() print('Accuracy :' , total_accuracy)
def create_model(args): print("> Create model.") ## Gensim # word_model = Word2Vec.load("Word2Vec_V1.h5") # vectors = word_model.wv # all_words = vectors.index2word # mean_vector = vectors.vectors.mean(axis=0) # wei = torch.tensor(vectors.vectors, dtype=torch.float) ## Gensim with open(os.path.join(args.data_path, "dict&vectors.pkl"), "rb") as f: [word2idx, vectors] = pickle.load(f) global model if args.attn == 1: hidden = args.hidden_size encoder1 = models.Encoder(hidden_size=hidden, nlayers=1) encoder2 = models.Encoder(input_size=hidden*2*4, hidden_size=hidden, nlayers=1) attention_dim = 128 attention = models.Attention(attention_dim, attention_dim, attention_dim) model = models.Classifier(encoder1, encoder2, attention, hidden_size=hidden, rec_len=rec_len, rep_len=rep_len, num_of_words=len(word2idx), drop_p=args.drop_p) elif args.attn == 2: model = models.BiDAF(window_size=args.max_length, hidden_size=args.hidden_size, drop_p=args.drop_p, num_of_words=len(word2idx) ) elif args.attn == 3: model = models.RNNatt(window_size=args.max_length, hidden_size=args.hidden_size, drop_p=args.drop_p, num_of_words=len(word2idx), rec_len=rec_len, rep_len=rep_len ) elif args.attn == 4: model = models.RNNatt_weight(window_size=args.max_length, hidden_size=args.hidden_size, drop_p=args.drop_p, num_of_words=len(word2idx), rec_len=rec_len, rep_len=rep_len ) else: # args.attn == 0 model = models.RNNbase(window_size=args.max_length, hidden_size=args.hidden_size, drop_p=args.drop_p, num_of_words=len(word2idx) ) model.word_embedding.load_state_dict({'weight': vectors.to(torch.float32)}) model.word_embedding.weight.requires_grad = False model = model.to(device) print(model) global optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr_rate) # , betas=(0.9, 0.999), weight_decay=1e-3) return word2idx, vectors
def main(args): def precision(confusion): correct = confusion * torch.eye(confusion.shape[0]) incorrect = confusion - correct correct = correct.sum(0) incorrect = incorrect.sum(0) precision = correct / (correct + incorrect) total_correct = correct.sum().item() total_incorrect = incorrect.sum().item() percent_correct = total_correct / (total_correct + total_incorrect) return precision, percent_correct def get_lr(optimizer): for param_group in optimizer.param_groups: return param_group['lr'] class Batch: def __init__(self, type, loader, dataset): self.type = type self.loader = loader self.batch = tqdm(loader, total=len(dataset) // args.batchsize) self.ll = 0 self.confusion = torch.zeros(datapack.num_classes, datapack.num_classes) self.total = 0 self.correct = 0 self.batch_step = 0 def __iter__(self): return iter(self.batch) def log_step(self): global global_step self.batch_step += 1 self.ll += loss.detach().item() _, predicted = y.detach().max(1) self.total += target.size(0) self.correct += predicted.eq(target).sum().item() running_loss = self.ll / self.batch_step accuracy = 100.0 * self.correct / self.total self.batch.set_description( f'Epoch: {epoch} {args.optim_class} LR: {get_lr(optim)} ' f'{self.type} Loss: {running_loss:.4f} ' f'Accuracy {accuracy:.4f}% {self.correct}/{self.total}') if self.type == 'test': for p, t in zip(predicted, target): self.confusion[p, t] += 1 writer.add_scalar(f'{self.type}_loss', loss.item(), global_step) writer.add_scalar(f'{self.type}_accuracy', accuracy, global_step) global_step += 1 return accuracy def log_epoch(confusion, best_precision, test_accuracy, train_accuracy): precis, ave_precis = precision(confusion) print('') print( f'{Fore.CYAN}RESULTS FOR EPOCH {Fore.LIGHTYELLOW_EX}{epoch}{Style.RESET_ALL}' ) for i, cls in enumerate(datapack.class_list): print( f'{Fore.LIGHTMAGENTA_EX}{cls} : {precis[i].item()}{Style.RESET_ALL}' ) best_precision = ave_precis if ave_precis > best_precision else best_precision print( f'{Fore.GREEN}ave precision : {ave_precis} best: {best_precision} test accuracy {test_accuracy} ' f'train accuracy {train_accuracy}{Style.RESET_ALL}') return ave_precis, best_precision def nop(args, x, target): return x.to(args.device), target.to(args.device) def flatten(args, x, target): return x.flatten(start_dim=1).to(args.device), target.to(args.device) """ reproducibility """ if args.seed is not None: torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(args.seed) """ variables """ run_dir = f'data/models/classifiers/{args.dataset_name}/{args.model_name}/run_{args.run_id}' writer = SummaryWriter(log_dir=run_dir) global_step = 0.0 ave_precision = 0.0 best_precision = 0.0 train_accuracy = 0.0 test_accuracy = 0.0 """ data """ datapack = package.datasets[args.dataset_name] trainset, testset = datapack.make(args.dataset_train_len, args.dataset_test_len, data_root=args.dataroot) train = DataLoader(trainset, batch_size=args.batchsize, shuffle=True, drop_last=True, pin_memory=True) test = DataLoader(testset, batch_size=args.batchsize, shuffle=True, drop_last=True, pin_memory=True) augment = flatten if args.model_type == 'fc' else nop """ model """ if 'model_stride' in args: encoder, shapes = make_layers(args.model_type, args.model_encoder, datapack.shape, stride=args.model_stride) else: encoder, shapes = make_layers(args.model_type, args.model_encoder, datapack.shape) classifier = models.Classifier(encoder, shapes[-1], num_classes=datapack.num_classes).to( args.device) if args.load is not None: classifier.load_state_dict(torch.load(args.load)) """ optimizer """ optim, scheduler = config.get_optim(args, classifier.parameters()) """ loss function """ criterion = nn.CrossEntropyLoss() """ training/test loop """ for i, epoch in enumerate(range(args.epochs)): batch = Batch('train', train, trainset) for x, target in batch: x, target = augment(args, x, target) optim.zero_grad() y = classifier(x) loss = criterion(y, target) loss.backward() optim.step() train_accuracy = batch.log_step() if i % args.checkpoint_freq == 0: torch.save(classifier.state_dict(), run_dir + '/checkpoint') batch = Batch('test', test, testset) for x, target in batch: x, target = augment(args, x, target) y = classifier(x) loss = criterion(y, target) test_accuracy = batch.log_step() ave_precision, best_precision = log_epoch(batch.confusion, best_precision, test_accuracy, train_accuracy) scheduler.step() if ave_precision >= best_precision: torch.save(classifier.state_dict(), run_dir + '/best') return ave_precision, best_precision, train_accuracy, test_accuracy
def getmodel(self): return models.Classifier(size=self.size, d_model=self.d_model, d_ff=self.d_ff, dropout=self.dropout, n_outputs=self.n_outputs)
def load_model_and_scheduler(self): """ Load model. """ params = { 'lr': self.args.lr, 'lr_decay': self.args.lr_decay, 'lr_min': 0.0000001, 'weight_decay': self.args.weight_decay, } log('[Training] using %d input channels' % self.train_images.shape[3]) network_units = list(map(int, self.args.network_units.split(','))) self.model = models.Classifier( self.N_class, resolution=(self.train_images.shape[3], self.train_images.shape[1], self.train_images.shape[2]), architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) self.epoch = 0 if os.path.exists(self.args.state_file): state = State.load(self.args.state_file) log('[Training] loaded %s' % self.args.state_file) self.model.load_state_dict(state.model) # needs to be done before costructing optimizer. if self.args.use_gpu and not cuda.is_cuda(self.model): self.model = self.model.cuda() log('[Training] model is not CUDA') log('[Training] loaded model') optimizer = torch.optim.Adam(self.model.parameters(), params['lr']) optimizer.load_state_dict(state.optimizer) self.scheduler = ADAMScheduler(optimizer, **params) self.epoch = state.epoch + 1 self.scheduler.update(self.epoch) assert os.path.exists(self.args.training_file) and os.path.exists( self.args.testing_file) self.train_statistics = utils.read_hdf5(self.args.training_file) log('[Training] read %s' % self.args.training_file) self.test_statistics = utils.read_hdf5(self.args.testing_file) log('[Training] read %s' % self.args.testing_file) if utils.display(): self.plot() else: if self.args.use_gpu and not cuda.is_cuda(self.model): self.model = self.model.cuda() log('[Training] model is not CUDA') log('[Training] did not load model, using new one') self.scheduler = ADAMScheduler(self.model.parameters(), **params) self.scheduler.initialize() # ! log(self.model)
def train_cnn(train_dataset, batch_size, num_filters, filter_sizes, use_elmo=False, epochs=15, learning_rate=3e-4, num_classes=2, use_gpu=False): """ Trains CNN on train_dataset. Initialises word embeddings with pre-trained GloVe OR uses pre-trained ELMo model to dynamically compute embeddings. The CNN has one convolution layer for each ngram filter size. Parameters ---------- train_dataset: List[Instance] Instances for training set batch_size: int number of Instances to process in a batch num_filters: int output dim for each convolutional layer, which is the number of 'filters' learned by that layer filter_sizes: Tuple[int] specifies the number of convolutional layers and their sizes use_elmo: bool use ELMo embeddings (transfer learning) if True | GloVe if False epochs: int total number of epochs to train on (default=30) learning_rate: float learning rate for Adam Optimizer num_classes: int default=2 for binary classification use_gpu: bool True to use the GPU Returns ------- Trained Model, Vocabulary, Number of actual training epochs """ if use_elmo: vocab = Vocabulary() vocab.add_tokens_to_namespace(tokens=['fic', 'non'], namespace="labels") word_embeddings: TextFieldEmbedder = load_elmo_embeddings() else: vocab = Vocabulary.from_instances(train_dataset) word_embeddings: TextFieldEmbedder = load_glove_embeddings(vocab) iterator = BucketIterator(batch_size=batch_size, sorting_keys=[("tokens", "num_tokens")]) iterator.index_with(vocab) # numericalize the data assert vocab.get_token_from_index(index=0, namespace='labels') == 'fic' assert vocab.get_token_from_index(index=1, namespace='labels') == 'non' print("\n\nThe ordering of labels is ['fic', 'non']\n\n") encoder: Seq2VecEncoder = CnnEncoder( embedding_dim=word_embeddings.get_output_dim(), num_filters=num_filters, ngram_filter_sizes=filter_sizes) classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(), num_classes) model = models.Classifier(vocab=vocab, word_embeddings=word_embeddings, encoder=encoder, classifier_feedforward=classifier_feedforward) if use_gpu: model.cuda() else: model optimizer = optim.Adam(model.parameters(), learning_rate) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, cuda_device=0 if use_gpu else -1, num_epochs=epochs) metrics = trainer.train() print(metrics) return model, vocab, metrics['training_epochs']
def train_lstm(train_dataset, batch_size, num_layers, use_elmo=False, epochs=15, bidirectional=True, learning_rate=3e-4, hidden_size=64, num_classes=2, use_gpu=False): """ Trains a LSTM and its variants (Vanilla, Bi-Directional, Stacked BiLSTM) on train_dataset. Initialises word embeddings with pre-trained GloVe OR uses pre-trained ELMo model to dynamically compute embeddings. Parameters ---------- train_dataset: List[Instance] Instances for training set batch_size: int number of Instances to process in a batch num_layers: int number of BiLSTM layers: 2 or higher for Stacked BiLSTMs use_elmo: bool use elmo embeddings (transfer learning) if True | GloVe if False epochs: int total number of epochs to train on (default=30) bidirectional: bool True for a bidirectional LSTM learning_rate: float learning rate for Adam Optimizer hidden_size: int size of the hidden layer in the encoder num_classes: int default=2 for binary classification use_gpu: bool True to use the GPU Returns ------- Trained Model, Vocabulary, Number of actual training epochs """ if use_elmo: vocab = Vocabulary() vocab.add_tokens_to_namespace(tokens=['fic', 'non'], namespace="labels") word_embeddings: TextFieldEmbedder = load_elmo_embeddings() else: vocab = Vocabulary.from_instances(train_dataset) word_embeddings: TextFieldEmbedder = load_glove_embeddings(vocab) iterator = BucketIterator(batch_size=batch_size, sorting_keys=[("tokens", "num_tokens")]) iterator.index_with(vocab) # numericalize the data assert vocab.get_token_from_index(index=0, namespace='labels') == 'fic' assert vocab.get_token_from_index(index=1, namespace='labels') == 'non' print("\n\nThe ordering of labels is ['fic', 'non']\n\n") encoder: Seq2VecEncoder = PytorchSeq2VecWrapper( nn.LSTM(word_embeddings.get_output_dim(), hidden_size, num_layers=num_layers, bidirectional=bidirectional, batch_first=True)) classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(), num_classes) model = models.Classifier(vocab=vocab, word_embeddings=word_embeddings, encoder=encoder, classifier_feedforward=classifier_feedforward) if use_gpu: model.cuda() else: model optimizer = optim.Adam(model.parameters(), learning_rate) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, cuda_device=0 if use_gpu else -1, num_epochs=epochs) metrics = trainer.train() print(metrics) return model, vocab, metrics['training_epochs']
def load_data_and_model(self): """ Load data and model. """ database = utils.read_hdf5(self.args.database_file).astype( numpy.float32) log('[Visualization] read %s' % self.args.database_file) N_font = database.shape[0] N_class = database.shape[1] resolution = database.shape[2] database = database.reshape((database.shape[0] * database.shape[1], database.shape[2], database.shape[3])) database = torch.from_numpy(database) if self.args.use_gpu: database = database.cuda() database = torch.autograd.Variable(database, False) self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) if len(self.test_images.shape) < 4: self.test_images = numpy.expand_dims(self.test_images, axis=3) self.perturbations = utils.read_hdf5( self.args.perturbations_file).astype(numpy.float32) self.perturbations = numpy.swapaxes(self.perturbations, 0, 1) log('[Visualization] read %s' % self.args.perturbations_file) self.success = utils.read_hdf5(self.args.success_file) self.success = numpy.swapaxes(self.success, 0, 1) log('[Visualization] read %s' % self.args.success_file) self.accuracy = utils.read_hdf5(self.args.accuracy_file) log('[Visualization] read %s' % self.args.success_file) self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype( numpy.float32) self.test_theta = self.test_theta[:self.perturbations.shape[0]] N_theta = self.test_theta.shape[1] log('[Visualization] using %d N_theta' % N_theta) log('[Visualization] read %s' % self.args.test_theta_file) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) self.test_codes = self.test_codes[:self.perturbations.shape[0]] self.test_codes = self.test_codes[:, 1:3] self.test_codes = numpy.concatenate( (common.numpy.one_hot(self.test_codes[:, 0], N_font), common.numpy.one_hot(self.test_codes[:, 1], N_class)), axis=1).astype(numpy.float32) log('[Attack] read %s' % self.args.test_codes_file) image_channels = 1 if N_theta <= 7 else 3 network_units = list(map(int, self.args.network_units.split(','))) log('[Visualization] using %d input channels' % image_channels) self.classifier = models.Classifier( N_class, resolution=(image_channels, resolution, resolution), architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) self.decoder = models.AlternativeOneHotDecoder(database, N_font, N_class, N_theta) self.decoder.eval() assert os.path.exists( self.args.classifier_file ), 'state file %s not found' % self.args.classifier_file state = State.load(self.args.classifier_file) log('[Visualization] read %s' % self.args.classifier_file) self.classifier.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(self.classifier): log('[Visualization] classifier is not CUDA') self.classifier = self.classifier.cuda() log('[Visualization] loaded classifier') self.classifier.eval() log('[Visualization] set classifier to eval')
shuffle=True) val_loader = torch.utils.data.DataLoader(data.DATA(args, mode='valid'), batch_size=args.train_batch, num_workers=args.workers, shuffle=False) ''' load model ''' print('===> prepare model ...') feature_stractor = models.Stractor() feature_stractor.cuda() # load model to gpu params_to_update = feature_stractor.parameters() params_to_update_str = [] for name, param in feature_stractor.named_parameters(): if param.requires_grad == True: params_to_update_str.append(param) classifier = models.Classifier() classifier = classifier.cuda() params_to_update_class = [] for name, param in classifier.named_parameters(): if param.requires_grad == True: params_to_update_class.append(param) ''' define loss ''' criterion = nn.CrossEntropyLoss() ''' setup optimizer ''' optimizer = torch.optim.Adam(params_to_update_class + params_to_update_str, lr=args.lr, weight_decay=args.weight_decay) ''' setup tensorboard ''' writer = SummaryWriter(os.path.join(args.save_dir, 'train_info')) iters = 0 best_acc = 0
NUM_CLASS = 10 HIDDEN_SIZE = args.HIDDEN_SIZE NUM_STACK = args.NUM_STACK DROPOUT = args.DROPOUT USE_CMVN = args.USE_CMVN MAX_ITERATION = args.MAX_ITERATION MAX_EPOCH = args.MAX_EPOCH BATCH_SIZE = args.BATCH_SIZE MFCC_ROOT = args.MFCC_ROOT TRAIN_LIST = args.TRAIN_LIST VALID_LIST = args.VALID_LIST SAVE_FILE = args.SAVE_FILE # Build up model and batch generator device = 'cuda' if torch.cuda.is_available() else 'cpu' # check available gpu model = models.Classifier(IN_SIZE, NUM_CLASS, HIDDEN_SIZE, NUM_STACK, DROPOUT).to(device) # build up model loss_fun = nn.CrossEntropyLoss( ) # define CE as loss function (objective function) optimizer = torch.optim.Adam(model.parameters( )) # define optimizer (choosed adam here, you can try others as well) batch_train = utils.Batch_generator(MFCC_ROOT, TRAIN_LIST, BATCH_SIZE) # batch generator batch_valid = utils.Batch_generator(MFCC_ROOT, VALID_LIST, BATCH_SIZE) # print out settings logging.info('Batch_size: {}'.format(BATCH_SIZE)) logging.info('Max epoch: {}'.format(MAX_EPOCH)) logging.info('Max iteration: {}'.format(MAX_ITERATION)) logging.info('Hidden size: {}'.format(HIDDEN_SIZE)) logging.info('Num stack: {}'.format(NUM_STACK)) logging.info('Use cmvn: {}'.format(USE_CMVN))
def train_bert(train_dataset, validation_dataset, batch_size, pretrained_model, epochs=15, patience=None, learning_rate=3e-4, num_classes=2, use_gpu=False): """ Trains BERT on train_dataset; with optional early stopping on validation_dataset. Parameters ---------- train_dataset: List[Instance] Instances for training set validation_dataset: List[Instance] Instances for validation set batch_size: int number of Instances to process in a batch pretrained_model: str pretrained BERT model to use epochs: int total number of epochs to train on (default=15) patience: int or None early stopping - number of epochs to wait for validation loss to improve; 'None' to disable early stopping learning_rate: float learning rate for Adam Optimizer num_classes: int default=2 for binary classification use_gpu: bool True to use the GPU Returns ------- Trained Model, Vocabulary, Number of actual training epochs """ vocab = Vocabulary() iterator = BucketIterator(batch_size=batch_size, sorting_keys=[("tokens", "num_tokens")]) iterator.index_with(vocab) # numericalize the data word_embeddings: TextFieldEmbedder = load_bert_embeddings(pretrained_model) encoder: Seq2VecEncoder = BertSentencePooler(vocab) # Feedforward: classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(), num_classes) model = models.Classifier(vocab=vocab, word_embeddings=word_embeddings, encoder=encoder, classifier_feedforward=classifier_feedforward) if use_gpu: model.cuda() else: model optimizer = optim.Adam(model.parameters(), learning_rate) if patience == None: # No early stopping: train on both train+validation dataset trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset + validation_dataset, cuda_device=0 if use_gpu else -1, num_epochs=epochs) else: trainer = Trainer( model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, validation_dataset=validation_dataset, cuda_device=0 if use_gpu else -1, patience= patience, # stop if loss does not improve for 'patience' epochs num_epochs=epochs) metrics = trainer.train() # print(metrics) return model, vocab, metrics['training_epochs']
def train_lstm(train_dataset, validation_dataset, batch_size, num_layers, use_elmo=False, epochs=15, patience=None, bidirectional=True, learning_rate=3e-4, hidden_size=64, num_classes=2, use_gpu=False): """ Trains a LSTM and its variants (Vanilla, Bi-Directional, Stacked BiLSTM) on train_dataset; optionally, perform early stopping based on validation loss. Initialises word embeddings with pre-trained GloVe (OR) uses pre-trained ELMo model to dynamically compute embeddings. Parameters ---------- train_dataset: List[Instance] Instances for training set validation_dataset: List[Instance] Instances for validation set batch_size: int number of Instances to process in a batch num_layers: int number of BiLSTM layers: 2 or higher for Stacked BiLSTMs use_elmo: bool use ELMo embeddings if True | GloVe embeddings if False epochs: int total number of epochs to train for (default=15) patience: int or None early stopping - number of epochs to wait for validation loss to improve; 'None' to disable early stopping bidirectional: bool True for a bidirectional LSTM learning_rate: float learning rate for Adam Optimizer hidden_size: int size of the hidden layer in the encoder num_classes: int default=2 for binary classification use_gpu: bool True to use the GPU Returns ------- Trained Model, Vocabulary, Number of actual training epochs """ if use_elmo: vocab = Vocabulary() word_embeddings: TextFieldEmbedder = load_elmo_embeddings() else: vocab = Vocabulary.from_instances(train_dataset + validation_dataset) word_embeddings: TextFieldEmbedder = load_glove_embeddings(vocab) iterator = BucketIterator(batch_size=batch_size, sorting_keys=[("tokens", "num_tokens")]) iterator.index_with(vocab) # numericalize the data # BiLSTM encoder encoder: Seq2VecEncoder = PytorchSeq2VecWrapper( nn.LSTM(word_embeddings.get_output_dim(), hidden_size, num_layers=num_layers, bidirectional=bidirectional, batch_first=True)) # Feedforward: classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(), num_classes) model = models.Classifier(vocab=vocab, word_embeddings=word_embeddings, encoder=encoder, classifier_feedforward=classifier_feedforward) if use_gpu: model.cuda() else: model optimizer = optim.Adam(model.parameters(), learning_rate) if patience == None: # No early stopping: train on both train+validation dataset trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset + validation_dataset, cuda_device=0 if use_gpu else -1, num_epochs=epochs) else: trainer = Trainer( model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, validation_dataset=validation_dataset, cuda_device=0 if use_gpu else -1, patience= patience, # stop if loss does not improve for 'patience' epochs num_epochs=epochs) metrics = trainer.train() # print(metrics) return model, vocab, metrics['training_epochs']