def test_saveClassifier_loadClassifier(self): temp = Classifier.train(self.infos) c = temp["classifier"] Classifier.saveClassifier(c, "whentest") c2 = Classifier.loadClassifier("whentest") assert isinstance(c2, nltk.classify.naivebayes.NaiveBayesClassifier) os.remove("whentest.classifier")
def __init__(self, config): # training device - try to find a gpu, if not just use cpu self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') print('[INFO]: using \'{}\' device'.format(self.device)) # initialize model self.model = Classifier( config['input_dimensions'], config['output_dimension'], hid_act=config['hidden_activation'], norm=config['normalization']) # if model file provided, load pretrained params if config['model_file']: self.load(config['model_file']) # move the model to the training device self.model.to(self.device) self.config = config
def __init__(self, config): # training device - try to find a gpu, if not just use cpu self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') print('[INFO]: using \'{}\' device'.format(self.device)) # initialize model self.model = Classifier( config['input_dimensions'], config['output_dimension'], hid_act=config['hidden_activation'], norm=config['normalization']) # initialize a random input distribution self.n_samples = 32 mean = 0.0 stdev = 0.25 self.input_dist = torch.distributions.Normal( mean * torch.ones( config['batch_size'], self.n_samples, config['input_dimensions'][-1], config['input_dimensions'][0], config['input_dimensions'][1]), stdev * torch.ones( config['batch_size'], self.n_samples, config['input_dimensions'][-1], config['input_dimensions'][0], config['input_dimensions'][1]) ) # if model file provided, load pretrained params if config['model_file']: self.load(config['model_file']) # move the model to the training device self.model.to(self.device) self.config = config
def train_epochs(self, train_loader, test_loader): # define cross entropy loss (requires logits as outputs) loss_fn = torch.nn.CrossEntropyLoss() # initialize an optimizer optimizer = torch.optim.Adam( self.model.parameters(), lr=self.config['learning_rate'], weight_decay=self.config['weight_decay']) # initialize guide model (for making adversarial samples) guide_model = Classifier( self.config['input_dimensions'], self.config['output_dimension'], hid_act=self.config['hidden_activation'], norm=self.config['normalization']) # must be able to load guide model to proceed guide_model.load_state_dict( torch.load(self.config['guide_model_file'], map_location=self.device)) print('[INFO]: loaded guide model from \'{}\'' .format(self.config['guide_model_file'])) # move guide model to the training device guide_model.to(self.device) # initialize tensorboard writer writer = SummaryWriter('{}runs/{}/'.format( self.config['output_directory'], self.config['model_name'])) print('[INFO]: training...') # train through all epochs for e in range(self.config['number_epochs']): # get epoch start time epoch_start = time.time() # reset accumulators train_epoch_loss = 0.0 train_num_correct = 0 test_epoch_loss = 0.0 test_num_correct = 0 # run through epoch of train data for i, batch in enumerate(train_loader): # parse batch and move to training device input_batch = batch['image'].to(self.device) label_batch = batch['label'].to(self.device) # require gradient for input data (need to do this to compute # the gradients for inputs during backward() call) input_batch.requires_grad = True # make adversarial samples from input batch adv_logits_batch = guide_model(input_batch) adv_loss = loss_fn(adv_logits_batch, label_batch) adv_loss.backward() adv_grads = input_batch.grad epsilon = (0.5 * ((2 * torch.rand(1)) - 1)).to(self.device) adv_input_batch = input_batch + ( epsilon * torch.sign(adv_grads)) # keep pixel values of adv batch in [-1, 1] new_min, new_max = -1., 1. old_min = torch.min(adv_input_batch) old_max = torch.max(adv_input_batch) adv_input_batch = (((adv_input_batch - old_min) / ( old_max - old_min)) * (new_max - new_min)) + new_min # compute output batch logits and predictions logits_batch = self.model(input_batch) adv_logits_batch = self.model(adv_input_batch) pred_batch = torch.argmax(logits_batch, dim=1) # compute combined normal/adversarial loss loss = loss_fn(logits_batch, label_batch) adv_loss = loss_fn(adv_logits_batch, label_batch) alpha = 0.5 total_loss = (alpha * loss) + ((1 - alpha) * adv_loss) # accumulate loss train_epoch_loss += loss.item() # accumulate number correct train_num_correct += torch.sum( torch.tensor(pred_batch == label_batch)).item() # zero out gradient attributes for all trainable params optimizer.zero_grad() # compute gradients w.r.t loss (repopulate gradient # attribute for all trainable params) total_loss.backward() # update params with current gradients optimizer.step() # compute epoch average loss and accuracy metrics train_loss = train_epoch_loss / i train_acc = 100.0 * train_num_correct / self.config['number_train'] # run through epoch of test data for i, batch in enumerate(test_loader): # parse batch and move to training device input_batch = batch['image'].to(self.device) label_batch = batch['label'].to(self.device) # compute output batch logits and predictions logits_batch = self.model(input_batch) pred_batch = torch.argmax(logits_batch, dim=1) # compute loss loss = loss_fn(logits_batch, label_batch) # accumulate loss test_epoch_loss += loss.item() # accumulate number correct test_num_correct += torch.sum( torch.tensor(pred_batch == label_batch)).item() # compute epoch average loss and accuracy metrics test_loss = test_epoch_loss / i test_acc = 100.0 * test_num_correct / self.config['number_test'] # compute epoch time epoch_time = time.time() - epoch_start # save model torch.save(self.model.state_dict(), '{}{}.pt'.format( self.config['output_directory'], self.config['model_name'])) # add metrics to tensorboard writer.add_scalar('Loss/Train', train_loss, e + 1) writer.add_scalar('Accuracy/Train', train_acc, e + 1) writer.add_scalar('Loss/Test', test_loss, e + 1) writer.add_scalar('Accuracy/Test', test_acc, e + 1) # print epoch metrics template = '[INFO]: Epoch {}, Epoch Time {:.2f}s, ' \ 'Train Loss: {:.2f}, Train Accuracy: {:.2f}, ' \ 'Test Loss: {:.2f}, Test Accuracy: {:.2f}' print(template.format(e + 1, epoch_time, train_loss, train_acc, test_loss, test_acc))
def getWhen(text): ctokens = Classifier.getClassifiedTokens("when", text) ctokenstrue = [ctoken[1] for ctoken in ctokens if ctoken[0] == True] return ctokenstrue
class VanillaClassifier(): def __init__(self, config): # training device - try to find a gpu, if not just use cpu self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') print('[INFO]: using \'{}\' device'.format(self.device)) # initialize model self.model = Classifier(config['input_dimensions'], config['output_dimension'], hid_act=config['hidden_activation'], norm=config['normalization']) # if model file provided, load pretrained params if config['model_file']: self.load(config['model_file']) # move the model to the training device self.model.to(self.device) self.config = config def load(self, model_file): self.model.load_state_dict( torch.load(model_file, map_location=self.device)) print('[INFO]: loaded model from \'{}\''.format(model_file)) def logits(self, x): return self.model(x) def predict(self, x): return torch.argmax(self.model(x), dim=1) def train_epochs(self, train_loader, test_loader): # define cross entropy loss (requires logits as outputs) loss_fn = torch.nn.CrossEntropyLoss() # initialize an optimizer optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config['learning_rate'], weight_decay=self.config['weight_decay']) # initialize tensorboard writer writer = SummaryWriter('{}runs/{}/'.format( self.config['output_directory'], self.config['model_name'])) print('[INFO]: training...') # train through all epochs for e in range(self.config['number_epochs']): # get epoch start time epoch_start = time.time() # reset accumulators train_epoch_loss = 0.0 train_num_correct = 0 test_epoch_loss = 0.0 test_num_correct = 0 # run through epoch of train data for i, batch in enumerate(train_loader): # parse batch and move to training device input_batch = batch['image'].to(self.device) label_batch = batch['label'].to(self.device) # compute output batch logits and predictions logits_batch = self.model(input_batch) pred_batch = torch.argmax(logits_batch, dim=1) # compute loss loss = loss_fn(logits_batch, label_batch) # accumulate loss train_epoch_loss += loss.item() # accumulate number correct train_num_correct += torch.sum( torch.tensor(pred_batch == label_batch)).item() # zero out gradient attributes for all trainable params optimizer.zero_grad() # compute gradients w.r.t loss (repopulate gradient # attribute for all trainable params) loss.backward() # update params with current gradients optimizer.step() # compute epoch average loss and accuracy metrics train_loss = train_epoch_loss / i train_acc = 100.0 * train_num_correct / self.config['number_train'] # run through epoch of test data for i, batch in enumerate(test_loader): # parse batch and move to training device input_batch = batch['image'].to(self.device) label_batch = batch['label'].to(self.device) # compute output batch logits and predictions logits_batch = self.model(input_batch) pred_batch = torch.argmax(logits_batch, dim=1) # compute loss loss = loss_fn(logits_batch, label_batch) # accumulate loss test_epoch_loss += loss.item() # accumulate number correct test_num_correct += torch.sum( torch.tensor(pred_batch == label_batch)).item() # compute epoch average loss and accuracy metrics test_loss = test_epoch_loss / i test_acc = 100.0 * test_num_correct / self.config['number_test'] # compute epoch time epoch_time = time.time() - epoch_start # save model torch.save( self.model.state_dict(), '{}{}.pt'.format(self.config['output_directory'], self.config['model_name'])) # add metrics to tensorboard writer.add_scalar('Loss/Train', train_loss, e + 1) writer.add_scalar('Accuracy/Train', train_acc, e + 1) writer.add_scalar('Loss/Test', test_loss, e + 1) writer.add_scalar('Accuracy/Test', test_acc, e + 1) # print epoch metrics template = '[INFO]: Epoch {}, Epoch Time {:.2f}s, ' \ 'Train Loss: {:.2f}, Train Accuracy: {:.2f}, ' \ 'Test Loss: {:.2f}, Test Accuracy: {:.2f}' print( template.format(e + 1, epoch_time, train_loss, train_acc, test_loss, test_acc))
def getWhen(text): ctokens = Classifier.getClassifiedTokens("when", text) ctokenstrue = [ctoken[1] for ctoken in ctokens if ctoken[0]==True] return ctokenstrue
class SmoothClassifier(): def __init__(self, config): # training device - try to find a gpu, if not just use cpu self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') print('[INFO]: using \'{}\' device'.format(self.device)) # initialize model self.model = Classifier( config['input_dimensions'], config['output_dimension'], hid_act=config['hidden_activation'], norm=config['normalization']) # initialize a random input distribution self.n_samples = 32 mean = 0.0 stdev = 0.25 self.input_dist = torch.distributions.Normal( mean * torch.ones( config['batch_size'], self.n_samples, config['input_dimensions'][-1], config['input_dimensions'][0], config['input_dimensions'][1]), stdev * torch.ones( config['batch_size'], self.n_samples, config['input_dimensions'][-1], config['input_dimensions'][0], config['input_dimensions'][1]) ) # if model file provided, load pretrained params if config['model_file']: self.load(config['model_file']) # move the model to the training device self.model.to(self.device) self.config = config def load(self, model_file): self.model.load_state_dict( torch.load(model_file, map_location=self.device)) print('[INFO]: loaded model from \'{}\'' .format(model_file)) def logits(self, x): return self.model(x) def predict(self, x): # get number of samples in batch bs = x.shape[0] # add noise to input batch x = x.unsqueeze(1) + self.input_dist.sample()[:bs].to(self.device) # reshape input batch by stacking samples into batch dimension x = x.view(( bs * self.n_samples, self.config['input_dimensions'][-1], self.config['input_dimensions'][0], self.config['input_dimensions'][1])) # compute output batch logits and predictions logits = self.model(x) pred = torch.argmax(logits, dim=1) # reshape predictions to unstack samples from batch dimension pred = pred.view((bs, self.n_samples)) # take mode along sample dim to get final prediction pred = torch.mode(pred, dim=1)[0] return pred def train_epochs(self, train_loader, test_loader): # define cross entropy loss (requires logits as outputs) loss_fn = torch.nn.CrossEntropyLoss() # initialize an optimizer optimizer = torch.optim.Adam( self.model.parameters(), lr=self.config['learning_rate'], weight_decay=self.config['weight_decay']) # initialize tensorboard writer writer = SummaryWriter('{}runs/{}/'.format( self.config['output_directory'], self.config['model_name'])) print('[INFO]: training...') # train through all epochs for e in range(self.config['number_epochs']): # get epoch start time epoch_start = time.time() # reset accumulators train_epoch_loss = 0.0 train_num_correct = 0 test_epoch_loss = 0.0 test_num_correct = 0 # run through epoch of train data for i, batch in enumerate(train_loader): # parse batch and move to training device input_batch = batch['image'].to(self.device) label_batch = batch['label'].to(self.device) # get number of samples in batch bs = input_batch.shape[0] # add noise to input batch input_batch = input_batch.unsqueeze(1) \ + self.input_dist.sample()[:bs].to(self.device) # reshape input batch by stacking samples into batch dimension input_batch = input_batch.view(( bs * self.n_samples, self.config['input_dimensions'][-1], self.config['input_dimensions'][0], self.config['input_dimensions'][1])) # keep pixel values of batch in [-1, 1] new_min, new_max = -1., 1. old_min = torch.min(input_batch) old_max = torch.max(input_batch) input_batch = (((input_batch - old_min) / ( old_max - old_min)) * (new_max - new_min)) + new_min # repeat and interleave label batch to repeat labels for each # samples stacked into batch dimension label_batch = label_batch.repeat_interleave(self.n_samples) # compute output batch logits and predictions logits_batch = self.model(input_batch) pred_batch = torch.argmax(logits_batch, dim=1) # compute loss loss = loss_fn(logits_batch, label_batch) # accumulate loss train_epoch_loss += loss.item() # accumulate number correct train_num_correct += torch.sum( torch.tensor(pred_batch == label_batch)).item() # zero out gradient attributes for all trainable params optimizer.zero_grad() # compute gradients w.r.t loss (repopulate gradient # attribute for all trainable params) loss.backward() # update params with current gradients optimizer.step() # compute epoch average loss and accuracy metrics train_loss = train_epoch_loss / i train_acc = 100.0 * train_num_correct \ / (self.config['number_train'] * self.n_samples) # run through epoch of test data for i, batch in enumerate(test_loader): # parse batch and move to training device input_batch = batch['image'].to(self.device) label_batch = batch['label'].to(self.device) # compute output batch logits and predictions logits_batch = self.model(input_batch) pred_batch = torch.argmax(logits_batch, dim=1) # compute loss loss = loss_fn(logits_batch, label_batch) # accumulate loss test_epoch_loss += loss.item() # accumulate number correct test_num_correct += torch.sum( torch.tensor(pred_batch == label_batch)).item() # compute epoch average loss and accuracy metrics test_loss = test_epoch_loss / i test_acc = 100.0 * test_num_correct / self.config['number_test'] # compute epoch time epoch_time = time.time() - epoch_start # save model torch.save(self.model.state_dict(), '{}{}.pt'.format( self.config['output_directory'], self.config['model_name'])) # add metrics to tensorboard writer.add_scalar('Loss/Train', train_loss, e + 1) writer.add_scalar('Accuracy/Train', train_acc, e + 1) writer.add_scalar('Loss/Test', test_loss, e + 1) writer.add_scalar('Accuracy/Test', test_acc, e + 1) # print epoch metrics template = '[INFO]: Epoch {}, Epoch Time {:.2f}s, ' \ 'Train Loss: {:.2f}, Train Accuracy: {:.2f}, ' \ 'Test Loss: {:.2f}, Test Accuracy: {:.2f}' print(template.format(e + 1, epoch_time, train_loss, train_acc, test_loss, test_acc))
def test_getClassifiedTokens(self): text = MDP.get5w1h([6])[0].text ctokens = Classifier.getClassifiedTokens("when", text) assert len(ctokens)>1 # def test_temp(self): # infos = MDP.get5w1h([6]) # infos2 = [] # for x in [2,4,6,7,9,10,13,16,17,18,23,24,27]: # infos2.append(infos[x-1]) # infos = infos2 # temp = Classifier.train(infos) # c = temp["classifier"] # fiturs = temp["fiturs"] # fiturs0 = [x for x in fiturs if (x[1]=="other")] # fiturs1 = [x for x in fiturs if (x[1]=="beg_what")] # fiturs2 = [x for x in fiturs if (x[1]=="beg_who")] # fiturs3 = [x for x in fiturs if (x[1]=="beg_when")] # fiturs4 = [x for x in fiturs if (x[1]=="beg_where")] # fiturs5 = [x for x in fiturs if (x[1]=="beg_why")] # fiturs6 = [x for x in fiturs if (x[1]=="beg_how")] # fiturs1b = [x for x in fiturs if (x[1]=="in_what")] # fiturs2b = [x for x in fiturs if (x[1]=="in_who")] # fiturs3b = [x for x in fiturs if (x[1]=="in_when")] # fiturs4b = [x for x in fiturs if (x[1]=="in_where")] # fiturs5b = [x for x in fiturs if (x[1]=="in_why")] # fiturs6b = [x for x in fiturs if (x[1]=="in_how")] # # for x in fiturst: # # print x # print "" # print "All =\t %d" % len(fiturs) # # print "True features set =\t %d" % len(fiturst) # # print "False features set =\t %d" % len(fitursf) # # print "True detected as true \t\t %s" % (nltk.classify.accuracy(c, fiturst)) # # print "False detected as False \t %s" % (nltk.classify.accuracy(c, fitursf)) # print '' # print "beg_what detected as beg_what \t %s" % (nltk.classify.accuracy(c, fiturs1)) # print "beg_who detected as beg_who \t %s" % (nltk.classify.accuracy(c, fiturs2)) # print "beg_when detected as beg_when \t %s" % (nltk.classify.accuracy(c, fiturs3)) # print "beg_where detected as beg_wh.. \t %s" % (nltk.classify.accuracy(c, fiturs4)) # print "beg_why detected as beg_why \t %s" % (nltk.classify.accuracy(c, fiturs5)) # print "beg_how detected as beg_how \t %s" % (nltk.classify.accuracy(c, fiturs6)) # print '' # print "in_what detected as in_what \t %s" % (nltk.classify.accuracy(c, fiturs1b)) # print "in_who detected as in_who \t %s" % (nltk.classify.accuracy(c, fiturs2b)) # print "in_when detected as in_when \t %s" % (nltk.classify.accuracy(c, fiturs3b)) # print "in_where detected as in_where \t %s" % (nltk.classify.accuracy(c, fiturs4b)) # print "in_why detected as in_why \t %s" % (nltk.classify.accuracy(c, fiturs5b)) # print "in_how detected as in_how \t %s" % (nltk.classify.accuracy(c, fiturs6b)) # print '' # print "other detected as other \t %s" % (nltk.classify.accuracy(c, fiturs0)) # print '' # print "All \t\t\t\t %s" % (nltk.classify.accuracy(c, fiturs)) # assert "classifier" in temp # assert "fiturs" in temp # assert 1 == 2
def test_train(self): temp = Classifier.train(self.infos) assert "classifier" in temp assert "fiturs" in temp