def collectArticles(myfeeds=getFeedDict()): # Primary source of feeds https://blog.feedspot.com/world_news_rss_feeds/ # TODO put feed data in a separate configurable dictionary allFeeds={} # The critical collection of all articles # TODO tries=collections.defaultdict(lambda : None) allEntries={} # bar = trange(len(myfeeds.items())) # for feedName, feedURL in myfeeds.items(): for feedName, feedURL in tqdm(myfeeds.items(),desc="Loading feed data"): feed = feedparser.parse(feedURL) allFeeds[feedURL]=feed feed.entries = enhanceEntries(feed.entries, feed.href, feedName) addEntries(feed.entries, allEntries) # if hasattr(feed , "entries") and hasattr(feed.entries[0] , "content"): # print (f"{feedName: >30}Content Loaded in: {toc - tic:0.4f} seconds") # else: # tqdm.write(f"{feedName: >30}Summary Loaded in: {toc - tic:0.4f} seconds") tqdm.write(feedName) # populates collatedContents and removes any RSS-Entries with no contents # or summary detail or spurious content collateDocContents(allEntries) return savePickle(allEntries)
def train_loop(model, optimizer, feats, pred, target_exps, era): for i in range(1000000): loss, grads = train_loop_body(model, feats, pred, target_exps) optimizer.apply_gradients(zip(grads, model.trainable_variables)) if loss < 1e-7: break if i % 10000 == 0: tqdm.write(f'era: {era[3:]} loss: {loss:0.7f}', end='\r')
def print_profile(self, prefix): process = psutil.Process(os.getpid()) total, available, percent, used, free = psutil.virtual_memory() total, available, used, free = total / MEGA, available / MEGA, used / MEGA, free / MEGA proc = process.memory_info()[1] / MEGA tqdm.write( 'process = %.2f total = %.2f available = %.2f used = %.2f free = %.2f percent = %.2f' % (proc, total, available, used, free, percent))
def fit(self, epochs: int, train_dl: DataLoader, test_dl: DataLoader, criterion: torch.nn, optimizer: torch.optim, scheduler: torch.optim.lr_scheduler = None): train_losses = [] eval_losses = [] for epoch in tqdm(range(epochs), desc="Epochs"): # train self.train() batch_losses = [] batches = len(train_dl) for batch_input in tqdm(train_dl, total=batches, desc="- Remaining batches"): batch_input = [x.to(self.device) for x in batch_input] input_ids, att_masks, labels = batch_input # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = self(input_ids, att_masks) loss = criterion(outputs.squeeze(), labels) loss.backward() optimizer.step() if scheduler is not None: scheduler.step() batch_losses.append(loss.item()) train_loss = np.mean(batch_losses) self.last_train_loss = train_loss # evaluate tqdm.write(f"Epoch: {epoch+1}") _, eval_loss = self.evaluate(test_dl, criterion) train_losses.append(train_loss) eval_losses.append(eval_loss) return train_losses, eval_losses
def on_batch_end(self, session, schedule, cb_dict, *args, **kwargs): end = time.time() elapsed = end - self.start_time if elapsed > self.interval: self.start_time = end session.checkpoint(self.ckpt_file) if "print-width" in cb_dict: half_width = (cb_dict["print-width"] - 12) / 2 left = "+" + ("-" * (math.floor(half_width) - 1)) right = ("-" * (math.ceil(half_width) - 1)) + "+" tqdm.write(left + " CHECKPOINT " + right) else: tqdm.write("--- CHECKPOINT ---")
def on_epoch_end(self, session, schedule, cb_dict, *args, **kwargs): if not self.printed_header: self.print_header() self.printed_header = True columns = [schedule.epoch + 1] + [ cb_dict[key] if key in cb_dict and cb_dict[key] is not None else "None" for key in self.metrics ] metrics_string = self.format_column(columns) tqdm.write(metrics_string) # print(self.divider()) session.append_meta("Training Log", "\n" + metrics_string)
def reduce_all_exposures( df, column=["prediction"], neutralizers=None, lm_cache_file=None, normalize=True, gaussianize=True, era_col="era", max_exp=0.1): ###<-----SELECT YOUR MAXIMUM FEATURE EXPOSURE HERE### if neutralizers is None: neutralizers = [x for x in df.columns if x.startswith("feature")] neutralized = [] if lm_cache_file.is_file(): cache = joblib.load(lm_cache_file) # Remove weights for eraX if we'd accidentally saved it in the past. cache.pop("eraX", None) else: cache = {} for era in tqdm(df[era_col].unique()): tqdm.write(era, end='\r') df_era = df[df[era_col] == era] scores = df_era[column].values exposure_values = df_era[neutralizers].values if normalize: scores2 = [] for x in scores.T: x = (scipy.stats.rankdata(x, method='ordinal') - .5) / len(x) if gaussianize: x = scipy.stats.norm.ppf(x) scores2.append(x) scores = np.array(scores2)[0] scores, weights = reduce_exposure(scores, exposure_values, max_exp, era, cache.get(era)) if era not in cache and era != "eraX": cache[era] = weights joblib.dump(cache, lm_cache_file) scores /= tf.math.reduce_std(scores) scores -= tf.reduce_min(scores) scores /= tf.reduce_max(scores) neutralized.append(scores.numpy()) predictions = pd.DataFrame(np.concatenate(neutralized), columns=column, index=df.index) return predictions
def forward(self): if exists(self.start_image): tqdm.write('Preparing with initial image...') optim = DiffGrad(self.model.parameters(), lr = self.start_image_lr) pbar = trange(self.start_image_train_iters, desc='iteration') for _ in pbar: loss = self.model.model(self.start_image) loss.backward() pbar.set_description(f'loss: {loss.item():.2f}') optim.step() optim.zero_grad() if terminate: print('interrupted by keyboard, gracefully exiting') return exit() del self.start_image del optim tqdm.write(f'Imagining "{self.textpath}" from the depths of my weights...') with torch.no_grad(): self.model(self.clip_encoding, dry_run=True) # do one warmup step due to potential issue with CLIP and CUDA if self.open_folder: open_folder('./') self.open_folder = False for epoch in trange(self.epochs, desc='epochs'): pbar = trange(self.iterations, desc='iteration') for i in pbar: _, loss = self.train_step(epoch, i) pbar.set_description(f'loss: {loss.item():.2f}') if terminate: print('interrupted by keyboard, gracefully exiting') return # Update clip_encoding per epoch if we are creating a story if self.create_story: self.clip_encoding = self.update_story_encoding(epoch, i) self.save_image(epoch, i) # one final save at end
def save_model(model: nn.Module, stats: Dict, model_save_path: str): """Save model in provided path.""" tqdm.write('Saving model...') try: torch.save(model, model_save_path) tqdm.write('Saved successfully') except FileNotFoundError: tqdm.write('Error during saving!')
def run(experiment, memory_limit): # if there is a memory limit, set it on the config if memory_limit: experiment.cfg.train.memory_limit = memory_limit # load the data train_loader, dev_loader, test_loader = experiment.dataloaders_fn( collate=experiment.collate, train_batch_size=experiment.cfg.train.train_batch_size, tune_batch_size=experiment.cfg.train.tune_batch_size) # do grid search if required if experiment.grid_space: # TODO: the search pass results = new_or_load_results(experiment) for run_no in range(1, experiment.cfg.n_runs + 1): seed = util.new_random_seed() util.set_random_seed(seed) # init model and train model = experiment.model_cls(**experiment.cfg.model) model = training.TrainableModel(model, experiment.cfg) model.train(train_loader, dev_loader) # obtain evaluation results and predictions train_metric, train_preds = model.evaluate(train_loader) dev_metric, dev_preds = model.evaluate(dev_loader) test_metric, test_preds = model.evaluate(test_loader) # save results results.report_metrics(train_metric, dev_metric, test_metric) results.report_preds(train_preds, dev_preds, test_preds) # report results tqdm.write(results.summarize())
def collectArticles(myfeeds=getFeedDict()): allFeeds={} # The critical collection of all articles allEntries={} # bar = trange(len(myfeeds.items())) # for feedName, feedURL in myfeeds.items(): for feedName, feedURL in tqdm(myfeeds.items(),desc="Loading feed data"): feed = feedparser.parse(feedURL) allFeeds[feedURL]=feed feed.entries = enhanceEntries(feed.entries, feed.href, feedName) addEntries(feed.entries, allEntries) # if hasattr(feed , "entries") and hasattr(feed.entries[0] , "content"): # print (f"{feedName: >30}Content Loaded in: {toc - tic:0.4f} seconds") # else: # tqdm.write(f"{feedName: >30}Summary Loaded in: {toc - tic:0.4f} seconds") tqdm.write(feedName) # populates collatedContents and removes any RSS-Entries with no contents # or summary detail or spurious content collateDocContents(allEntries) return savePickle(allEntries)
def evaluate(self, eval_dl: DataLoader, criterion: torch.nn) -> None: # evaluate self.eval() with torch.no_grad(): preds = [] real = [] batch_losses = [] for input_batch in eval_dl: input_batch = [x.to(self.device) for x in input_batch] input_ids, att_masks, labels = input_batch outputs = self(input_ids, att_masks) loss = criterion(outputs.squeeze(), labels) outputs = F.softmax(outputs, dim=1) outputs = outputs.argmax(axis=1) preds.extend(outputs.tolist()) real.extend(labels.tolist()) batch_losses.append(loss.item()) results = {} for metric_name, metric in self.metrics.items(): results[metric_name] = metric(real, preds) mean_loss = np.mean(batch_losses) tqdm.write( f"\ttrain_loss: {self.last_train_loss} // test_loss: {mean_loss}// metrics: {str(results)}\n" ) return preds, mean_loss
loss = outputs[0] loss_train_total += loss.item() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() progress_bar.set_postfix( {'training_loss': '{:.3f}'.format(loss.item() / len(batch))}) torch.save(model.state_dict(), f'finetuned_BERT_epoch_{epoch}.model') tqdm.write(f'\nEpoch {epoch}') loss_train_avg = loss_train_total / len(dataloader_train) tqdm.write(f'Training loss: {loss_train_avg}') val_loss, predictions, true_vals = evaluate(dataloader_validation) val_f1 = f1_score_func(predictions, true_vals) tqdm.write(f'Validation loss: {val_loss}') tqdm.write(f'F1 Score (Weighted): {val_f1}') model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=len(label_dict), output_attentions=False, output_hidden_states=False)
def train_model(model, criterion, optimizer, scheduler, n_epochs=20): writer = SummaryWriter() start = time.time() best_wts = copy.deepcopy(model.state_dict()) best_acc = 0. early_stopping = EarlyStopping(patience=3, verbose=True, delta=0.01) for epoch in range(n_epochs): print(68 * '-') print(f'Epoch: {epoch+1}/{n_epochs}') print(11 * '-') train_loss, train_acc = 0.0, 0.0 val_loss, val_acc = 0.0, 0.0 for pidx, phase in enumerate(['train', 'valid']): if phase == 'train': model.train() optimizer.step() scheduler.step() else: model.eval() running_total = 0 running_loss = 0.0 running_correct = 0 n = 0 progress = tqdm(dataloaders[phase], leave=False) for images, labels in progress: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): outputs = model(images) _, preds = torch.max(outputs, dim=1) loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() running_loss += loss.mean() * images.size(0) running_correct += preds.eq(labels.data).sum() running_total += labels.size(0) n += len(labels) progress.set_postfix(loss=loss.item(), correct=running_correct.item(), acc=(running_correct.double() / running_total).item()) if phase == 'train': writer.add_scalar('train_loss', running_loss, epoch * len(dataloaders[phase]) + pidx) epoch_loss = running_loss / float(n) epoch_acc = running_correct.double() / float(n) tqdm.write( f'{phase} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}\n') if phase == 'valid': early_stopping(epoch_loss, model) if epoch_acc > best_acc: best_acc = epoch_acc best_wts = copy.deepcopy(model.state_dict()) if early_stopping.early_stop: print('Early stopping') break if early_stopping.counter == 3: print('Early Stopped!') break time_elapsed = time.time() - start print( f'Training Complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s' ) print(f'Best Validation Acc: {best_acc:.4f}') model.load_state_dict(best_wts) writer.close() return model
import os epochs = 50 batch_size = 64 verbose = 1 losses = [] val_losses = [] #in here added option to not shuffle, so last 20% of recording time is used as val set -- in future might want to reduce proportion of val set generator = Generator(config, memory_tuple= memory, base_path='../', batch_size=batch_size, column_mode='all', shuffle_data=False) # frame_shape, numeric_shape, diff_shape = generator.get_shapes() frame_shape, diff_shape = generator.get_shapes() tqdm.write('Target shape: {}'.format(diff_shape)) #tqdm is some package that allow to track the progress of operations tqdm.write('Input shapes: {}'.format(frame_shape)) models = [] # ARDI's comment: Nividia is the model we want to use (resnet might be good to??) generator = Generator(config, memory_tuple= memory, base_path='../', batch_size=batch_size, column_mode='all', shuffle_data=True) models.append((create_standalone_nvidia_cnn(activation='linear', input_shape=(50, 180, 3), output_shape=2), generator.generate)) # # "steer and throttle" # generator = Generator(config, memory_tuple= memory, base_path='../', batch_size=batch_size, column_mode='all', shuffle_data=False) # models.append((create_small_cnn(activation='linear', input_shape=(50, 180, 3), output_shape=2), generator.generate)) callbacks=[tf.keras.callbacks.LearningRateScheduler(scheduler)]
def print_profile(self): stats = torch.cuda.memory_stats() tqdm.write( f"{stats['allocated_bytes.all.current']:<30,} {stats['allocated_bytes.all.peak']:<30,} {torch.cuda.get_device_properties('cuda').total_memory:<30,}" )
def on_epoch_begin(self, *args, **kwargs): tqdm.write(f"{'current': <30} {'peak': <30} {'total': <30}")
def on_train_end(self, *args, **kwargs): tqdm.write(self.divider())
def train_model(model, dataloaders, lossfun, optimizer, wts_path, save_as=None, save=False, epochs=25, load_wts=False): """ train a model with given params Args: model: model, extends torch.nn dataloaders: dataloader dictionary of the form {"train": dataloader_train_data "val": dataloader_val_data } lossfun: Loss function optimizer: optimization func. wts_path: path to torch.nn.Module.load_state_dict for "model" epochs: number of epochs to train model load_wts: bool true if loading a state dict, false otherwhise epochs: number of epochs to train model load_wts: bool true if loading a state dict, false otherwhise Return: Tuple: model with trained weights and validation training statistics(epoch loss, accuracy) """ #isntantiate validation history, base model waits and loss val_history = [] best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 #load moadel weigthts if load_wts == True: print("loading from: " + path_wts) checkpoint = torch.load(path_wts) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) #train model print("num training points : {}".format(len( dataloaders["train"].dataset))) print("num validation points: {}".format(len(dataloaders["val"].dataset))) for epoch in tqdm(range(epochs), desc='epoch', leave=False): #import pdb; pdb.set_trace() since = time.time() # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 for batch in tqdm(dataloaders[phase], desc='batch', leave=False): #send inputs and labels to device inputs = batch[0].to(device) labels = batch[1].to(device) #clear gradients rom previous batch optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): # Get model outputs and calculate loss for train if phase == 'train': preds = model(inputs) #print(preds) #print(labels) loss = lossfun(preds, labels) # Get model outputs and calculate loss for val else: preds = model(inputs) loss = lossfun(preds, labels) #get predictions _, preds = torch.max(preds, 1) # backward + optimize only if in training phase if phase == 'train': #back propagate loss loss.backward() #update weights optimizer.step() #running statistics running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) time_elapsed = time.time() - since #update epoch loss and acc epoch_loss = running_loss / len(dataloaders[phase].dataset) epoch_acc = running_corrects.double() / len( dataloaders[phase].dataset) #track validation loss and acc tqdm.write( '{}: {} epoch_loss: {:.10f} epoch_acc: {:.4f} time: {:.4f}'. format(epoch, phase, epoch_loss, epoch_acc, time_elapsed)) #update training history if phase == 'val': val_history.append(epoch_loss) #update best weights if phase == 'val' and best_acc < epoch_acc: print("best model updated") best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) best_optim = copy.deepcopy(optimizer.state_dict()) #save model if epoch == epochs - 1 and save: torch.save( { 'best_acc': best_acc, 'model_state_dict': best_model_wts, 'optimizer_state_dict': best_optim, 'best_acc': best_acc, }, save_as + "_ep={}.tar".format(epoch)) #if running in jupyter notebook may want to print in console #or refresh cell with each print. Printing too many lines in cell will #corrupt the notebook model.load_state_dict(best_model_wts) return model, val_history
class Imagine(nn.Module): def __init__( self, *, text=None, img=None, clip_encoding=None, lr=1e-5, batch_size=4, gradient_accumulate_every=4, save_every=100, image_width=512, num_layers=16, epochs=20, iterations=1050, save_progress=True, seed=None, open_folder=True, save_date_time=False, start_image_path=None, start_image_train_iters=10, start_image_lr=3e-4, theta_initial=None, <<<<<<< HEAD theta_hidden=None, <<<<<<< HEAD lower_bound_cutout=0.1, # should be smaller than 0.8 upper_bound_cutout=1.0, saturate_bound=False, create_story=False, story_start_words=5, story_words_per_epoch=5, ======= savetodrive=False, drive_location="" >>>>>>> add option to save to gdrive ======= theta_hidden=None >>>>>>> strange argument behavior, rolling back gdrive saving ): super().__init__() if exists(seed): tqdm.write(f'setting seed: {seed}') torch.manual_seed(seed) torch.cuda.manual_seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True # fields for story creation: self.create_story = create_story self.words = None self.all_words = text.split(" ") if text is not None else None self.num_start_words = story_start_words self.words_per_epoch = story_words_per_epoch if create_story: assert text is not None, "We need text input to create a story..." # overwrite epochs to match story length num_words = len(self.all_words) self.epochs = 1 + (num_words - self.num_start_words) / self.words_per_epoch # add one epoch if not divisible self.epochs = int(self.epochs) if int(self.epochs) == self.epochs else int(self.epochs) + 1 print("Running for ", self.epochs, "epochs") else: self.epochs = epochs self.iterations = iterations self.image_width = image_width total_batches = self.epochs * self.iterations * batch_size * gradient_accumulate_every model = DeepDaze( total_batches=total_batches, batch_size=batch_size, image_width=image_width, num_layers=num_layers, theta_initial=theta_initial, theta_hidden=theta_hidden, lower_bound_cutout=lower_bound_cutout, upper_bound_cutout=upper_bound_cutout, saturate_bound=saturate_bound, ).cuda() self.model = model self.scaler = GradScaler() self.optimizer = AdamP(model.parameters(), lr) self.gradient_accumulate_every = gradient_accumulate_every self.save_every = save_every self.save_date_time = save_date_time self.open_folder = open_folder self.save_progress = save_progress self.text = text self.image = img self.textpath = create_text_path(text=text, img=img, encoding=clip_encoding) self.filename = self.image_output_path() # create coding to optimize for self.clip_img_transform = create_clip_img_transform(perceptor.input_resolution.item()) self.clip_encoding = self.create_clip_encoding(text=text, img=img, encoding=clip_encoding) self.start_image = None self.start_image_train_iters = start_image_train_iters self.start_image_lr = start_image_lr if exists(start_image_path): file = Path(start_image_path) assert file.exists(), f'file does not exist at given starting image path {self.start_image_path}' image = Image.open(str(file)) image_tensor = self.clip_img_transform(image)[None, ...].cuda() self.start_image = image_tensor
def main(num_epoch, max_length, batch_size, model_name): """ main function to train and evaluate BERT model :param num_epoch: number of epochs for training the model :param max_length: max length of the input string for training :param batch_size: batch size for training the model :param model_name: the name of the BERT model :return: None """ # check whether uses gpu or not check_gpu() # print model info print('Start training BERT model.') print('Number of epochs: ', num_epoch) print('Max input length: ', max_length) print('Batch size: ', batch_size) # read in data df = pd.read_csv("s3://msia490project/processed_video_reviews.csv").head( 500000) df['reviewText'] = df['reviewText'].astype(str) df.head() # Encode the classes for BERT. We'll keep using the 3 labels we made earlier. encoder = LabelEncoder() df['score'] = encoder.fit_transform(df['score']) # Set X and y. X = df['reviewText'] y = df['score'] # Split data into training and test sets. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) # Encoding the words in the training data into vectors. max_length = int(max_length) encoded_data_train = tokenizer.batch_encode_plus( X_train, truncation=True, add_special_tokens=True, return_attention_mask=True, pad_to_max_length=True, max_length=max_length, return_tensors='pt') # Encoding the words in the test data into vectors. encoded_data_test = tokenizer.batch_encode_plus(X_test, truncation=True, add_special_tokens=True, return_attention_mask=True, pad_to_max_length=True, max_length=max_length, return_tensors='pt') # Get inputs and attention masks from previously encoded data. input_ids_train = encoded_data_train['input_ids'] attention_masks_train = encoded_data_train['attention_mask'] labels_train = torch.tensor(y_train.values) input_ids_test = encoded_data_test['input_ids'] attention_masks_test = encoded_data_test['attention_mask'] labels_test = torch.tensor(y_test.values) # Instantiate TensorDataset dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train) dataset_test = TensorDataset(input_ids_test, attention_masks_test, labels_test) # Initialize the model. model = transformers.BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=5, output_attentions=False, output_hidden_states=False) # DataLoaders for running the model dataloader_train = DataLoader(dataset_train, sampler=RandomSampler(dataset_train), batch_size=int(batch_size)) dataloader_test = DataLoader(dataset_test, sampler=SequentialSampler(dataset_test), batch_size=int(batch_size)) # Setting hyper-parameters optimizer = AdamW(model.parameters(), lr=1e-5, eps=1e-8) epochs = int(num_epoch) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=len(dataloader_train) * epochs) seed_val = 15 random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) device = torch.device('cuda') # train the model model.to(device) for epoch in tqdm(range(1, epochs + 1)): model.train() loss_train_total = 0 progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False) for batch in progress_bar: model.zero_grad() batch = tuple(b.to(device) for b in batch) inputs = { 'input_ids': batch[0].to(device), 'attention_mask': batch[1].to(device), 'labels': batch[2].to(device), } outputs = model(**inputs) loss = outputs[0] loss_train_total += loss.item() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() progress_bar.set_postfix( {'training_loss': '{:.3f}'.format(loss.item() / len(batch))}) # progress bar tqdm.write(f'\nEpoch {epoch}') loss_train_avg = loss_train_total / len(dataloader_train) tqdm.write(f'Training loss: {loss_train_avg}') # evaluate the model run_evaluation(dataloader_test, model, device, encoder, epoch, model_name) # save the model for future use/retrain torch.save( { 'epoch': num_epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, model_name + '.tar')
def main(args): """ main function to fune tuning bert classification model :param args: (argparse) user-input configuration file """ try: config_path = project_path + "/" + args.config input_data_path = project_path + "/" + args.input model_path = project_path + "/" + args.model evaluation_path = project_path + "/" + args.evaluation config = load_config(config_path) # load data df = read_csv(input_data_path) # # -- debug # df = df[:100] # Encode the classes for BERT. encoder = preprocessing.LabelEncoder() df['label'] = encoder.fit_transform(df['label']) # Split data into training and test sets. X_train, X_test, y_train, y_test = training_test_split( df, **config['bert']['training_test_split']) # Bert tokenization logger.info("Tokenizing...") tokenizer = transformers.BertTokenizer.from_pretrained( 'bert-base-uncased', do_lower_case=True) if not args.max_length: max_length = config['bert']['max_length'] else: max_length = int(args.max_length) # DataLoaders for running the model if not args.batch_size: batch_size = config['bert']['batch_size'] else: batch_size = int(args.batch_size) dataloader_train = pro_pipline(X_train, tokenizer, max_length, config['bert']['tokenize'], batch_size, y_train) dataloader_test = pro_pipline(X_test, tokenizer, max_length, config['bert']['tokenize'], batch_size, y_test) # Initialize the model. model = transformers.BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=df['label'].nunique(), output_attentions=False, output_hidden_states=False) # Setting optimizer optimizer = AdamW(model.parameters(), **config['bert']['optimizer']) # Setting epochs if not args.num_epoch: epochs = config['bert']['num_epoch'] else: epochs = int(args.num_epoch) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=len(dataloader_train) * epochs) # Setting seeds seed = config['bert']['seed'] random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) # Write prints to .txt model_name = 'max_length' + str(max_length) + 'batch_size' + str( batch_size) + 'num_epoch' + str(epochs) e_dir = evaluation_path + "/" + model_name if not os.path.exists(e_dir): os.makedirs(e_dir) sys.stdout = open(e_dir + "/" + model_name + '.txt', 'w') logger.info("Training... and evaluations will be saved into %s", e_dir) device = torch.device('cuda') # device = torch.device('cpu') model.to(device) complete_epoch, training_loss, test_accuracy = [], [], [] for epoch in tqdm(range(1, epochs + 1)): model.train() loss_train_total = 0 progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False) for batch in progress_bar: model.zero_grad() batch = tuple(b.to(device) for b in batch) inputs = { 'input_ids': batch[0].to(device), 'attention_mask': batch[1].to(device), 'labels': batch[2].to(device), } outputs = model(**inputs) loss = outputs[0] loss_train_total += loss.item() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() progress_bar.set_postfix({ 'training_loss': '{:.3f}'.format(loss.item() / len(batch)) }) # training loss tqdm.write(f'\nEpoch {epoch}') loss_train_avg = loss_train_total / len(dataloader_train) training_loss.append(loss_train_avg) tqdm.write(f'Training loss: {loss_train_avg}') # evaluate the model plt, val_accuracy = run_evaluation(dataloader_test, model, device, encoder) plt.savefig(e_dir + "/" + model_name + '-' + str(epoch) + '.png') test_accuracy.append(val_accuracy) complete_epoch.append(epoch) loss_plt = plot_loss(complete_epoch, training_loss, test_accuracy) loss_plt.savefig(e_dir + "/" + model_name + '_loss' + '.png') # save the model for future use/retrain output_dir = model_path + '/' + model_name + "/" if not os.path.exists(output_dir): os.makedirs(output_dir) logging.info("Saving model to %s" % output_dir) model_to_save = model.module if hasattr( model, 'module') else model # Take care of distributed/parallel training model_to_save.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) except KeyError as e3: logger.error("KeyError: " + str(e3)) except Exception as e: logger.error("Unexpected error occurred when training with Bert: " + str(e))
def main(): # Only read in data for replaced df = pd.read_csv(config.PATH_CLEAN_DATA_REPLACED, encoding="utf_8") df, label_dict = prep_data(df) # Split train vs. val X_train, X_val, y_train, y_val = train_test_split( df.index.values, df.label.values, train_size=80000, test_size=20000, random_state=414, stratify=df.label.values, ) # Label train vs. val df["data_type"] = ["not_set"] * df.shape[0] df.loc[X_train, "data_type"] = "train" df.loc[X_val, "data_type"] = "val" # Encode data tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") encoded_data_train = tokenizer.batch_encode_plus( df[df.data_type == "train"].text.values, add_special_tokens=True, return_attention_mask=True, pad_to_max_length=True, max_length=25, return_tensors="pt", ) encoded_data_val = tokenizer.batch_encode_plus( df[df.data_type == "val"].text.values, add_special_tokens=True, return_attention_mask=True, pad_to_max_length=True, max_length=25, return_tensors="pt", ) input_ids_train = encoded_data_train["input_ids"] attention_masks_train = encoded_data_train["attention_mask"] labels_train = torch.tensor(df[df.data_type == "train"].label.values) input_ids_val = encoded_data_val["input_ids"] attention_masks_val = encoded_data_val["attention_mask"] labels_val = torch.tensor(df[df.data_type == "val"].label.values) dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train) dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val) # Load pretrained BERT model model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=len(label_dict), output_attentions=False, output_hidden_states=False, ) # Train and validation data loader dataloader_train = DataLoader(dataset_train, sampler=RandomSampler(dataset_train), batch_size=batch_size) dataloader_validation = DataLoader(dataset_val, sampler=SequentialSampler(dataset_val), batch_size=batch_size) # Define optimizer optimizer = AdamW(model.parameters(), lr=1e-5, eps=1e-8) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=len(dataloader_train) * epochs) # Train for epoch in tqdm(range(1, epochs + 1)): model.to(device) model.train() loss_train_total = 0 progress_bar = tqdm( dataloader_train, desc="Epoch {:1d}".format(epoch), leave=False, disable=False, ) for batch in progress_bar: model.zero_grad() batch = tuple(b.to(device) for b in batch) inputs = { "input_ids": batch[0], "attention_mask": batch[1], "labels": batch[2], } outputs = model(**inputs) loss = outputs[0] loss_train_total += loss.item() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() progress_bar.set_postfix( {"training_loss": "{:.3f}".format(loss.item() / len(batch))}) torch.save( model.state_dict(), f"./models/finetuned_BERT_epoch_{epoch}.model", ) tqdm.write(f"\nEpoch {epoch}") loss_train_avg = loss_train_total / len(dataloader_train) tqdm.write(f"Training loss: {loss_train_avg}") val_loss, predictions, true_vals = evaluate(model, dataloader_validation) val_f1 = f1_score_func(predictions, true_vals) tqdm.write(f"Validation loss: {val_loss}") tqdm.write(f"F1 Score (Weighted): {val_f1}")
outputs = model(**inputs) #run our model, while unpacking the dictionary of inputs loss = outputs[0] loss_train_total += loss.item() #add the lost items loss.backward() #back propagation to improve performance? torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) #give our grad a norm value == 1, prevent grad to become exceptionally small or too big, help promote generalization #do that to all our parameters, all our weights optimizer.step() scheduler.step() progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))}) #include that in progress bar torch.save(model.state_dict(), f'Models/BERT_ft_epoch{epoch}.model') #save the model every epoch, name it BERT finetuned (ft) model tqdm.write('\nEpoch {epoch}') #type which epoch we are on loss_train_avg = loss_train_total/len(dataloader_train) tqdm.write(f'Training loss: {loss_train_avg}') val_loss, predictions, true_vals = evaluate(dataloader_val) #use the evaluate function to get validation loss #similar to training's except we dont change any grad, dont do backpropagation #want to know if model is overtraining, which will occur when training loss is going down but validation loss is going up #means it doesnt have generalisation ability, validation replicate training totally val_f1 = f1_score_func(predictions, true_vals) tqdm.write(f'Validation loss: {val_loss}') tqdm.write(f'F1 Score (weighted): {val_f1}')
# Train original model. c_orig = make_adult_classifier() c_orig.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy']) c_orig.fit(X, y, epochs=2, batch_size=512) y_pred_orig = (c_orig.predict(X) > 0.5) * 1 tqdm.write('Original Model\n' '--------------\n' '\n' 'Accuracy: {:.4f} (baseline: {:0.4f})\n' '\n' ' Group 0\tGroup 1\n' 'Demographic Parity: {:.4f}\t{:.4f}\n' 'Equal Opportunity: {:.4f}\t{:.4f}\n'.format( *(((y_pred_orig == y).mean(), baseline_accuracy) + evaluate_dem_parity(y_pred_orig, y, z) + evaluate_eq_op(y_pred_orig, y, z)))) # Train model with demographic parity. c_dem_par = AdversarialFairModel(make_adult_classifier()) c_dem_par.train_dem_parity(X, y, z, epochs=2, batch_size=512) y_pred_dem_par = (c_dem_par.predict(X) > 0.5) * 1 print('Demographic Parity\n' '------------------\n' '\n'
def __call__(self, **kwargs) -> None: progress = tqdm(list( range(self.args.start_epoch, self.args.total_epochs + 1)), miniters=1, ncols=100, unit='epoch', desc='Overall Progress', leave=True, position=0) OFFSET = 1 best_err = args.best_err best_epoch = self.args.start_epoch for epoch in progress: self.experiment.log_current_epoch(epoch) for key in self.data_loader.keys(): if bool(re.search('train', key)): # Training loss = self.perform_epoch(loader_key=key, epoch=epoch, offset=OFFSET) OFFSET += 1 elif bool(re.search('val', key)) and ( (epoch - 1) % self.args.validation_frequency) == 0: # Validation loss = self.perform_epoch(loader_key=key, epoch=epoch, offset=OFFSET) OFFSET += 1 is_best = loss < best_err if is_best: best_err = loss best_epoch = int(epoch) self.save_model(epoch, best_err, OFFSET, is_best, filename=None) OFFSET += 1 else: raise ValueError( f'Unknown data_loader key is found! unknown_key = {key}' ) # LOGGER log_name = ('_').join([key, self.loss_label]) self.experiment.log_metric(log_name, loss, step=epoch, epoch=epoch) self.experiment.log_metric('best_epoch', best_epoch) # Epoch update if self.lr_scheduler is not None: self.lr_scheduler.step() self.experiment.log_metric('current_lr', self.lr_scheduler.get_lr()[0], step=epoch, epoch=epoch) if ((epoch - 1) % self.args.backup_frequency) == 0: self.save_model(epoch, best_err, OFFSET, False, filename=f'backup_{epoch}.pth.tar') tqdm.write("\n")
def print_header(self): tqdm.write(self.divider()) tqdm.write(self.format_column(["Epoch"] + self.metrics)) tqdm.write(self.divider("="))
'attention_mask': batch[1], 'labels': batch[2] } outputs = model(**inputs) loss = outputs[0] loss_train_total += loss.item() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 1.0) optimizer.step() scheduler.step() progress_bar.set_postfix( {'training_loss': '{:.3f}'.format(loss.item() / len(batch))}) torch.save(model.state_dict(), f'Models/BERT_ft_epoch{epoch}.model') tqdm.write('\nEpoch {epoch}') loss_train_avg = loss_train_total / len(dataloader) tqdm.write(f'Training Loss: {loss_train_avg}') val_loss, predictions, true_vals = evaluate(dataloader_val) val_f1 = f1_score_func(predictions, true_vals) tqdm.write(f'Validation Loss: {val_loss}') tqdm.write(f'F1 score: {val_f1}') # # Loading and Evaluting our Model # In[57]: model = BertForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=len(label_dict),
hist = model.fit(frames, commands, batch_size=64, epochs=12, validation_split=0.2) losses = [] val_losses = [] current_loss = hist.history['loss'] current_val_loss = hist.history['val_loss'] losses.append(current_loss) print(val_losses) val_losses.append(current_val_loss) tqdm.write("Loss per epoch: {}".format(current_loss)) tqdm.write("Validation loss per epoch: {}".format(current_val_loss)) gc.collect() print(val_losses) print(hist.history['val_loss']) loss_data = [] val_loss_data = [] val_loss_data = { 'data': hist.history['val_loss'], 'label': 'Validation loss', 'alpha': 1.0 } plot_stuff('Nivida cnn standalone validation loss',
def on_train_begin(self, session, *args, **kwargs): if os.path.exists(self.ckpt_file) and not self.reset: tqdm.write("--- LOADING CHECKPOINT ---") session.load(self.ckpt_file) self.start_time = time.time()