def test_find_learning_rate(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary(u'ner') embeddings = WordEmbeddings(u'glove') tagger = SequenceTagger(hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=u'ner', use_crf=False) optimizer = SGD trainer = ModelTrainer(tagger, corpus, optimizer=optimizer) trainer.find_learning_rate(results_base_path, iterations=5) shutil.rmtree(results_base_path)
def main(config, name, args): from flair.trainers import ModelTrainer from flair.visual.training_curves import Plotter from math import ceil from torch.optim import Adam from torch import manual_seed from pickle import load from discodop.lexgrammar import SupertagGrammar cp = corpusparam(**config["Corpus"], **config["Grammar"]) corpus = SupertagParseCorpus(cp.filename) grammar = load(open(f"{cp.filename}.grammar", "rb")) tc = FindlrParameters(**config["Training"], **config["Eval-common"], **config["Eval-Development"], language=cp.language) model = Supertagger.from_corpus(corpus, grammar, tc) model.set_eval_param(tc) if args.downsample: corpus = corpus.downsample(args.downsample) if args.iterations is None: epoch = ceil(len(corpus.train) / tc.batchsize) args.iterations = epoch * 5 trainer = ModelTrainer(model, corpus) learning_rate_tsv = trainer.find_learning_rate( name, start_learning_rate=args.min_lr, end_learning_rate=args.max_lr, iterations=args.iterations) plotter = Plotter() plotter.plot_learning_rate(learning_rate_tsv)
def optimize_lr(): corpus, label_dictionary = load_corpus() embeddings = [ WordEmbeddings('glove'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward') ] document_embeddings = DocumentRNNEmbeddings(embeddings, hidden_size=512, reproject_words=True, reproject_words_dimension=256, bidirectional=True) classifier = TextClassifier(document_embeddings, label_dictionary=label_dictionary, multi_label=False) trainer = ModelTrainer(classifier, corpus) # 7. find learning rate learning_rate_tsv = trainer.find_learning_rate('resources/classifiers/', 'learning_rate.tsv') # 8. plot the learning rate finder curve from flair.visual.training_curves import Plotter plotter = Plotter() plotter.plot_learning_rate(learning_rate_tsv)
def test_find_learning_rate(results_base_path, tasks_base_path): corpus = flair.datasets.ColumnCorpus(data_folder=(tasks_base_path / 'fashion'), column_format={ 0: 'text', 2: 'ner', }) tag_dictionary = corpus.make_tag_dictionary('ner') embeddings = WordEmbeddings('turian') tagger = SequenceTagger(hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type='ner', use_crf=False) optimizer = SGD trainer = ModelTrainer(tagger, corpus, optimizer=optimizer) trainer.find_learning_rate(results_base_path, iterations=5) shutil.rmtree(results_base_path)
class SequenceClassifierTrainer: """Sequence Classifier Trainer Usage: ```python >>> sc_trainer = SequenceClassifierTrainer(corpus="/Path/to/data/dir") ``` **Parameters:** * **corpus** - A flair corpus data model or `Path`/string to a directory with train.csv/test.csv/dev.csv * **encoder** - A `EasyDocumentEmbeddings` object if training with a flair prediction head or `Path`/string if training with Transformer's prediction models * **column_name_map** - Required if corpus is not a `Corpus` object, it's a dictionary specifying the indices of the text and label columns of the csv i.e. {1:"text",2:"label"} * **corpus_in_memory** - Boolean for whether to store corpus embeddings in memory * **predictive_head** - For now either "flair" or "transformers" for the prediction head * ****kwargs** - Keyword arguments for Flair's `TextClassifier` model class """ def __init__( self, corpus: Union[Corpus, Path, str], encoder: Union[EasyDocumentEmbeddings, Path, str], column_name_map: None, corpus_in_memory: bool = True, predictive_head: str = "flair", **kwargs, ): if isinstance(corpus, Corpus): self.corpus = corpus else: if isinstance(corpus, str): corpus = Path(corpus) if not column_name_map: raise ValueError( "If not instantiating with `Corpus` object, must pass in `column_name_map` argument to specify text/label indices" ) self.corpus = CSVClassificationCorpus( corpus, column_name_map, skip_header=True, delimiter=",", in_memory=corpus_in_memory, ) # Verify predictive head is within available heads self.available_predictive_head = ["flair", "transformers"] if predictive_head not in self.available_predictive_head: raise ValueError( f"predictive_head param must be one of the following: {self.available_predictive_head}" ) self.predictive_head = predictive_head # Verify correct corresponding encoder is used with predictive head (This can be structured with better design in the future) if isinstance(encoder, EasyDocumentEmbeddings): if predictive_head == "transformers": raise ValueError( "If using `transformers` predictive head, pass in the path to the transformer's model" ) else: self.encoder = encoder else: if isinstance(encoder, str): encoder = Path(encoder) self.encoder = encoder # Create the label dictionary on init (store to keep from constantly generating label_dict) should we use dev/test set instead assuming all labels are provided? self.label_dict = self.corpus.make_label_dictionary() # Save trainer kwargs dict for reinitializations self.trainer_kwargs = kwargs # Load trainer with initial setup self._initial_setup(self.label_dict, **kwargs) def _initial_setup(self, label_dict: Dict, **kwargs): if self.predictive_head == "flair": # Get Document embeddings from `embeddings` document_embeddings: DocumentRNNEmbeddings = self.encoder.rnn_embeddings # Create the text classifier classifier = TextClassifier( document_embeddings, label_dictionary=label_dict, **kwargs, ) # Initialize the text classifier trainer self.trainer = ModelTrainer(classifier, self.corpus) # TODO: In internal transformers package, create ****ForSequenceClassification adaptations elif self.predictive_head == "transformers": with open(self.encoder / "config.json") as config_f: configs = json.load(config_f) model_name = configs["architectures"][-1] if model_name == "BertForMaskedLM": pass def train( self, output_dir: Union[Path, str], learning_rate: float = 0.07, mini_batch_size: int = 32, anneal_factor: float = 0.5, patience: int = 5, max_epochs: int = 150, plot_weights: bool = False, **kwargs, ) -> None: """ Train the Sequence Classifier * **output_dir** - The output directory where the model predictions and checkpoints will be written. * **learning_rate** - The initial learning rate * **mini_batch_size** - Batch size for the dataloader * **anneal_factor** - The factor by which the learning rate is annealed * **patience** - Patience is the number of epochs with no improvement the Trainer waits until annealing the learning rate * **max_epochs** - Maximum number of epochs to train. Terminates training if this number is surpassed. * **plot_weights** - Bool to plot weights or not * **kwargs** - Keyword arguments for the rest of Flair's `Trainer.train()` hyperparameters """ if isinstance(output_dir, str): output_dir = Path(output_dir) # Start the training self.trainer.train( output_dir, learning_rate=learning_rate, mini_batch_size=mini_batch_size, anneal_factor=anneal_factor, patience=patience, max_epochs=max_epochs, **kwargs, ) # Plot weight traces if plot_weights: plotter = Plotter() plotter.plot_weights(output_dir / "weights.txt") def find_learning_rate( self, output_dir: Union[Path, str], file_name: str = "learning_rate.tsv", start_learning_rate: float = 1e-8, end_learning_rate: float = 10, iterations: int = 100, mini_batch_size: int = 32, stop_early: bool = True, smoothing_factor: float = 0.7, plot_learning_rate: bool = True, **kwargs, ) -> float: """ Uses Leslie's cyclical learning rate finding method to generate and save the loss x learning rate plot This method returns a suggested learning rate using the static method `LMFineTuner.suggest_learning_rate()` which is implicitly run in this method. * **output_dir** - Path to dir for learning rate file to be saved * **file_name** - Name of learning rate .tsv file * **start_learning_rate** - Initial learning rate to start cyclical learning rate finder method * **end_learning_rate** - End learning rate to stop exponential increase of the learning rate * **iterations** - Number of optimizer iterations for the ExpAnnealLR scheduler * **mini_batch_size** - Batch size for dataloader * **stop_early** - Bool for stopping early once loss diverges * **smoothing_factor** - Smoothing factor on moving average of losses * **adam_epsilon** - Epsilon for Adam optimizer. * **weight_decay** - Weight decay if we apply some. * **kwargs** - Additional keyword arguments for the Adam optimizer **return** - Learning rate as a float """ # 7. find learning rate learning_rate_tsv = self.trainer.find_learning_rate( base_path=output_dir, file_name=file_name, start_learning_rate=start_learning_rate, end_learning_rate=end_learning_rate, iterations=iterations, mini_batch_size=mini_batch_size, stop_early=stop_early, smoothing_factor=smoothing_factor, ) # Reinitialize optimizer and parameters by reinitializing trainer self._initial_setup(self.label_dict, **self.trainer_kwargs) if plot_learning_rate: plotter = Plotter() plotter.plot_learning_rate(learning_rate_tsv) # Use the automated learning rate finder with open(learning_rate_tsv) as lr_f: lr_tsv = list(csv.reader(lr_f, delimiter="\t")) losses = np.array([float(row[-1]) for row in lr_tsv[1:]]) lrs = np.array([float(row[-2]) for row in lr_tsv[1:]]) lr_to_use = self.suggested_learning_rate(losses, lrs, **kwargs) print(f"Recommended Learning Rate {lr_to_use}") return lr_to_use @staticmethod def suggested_learning_rate( losses: np.array, lrs: np.array, lr_diff: int = 15, loss_threshold: float = 0.2, adjust_value: float = 1, ) -> float: # This seems redundant unless we can make this configured for each trainer/finetuner """ Attempts to find the optimal learning rate using a interval slide rule approach with the cyclical learning rate method * **losses** - Numpy array of losses * **lrs** - Numpy array of exponentially increasing learning rates (must match dim of `losses`) * **lr_diff** - Learning rate Interval of slide ruler * **loss_threshold** - Threshold of loss difference on interval where the sliding stops * **adjust_value** - Coefficient for adjustment **return** - the optimal learning rate as a float """ # Get loss values and their corresponding gradients, and get lr values assert lr_diff < len(losses) loss_grad = np.gradient(losses) # Search for index in gradients where loss is lowest before the loss spike # Initialize right and left idx using the lr_diff as a spacing unit # Set the local min lr as -1 to signify if threshold is too low r_idx = -1 l_idx = r_idx - lr_diff local_min_lr = lrs[l_idx] while (l_idx >= -len(losses)) and ( abs(loss_grad[r_idx] - loss_grad[l_idx]) > loss_threshold ): local_min_lr = lrs[l_idx] r_idx -= 1 l_idx -= 1 lr_to_use = local_min_lr * adjust_value return lr_to_use
labelDf = labelDf.sample(frac=1) labelDf.iloc[0:int(len(labelDf) * 0.8)].to_csv('data/train.csv', sep='\t', index=False, header=False) labelDf.iloc[int(len(labelDf) * 0.8):int(len(labelDf) * 0.9)].to_csv( 'data/test.csv', sep='\t', index=False, header=False) labelDf.iloc[int(len(labelDf) * 0.9):].to_csv('data/dev.csv', sep='\t', index=False, header=False) corpus = ClassificationCorpus(Path('data/'), test_file='test.csv', dev_file='dev.csv', train_file='train.csv') word_embeddings = [ WordEmbeddings('glove'), FlairEmbeddings('news-forward-fast'), FlairEmbeddings('news-backward-fast') ] document_embeddings = DocumentRNNEmbeddings(word_embeddings, hidden_size=512, reproject_words=True, reproject_words_dimension=256) classifier = TextClassifier(document_embeddings, label_dictionary=corpus.make_label_dictionary(), multi_label=False) trainer = ModelTrainer(classifier, corpus) trainer.find_learning_rate('model/', 'learning_rate.tsv')
multi_label=False) trainer = ModelTrainer(classifier, corpus) trainer.train('./' + prefix_model_output_dir + '_' + sLang + prefix + '/', learning_rate=cmd_args.lr, mini_batch_size=16, anneal_factor=0.5, patience=1, evaluation_metric=EvaluationMetric.MICRO_F1_SCORE, max_epochs=cmd_args.iters) plotter = Plotter() plotter.plot_training_curves('./' + prefix_model_output_dir + '_' + sLang + prefix + '/loss.tsv') plotter.plot_weights('./' + prefix_model_output_dir + '_' + sLang + prefix + '/weights.txt') # 7. find learning rate learning_rate_tsv = trainer.find_learning_rate('./' + prefix_model_output_dir + '_' + sLang + prefix + '/learning_rate.tsv') plotter = Plotter() plotter.plot_learning_rate(learning_rate_tsv) del(classifier) del(trainer) classifier = TextClassifier.load('./' + prefix_model_output_dir + '_' + sLang + prefix + '/best-model.pt') dev_data = read_file(data_path + sLang + "/intertass_" + sLang + "_dev.xml") print("Writing " + output_dir + sLang + "_dev_" + prefix_model_output_dir + prefix + ".tsv") with open(data_path + sLang + '/' + dev_filename) as f_in, \ open(output_dir + sLang + "_dev_" + prefix_model_output_dir + prefix + ".tsv", 'w', newline='') as out_file, \ open(output_dir + sLang + "_dev_" + prefix_model_output_dir + prefix + ".csv", 'w', newline='') as out_csv_file: tsv_writer = csv.writer(out_file, delimiter='\t')