trainer = Trainer( optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, warmup_linear=warmup_linear, evaluate_every=evaluate_every, device=device, ) # 7. Let it grow model = trainer.train(model) # 8. Hooray! You have a model. Store it: save_dir = "saved_models/bert-german-ner-tutorial" model.save(save_dir) processor.save(save_dir) # 9. Load it & harvest your fruits (Inference) basic_texts = [ { "text": "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot sei" }, { "text": "Martin Müller spielt Handball in Berlin" }, ] model = Inferencer.load(save_dir) result = model.run_inference(dicts=basic_texts) print(result)
def ner(): logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/") ml_logger.init_experiment(experiment_name="Public_FARM", run_name="Run_ner") ########################## ########## Settings ########################## set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=True) n_epochs = 4 batch_size = 32 evaluate_every = 400 lang_model = "bert-base-german-cased" do_lower_case = False # 1.Create a tokenizer tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case) # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset # See test/sample/ner/train-sample.txt for an example of the data format that is expected by the Processor ner_labels = [ "[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH" ] processor = NERProcessor(tokenizer=tokenizer, max_seq_len=128, data_dir=Path("../data/conll03-de"), delimiter=" ", metric="seq_f1", label_list=ner_labels) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets data_silo = DataSilo(processor=processor, batch_size=batch_size) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.load(lang_model) # b) and a prediction head on top that is suited for our task => NER prediction_head = TokenClassificationHead(num_labels=len(ner_labels)) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=1e-5, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, device=device, ) # 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time trainer = Trainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, ) # 7. Let it grow trainer.train() # 8. Hooray! You have a model. Store it: save_dir = "saved_models/bert-german-ner-tutorial" model.save(save_dir) processor.save(save_dir) # 9. Load it & harvest your fruits (Inference) basic_texts = [ { "text": "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot sei" }, { "text": "Martin Müller spielt Handball in Berlin" }, ] model = Inferencer.load(save_dir) result = model.inference_from_dicts(dicts=basic_texts) print(result)
def test_ner_amp(caplog): caplog.set_level(logging.CRITICAL) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=True) n_epochs = 1 batch_size = 2 evaluate_every = 1 lang_model = "bert-base-german-cased" if AMP_AVAILABLE: use_amp = 'O1' else: use_amp = None tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=False) ner_labels = [ "[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH" ] processor = NERProcessor(tokenizer=tokenizer, max_seq_len=8, data_dir=Path("samples/ner"), train_filename=Path("train-sample.txt"), dev_filename=Path("dev-sample.txt"), test_filename=None, delimiter=" ", label_list=ner_labels, metric="seq_f1") data_silo = DataSilo(processor=processor, batch_size=batch_size, max_processes=1) language_model = LanguageModel.load(lang_model) prediction_head = TokenClassificationHead(num_labels=13) model = AdaptiveModel(language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device) model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=2e-05, schedule_opts=None, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, device=device, use_amp=use_amp) trainer = Trainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, ) save_dir = Path("testsave/ner") trainer.train() model.save(save_dir) processor.save(save_dir) basic_texts = [ { "text": "1980 kam der Crown von Toyota" }, ] model = Inferencer.load(save_dir, gpu=True) result = model.inference_from_dicts(dicts=basic_texts, max_processes=1) #print(result) assert result[0]["predictions"][0]["context"] == "Crown" assert isinstance(result[0]["predictions"][0]["probability"], np.float32)
def execML(self, job): start_time = time.time() if job.task == 'analyse': basic_texts = [] # Will donwload and store dataset... sample = self.downloadAndConvertText(job, job.data_sample) for text in sample.encode('utf-8').splitlines(): basic_texts.append({'text': text.decode('utf-8')}) # Will donwload and store model... self.downloadAndStoreZIPModel(job, job.model) self.updateJobStatus(job, 'analysing') save_dir = 'tmp/' + job.model['id'] model = Inferencer.load(save_dir) result = model.inference_from_dicts(dicts=basic_texts) self.persistResult(job, result) model.close_multiprocessing_pool() self.updateJobStatus(job, 'completed') elif job.task == 'train': self.updateJobStatus(job, 'training') # Will donwload and store dataset... self.downloadAndStoreZIPDataset(job, job.data_source) # Will donwload and store model... self.downloadAndStoreZIPModel(job, job.model) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=True) n_epochs = 4 evaluate_every = 400 do_lower_case = False batch_size = 32 lang_model = os.path.join(Path.cwd(), 'tmp', job.model['id']) ner_labels = [ "[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH" ] # 1. Create a tokenizer tokenizer = Tokenizer.load( pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case, tokenizer_class='BertTokenizer' ) #tokenizer_class='BertTokenizer' # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset processor = NERProcessor(tokenizer=tokenizer, max_seq_len=128, data_dir=str( os.path.join(Path.cwd(), 'tmp', job.data_source['id'])), delimiter=' ', metric='seq_f1', label_list=ner_labels) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets data_silo = DataSilo(processor=processor, batch_size=batch_size, max_processes=1) # 4. Create an AdaptiveModel # 4.1 which consists of a pretrained language model as a basis language_model = LanguageModel.load(lang_model) # 4.2 and a prediction head on top that is suited for our task => NER prediction_head = TokenClassificationHead( num_labels=len(ner_labels)) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=['per_token'], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=1e-5, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, device=device, ) # 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time trainer = Trainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, ) # 7. Let it grow trainer.train() # 8. Hooray! You have a model. Store it: newModelId = str(uuid.uuid4()) save_dir = 'tmp/' + newModelId model.save(save_dir) processor.save(save_dir) model.close_multiprocessing_pool() self.persistZIPModel(newModelId, job) self.updateJobStatus(job, 'completed') elapsed_time = time.time() - start_time print('Execution time max: ', elapsed_time, 'for job.id:', job.id, flush=True) return {'status': True, 'code': 'ok', 'msg': 'success'}
def test_ner(caplog): caplog.set_level(logging.CRITICAL) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=False) n_epochs = 1 batch_size = 2 evaluate_every = 1 lang_model = "bert-base-german-cased" tokenizer = Tokenizer.load( pretrained_model_name_or_path=lang_model, do_lower_case=False ) ner_labels = ["[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH"] processor = NERProcessor( tokenizer=tokenizer, max_seq_len=8, data_dir="samples/ner", train_filename="train-sample.txt", dev_filename="dev-sample.txt", test_filename=None, delimiter=" ", label_list=ner_labels, metric="seq_f1" ) data_silo = DataSilo(processor=processor, batch_size=batch_size) language_model = LanguageModel.load(lang_model) prediction_head = TokenClassificationHead(layer_dims=[768, len(ner_labels)]) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) optimizer, warmup_linear = initialize_optimizer( model=model, learning_rate=2e-5, warmup_proportion=0.1, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, ) trainer = Trainer( optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, warmup_linear=warmup_linear, evaluate_every=evaluate_every, device=device, ) save_dir = "testsave/ner" model = trainer.train(model) model.save(save_dir) processor.save(save_dir) basic_texts = [ {"text": "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot sei"}, ] model = Inferencer.load(save_dir) result = model.inference_from_dicts(dicts=basic_texts) assert result[0]["predictions"][0]["context"] == "sagte" assert isinstance(result[0]["predictions"][0]["probability"], np.float32)
def test_ner(caplog): if caplog: caplog.set_level(logging.CRITICAL) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=False) n_epochs = 3 batch_size = 2 evaluate_every = 1 lang_model = "distilbert-base-german-cased" tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=False) ner_labels = [ "[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH" ] processor = NERProcessor(tokenizer=tokenizer, max_seq_len=8, data_dir=Path("samples/ner"), train_filename="train-sample.txt", dev_filename="dev-sample.txt", test_filename=None, delimiter=" ", label_list=ner_labels, metric="seq_f1") data_silo = DataSilo(processor=processor, batch_size=batch_size, max_processes=1) language_model = LanguageModel.load(lang_model) prediction_head = TokenClassificationHead(num_labels=13) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=2e-5, #optimizer_opts={'name': 'AdamW', 'lr': 2E-05}, n_batches=len(data_silo.loaders["train"]), n_epochs=1, device=device, schedule_opts={ 'name': 'LinearWarmup', 'warmup_proportion': 0.1 }) trainer = Trainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, ) save_dir = Path("testsave/ner") model = trainer.train() model.save(save_dir) processor.save(save_dir) basic_texts = [ { "text": "Paris is a town in France." }, ] model = Inferencer.load( model_name_or_path="dbmdz/bert-base-cased-finetuned-conll03-english", num_processes=0, task_type="ner") # labels arent correctly inserted from transformers # They are converted to LABEL_1 ... LABEL_N # For the inference result to contain predictions we need them in IOB NER format model.processor.tasks["ner"]["label_list"][-1] = "B-LOC" result = model.inference_from_dicts(dicts=basic_texts) assert result[0]["predictions"][0]["context"] == "Paris" assert isinstance(result[0]["predictions"][0]["probability"], np.float32)
def test_ner(caplog): caplog.set_level(logging.CRITICAL) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=False) n_epochs = 5 batch_size = 2 evaluate_every = 1 lang_model = "distilbert-base-german-cased" tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=False) ner_labels = [ "[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH" ] processor = NERProcessor(tokenizer=tokenizer, max_seq_len=8, data_dir=Path("samples/ner"), train_filename="train-sample.txt", dev_filename="dev-sample.txt", test_filename=None, delimiter=" ", label_list=ner_labels, metric="seq_f1") data_silo = DataSilo(processor=processor, batch_size=batch_size, max_processes=1) language_model = LanguageModel.load(lang_model) prediction_head = TokenClassificationHead(num_labels=13) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=2e-5, #optimizer_opts={'name': 'AdamW', 'lr': 2E-05}, n_batches=len(data_silo.loaders["train"]), n_epochs=1, device=device, schedule_opts={ 'name': 'LinearWarmup', 'warmup_proportion': 0.1 }) trainer = Trainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, ) save_dir = Path("testsave/ner") model = trainer.train() model.save(save_dir) processor.save(save_dir) basic_texts = [ { "text": "Albrecht Lehman ist eine Person" }, ] model = Inferencer.load(save_dir) result = model.inference_from_dicts(dicts=basic_texts, max_processes=1) #print(result) #assert result[0]["predictions"][0]["context"] == "sagte" #assert isinstance(result[0]["predictions"][0]["probability"], np.float32) result2 = model.inference_from_dicts(dicts=basic_texts, rest_api_schema=True) assert result == result2
def ner(self, task, model_type, n_epochs, batch_size, evaluate_every, use_cude): aml_run = he.get_context() # Check task if cu.tasks.get(str(task)).get('type') != 'ner': raise Exception('NOT A NER TASK') language = cu.params.get('language') # Data dt_task = dt.Data(task=task) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=True) lang_model = he.get_farm_model(model_type, language) save_dir = dt_task.get_path('model_dir') # ner_labels = dt_task.load('fn_label', header=None)[0].to_list() ner_labels = [ "[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH" ] # n_epochs = 4 # batch_size = 32 # evaluate_every = 750 # lang_model = "xlm-roberta-large" # AML log try: aml_run.log('task', task) aml_run.log('language', language) aml_run.log('n_epochs', n_epochs) aml_run.log('batch_size', batch_size) aml_run.log('lang_model', lang_model) aml_run.log_list('label_list', label_list) except: pass # 1.Create a tokenizer tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=False) # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset processor = NERProcessor(tokenizer=tokenizer, max_seq_len=128, data_dir=dt_task.data_dir, metric="seq_f1", label_list=ner_labels) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets data_silo = DataSilo(processor=processor, batch_size=batch_size) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.load(lang_model) # b) and a prediction head on top that is suited for our task => NER prediction_head = TokenClassificationHead(num_labels=len(ner_labels)) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=1e-5, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, device=device, ) # 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time trainer = Trainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, ) # 7. Let it grow trainer.train() # 8. Hooray! You have a model. Store it: model.save(save_dir) processor.save(save_dir)
def ner(task: str, lm: str): logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) ########################## ########## Settings ########################## set_all_seeds(seed=42, deterministic_cudnn=use_cuda) use_amp = None device, n_gpu = initialize_device_settings(use_cuda=use_cuda, use_amp=use_amp) n_epochs = 10 batch_size = 32 evaluate_every = 1000 model_dir = MODEL_DIR if lm == 'bert-hgcrw': lang_model = "redewiedergabe/bert-base-historical-german-rw-cased" model_dir += '_bert-hgcrw' elif lm == 'lmgot01': lang_model = Path( "/home/stud/wangsadirdja/pyfarmbert/models/lm/lmgot_01") model_dir += '_lmgot01' elif lm == 'lmgot02': lang_model = Path( "/home/stud/wangsadirdja/pyfarmbert/models/lm/lmgot_02") model_dir += '_lmgot02' else: lang_model = "bert-base-german-cased" if task != 'all': model_dir += '_' + task do_lower_case = False # 1.Create a tokenizer tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case) # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset # See test/sample/ner/train-sample.txt for an example of the data format that is expected by the Processor if task == 'direct': ner_labels = ["[PAD]", "X", "O", "B-DIR", "I-DIR"] elif task == 'indirect': ner_labels = ["[PAD]", "X", "O", "B-IND", "I-IND"] elif task == 'reported': ner_labels = ["[PAD]", "X", "O", "B-REP", "I-REP"] else: ner_labels = [ "[PAD]", "X", "O", "B-DIR", "I-DIR", "B-IND", "I-IND", "B-REP", "I-REP" ] data_dir = DATA_DIR if task != 'all': data_dir += task + '/' processor = NERProcessor(tokenizer=tokenizer, max_seq_len=64, data_dir=Path(data_dir), delimiter="\t", metric="seq_f1", label_list=ner_labels) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets data_loader_worker = 1 data_silo = DataSilo(processor=processor, batch_size=batch_size, max_processes=data_loader_worker) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.load(lang_model) # b) and a prediction head on top that is suited for our task => NER prediction_head = TokenClassificationHead(num_labels=len(ner_labels)) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=1e-5, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, device=device, ) # 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time trainer = Trainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, ) # 7. Let it grow trainer.train() # 8. Hooray! You have a model. Store it: save_dir = model_dir model.save(save_dir) processor.save(save_dir)
def ner(): logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) ########################## ########## Settings ########################## set_all_seeds(seed=42, deterministic_cudnn=True) use_amp = None device, n_gpu = initialize_device_settings(use_cuda=True, use_amp=use_amp) n_epochs = 4 batch_size = 32 evaluate_every = 400 lang_model = "bert-base-cased" do_lower_case = False # 1.Create a tokenizer tokenizer = Tokenizer.load( pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case ) # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset # See test/sample/ner/train-sample.txt for an example of the data format that is expected by the Processor ner_labels = ["[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH"] processor = NERProcessor( tokenizer=tokenizer, max_seq_len=128, data_dir=Path(DATA_DIR), delimiter=" ", metric="seq_f1", label_list=ner_labels ) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets data_loader_worker = 15 data_silo = DataSilo(processor=processor, batch_size=batch_size, max_processes=data_loader_worker) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.load(lang_model) # b) and a prediction head on top that is suited for our task => NER prediction_head = TokenClassificationHead(num_labels=len(ner_labels)) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=1e-5, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, device=device, ) # 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time trainer = Trainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, ) # 7. Let it grow trainer.train() # 8. Hooray! You have a model. Store it: save_dir = MODEL_DIR model.save(save_dir) processor.save(save_dir)