Esempio n. 1
0
 def __validate_one_epoch(self, valid_bundle_list, valid_dt_list, valid_tasks, weighted_instance_loss):
     stopping_valid_task = None
     if valid_bundle_list is not None:
         self.bert_classifier.eval()
         [cur_task[1].reset() for cur_task in valid_tasks.items()]
         for dt_ind, cur_dt in enumerate(valid_dt_list):
             batches = self.generate_batches([cur_dt], self.config, False, False, self.current_train_epoch,
                                             EInputListMode.sequential)
             for ba_ind, cur_batch in enumerate(batches):
                 outcome = self.bert_classifier(cur_batch, False)
                 self.__process_loss(outcome, cur_batch, valid_tasks, False, weighted_instance_loss)
                 self.delete_batch_from_gpu(cur_batch, EInputListMode.sequential)
                 del cur_batch, outcome
         for cur_task in valid_tasks.items():
             cur_task[1].loss /= cur_task[1].size
             cur_task[1].f1 = ELib.calculate_f1(cur_task[1].lbl_true, cur_task[1].lbl_pred)
         ################ checks early stopping only if the model does not have hooks
         ## deepcopy() cannot copy hooks! fix it later...
         if self.config.check_early_stopping and len(self.bert_classifier.logs) == 0:
             for cur_task in valid_tasks.items():
                     if cur_task[1].learning_state.should_stop(
                             cur_task[1].loss, self.bert_classifier, self.config.device):
                         self.bert_classifier.cpu()
                         self.bert_classifier = cur_task[1].learning_state.best_model
                         stopping_valid_task = cur_task
                         break
     return stopping_valid_task
Esempio n. 2
0
 def __train_one_epoch(self, train_dt_list, train_tasks, input_mode, weighted_instance_loss,
                       report_number_of_intervals, train_shuffle, train_drop_last, balance_batch_mode_list):
     batches = self.generate_batches(train_dt_list, self.config, train_shuffle, train_drop_last,
                                     self.current_train_epoch, input_mode, balance_batch_mode_list)
     [cur_task[1].reset() for cur_task in train_tasks.items()]
     for ba_ind, cur_batch in enumerate(batches):
         self.bert_classifier.train_step += 1  # to track the overall number inside the classifier
         while True:
             outcome = self.bert_classifier(cur_batch, False)
             self.__process_loss(outcome, cur_batch, train_tasks, True, weighted_instance_loss)
             if not self.delay_optimizer:
                 break
         if ELib.progress_made(ba_ind, cur_batch['batch_count'], report_number_of_intervals):
             print(ELib.progress_percent(ba_ind, cur_batch['batch_count']), end=' ', flush=True)
         self.delete_batch_from_gpu(cur_batch, input_mode)
         del cur_batch, outcome
         ## in case there are multiple models and their losses are heavy (in terms of memory)
         ## you can call 'self.sync_obj.lock_loss_calculation.acquire()' in 'self.custom_train_loss_func()'
         ## This way the losses are calculated one by one and after that the models are re-synched
         if self.sync_obj is not None and self.sync_obj.lock_loss_calculation.locked():
             ## wait for the other models to arrive
             if self.sync_obj.sync_counter == self.sync_obj.model_count:
                 self.sync_obj.reset()
             self.sync_obj.sync_counter += 1
             self.sync_obj.lock_loss_calculation.release()
             while self.sync_obj.sync_counter < self.sync_obj.model_count:
                 self.sleep()
         # pprint(vars(self))
         # ELib.PASS()
     ## if there are multiple models avoid double printing the newline
     if self.sync_obj is None:
         print()
     elif self.model_id == 0:
         print()
     ## calculate the metric averages in the epoch
     for cur_task in train_tasks.items():
         if cur_task[1].size > 0:
             cur_task[1].loss /= cur_task[1].size
             cur_task[1].f1 = ELib.calculate_f1(cur_task[1].lbl_true, cur_task[1].lbl_pred)
     ELib.PASS()