Python ModelConfig.load Examples

Programming Language: Python

Namespace/Package Name: delft.sequenceLabelling.config

Class/Type: ModelConfig

Method/Function: load

Examples at hotexamples.com: 4

Python ModelConfig.load - 4 examples found. These are the top rated real world Python examples of delft.sequenceLabelling.config.ModelConfig.load extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

load(4)

save(3)

ModelConfig(2)

Frequently Used Methods

load (4)

save (3)

ModelConfig (2)

Example #1

Show file

    def load(self, dir_path='data/models/sequenceLabelling/'):
        self.model_config = ModelConfig.load(
            os.path.join(dir_path, self.model_config.model_name,
                         self.config_file))
        self.p = WordPreprocessor.load(
            os.path.join(dir_path, self.model_config.model_name,
                         self.preprocessor_file))

        if self.model_config.model_type.lower().find("bert") != -1:
            self.model = get_model(self.model_config,
                                   self.p,
                                   ntags=len(self.p.vocab_tag),
                                   dir_path=dir_path)
            self.model.load_model()
            return

        # load embeddings
        # Do not use cache in 'production' mode
        self.embeddings = Embeddings(self.model_config.embeddings_name,
                                     use_ELMo=self.model_config.use_ELMo,
                                     use_BERT=self.model_config.use_BERT,
                                     use_cache=False)
        self.model_config.word_embedding_size = self.embeddings.embed_size

        self.model = get_model(self.model_config,
                               self.p,
                               ntags=len(self.p.vocab_tag))
        self.model.load(filepath=os.path.join(
            dir_path, self.model_config.model_name, self.weight_file))

Example #2

Show file

    def load(self,
             dir_path='data/models/sequenceLabelling/',
             weight_file=DEFAULT_WEIGHT_FILE_NAME):
        model_path = os.path.join(dir_path, self.model_config.model_name)
        self.model_config = ModelConfig.load(
            os.path.join(model_path, CONFIG_FILE_NAME))

        if self.model_config.embeddings_name is not None:
            # load embeddings
            # Do not use cache in 'prediction/production' mode
            self.embeddings = Embeddings(self.model_config.embeddings_name,
                                         resource_registry=self.registry,
                                         use_ELMo=self.model_config.use_ELMo,
                                         use_cache=False)
            self.model_config.word_embedding_size = self.embeddings.embed_size
        else:
            self.embeddings = None
            self.model_config.word_embedding_size = 0

        self.p = Preprocessor.load(
            os.path.join(dir_path, self.model_config.model_name,
                         PROCESSOR_FILE_NAME))
        self.model = get_model(self.model_config,
                               self.p,
                               ntags=len(self.p.vocab_tag),
                               load_pretrained_weights=False,
                               local_path=os.path.join(
                                   dir_path, self.model_config.model_name))
        print(
            "load weights from",
            os.path.join(dir_path, self.model_config.model_name, weight_file))
        self.model.load(filepath=os.path.join(
            dir_path, self.model_config.model_name, weight_file))
        self.model.print_summary()

Example #3

Show file

File: wrapper.py Project: aarushiibisht/delft

    def load(self, dir_path='data/models/sequenceLabelling/'):
        self.p = WordPreprocessor.load(os.path.join(dir_path, self.model_config.model_name, self.preprocessor_file))

        self.model_config = ModelConfig.load(os.path.join(dir_path, self.model_config.model_name, self.config_file))

        # load embeddings
        self.embeddings = Embeddings(self.model_config.embeddings_name, use_ELMo=self.model_config.use_ELMo) 
        self.model_config.word_embedding_size = self.embeddings.embed_size

        self.model = get_model(self.model_config, self.p, ntags=len(self.p.vocab_tag))
        self.model.load(filepath=os.path.join(dir_path, self.model_config.model_name, self.weight_file))

Example #4

Show file

    def eval_nfold(self, x_test, y_test, features=None):
        if self.models is not None:
            total_f1 = 0
            best_f1 = 0
            best_index = 0
            worst_f1 = 1
            worst_index = 0
            reports = []
            reports_as_map = []
            total_precision = 0
            total_recall = 0
            for i in range(self.model_config.fold_number):
                print('\n------------------------ fold ' + str(i) +
                      ' --------------------------------------')

                if 'bert' not in self.model_config.model_type.lower():
                    # Prepare test data(steps, generator)
                    test_generator = DataGenerator(
                        x_test,
                        y_test,
                        batch_size=self.model_config.batch_size,
                        preprocessor=self.p,
                        char_embed_size=self.model_config.char_embedding_size,
                        max_sequence_length=self.model_config.
                        max_sequence_length,
                        embeddings=self.embeddings,
                        shuffle=False,
                        features=features)

                    # Build the evaluator and evaluate the model
                    scorer = Scorer(test_generator, self.p, evaluation=True)
                    scorer.model = self.models[i]
                    scorer.on_epoch_end(epoch=-1)
                    f1 = scorer.f1
                    precision = scorer.precision
                    recall = scorer.recall
                    reports.append(scorer.report)
                    reports_as_map.append(scorer.report_as_map)

                else:
                    # BERT architecture model
                    dir_path = 'data/models/sequenceLabelling/'
                    self.model_config = ModelConfig.load(
                        os.path.join(dir_path, self.model_config.model_name,
                                     self.config_file))
                    self.p = WordPreprocessor.load(
                        os.path.join(dir_path, self.model_config.model_name,
                                     self.preprocessor_file))
                    self.model = get_model(self.model_config,
                                           self.p,
                                           ntags=len(self.p.vocab_tag))
                    self.model.load_model(i)

                    y_pred = self.model.predict(x_test, fold_id=i)

                    nb_alignment_issues = 0
                    for j in range(len(y_test)):
                        if len(y_test[i]) != len(y_pred[j]):
                            nb_alignment_issues += 1
                            # BERT tokenizer appears to introduce some additional tokens without ## prefix,
                            # but this is normally handled when predicting.
                            # To be very conservative, the following ensure the number of tokens always
                            # match, but it should never be used in practice.
                            if len(y_test[j]) < len(y_pred[j]):
                                y_test[j] = y_test[j] + ["O"] * (
                                    len(y_pred[j]) - len(y_test[j]))
                            if len(y_test[j]) > len(y_pred[j]):
                                y_pred[j] = y_pred[j] + ["O"] * (
                                    len(y_test[j]) - len(y_pred[j]))

                    if nb_alignment_issues > 0:
                        print("number of alignment issues with test set:",
                              nb_alignment_issues)

                    f1 = f1_score(y_test, y_pred)
                    precision = precision_score(y_test, y_pred)
                    recall = recall_score(y_test, y_pred)

                    print("\tf1: {:04.2f}".format(f1 * 100))
                    print("\tprecision: {:04.2f}".format(precision * 100))
                    print("\trecall: {:04.2f}".format(recall * 100))

                    report, report_as_map = classification_report(y_test,
                                                                  y_pred,
                                                                  digits=4)
                    reports.append(report)
                    reports_as_map.append(report_as_map)

                if best_f1 < f1:
                    best_f1 = f1
                    best_index = i
                if worst_f1 > f1:
                    worst_f1 = f1
                    worst_index = i
                total_f1 += f1
                total_precision += precision
                total_recall += recall

            fold_average_evaluation = {'labels': {}, 'micro': {}, 'macro': {}}

            micro_f1 = total_f1 / self.model_config.fold_number
            micro_precision = total_precision / self.model_config.fold_number
            micro_recall = total_recall / self.model_config.fold_number

            micro_eval_block = {
                'f1': micro_f1,
                'precision': micro_precision,
                'recall': micro_recall
            }
            fold_average_evaluation['micro'] = micro_eval_block

            # field-level average over the n folds
            labels = []
            for label in sorted(self.p.vocab_tag):
                if label == 'O' or label == '<PAD>':
                    continue
                if label.startswith("B-") or label.startswith(
                        "S-") or label.startswith("I-") or label.startswith(
                            "E-"):
                    label = label[2:]

                if label in labels:
                    continue
                labels.append(label)

                sum_p = 0
                sum_r = 0
                sum_f1 = 0
                sum_support = 0
                for j in range(0, self.model_config.fold_number):
                    if not label in reports_as_map[j]['labels']:
                        continue
                    report_as_map = reports_as_map[j]['labels'][label]
                    sum_p += report_as_map["precision"]
                    sum_r += report_as_map["recall"]
                    sum_f1 += report_as_map["f1"]
                    sum_support += report_as_map["support"]

                avg_p = sum_p / self.model_config.fold_number
                avg_r = sum_r / self.model_config.fold_number
                avg_f1 = sum_f1 / self.model_config.fold_number
                avg_support = sum_support / self.model_config.fold_number
                avg_support_dec = str(avg_support - int(avg_support))[1:]
                if avg_support_dec != '0':
                    avg_support = math.floor(avg_support)

                block_label = {
                    'precision': avg_p,
                    'recall': avg_r,
                    'support': avg_support,
                    'f1': avg_f1
                }
                fold_average_evaluation['labels'][label] = block_label

            print(
                "----------------------------------------------------------------------"
            )
            print("\n** Worst ** model scores - run", str(worst_index))
            print(reports[worst_index])

            print("\n** Best ** model scores - run", str(best_index))
            print(reports[best_index])

            if 'bert' not in self.model_config.model_type.lower():
                self.model = self.models[best_index]
            else:
                # copy best BERT model fold_number
                best_model_dir = 'data/models/sequenceLabelling/' + self.model_config.model_name + str(
                    best_index)
                new_model_dir = 'data/models/sequenceLabelling/' + self.model_config.model_name
                # update new_model_dir if it already exists, keep its existing config content
                merge_folders(best_model_dir, new_model_dir)
                # clean other fold directory
                for i in range(self.model_config.fold_number):
                    shutil.rmtree('data/models/sequenceLabelling/' +
                                  self.model_config.model_name + str(i))

            print(
                "----------------------------------------------------------------------"
            )
            print("\nAverage over", self.model_config.fold_number, "folds")
            print(
                get_report(fold_average_evaluation,
                           digits=4,
                           include_avgs=['micro']))