Python Embeddings Examples

Programming Language: Python

Namespace/Package Name: utilities.Embeddings

Class/Type: Embeddings

Examples at hotexamples.com: 6

Python Embeddings - 6 examples found. These are the top rated real world Python examples of utilities.Embeddings.Embeddings extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Embeddings(4)

clean_ELMo_cache(2)

Example #1

Show file

    def __init__(self,
                 model_name,
                 model_type="BidLSTM_CRF",
                 embeddings_name=None,
                 char_emb_size=25,
                 max_char_length=30,
                 char_lstm_units=25,
                 word_lstm_units=100,
                 dropout=0.5,
                 recurrent_dropout=0.25,
                 use_char_feature=True,
                 use_crf=True,
                 batch_size=20,
                 optimizer='adam',
                 learning_rate=0.001,
                 lr_decay=0.9,
                 clip_gradients=5.0,
                 max_epoch=50,
                 early_stop=True,
                 patience=5,
                 max_checkpoints_to_keep=5,
                 log_dir=None,
                 use_ELMo=True,
                 fold_number=1):

        self.model = None
        self.models = None
        self.p = None
        self.log_dir = log_dir
        self.embeddings_name = embeddings_name

        word_emb_size = 0
        if embeddings_name is not None:
            self.embeddings = Embeddings(embeddings_name, use_ELMo=use_ELMo)
            word_emb_size = self.embeddings.embed_size

        self.model_config = ModelConfig(model_name=model_name,
                                        model_type=model_type,
                                        embeddings_name=embeddings_name,
                                        word_embedding_size=word_emb_size,
                                        char_emb_size=char_emb_size,
                                        char_lstm_units=char_lstm_units,
                                        max_char_length=max_char_length,
                                        word_lstm_units=word_lstm_units,
                                        dropout=dropout,
                                        recurrent_dropout=recurrent_dropout,
                                        use_char_feature=use_char_feature,
                                        use_crf=use_crf,
                                        fold_number=fold_number,
                                        batch_size=batch_size,
                                        use_ELMo=use_ELMo)

        self.training_config = TrainingConfig(batch_size, optimizer,
                                              learning_rate, lr_decay,
                                              clip_gradients, max_epoch,
                                              early_stop, patience,
                                              max_checkpoints_to_keep)

Example #2

Show file

File: wrapper.py Project: scitedotai/delft

    def load(self, dir_path='data/models/sequenceLabelling/'):
        self.p = WordPreprocessor.load(os.path.join(dir_path, self.model_config.model_name, self.preprocessor_file))
        
        self.model_config = ModelConfig.load(os.path.join(dir_path, self.model_config.model_name, self.config_file))

        # load embeddings
        self.embeddings = Embeddings(self.model_config.embeddings_name, use_ELMo=self.model_config.use_ELMo) 
        self.model_config.word_embedding_size = self.embeddings.embed_size

        self.model = get_model(self.model_config, self.p, ntags=len(self.p.vocab_tag))
        self.model.load(filepath=os.path.join(dir_path, self.model_config.model_name, self.weight_file))

Example #3

Show file

File: wrapper.py Project: vincent770608/delft

    def load(self, dir_path='data/models/textClassification/'):
        self.model_config = ModelConfig.load(
            os.path.join(dir_path, self.model_config.model_name,
                         self.config_file))

        # load embeddings
        self.embeddings = Embeddings(self.model_config.embeddings_name)
        self.model_config.word_embedding_size = self.embeddings.embed_size

        self.model = getModel(self.model_config, self.training_config)
        if self.model_config.fold_number is 1:
            self.model.load_weights(
                os.path.join(
                    dir_path, self.model_config.model_name,
                    self.model_config.model_type + "." + self.weight_file))
        else:
            self.models = []
            for i in range(0, self.model_config.fold_number):
                local_model = getModel(self.model_config, self.training_config)
                local_model.load_weights(
                    os.path.join(
                        dir_path, self.model_config.model_name,
                        self.model_config.model_type +
                        ".model{0}_weights.hdf5".format(i)))
                self.models.append(local_model)

Example #4

Show file

File: wrapper.py Project: vincent770608/delft

    def __init__(self,
                 model_name="",
                 model_type="gru",
                 embeddings_name=None,
                 list_classes=[],
                 char_emb_size=25,
                 dropout=0.5,
                 recurrent_dropout=0.25,
                 use_char_feature=False,
                 batch_size=256,
                 optimizer='adam',
                 learning_rate=0.001,
                 lr_decay=0.9,
                 clip_gradients=5.0,
                 max_epoch=50,
                 patience=5,
                 log_dir=None,
                 maxlen=300,
                 fold_number=1,
                 use_roc_auc=True,
                 embeddings=()):

        self.model = None
        self.models = None
        self.log_dir = log_dir
        self.embeddings_name = embeddings_name

        word_emb_size = 0
        if embeddings_name is not None:
            self.embeddings = Embeddings(embeddings_name)
            word_emb_size = self.embeddings.embed_size

        self.model_config = ModelConfig(model_name=model_name,
                                        model_type=model_type,
                                        embeddings_name=embeddings_name,
                                        list_classes=list_classes,
                                        char_emb_size=char_emb_size,
                                        word_emb_size=word_emb_size,
                                        dropout=dropout,
                                        recurrent_dropout=recurrent_dropout,
                                        use_char_feature=use_char_feature,
                                        maxlen=maxlen,
                                        fold_number=fold_number,
                                        batch_size=batch_size)

        self.training_config = TrainingConfig(batch_size, optimizer,
                                              learning_rate, lr_decay,
                                              clip_gradients, max_epoch,
                                              patience, use_roc_auc)

Example #5

Show file

class Sequence(object):

    config_file = 'config.json'
    weight_file = 'model_weights.hdf5'
    preprocessor_file = 'preprocessor.pkl'

    # number of parallel worker for the data generator when not using ELMo
    nb_workers = 6

    def __init__(self,
                 model_name,
                 model_type="BidLSTM_CRF",
                 embeddings_name=None,
                 char_emb_size=25,
                 max_char_length=30,
                 char_lstm_units=25,
                 word_lstm_units=100,
                 dropout=0.5,
                 recurrent_dropout=0.25,
                 use_char_feature=True,
                 use_crf=True,
                 batch_size=20,
                 optimizer='adam',
                 learning_rate=0.001,
                 lr_decay=0.9,
                 clip_gradients=5.0,
                 max_epoch=50,
                 early_stop=True,
                 patience=5,
                 max_checkpoints_to_keep=5,
                 log_dir=None,
                 use_ELMo=True,
                 fold_number=1):

        self.model = None
        self.models = None
        self.p = None
        self.log_dir = log_dir
        self.embeddings_name = embeddings_name

        word_emb_size = 0
        if embeddings_name is not None:
            self.embeddings = Embeddings(embeddings_name, use_ELMo=use_ELMo)
            word_emb_size = self.embeddings.embed_size

        self.model_config = ModelConfig(model_name=model_name,
                                        model_type=model_type,
                                        embeddings_name=embeddings_name,
                                        word_embedding_size=word_emb_size,
                                        char_emb_size=char_emb_size,
                                        char_lstm_units=char_lstm_units,
                                        max_char_length=max_char_length,
                                        word_lstm_units=word_lstm_units,
                                        dropout=dropout,
                                        recurrent_dropout=recurrent_dropout,
                                        use_char_feature=use_char_feature,
                                        use_crf=use_crf,
                                        fold_number=fold_number,
                                        batch_size=batch_size,
                                        use_ELMo=use_ELMo)

        self.training_config = TrainingConfig(batch_size, optimizer,
                                              learning_rate, lr_decay,
                                              clip_gradients, max_epoch,
                                              early_stop, patience,
                                              max_checkpoints_to_keep)

    def train(self, x_train, y_train, x_valid=None, y_valid=None):
        # TBD if valid is None, segment train to get one
        x_all = np.concatenate((x_train, x_valid), axis=0)
        y_all = np.concatenate((y_train, y_valid), axis=0)
        self.p = prepare_preprocessor(x_all, y_all, self.model_config)
        self.model_config.char_vocab_size = len(self.p.vocab_char)
        self.model_config.case_vocab_size = len(self.p.vocab_case)
        """
        if self.embeddings.use_ELMo:
            # dump token context independent data for the train set, done once for the training
            x_train_local = x_train
            if not self.training_config.early_stop:
                # in case we want to train with the validation set too, we dump also
                # the ELMo embeddings for the token of the valid set
                x_train_local = np.concatenate((x_train, x_valid), axis=0)
            self.embeddings.dump_ELMo_token_embeddings(x_train_local)
        """
        self.model = get_model(self.model_config, self.p,
                               len(self.p.vocab_tag))
        trainer = Trainer(self.model,
                          self.models,
                          self.embeddings,
                          self.model_config,
                          self.training_config,
                          checkpoint_path=self.log_dir,
                          preprocessor=self.p)
        trainer.train(x_train, y_train, x_valid, y_valid)
        if self.embeddings.use_ELMo:
            self.embeddings.clean_ELMo_cache()

    def train_nfold(self,
                    x_train,
                    y_train,
                    x_valid=None,
                    y_valid=None,
                    fold_number=10):
        if x_valid is not None and y_valid is not None:
            x_all = np.concatenate((x_train, x_valid), axis=0)
            y_all = np.concatenate((y_train, y_valid), axis=0)
            self.p = prepare_preprocessor(x_all, y_all, self.model_config)
        else:
            self.p = prepare_preprocessor(x_train, y_train, self.model_config)
        self.model_config.char_vocab_size = len(self.p.vocab_char)
        self.model_config.case_vocab_size = len(self.p.vocab_case)
        self.p.return_lengths = True

        #self.model = get_model(self.model_config, self.p, len(self.p.vocab_tag))
        self.models = []

        for k in range(0, fold_number):
            model = get_model(self.model_config, self.p, len(self.p.vocab_tag))
            self.models.append(model)

        trainer = Trainer(self.model,
                          self.models,
                          self.embeddings,
                          self.model_config,
                          self.training_config,
                          checkpoint_path=self.log_dir,
                          preprocessor=self.p)
        trainer.train_nfold(x_train, y_train, x_valid, y_valid)
        if self.embeddings.use_ELMo:
            self.embeddings.clean_ELMo_cache()

    def eval(self, x_test, y_test):
        if self.model_config.fold_number > 1 and self.models and len(
                self.models) == self.model_config.fold_number:
            self.eval_nfold(x_test, y_test)
        else:
            self.eval_single(x_test, y_test)

    def eval_single(self, x_test, y_test):
        if self.model:
            # Prepare test data(steps, generator)
            test_generator = DataGenerator(
                x_test,
                y_test,
                batch_size=self.training_config.batch_size,
                preprocessor=self.p,
                char_embed_size=self.model_config.char_embedding_size,
                embeddings=self.embeddings,
                shuffle=False)

            # Build the evaluator and evaluate the model
            scorer = Scorer(test_generator, self.p, evaluation=True)
            scorer.model = self.model
            scorer.on_epoch_end(epoch=-1)
        else:
            raise (OSError('Could not find a model.'))

    def eval_nfold(self, x_test, y_test):
        if self.models is not None:
            total_f1 = 0
            best_f1 = 0
            best_index = 0
            worst_f1 = 1
            worst_index = 0
            reports = []
            total_precision = 0
            total_recall = 0
            for i in range(0, self.model_config.fold_number):
                print('\n------------------------ fold ' + str(i) +
                      '--------------------------------------')

                # Prepare test data(steps, generator)
                test_generator = DataGenerator(
                    x_test,
                    y_test,
                    batch_size=self.training_config.batch_size,
                    preprocessor=self.p,
                    char_embed_size=self.model_config.char_embedding_size,
                    embeddings=self.embeddings,
                    shuffle=False)

                # Build the evaluator and evaluate the model
                scorer = Scorer(test_generator, self.p, evaluation=True)
                scorer.model = self.models[i]
                scorer.on_epoch_end(epoch=-1)
                f1 = scorer.f1
                precision = scorer.precision
                recall = scorer.recall
                reports.append(scorer.report)

                if best_f1 < f1:
                    best_f1 = f1
                    best_index = i
                if worst_f1 > f1:
                    worst_f1 = f1
                    worst_index = i
                total_f1 += f1
                total_precision += precision
                total_recall += recall

            macro_f1 = total_f1 / self.model_config.fold_number
            macro_precision = total_precision / self.model_config.fold_number
            macro_recall = total_recall / self.model_config.fold_number

            print("\naverage over", self.model_config.fold_number, "folds")
            print("\tmacro f1 =", macro_f1)
            print("\tmacro precision =", macro_precision)
            print("\tmacro recall =", macro_recall, "\n")

            print("\n** Worst ** model scores - \n")
            print(reports[worst_index])

            self.model = self.models[best_index]
            print("\n** Best ** model scores - \n")
            print(reports[best_index])

    def tag(self, texts, output_format):
        # annotate a list of sentences, return the list of annotations in the
        # specified output_format
        if self.model:
            tagger = Tagger(self.model,
                            self.model_config,
                            self.embeddings,
                            preprocessor=self.p)
            start_time = time.time()
            annotations = tagger.tag(texts, output_format)
            runtime = round(time.time() - start_time, 3)
            if output_format is 'json':
                annotations["runtime"] = runtime
            else:
                print("runtime: %s seconds " % (runtime))
            return annotations
        else:
            raise (OSError('Could not find a model.'))

    def tag_file(self, file_in, output_format, file_out):
        # Annotate a text file containing one sentence per line, the annotations are
        # written in the output file if not None, in the standard output otherwise.
        # Processing is streamed by batches so that we can process huge files without
        # memory issues
        if self.model:
            tagger = Tagger(self.model,
                            self.model_config,
                            self.embeddings,
                            preprocessor=self.p)
            start_time = time.time()
            if file_out is not None:
                out = open(file_out, 'w')
            first = True
            with open(file_in, 'r') as f:
                texts = None
                while texts is None or len(
                        texts
                ) == self.model_config.batch_size * self.nb_workers:

                    texts = next_n_lines(
                        f, self.model_config.batch_size * self.nb_workers)
                    annotations = tagger.tag(texts, output_format)
                    # if the following is true, we just output the JSON returned by the tagger without any modification
                    directDump = False
                    if first:
                        first = False
                        if len(
                                texts
                        ) < self.model_config.batch_size * self.nb_workers:
                            runtime = round(time.time() - start_time, 3)
                            annotations['runtime'] = runtime
                            jsonString = json.dumps(annotations,
                                                    sort_keys=False,
                                                    indent=4,
                                                    ensure_ascii=False)
                            if file_out is None:
                                print(jsonString)
                            else:
                                out.write(jsonString)
                            directDump = True
                        else:
                            # we need to modify a bit the JSON outputted by the tagger to glue the different batches
                            # output the general information attributes
                            jsonString = '{\n    "software": ' + json.dumps(
                                annotations["software"],
                                ensure_ascii=False) + ",\n"
                            jsonString += '    "date": ' + json.dumps(
                                annotations["date"],
                                ensure_ascii=False) + ",\n"
                            jsonString += '    "model": ' + json.dumps(
                                annotations["model"],
                                ensure_ascii=False) + ",\n"
                            jsonString += '    "texts": ['
                            if file_out is None:
                                print(jsonString, end='', flush=True)
                            else:
                                out.write(jsonString)
                            first = True
                            for jsonStr in annotations["texts"]:
                                jsonString = json.dumps(jsonStr,
                                                        sort_keys=False,
                                                        indent=4,
                                                        ensure_ascii=False)
                                #jsonString = jsonString.replace('\n', '\n\t\t')
                                jsonString = re.sub('\n', '\n        ',
                                                    jsonString)
                                if file_out is None:
                                    if not first:
                                        print(',\n        ' + jsonString,
                                              end='',
                                              flush=True)
                                    else:
                                        first = False
                                        print('\n        ' + jsonString,
                                              end='',
                                              flush=True)
                                else:
                                    if not first:
                                        out.write(',\n        ')
                                        out.write(jsonString)
                                    else:
                                        first = False
                                        out.write('\n        ')
                                        out.write(jsonString)
                    else:
                        for jsonStr in annotations["texts"]:
                            jsonString = json.dumps(jsonStr,
                                                    sort_keys=False,
                                                    indent=4,
                                                    ensure_ascii=False)
                            jsonString = re.sub('\n', '\n        ', jsonString)
                            if file_out is None:
                                print(',\n        ' + jsonString,
                                      end='',
                                      flush=True)
                            else:
                                out.write(',\n        ')
                                out.write(jsonString)

            runtime = round(time.time() - start_time, 3)
            if not directDump:
                jsonString = "\n    ],\n"
                jsonString += '    "runtime": ' + str(runtime)
                jsonString += "\n}\n"
                if file_out is None:
                    print(jsonString)
                else:
                    out.write(jsonString)

            if file_out is not None:
                out.close()
            #print("runtime: %s seconds " % (runtime))
        else:
            raise (OSError('Could not find a model.'))

    def save(self, dir_path='data/models/sequenceLabelling/'):
        # create subfolder for the model if not already exists
        directory = os.path.join(dir_path, self.model_config.model_name)
        if not os.path.exists(directory):
            os.makedirs(directory)

        self.p.save(os.path.join(directory, self.preprocessor_file))
        print('preprocessor saved')

        self.model_config.save(os.path.join(directory, self.config_file))
        print('model config file saved')

        self.model.save(os.path.join(directory, self.weight_file))
        print('model saved')

    def load(self, dir_path='data/models/sequenceLabelling/'):
        self.p = WordPreprocessor.load(
            os.path.join(dir_path, self.model_config.model_name,
                         self.preprocessor_file))

        self.model_config = ModelConfig.load(
            os.path.join(dir_path, self.model_config.model_name,
                         self.config_file))

        # load embeddings
        self.embeddings = Embeddings(self.model_config.embeddings_name,
                                     use_ELMo=self.model_config.use_ELMo)
        self.model_config.word_embedding_size = self.embeddings.embed_size

        self.model = get_model(self.model_config,
                               self.p,
                               ntags=len(self.p.vocab_tag))
        self.model.load(filepath=os.path.join(
            dir_path, self.model_config.model_name, self.weight_file))

Example #6

Show file

File: wrapper.py Project: scitedotai/delft

class Sequence(object):

    config_file = 'config.json'
    weight_file = 'model_weights.hdf5'
    preprocessor_file = 'preprocessor.pkl'

    def __init__(self, 
                 model_name,
                 model_type="BidLSTM_CRF",
                 embeddings_name=None,
                 char_emb_size=25, 
                 max_char_length=30,
                 char_lstm_units=25,
                 word_lstm_units=100, 
                 dropout=0.5, 
                 recurrent_dropout=0.25,
                 use_char_feature=True, 
                 use_crf=True,
                 batch_size=20, 
                 optimizer='adam', 
                 learning_rate=0.001, 
                 lr_decay=0.9,
                 clip_gradients=5.0, 
                 max_epoch=50, 
                 early_stop=True,
                 patience=5,
                 max_checkpoints_to_keep=5, 
                 log_dir=None,
                 use_ELMo=True,
                 fold_number=1):

        self.model = None
        self.models = None
        self.p = None
        self.log_dir = log_dir
        self.embeddings_name = embeddings_name

        word_emb_size = 0
        if embeddings_name is not None:
            self.embeddings = Embeddings(embeddings_name, use_ELMo=use_ELMo) 
            word_emb_size = self.embeddings.embed_size

        self.model_config = ModelConfig(model_name=model_name, 
                                        model_type=model_type, 
                                        embeddings_name=embeddings_name, 
                                        word_embedding_size=word_emb_size, 
                                        char_emb_size=char_emb_size, 
                                        char_lstm_units=char_lstm_units, 
                                        max_char_length=max_char_length,
                                        word_lstm_units=word_lstm_units, 
                                        dropout=dropout, 
                                        recurrent_dropout=recurrent_dropout, 
                                        use_char_feature=use_char_feature, 
                                        use_crf=use_crf, 
                                        fold_number=fold_number, 
                                        batch_size=batch_size,
                                        use_ELMo=use_ELMo)

        self.training_config = TrainingConfig(batch_size, optimizer, learning_rate,
                                              lr_decay, clip_gradients, max_epoch,
                                              early_stop, patience, 
                                              max_checkpoints_to_keep)


    def train(self, x_train, y_train, x_valid=None, y_valid=None):
        # TBD if valid is None, segment train to get one
        x_all = np.concatenate((x_train, x_valid), axis=0)
        y_all = np.concatenate((y_train, y_valid), axis=0)
        self.p = prepare_preprocessor(x_all, y_all, self.model_config)
        self.model_config.char_vocab_size = len(self.p.vocab_char)
        self.model_config.case_vocab_size = len(self.p.vocab_case)

        """
        if self.embeddings.use_ELMo:
            # dump token context independent data for the train set, done once for the training
            x_train_local = x_train
            if not self.training_config.early_stop:
                # in case we want to train with the validation set too, we dump also
                # the ELMo embeddings for the token of the valid set
                x_train_local = np.concatenate((x_train, x_valid), axis=0)
            self.embeddings.dump_ELMo_token_embeddings(x_train_local)
        """
        self.model = get_model(self.model_config, self.p, len(self.p.vocab_tag))
        trainer = Trainer(self.model, 
                          self.models,
                          self.embeddings,
                          self.model_config,
                          self.training_config,
                          checkpoint_path=self.log_dir,
                          preprocessor=self.p
                          )
        trainer.train(x_train, y_train, x_valid, y_valid)
        if self.embeddings.use_ELMo:
            self.embeddings.clean_ELMo_cache()

    def train_nfold(self, x_train, y_train, x_valid=None, y_valid=None, fold_number=10):
        if x_valid is not None and y_valid is not None:
            x_all = np.concatenate((x_train, x_valid), axis=0)
            y_all = np.concatenate((y_train, y_valid), axis=0)
            self.p = prepare_preprocessor(x_all, y_all, self.model_config)
        else:
            self.p = prepare_preprocessor(x_train, y_train, self.model_config)
        self.model_config.char_vocab_size = len(self.p.vocab_char)
        self.model_config.case_vocab_size = len(self.p.vocab_case)
        self.p.return_lengths = True
        
        #self.model = get_model(self.model_config, self.p, len(self.p.vocab_tag))
        self.models = []

        for k in range(0, fold_number):
            model = get_model(self.model_config, self.p, len(self.p.vocab_tag))
            self.models.append(model)

        trainer = Trainer(self.model, 
                          self.models,
                          self.embeddings,
                          self.model_config,
                          self.training_config,
                          checkpoint_path=self.log_dir,
                          preprocessor=self.p
                          )
        trainer.train_nfold(x_train, y_train, x_valid, y_valid)
        if self.embeddings.use_ELMo:
            self.embeddings.clean_ELMo_cache()

    def eval(self, x_test, y_test):
        if self.model_config.fold_number > 1 and self.models and len(self.models) == self.model_config.fold_number:
            self.eval_nfold(x_test, y_test)
        else:
            self.eval_single(x_test, y_test)


    def eval_single(self, x_test, y_test):   
        if self.model:
            # Prepare test data(steps, generator)
            test_generator = DataGenerator(x_test, y_test, 
              batch_size=self.training_config.batch_size, preprocessor=self.p, 
              char_embed_size=self.model_config.char_embedding_size, 
              embeddings=self.embeddings, shuffle=False)

            # Build the evaluator and evaluate the model
            scorer = Scorer(test_generator, self.p, evaluation=True)
            scorer.model = self.model
            scorer.on_epoch_end(epoch=-1) 
        else:
            raise (OSError('Could not find a model.'))


    def eval_nfold(self, x_test, y_test):
        if self.models is not None:
            total_f1 = 0
            best_f1 = 0
            best_index = 0
            worst_f1 = 1
            worst_index = 0
            reports = []
            total_precision = 0
            total_recall = 0
            for i in range(0, self.model_config.fold_number):
                print('\n------------------------ fold ' + str(i) + '--------------------------------------')

                # Prepare test data(steps, generator)
                test_generator = DataGenerator(x_test, y_test, 
                  batch_size=self.training_config.batch_size, preprocessor=self.p, 
                  char_embed_size=self.model_config.char_embedding_size, 
                  embeddings=self.embeddings, shuffle=False)

                # Build the evaluator and evaluate the model
                scorer = Scorer(test_generator, self.p, evaluation=True)
                scorer.model = self.models[i]
                scorer.on_epoch_end(epoch=-1) 
                f1 = scorer.f1
                precision = scorer.precision
                recall = scorer.recall
                reports.append(scorer.report)
                
                if best_f1 < f1:
                    best_f1 = f1
                    best_index = i
                if worst_f1 > f1:
                    worst_f1 = f1
                    worst_index = i
                total_f1 += f1
                total_precision += precision
                total_recall += recall

            macro_f1 = total_f1 / self.model_config.fold_number
            macro_precision = total_precision / self.model_config.fold_number
            macro_recall = total_recall / self.model_config.fold_number

            print("\naverage over", self.model_config.fold_number, "folds")
            print("\tmacro f1 =", macro_f1)
            print("\tmacro precision =", macro_precision)
            print("\tmacro recall =", macro_recall, "\n")

            print("\n** Worst ** model scores - \n")
            print(reports[worst_index])

            self.model = self.models[best_index]
            print("\n** Best ** model scores - \n")
            print(reports[best_index])
        

    def tag(self, texts, output_format):
        if self.model:
            tagger = Tagger(self.model, self.model_config, self.embeddings, preprocessor=self.p)
            return tagger.tag(texts, output_format)
        else:
            raise (OSError('Could not find a model.'))


    def save(self, dir_path='data/models/sequenceLabelling/'):
        
        # create subfolder for the model if not already exists
        directory = os.path.join(dir_path, self.model_config.model_name)
        if not os.path.exists(directory):
            os.makedirs(directory)

        self.p.save(os.path.join(directory, self.preprocessor_file))
        print('preprocessor saved')
        
        self.model_config.save(os.path.join(directory, self.config_file))
        print('model config file saved')
        
        self.model.save(os.path.join(directory, self.weight_file))
        print('model saved')


    def load(self, dir_path='data/models/sequenceLabelling/'):
        self.p = WordPreprocessor.load(os.path.join(dir_path, self.model_config.model_name, self.preprocessor_file))
        
        self.model_config = ModelConfig.load(os.path.join(dir_path, self.model_config.model_name, self.config_file))

        # load embeddings
        self.embeddings = Embeddings(self.model_config.embeddings_name, use_ELMo=self.model_config.use_ELMo) 
        self.model_config.word_embedding_size = self.embeddings.embed_size

        self.model = get_model(self.model_config, self.p, ntags=len(self.p.vocab_tag))
        self.model.load(filepath=os.path.join(dir_path, self.model_config.model_name, self.weight_file))