コード例 #1
0
    def predict(self, text):
        assert False
        self.prediction_count += 1

        if self.prediction_count == 1:
            self.parameters['dataset_text_folder'] = os.path.join('..', 'data', 'temp')
            self.stats_graph_folder, _ = self._create_stats_graph_folder(self.parameters)

        # Update the deploy folder, file, and dataset
        dataset_type = 'deploy'
        ### Delete all deployment data
        for filepath in glob.glob(os.path.join(self.parameters['dataset_text_folder'], '{0}*'.format(dataset_type))):
            if os.path.isdir(filepath):
                shutil.rmtree(filepath)
            else:
                os.remove(filepath)
        ### Create brat folder and file
        dataset_brat_deploy_folder = os.path.join(self.parameters['dataset_text_folder'], dataset_type)
        utils.create_folder_if_not_exists(dataset_brat_deploy_folder)
        dataset_brat_deploy_filepath = os.path.join(dataset_brat_deploy_folder,
                                        'temp_{0}.txt'.format(str(self.prediction_count).zfill(5)))
        with codecs.open(dataset_brat_deploy_filepath, 'w', 'UTF-8') as f:
            f.write(text)
        ### Update deploy filepaths
        dataset_filepaths, dataset_brat_folders = self._get_valid_dataset_filepaths(self.parameters,
            dataset_types=[dataset_type])
        self.dataset_filepaths.update(dataset_filepaths)
        self.dataset_brat_folders.update(dataset_brat_folders)
        ### Update the dataset for the new deploy set
        self.dataset.update_dataset(self.dataset_filepaths, [dataset_type])

        # Predict labels and output brat
        output_filepaths = {}
        prediction_output = train.prediction_step(self.sess, self.dataset, dataset_type, self.model,
                     self.transition_params_trained, self.stats_graph_folder, self.prediction_count,
                     self.parameters, self.dataset_filepaths)
        _, _, output_filepaths[dataset_type] = prediction_output
        conll_to_brat.output_brat(output_filepaths, self.dataset_brat_folders, self.stats_graph_folder,
                overwrite=True)

        # Print and output result
        text_filepath = os.path.join(self.stats_graph_folder, 'brat', 'deploy',
            os.path.basename(dataset_brat_deploy_filepath))
        annotation_filepath = os.path.join(self.stats_graph_folder, 'brat', 'deploy',
            '{0}.ann'.format(utils.get_basename_without_extension(dataset_brat_deploy_filepath)))
        text2, entities = brat_to_conll.get_entities_from_brat(text_filepath, annotation_filepath,
                verbose=True)
        assert(text == text2)
        return entities
コード例 #2
0
ファイル: neuroner.py プロジェクト: saikswaroop/NeuroNER
    def fit(self):
        parameters = self.parameters
        conf_parameters = self.conf_parameters
        dataset_filepaths = self.dataset_filepaths
        dataset = self.dataset
        dataset_brat_folders = self.dataset_brat_folders
        sess = self.sess
        model = self.model
        transition_params_trained = self.transition_params_trained
        stats_graph_folder, experiment_timestamp = self._create_stats_graph_folder(parameters)

        # Initialize and save execution details
        start_time = time.time()
        results = {}
        results['epoch'] = {}
        results['execution_details'] = {}
        results['execution_details']['train_start'] = start_time
        results['execution_details']['time_stamp'] = experiment_timestamp
        results['execution_details']['early_stop'] = False
        results['execution_details']['keyboard_interrupt'] = False
        results['execution_details']['num_epochs'] = 0
        results['model_options'] = copy.copy(parameters)

        model_folder = os.path.join(stats_graph_folder, 'model')
        utils.create_folder_if_not_exists(model_folder)
        with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file:
            conf_parameters.write(parameters_file)
        pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))
            
        tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs')
        utils.create_folder_if_not_exists(tensorboard_log_folder)
        tensorboard_log_folders = {}
        for dataset_type in dataset_filepaths.keys():
            tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type)
            utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type])
                
        # Instantiate the writers for TensorBoard
        writers = {}
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph)
        embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings

        embeddings_projector_config = projector.ProjectorConfig()
        tensorboard_token_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
        token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv')
        tensorboard_token_embeddings.metadata_path = os.path.relpath(token_list_file_path, '..')

        tensorboard_character_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
        character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv')
        tensorboard_character_embeddings.metadata_path = os.path.relpath(character_list_file_path, '..')

        projector.visualize_embeddings(embedding_writer, embeddings_projector_config)

        # Write metadata for TensorBoard embeddings
        token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8')
        for token_index in range(dataset.vocabulary_size):
            token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index]))
        token_list_file.close()

        character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8')
        for character_index in range(dataset.alphabet_size):
            if character_index == dataset.PADDING_CHARACTER_INDEX:
                character_list_file.write('PADDING\n')
            else:
                character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index]))
        character_list_file.close()


        # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
        bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score
        previous_best_valid_f1_score = 0
        epoch_number = -1
        try:
            while True:
                step = 0
                epoch_number += 1
                print('\nStarting epoch {0}'.format(epoch_number))

                epoch_start_time = time.time()

                if epoch_number != 0:
                    # Train model: loop over all sequences of training set with shuffling
                    sequence_numbers=list(range(len(dataset.token_indices['train'])))
                    random.shuffle(sequence_numbers)
                    for sequence_number in sequence_numbers:
                        transition_params_trained = train.train_step(sess, dataset, sequence_number, model, parameters)
                        step += 1
                        if step % 10 == 0:
                            print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True)

                epoch_elapsed_training_time = time.time() - epoch_start_time
                print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True)

                y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths)

                # Evaluate model: save and plot results
                evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters)

                if parameters['use_pretrained_model'] and not parameters['train_model']:
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder)
                    break

                # Save model
                model.saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number)))

                # Save TensorBoard logs
                summary = sess.run(model.summary_op, feed_dict=None)
                writers['train'].add_summary(summary, epoch_number)
                writers['train'].flush()
                utils.copytree(writers['train'].get_logdir(), model_folder)


                # Early stop
                valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                if  valid_f1_score > previous_best_valid_f1_score:
                    bad_counter = 0
                    previous_best_valid_f1_score = valid_f1_score
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True)
                    self.transition_params_trained = transition_params_trained
                else:
                    bad_counter += 1
                print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter))

                if bad_counter >= parameters['patience']:
                    print('Early Stop!')
                    results['execution_details']['early_stop'] = True
                    break

                if epoch_number >= parameters['maximum_number_of_epochs']: break


        except KeyboardInterrupt:
            results['execution_details']['keyboard_interrupt'] = True
            print('Training interrupted')

        print('Finishing the experiment')
        end_time = time.time()
        results['execution_details']['train_duration'] = end_time - start_time
        results['execution_details']['train_end'] = end_time
        evaluate.save_results(results, stats_graph_folder)
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type].close()
コード例 #3
0
ファイル: main.py プロジェクト: Franck-Dernoncourt/testing
def main():

    parameters, conf_parameters = load_parameters()
    dataset_filepaths, dataset_brat_folders = get_valid_dataset_filepaths(parameters)
    check_parameter_compatiblity(parameters, dataset_filepaths)

    # Load dataset
    dataset = ds.Dataset(verbose=parameters['verbose'], debug=parameters['debug'])
    dataset.load_dataset(dataset_filepaths, parameters)

    # Create graph and session
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            intra_op_parallelism_threads=parameters['number_of_cpu_threads'],
            inter_op_parallelism_threads=parameters['number_of_cpu_threads'],
            device_count={'CPU': 1, 'GPU': parameters['number_of_gpus']},
            allow_soft_placement=True, # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist
            log_device_placement=False
            )

        sess = tf.Session(config=session_conf)

        with sess.as_default():
            # Initialize and save execution details
            start_time = time.time()
            experiment_timestamp = utils.get_current_time_in_miliseconds()
            results = {}
            results['epoch'] = {}
            results['execution_details'] = {}
            results['execution_details']['train_start'] = start_time
            results['execution_details']['time_stamp'] = experiment_timestamp
            results['execution_details']['early_stop'] = False
            results['execution_details']['keyboard_interrupt'] = False
            results['execution_details']['num_epochs'] = 0
            results['model_options'] = copy.copy(parameters)

            dataset_name = utils.get_basename_without_extension(parameters['dataset_text_folder'])
            model_name = '{0}_{1}'.format(dataset_name, results['execution_details']['time_stamp'])

            output_folder=os.path.join('..', 'output')
            utils.create_folder_if_not_exists(output_folder)
            stats_graph_folder=os.path.join(output_folder, model_name) # Folder where to save graphs
            utils.create_folder_if_not_exists(stats_graph_folder)
            model_folder = os.path.join(stats_graph_folder, 'model')
            utils.create_folder_if_not_exists(model_folder)
            with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file:
                conf_parameters.write(parameters_file)
            tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs')
            utils.create_folder_if_not_exists(tensorboard_log_folder)
            tensorboard_log_folders = {}
            for dataset_type in dataset_filepaths.keys():
                tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type)
                utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type])
            pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))

            # Instantiate the model
            # graph initialization should be before FileWriter, otherwise the graph will not appear in TensorBoard
            model = EntityLSTM(dataset, parameters)

            # Instantiate the writers for TensorBoard
            writers = {}
            for dataset_type in dataset_filepaths.keys():
                writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph)
            embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings

            embeddings_projector_config = projector.ProjectorConfig()
            tensorboard_token_embeddings = embeddings_projector_config.embeddings.add()
            tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
            token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv')
            tensorboard_token_embeddings.metadata_path = os.path.relpath(token_list_file_path, '..')

            tensorboard_character_embeddings = embeddings_projector_config.embeddings.add()
            tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
            character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv')
            tensorboard_character_embeddings.metadata_path = os.path.relpath(character_list_file_path, '..')

            projector.visualize_embeddings(embedding_writer, embeddings_projector_config)

            # Write metadata for TensorBoard embeddings
            token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8')
            for token_index in range(dataset.vocabulary_size):
                token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index]))
            token_list_file.close()

            character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8')
            for character_index in range(dataset.alphabet_size):
                if character_index == dataset.PADDING_CHARACTER_INDEX:
                    character_list_file.write('PADDING\n')
                else:
                    character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index]))
            character_list_file.close()


            # Initialize the model
            sess.run(tf.global_variables_initializer())
            if not parameters['use_pretrained_model']:
                model.load_pretrained_token_embeddings(sess, dataset, parameters)

            # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
            bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score
            previous_best_valid_f1_score = 0
            transition_params_trained = np.random.rand(len(dataset.unique_labels)+2,len(dataset.unique_labels)+2)
            model_saver = tf.train.Saver(max_to_keep=parameters['maximum_number_of_epochs'])  # defaults to saving all variables
            epoch_number = -1
            try:
                while True:
                    step = 0
                    epoch_number += 1
                    print('\nStarting epoch {0}'.format(epoch_number))

                    epoch_start_time = time.time()

                    if parameters['use_pretrained_model'] and epoch_number == 0:
                        # Restore pretrained model parameters
                        transition_params_trained = train.restore_model_parameters_from_pretrained_model(parameters, dataset, sess, model, model_saver)
                    elif epoch_number != 0:
                        # Train model: loop over all sequences of training set with shuffling
                        sequence_numbers=list(range(len(dataset.token_indices['train'])))
                        random.shuffle(sequence_numbers)
                        for sequence_number in sequence_numbers:
                            transition_params_trained = train.train_step(sess, dataset, sequence_number, model, transition_params_trained, parameters)
                            step += 1
                            if step % 10 == 0:
                                print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True)

                    epoch_elapsed_training_time = time.time() - epoch_start_time
                    print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True)

                    y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths)

                    # Evaluate model: save and plot results
                    evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters)

                    if parameters['use_pretrained_model'] and not parameters['train_model']:
                        conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder)
                        break

                    # Save model
                    model_saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number)))

                    # Save TensorBoard logs
                    summary = sess.run(model.summary_op, feed_dict=None)
                    writers['train'].add_summary(summary, epoch_number)
                    writers['train'].flush()
                    utils.copytree(writers['train'].get_logdir(), model_folder)


                    # Early stop
                    valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                    if  valid_f1_score > previous_best_valid_f1_score:
                        bad_counter = 0
                        previous_best_valid_f1_score = valid_f1_score
                        conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True)
                    else:
                        bad_counter += 1
                    print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter))

                    if bad_counter >= parameters['patience']:
                        print('Early Stop!')
                        results['execution_details']['early_stop'] = True
                        break

                    if epoch_number >= parameters['maximum_number_of_epochs']: break


            except KeyboardInterrupt:
                results['execution_details']['keyboard_interrupt'] = True
                print('Training interrupted')

            print('Finishing the experiment')
            end_time = time.time()
            results['execution_details']['train_duration'] = end_time - start_time
            results['execution_details']['train_end'] = end_time
            print('ok1')
            evaluate.save_results(results, stats_graph_folder)
            print('ok2')
        print('ok3')
        #sess.close() # release the session's resources
    print('ok4')
コード例 #4
0
ファイル: neuroner.py プロジェクト: hmchuong/NeuroNER
    def fit(self):
        '''
        Dùng để train data
        '''
        parameters = self.parameters
        conf_parameters = self.conf_parameters
        dataset_filepaths = self.dataset_filepaths
        dataset = self.dataset
        dataset_brat_folders = self.dataset_brat_folders
        sess = self.sess
        model = self.model
        transition_params_trained = self.transition_params_trained
        stats_graph_folder, experiment_timestamp = self._create_stats_graph_folder(parameters)

        # Khởi tạo và lưu các thông tin của lần chạy
        start_time = time.time()
        results = {}
        results['epoch'] = {}
        '''
        An epoch, in Machine Learning, is the entire processing by the learning algorithm of the entire train-set.
        Ex:
        The MNIST train set is composed by 55000 samples. Once the algorithm processed all those 55000 samples an epoch is passed.
        '''
        results['execution_details'] = {}
        results['execution_details']['train_start'] = start_time                # Thời gian bắt đầu chạy
        results['execution_details']['time_stamp'] = experiment_timestamp       # Nhãn thời gian
        results['execution_details']['early_stop'] = False                      # Cho biết có lỗi xảy ra nên bị dừng sớm ko
        results['execution_details']['keyboard_interrupt'] = False              # Cho biết có bị dừng bởi keyboard
        results['execution_details']['num_epochs'] = 0                          # Số lượng epoch đã chạy
        results['model_options'] = copy.copy(parameters)                        # Các tham số

        model_folder = os.path.join(stats_graph_folder, 'model')                # output/en.../model
        utils.create_folder_if_not_exists(model_folder)
        # Save value cac parameters vao file parameters.ini
        with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file:
            conf_parameters.write(parameters_file)                                          # Log các tham số ra file
        pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))      # Dump dataset thành pickle file để lần sau chạy

        # Tạo folder tensorboard logs để dùng cho việc vẽ biểu đồ sau này
        tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs')       # folder lưu file log của tensorboard -> dùng cho việc plot biểu đồ lên
        utils.create_folder_if_not_exists(tensorboard_log_folder)
        tensorboard_log_folders = {}
        for dataset_type in dataset_filepaths.keys():
            tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type)
            utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type])

        # Khởi tạo các writers cho tensorboard
        writers = {} # Có nhiều nhất 4 writers train, test, valid, deploy
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph)
        embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings

        # Dùng cho việc visualize embedding bằng tensorboard
        embeddings_projector_config = projector.ProjectorConfig()
        tensorboard_token_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
        token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv')
        tensorboard_token_embeddings.metadata_path = 'tensorboard_metadata_tokens.tsv'#os.path.relpath(token_list_file_path, '..')

        tensorboard_character_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
        character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv')
        tensorboard_character_embeddings.metadata_path = 'tensorboard_metadata_characters.tsv'#os.path.relpath(character_list_file_path, '..')

        # Saves a configuration file that TensorBoard will read during startup.
        projector.visualize_embeddings(embedding_writer, embeddings_projector_config)

        # Ghi token vào file tsv dùng làm metadata cho embedding
        token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8')
        for token_index in range(dataset.vocabulary_size):
            token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index]))
        token_list_file.close()

        # Ghi characters vào file tsv dùng làm metadata cho embedding
        character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8')
        for character_index in range(dataset.alphabet_size):
            if character_index == dataset.PADDING_CHARACTER_INDEX:
                character_list_file.write('PADDING\n')
            else:
                character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index]))
        character_list_file.close()


        # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
        bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score
        previous_best_valid_f1_score = 0 # f1-Score tốt nhất ở các lần chạy trước
        epoch_number = -1
        try:
            while True:
                step = 0
                epoch_number += 1
                print('\nStarting epoch {0}'.format(epoch_number))

                epoch_start_time = time.time()

                if epoch_number != 0:
                    # Train model: loop over all sequences of training set with shuffling
                    sequence_numbers=list(range(len(dataset.token_indices['train'])))
                    print("----****____")
                    print(dataset.token_indices['train'][:10])
                    random.shuffle(sequence_numbers)
                    # Thuc hien train
                    for sequence_number in sequence_numbers:
                        transition_params_trained = train.train_step(sess, dataset, sequence_number, model, parameters)
                        step += 1
                        if step % 10 == 0:
                            print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True)

                # Tinh thoi gian thuc hien 1 epoch
                epoch_elapsed_training_time = time.time() - epoch_start_time
                print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True)

                y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths)

                # Evaluate model: save and plot results
                evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters)

                if parameters['use_pretrained_model'] and not parameters['train_model']:
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder)
                    break

                # Save model
                model.saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number)))

                # Save TensorBoard logs
                summary = sess.run(model.summary_op, feed_dict=None)
                writers['train'].add_summary(summary, epoch_number)
                writers['train'].flush()
                utils.copytree(writers['train'].get_logdir(), model_folder)


                # Early stop
                valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                # If do chinh xac cua epoch > do chinh xac cua epoch truoc
                if  valid_f1_score > previous_best_valid_f1_score:
                    bad_counter = 0
                    previous_best_valid_f1_score = valid_f1_score
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True)
                    self.transition_params_trained = transition_params_trained
                else:
                    bad_counter += 1
                print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter))

                # If bad_counter den mot muc gioi han parameters['patience'] = 10 (gia tri khoi tao) finish train
                if bad_counter >= parameters['patience']:
                    print('Early Stop!')
                    results['execution_details']['early_stop'] = True
                    break

                # Neu so epoch >= so luong epoch toi da quy dinh --> ket thuc train
                if epoch_number >= parameters['maximum_number_of_epochs']: break


        except KeyboardInterrupt:
            results['execution_details']['keyboard_interrupt'] = True
            print('Training interrupted')

        # Ket thuc train luu cac tham so time, ket qua
        print('Finishing the experiment')
        end_time = time.time()
        results['execution_details']['train_duration'] = end_time - start_time
        results['execution_details']['train_end'] = end_time
        evaluate.save_results(results, stats_graph_folder)
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type].close()