コード例 #1
0
ファイル: neuromodel.py プロジェクト: UofM-DSP/WebDI
def _get_config_param(param_filepath=None):
    """
    Get the parameters from the config file.
    """
    param = {}

    # If a parameter file is specified, load it
    if param_filepath:
        param_file_txt = configparser.ConfigParser()
        param_file_txt.read(param_filepath, encoding="UTF-8")
        nested_parameters = utils.convert_configparser_to_dictionary(param_file_txt)
        for k, v in nested_parameters.items():
            param.update(v)

    return param, param_file_txt
コード例 #2
0
    def _load_parameters(self,
                         parameters_filepath,
                         arguments={},
                         verbose=True):
        '''
        Load parameters from the ini file if specified, take into account any command line argument, and ensure that each parameter is cast to the correct type.
        Command line arguments take precedence over parameters specified in the parameter file.
        '''
        parameters = {
            'pretrained_model_folder': '../trained_models/conll_2003_en',
            'dataset_text_folder': '../data/conll2003/en',
            'character_embedding_dimension': 25,
            'character_lstm_hidden_state_dimension': 25,
            'check_for_digits_replaced_with_zeros': True,
            'check_for_lowercase': True,
            'debug': False,
            'dropout_rate': 0.5,
            'experiment_name': 'test',
            'freeze_token_embeddings': False,
            'gradient_clipping_value': 5.0,
            'learning_rate': 0.005,
            'load_only_pretrained_token_embeddings': False,
            'load_all_pretrained_token_embeddings': False,
            'main_evaluation_mode': 'conll',
            'maximum_number_of_epochs': 100,
            'number_of_cpu_threads': 8,
            'number_of_gpus': 0,
            'optimizer': 'sgd',
            'output_folder': '../output',
            'patience': 10,
            'plot_format': 'pdf',
            'reload_character_embeddings': True,
            'reload_character_lstm': True,
            'reload_crf': True,
            'reload_feedforward': True,
            'reload_token_embeddings': True,
            'reload_token_lstm': True,
            'remap_unknown_tokens_to_unk': True,
            'spacylanguage': 'en',
            'tagging_format': 'bioes',
            'token_embedding_dimension': 100,
            'token_lstm_hidden_state_dimension': 100,
            'token_pretrained_embedding_filepath':
            '../data/word_vectors/glove.6B.100d.txt',
            'tokenizer': 'spacy',
            'train_model': True,
            'use_character_lstm': True,
            'use_crf': True,
            'use_pretrained_model': False,
            'verbose': False
        }
        # If a parameter file is specified, load it
        if len(parameters_filepath) > 0:
            conf_parameters = configparser.ConfigParser()
            conf_parameters.read(parameters_filepath)
            nested_parameters = utils.convert_configparser_to_dictionary(
                conf_parameters)
            for k, v in nested_parameters.items():
                parameters.update(v)
        # Ensure that any arguments the specified in the command line overwrite parameters specified in the parameter file
        for k, v in arguments.items():
            if arguments[k] != arguments['argument_default_value']:
                parameters[k] = v
        for k, v in parameters.items():
            v = str(v)
            # If the value is a list delimited with a comma, choose one element at random.
            if ',' in v:
                v = random.choice(v.split(','))
                parameters[k] = v
            # Ensure that each parameter is cast to the correct type
            if k in [
                    'character_embedding_dimension',
                    'character_lstm_hidden_state_dimension',
                    'token_embedding_dimension',
                    'token_lstm_hidden_state_dimension', 'patience',
                    'maximum_number_of_epochs', 'maximum_training_time',
                    'number_of_cpu_threads', 'number_of_gpus'
            ]:
                parameters[k] = int(v)
            elif k in [
                    'dropout_rate', 'learning_rate', 'gradient_clipping_value'
            ]:
                parameters[k] = float(v)
            elif k in [
                    'remap_unknown_tokens_to_unk', 'use_character_lstm',
                    'use_crf', 'train_model', 'use_pretrained_model', 'debug',
                    'verbose', 'reload_character_embeddings',
                    'reload_character_lstm', 'reload_token_embeddings',
                    'reload_token_lstm', 'reload_feedforward', 'reload_crf',
                    'check_for_lowercase',
                    'check_for_digits_replaced_with_zeros',
                    'freeze_token_embeddings',
                    'load_only_pretrained_token_embeddings',
                    'load_all_pretrained_token_embeddings'
            ]:
                parameters[k] = util.strtobool(v)
        # If loading pretrained model, set the model hyperparameters according to the pretraining parameters
        if parameters['use_pretrained_model']:
            pretraining_parameters = self._load_parameters(
                parameters_filepath=os.path.join(
                    parameters['pretrained_model_folder'], 'parameters.ini'),
                verbose=False)[0]
            for name in [
                    'use_character_lstm', 'character_embedding_dimension',
                    'character_lstm_hidden_state_dimension',
                    'token_embedding_dimension',
                    'token_lstm_hidden_state_dimension', 'use_crf'
            ]:
                if parameters[name] != pretraining_parameters[name]:
                    print(
                        'WARNING: parameter {0} was overwritten from {1} to {2} to be consistent with the pretrained model'
                        .format(name, parameters[name],
                                pretraining_parameters[name]))
                    parameters[name] = pretraining_parameters[name]
        if verbose: pprint(parameters)
        # Update conf_parameters to reflect final parameter values
        conf_parameters = configparser.ConfigParser()
        conf_parameters.read(
            os.path.join(os.path.dirname(__file__),
                         'test-parameters-training.ini'))
        parameter_to_section = utils.get_parameter_to_section_of_configparser(
            conf_parameters)
        for k, v in parameters.items():
            conf_parameters.set(parameter_to_section[k], k, str(v))

        return parameters, conf_parameters