def _get_config_param(param_filepath=None): """ Get the parameters from the config file. """ param = {} # If a parameter file is specified, load it if param_filepath: param_file_txt = configparser.ConfigParser() param_file_txt.read(param_filepath, encoding="UTF-8") nested_parameters = utils.convert_configparser_to_dictionary(param_file_txt) for k, v in nested_parameters.items(): param.update(v) return param, param_file_txt
def _load_parameters(self, parameters_filepath, arguments={}, verbose=True): ''' Load parameters from the ini file if specified, take into account any command line argument, and ensure that each parameter is cast to the correct type. Command line arguments take precedence over parameters specified in the parameter file. ''' parameters = { 'pretrained_model_folder': '../trained_models/conll_2003_en', 'dataset_text_folder': '../data/conll2003/en', 'character_embedding_dimension': 25, 'character_lstm_hidden_state_dimension': 25, 'check_for_digits_replaced_with_zeros': True, 'check_for_lowercase': True, 'debug': False, 'dropout_rate': 0.5, 'experiment_name': 'test', 'freeze_token_embeddings': False, 'gradient_clipping_value': 5.0, 'learning_rate': 0.005, 'load_only_pretrained_token_embeddings': False, 'load_all_pretrained_token_embeddings': False, 'main_evaluation_mode': 'conll', 'maximum_number_of_epochs': 100, 'number_of_cpu_threads': 8, 'number_of_gpus': 0, 'optimizer': 'sgd', 'output_folder': '../output', 'patience': 10, 'plot_format': 'pdf', 'reload_character_embeddings': True, 'reload_character_lstm': True, 'reload_crf': True, 'reload_feedforward': True, 'reload_token_embeddings': True, 'reload_token_lstm': True, 'remap_unknown_tokens_to_unk': True, 'spacylanguage': 'en', 'tagging_format': 'bioes', 'token_embedding_dimension': 100, 'token_lstm_hidden_state_dimension': 100, 'token_pretrained_embedding_filepath': '../data/word_vectors/glove.6B.100d.txt', 'tokenizer': 'spacy', 'train_model': True, 'use_character_lstm': True, 'use_crf': True, 'use_pretrained_model': False, 'verbose': False } # If a parameter file is specified, load it if len(parameters_filepath) > 0: conf_parameters = configparser.ConfigParser() conf_parameters.read(parameters_filepath) nested_parameters = utils.convert_configparser_to_dictionary( conf_parameters) for k, v in nested_parameters.items(): parameters.update(v) # Ensure that any arguments the specified in the command line overwrite parameters specified in the parameter file for k, v in arguments.items(): if arguments[k] != arguments['argument_default_value']: parameters[k] = v for k, v in parameters.items(): v = str(v) # If the value is a list delimited with a comma, choose one element at random. if ',' in v: v = random.choice(v.split(',')) parameters[k] = v # Ensure that each parameter is cast to the correct type if k in [ 'character_embedding_dimension', 'character_lstm_hidden_state_dimension', 'token_embedding_dimension', 'token_lstm_hidden_state_dimension', 'patience', 'maximum_number_of_epochs', 'maximum_training_time', 'number_of_cpu_threads', 'number_of_gpus' ]: parameters[k] = int(v) elif k in [ 'dropout_rate', 'learning_rate', 'gradient_clipping_value' ]: parameters[k] = float(v) elif k in [ 'remap_unknown_tokens_to_unk', 'use_character_lstm', 'use_crf', 'train_model', 'use_pretrained_model', 'debug', 'verbose', 'reload_character_embeddings', 'reload_character_lstm', 'reload_token_embeddings', 'reload_token_lstm', 'reload_feedforward', 'reload_crf', 'check_for_lowercase', 'check_for_digits_replaced_with_zeros', 'freeze_token_embeddings', 'load_only_pretrained_token_embeddings', 'load_all_pretrained_token_embeddings' ]: parameters[k] = util.strtobool(v) # If loading pretrained model, set the model hyperparameters according to the pretraining parameters if parameters['use_pretrained_model']: pretraining_parameters = self._load_parameters( parameters_filepath=os.path.join( parameters['pretrained_model_folder'], 'parameters.ini'), verbose=False)[0] for name in [ 'use_character_lstm', 'character_embedding_dimension', 'character_lstm_hidden_state_dimension', 'token_embedding_dimension', 'token_lstm_hidden_state_dimension', 'use_crf' ]: if parameters[name] != pretraining_parameters[name]: print( 'WARNING: parameter {0} was overwritten from {1} to {2} to be consistent with the pretrained model' .format(name, parameters[name], pretraining_parameters[name])) parameters[name] = pretraining_parameters[name] if verbose: pprint(parameters) # Update conf_parameters to reflect final parameter values conf_parameters = configparser.ConfigParser() conf_parameters.read( os.path.join(os.path.dirname(__file__), 'test-parameters-training.ini')) parameter_to_section = utils.get_parameter_to_section_of_configparser( conf_parameters) for k, v in parameters.items(): conf_parameters.set(parameter_to_section[k], k, str(v)) return parameters, conf_parameters