Esempio n. 1
0
    def _load_parameters(self, parameters_filepath, arguments={}, verbose=True):
        '''
        Load parameters from the ini file if specified, take into account any command line argument, and ensure that each parameter is cast to the correct type.
        Command line arguments take precedence over parameters specified in the parameter file.
        '''
        parameters = {'pretrained_model_folder':'../trained_models/conll_2003_en',
                      'dataset_text_folder':'../data/conll2003/en',
                      'character_embedding_dimension':25,
                      'character_lstm_hidden_state_dimension':25,
                      'check_for_digits_replaced_with_zeros':True,
                      'check_for_lowercase':True,
                      'debug':False,
                      'dropout_rate':0.5,
                      'experiment_name':'test',
                      'freeze_token_embeddings':False,
                      'gradient_clipping_value':5.0,
                      'learning_rate':0.005,
                      'load_only_pretrained_token_embeddings':False,
                      'load_all_pretrained_token_embeddings':False,
                      'main_evaluation_mode':'conll',
                      'maximum_number_of_epochs':100,
                      'number_of_cpu_threads':8,
                      'number_of_gpus':0,
                      'optimizer':'sgd',
                      'output_folder':'../output',
                      'patience':10,
                      'plot_format':'pdf',
                      'reload_character_embeddings':True,
                      'reload_character_lstm':True,
                      'reload_crf':True,
                      'reload_feedforward':True,
                      'reload_token_embeddings':True,
                      'reload_token_lstm':True,
                      'remap_unknown_tokens_to_unk':True,
                      'spacylanguage':'en',
                      'tagging_format':'bioes',
                      'token_embedding_dimension':100,
                      'token_lstm_hidden_state_dimension':100,
                      'token_pretrained_embedding_filepath':'../data/word_vectors/glove.6B.100d.txt',
                      'tokenizer':'spacy',
                      'train_model':True,
                      'use_character_lstm':True,
                      'use_crf':True,
                      'use_pretrained_model':False,
                      'verbose':False,
                      # new arguments
                      'num_layers':2,
                      'use_deep_lstm': False}
        # If a parameter file is specified, load it
        if len(parameters_filepath) > 0:
            conf_parameters = configparser.ConfigParser()
            conf_parameters.read(parameters_filepath)
            nested_parameters = utils.convert_configparser_to_dictionary(conf_parameters)
            for k,v in nested_parameters.items():
                parameters.update(v)
        # Ensure that any arguments the specified in the command line overwrite parameters specified in the parameter file
        for k,v in arguments.items():
            if arguments[k] != arguments['argument_default_value']:
                parameters[k] = v
        for k,v in parameters.items():
            v = str(v)
            # If the value is a list delimited with a comma, choose one element at random.
            if ',' in v:
                v = random.choice(v.split(','))
                parameters[k] = v
            # Ensure that each parameter is cast to the correct type
            if k in ['character_embedding_dimension','character_lstm_hidden_state_dimension','token_embedding_dimension',
                     'token_lstm_hidden_state_dimension','patience',
                     'maximum_number_of_epochs','maximum_training_time','number_of_cpu_threads','number_of_gpus',
                     'num_layers']:
                parameters[k] = int(v)
            elif k in ['dropout_rate', 'learning_rate', 'gradient_clipping_value']:
                parameters[k] = float(v)
            elif k in ['remap_unknown_tokens_to_unk', 'use_character_lstm', 'use_crf', 'train_model', 'use_pretrained_model', 'debug', 'verbose',
                     'reload_character_embeddings', 'reload_character_lstm', 'reload_token_embeddings', 'reload_token_lstm', 'reload_feedforward', 'reload_crf',
                     'check_for_lowercase', 'check_for_digits_replaced_with_zeros', 'freeze_token_embeddings', 'load_only_pretrained_token_embeddings', 'load_all_pretrained_token_embeddings']:
                parameters[k] = distutils.util.strtobool(v)
        # If loading pretrained model, set the model hyperparameters according to the pretraining parameters 
        if parameters['use_pretrained_model']:
            pretraining_parameters = self._load_parameters(parameters_filepath=os.path.join(parameters['pretrained_model_folder'], 'parameters.ini'), verbose=False)[0]
            for name in ['use_character_lstm', 'character_embedding_dimension', 'character_lstm_hidden_state_dimension', 'token_embedding_dimension', 'token_lstm_hidden_state_dimension', 'use_crf']:
                if parameters[name] != pretraining_parameters[name]:
                    print('WARNING: parameter {0} was overwritten from {1} to {2} to be consistent with the pretrained model'.format(name, parameters[name], pretraining_parameters[name]))
                    parameters[name] = pretraining_parameters[name]
        if verbose: pprint(parameters)
        # Update conf_parameters to reflect final parameter values
        conf_parameters = configparser.ConfigParser()
        conf_parameters.read(os.path.join('test', 'test-parameters-training.ini'))
        parameter_to_section = utils.get_parameter_to_section_of_configparser(conf_parameters)
        for k, v in parameters.items():
            conf_parameters.set(parameter_to_section[k], k, str(v))

        return parameters, conf_parameters    
Esempio n. 2
0
def load_parameters(**kwargs):
    '''
    Load parameters from the ini file if specified, take into account any
    command line argument, and ensure that each parameter is cast to the
    correct type.

    Command line arguments take precedence over parameters specified in the
    parameter file.
    '''
    print('here1')
    param = {}
    param_default = _get_default_param()

    print('here2')

    # use parameter path if provided, otherwise use default
    try:
        if kwargs['parameters_filepath']:
            parameters_filepath = kwargs['parameters_filepath']
    except:
        parameters_filepath = param_default['parameters_filepath']

    # print('printing the parameters_filepath', parameters_filepath)

    param_config, param_file_txt = _get_config_param(parameters_filepath)

    # Parameter file settings should overwrite default settings
    for k, v in param_config.items():
        param[k] = v

    # Command line args should overwrite settings in the parameter file
    for k, v in kwargs.items():
        param[k] = v

    # Any missing args can be set to default
    for k, v in param_default.items():
        if k not in param:
            param[k] = param_default[k]

    # clean the data types
    param = _clean_param_dtypes(param)
    # print(param)

    # if loading a pretrained model, set to pretrain hyperparameters
    if param['use_pretrained_model']:

        pretrain_path = os.path.join(param['pretrained_model_folder'],
                                     'parameters.ini')

        # print('printing the pretrain path', pretrain_path)

        if os.path.isfile(pretrain_path):
            pretrain_param, _ = _get_config_param(pretrain_path)
            pretrain_param = _clean_param_dtypes(pretrain_param)

            pretrain_list = [
                'use_character_lstm', 'character_embedding_dimension',
                'character_lstm_hidden_state_dimension',
                'token_embedding_dimension',
                'token_lstm_hidden_state_dimension', 'use_crf'
            ]

            for name in pretrain_list:
                if param[name] != pretrain_param[name]:
                    msg = """WARNING: parameter '{0}' was overwritten from '{1}' to '{2}'
                        for consistency with the pretrained model""".format(
                        name, param[name], pretrain_param[name])
                    # print(msg)
                    param[name] = pretrain_param[name]
        else:
            msg = """Warning: pretraining parameter file not found."""
            # print(msg)

    # update param_file_txt to reflect the overriding
    param_to_section = utils.get_parameter_to_section_of_configparser(
        param_file_txt)
    for k, v in param.items():
        try:
            param_file_txt.set(param_to_section[k], k, str(v))
        except:
            pass

    if param['verbose']:
        pprint(param)

    return param, param_file_txt