def test_all(self):
        config = get_config()
        self.assertTrue(isinstance(config, dict))

        self.assertTrue('zh_dictionary' in config.keys())
        zh_dictionary = config['zh_dictionary']
        self.assertTrue(isinstance(zh_dictionary, dict))
        self.assertTrue(
            zh_dictionary['path'],
            '/Users/duanshangfu/PycharmProjects/named_entity_recognition/data/tmp/word2vec/zh'
        )
        self.assertTrue(zh_dictionary['flag'], 'True')

        self.assertTrue('eng_dictionary' in config.keys())
        eng_dictionary = config['eng_dictionary']
        self.assertTrue(
            eng_dictionary['path'],
            '/Users/duanshangfu/PycharmProjects/named_entity_recognition/data/tmp/word2vec/eng'
        )

        self.assertTrue('dataset' in config.keys())
        dataset = config['dataset']
        self.assertEqual(
            dataset['zh_train'],
            '/Users/duanshangfu/PycharmProjects/named_entity_recognition/data/zh/zh.train'
        )
def clean_models(model_name='crf'):
    config = get_config()
    model = config['model']
    name = 'train_' + model_name
    if os.path.exists(model[name]):
        os.remove(model[name])
    return
Example #3
0
    def test_keys(self):
        keys = list(self.dic.keys())
        self.assertTrue(len(keys) == 0)

        config = get_config()
        zh_config = config['zh_dictionary']
        self.dic.load_dictionary(zh_config['path'], zh_config['flag'] == 'True')

        keys = list(self.dic.keys())
        self.assertTrue(len(keys) > 0)
Example #4
0
    def test_load(self):
        config = get_config()

        zh_config = config['zh_dictionary']
        self.dic.load_dictionary(zh_config['path'], zh_config['flag'] == 'True')
        self.assertTrue(self.dic.vector_size == 100)

        eng_config = config['eng_dictionary']
        self.dic.load_dictionary(eng_config['path'], eng_config['flag'] == 'True')
        self.assertTrue(self.dic.vector_size == 300)
def get_crf_data_set(path=None, data=None, language='zh', train=True):
    config = get_config()

    data_config = config['dataset']
    if not path and not data:
        path_train = data_config[
            'zh_train'] if language == 'zh' else data_config['eng_train']
        path_test = data_config[
            'zh_test'] if language == 'zh' else data_config['eng_test']
        path = path_train if train else path_test

    return crf_DataSet(path=path, data=data, language=language)
def clean_logs():
    config = get_config()
    train_crf_log_path = config['log']['train_crf_log']
    test_crf_log_path = config['log']['test_crf_log']
    train_mlp_log_path = config['log']['train_mlp_log']
    test_mlp_log_path = config['log']['test_mlp_log']
    if os.path.exists(train_crf_log_path):
        os.remove(train_crf_log_path)
    if os.path.exists(test_crf_log_path):
        os.remove(test_crf_log_path)
    if os.path.exists(train_mlp_log_path):
        os.remove(train_mlp_log_path)
    if os.path.exists(test_mlp_log_path):
        os.remove(test_mlp_log_path)
Example #7
0
    def test_zh_get_vec(self):
        config = get_config()
        zh_config = config['zh_dictionary']
        self.dic.load_dictionary(zh_config['path'], zh_config['flag'] == 'True')


        default = [0] * 10
        self.assertRaises(ValueError, self.dic.get_vector, '中', default_vec=default )

        default = [0] * 100
        vec = self.dic.get_vector('中', default_vec=default)
        self.assertTrue(len(vec), 300)

        vec = self.dic.get_vector('nimh', default_vec=default)
        self.assertTrue(vec == default)


        return
def get_window_data_set(path=None, data=None, language='zh', train=True):
    config = get_config()

    data_config = config['dataset']
    if not path and not data:
        path_train = data_config[
            'zh_train'] if language == 'zh' else data_config['eng_train']
        path_test = data_config[
            'zh_test'] if language == 'zh' else data_config['eng_test']
        path = path_train if train else path_test

    language_key = language + '_dictionary'
    dictionary = config[language_key]

    if config != -1:
        ww = w2v_DataSet(path=path,
                         data=data,
                         window_size=int(data_config['window_size']),
                         dictionary_path=dictionary['path'],
                         isWord2Vec=dictionary['flag'] == 'True')
        return ww
 def test_init(self):
     config = get_config()
     est = MultiLayerPerceptron()