def test_all(self): config = get_config() self.assertTrue(isinstance(config, dict)) self.assertTrue('zh_dictionary' in config.keys()) zh_dictionary = config['zh_dictionary'] self.assertTrue(isinstance(zh_dictionary, dict)) self.assertTrue( zh_dictionary['path'], '/Users/duanshangfu/PycharmProjects/named_entity_recognition/data/tmp/word2vec/zh' ) self.assertTrue(zh_dictionary['flag'], 'True') self.assertTrue('eng_dictionary' in config.keys()) eng_dictionary = config['eng_dictionary'] self.assertTrue( eng_dictionary['path'], '/Users/duanshangfu/PycharmProjects/named_entity_recognition/data/tmp/word2vec/eng' ) self.assertTrue('dataset' in config.keys()) dataset = config['dataset'] self.assertEqual( dataset['zh_train'], '/Users/duanshangfu/PycharmProjects/named_entity_recognition/data/zh/zh.train' )
def clean_models(model_name='crf'): config = get_config() model = config['model'] name = 'train_' + model_name if os.path.exists(model[name]): os.remove(model[name]) return
def test_keys(self): keys = list(self.dic.keys()) self.assertTrue(len(keys) == 0) config = get_config() zh_config = config['zh_dictionary'] self.dic.load_dictionary(zh_config['path'], zh_config['flag'] == 'True') keys = list(self.dic.keys()) self.assertTrue(len(keys) > 0)
def test_load(self): config = get_config() zh_config = config['zh_dictionary'] self.dic.load_dictionary(zh_config['path'], zh_config['flag'] == 'True') self.assertTrue(self.dic.vector_size == 100) eng_config = config['eng_dictionary'] self.dic.load_dictionary(eng_config['path'], eng_config['flag'] == 'True') self.assertTrue(self.dic.vector_size == 300)
def get_crf_data_set(path=None, data=None, language='zh', train=True): config = get_config() data_config = config['dataset'] if not path and not data: path_train = data_config[ 'zh_train'] if language == 'zh' else data_config['eng_train'] path_test = data_config[ 'zh_test'] if language == 'zh' else data_config['eng_test'] path = path_train if train else path_test return crf_DataSet(path=path, data=data, language=language)
def clean_logs(): config = get_config() train_crf_log_path = config['log']['train_crf_log'] test_crf_log_path = config['log']['test_crf_log'] train_mlp_log_path = config['log']['train_mlp_log'] test_mlp_log_path = config['log']['test_mlp_log'] if os.path.exists(train_crf_log_path): os.remove(train_crf_log_path) if os.path.exists(test_crf_log_path): os.remove(test_crf_log_path) if os.path.exists(train_mlp_log_path): os.remove(train_mlp_log_path) if os.path.exists(test_mlp_log_path): os.remove(test_mlp_log_path)
def test_zh_get_vec(self): config = get_config() zh_config = config['zh_dictionary'] self.dic.load_dictionary(zh_config['path'], zh_config['flag'] == 'True') default = [0] * 10 self.assertRaises(ValueError, self.dic.get_vector, '中', default_vec=default ) default = [0] * 100 vec = self.dic.get_vector('中', default_vec=default) self.assertTrue(len(vec), 300) vec = self.dic.get_vector('nimh', default_vec=default) self.assertTrue(vec == default) return
def get_window_data_set(path=None, data=None, language='zh', train=True): config = get_config() data_config = config['dataset'] if not path and not data: path_train = data_config[ 'zh_train'] if language == 'zh' else data_config['eng_train'] path_test = data_config[ 'zh_test'] if language == 'zh' else data_config['eng_test'] path = path_train if train else path_test language_key = language + '_dictionary' dictionary = config[language_key] if config != -1: ww = w2v_DataSet(path=path, data=data, window_size=int(data_config['window_size']), dictionary_path=dictionary['path'], isWord2Vec=dictionary['flag'] == 'True') return ww
def test_init(self): config = get_config() est = MultiLayerPerceptron()