def test_query_word_pos(self): self.assertTrue(True) poses = read_obj('word.pos') counts = read_obj('word.count') multiples = defaultdict(set) # poses = defaultdict(set) for w, ps in poses.items(): s = set([x[0] for x in ps]) if len(s) > 1: multiples[w] |= ps results = [(w, ps, counts[w]) for w, ps in multiples.items()] results = sorted(results, key=lambda tp: tp[2], reverse=True) for w, ps, c in results: if c < 5: break print(w, c, ps) print('total:', len(counts)) print('multiple:', len(multiples)) print('percent:', 1.0 * len(multiples) / len(counts)) print(poses['真'])
def test_frequent_pruned(self): self.assertTrue(True) frequents = utils.read_obj( os.path.join(RESOURCE_DIR, 'mobile.itemsets.pruned.1')) for f in frequents: print(type(f))
def test_show_opinion_counter(self): self.assertTrue(True) ocounter = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'opinion.counter')) for o, c in ocounter.most_common(): print(o, c)
def test_show_feature_counter(self): self.assertTrue(True) fcounter = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'feature.counter')) for f, c in fcounter.most_common(): print(f, c)
def test_extract(self): self.assertTrue(True) ''' R = [] source_dir = os.path.join(RESOURCE_DIR, 'parsed3') i = 0 for d in os.listdir(source_dir): i += 1 print i R += read_obj(os.path.join(source_dir, d)) if len(R) >= 50000: break save_obj(R, os.path.join(RESOURCE_DIR, 'dp', 'dp.R')) ''' R = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.R')) R = [sentence for _, parsed in R for sentence in parsed['sentences']] # R = ['价格实惠'] print('单句总数:', len(R)) O = {'不错', '漂亮', '流畅', '方便', '高', '持久'} F, O_expanded = double_propagation.extract(O, R, parsed=True) write_file(os.path.join(RESOURCE_DIR, 'dp', 'dp.features'), F) write_file(os.path.join(RESOURCE_DIR, 'dp', 'dp.opinions'), O_expanded)
def test_frequent_support(self): self.assertTrue(True) frequents = utils.read_obj( os.path.join(RESOURCE_DIR, 'mobile.itemsets')) for itemset, support in frequents: print(itemset, support) print(len(frequents))
def test_prune_xx(self): self.assertTrue(True) F = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.F')) O = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.O')) fcounter = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.fcounter')) ocounter = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.ocounter')) print('len1: ', len(F)) F, O = double_propagation.prune_by_threshold(F, O, fcounter, ocounter) print('len2: ', len(F)) F = double_propagation.prune_order_features(F, fcounter) print('len3: ', len(F)) save_obj(F, os.path.join(RESOURCE_DIR, 'dp', 'dp.F.pruned'))
def test_show_count(self): self.assertTrue(True) ff_counter = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'ff.counter')) oo_counter = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'oo.counter')) fo_counter = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'fo.counter')) ff_dict = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'ff.dict')) oo_dict = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'oo.dict')) fo_dict = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'fo.dict')) print('-' * 10 + 'ff' + '-' * 10) for r, c in ff_counter.most_common(20): print(r, c) print('-' * 10 + 'oo' + '-' * 10) for r, c in oo_counter.most_common(20): print(r, c) print('-' * 10 + 'fo' + '-' * 10) for r, c in fo_counter.most_common(20): print(r, c) for relation in ff_dict: utils.write_file(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'samples', 'ff_{}.txt'.format(relation)), ff_dict[relation]) for relation in oo_dict: utils.write_file(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'samples', 'oo_{}.txt'.format(relation)), oo_dict[relation]) for relation in fo_dict: utils.write_file(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'samples', 'fo_{}.txt'.format(relation)), fo_dict[relation])
def load(cls, model_file, keras_model_file): """ :param model_file: Model对象保存的文件 :param keras_model_file: keras model保存的文件 :return: :rtype: BaseModel """ kmodel = keras.models.load_model(keras_model_file) model = read_obj(model_file) model._model = kmodel return model
def load(cls, keras_model_file=None): """ :rtype: SBDModel """ model = SBDModel(None) logger.info('loading model...') model = read_obj(model._model_file) # assert isinstance(model, Model) if keras_model_file is None: keras_model_file = model._keras_model_file logger.info('loading keras model...') model._model = keras.models.load_model(keras_model_file) return model
def get_features(): # fcounter = utils.read_obj(os.path.join(RESOURCE_DIR, 'dp', 'feature.counter')) # return [f for f in fcounter if fcounter[f] > 1] F = utils.read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.F')) return list(F)
def load(): """ :return: HomoModel :rtype HomoModel """ return read_obj(HomoModel.model_file)