コード例 #1
0
ファイル: database.py プロジェクト: pvt2345/KMS_IDRec
    def search(self, key_search):
        data = self.select('content')
        searcher = SearchEngine(key_search)
        result = []

        for d in data:
            content = d.get('content', ' ')
            if len(content.strip()) < 4:
                continue
            if len(
                    re.findall('|'.join(key_search.lower().split()),
                               content.lower())) == 0:
                continue

            titles = d.get('tieu_de', ' ').split('|')
            score = 0
            try:
                ok = True
                for i, title in enumerate(titles):
                    score_tieu_de, _ = searcher.LCS4Sentence(u'' + title)
                    score += (i + 1) / len(titles) * score_tieu_de * 2

                sentences = content.split('.')
                score_content = 0
                n_content = 0
                for sentence in sentences:
                    s_content, index = searcher.LCS4Sentence(u'' + sentence)
                    # score_content += s_content
                    # if s_content > 0:
                    #     n_content += 1
                    if s_content > score_content:
                        score_content = s_content
                score_content = 2 * score_content / (n_content + 1)
                _, index = searcher.LCS4Sentence(u'' + content)
                score += score_content
                if score < 0.3:
                    continue
                index = ';'.join(['{0}-{1}'.format(s, e) for s, e in index])
                reference = d.get('reference')  # d['stt']

                result.append({
                    'reference': reference,
                    'title': title.split('|')[-1],
                    'content': content,
                    'score': score,
                    'index': index
                })
            except Exception as e:
                print(e)
                ok = False
                # from text_mining.search_engine.search_engine import SearchEngine
                searcher = SearchEngine(key_search)

            if not ok:
                print('error')
                # print(searcher.LCS4Sentence('haha'))
        result = [r for r in result if r['score'] > 0.05]
        result = sorted(result, key=lambda r: r['score'], reverse=True)
        result = result[:20]
        return result