def get_document_type(self, dictname) : textfname = "../dictionary/text/" + dictname + ".txt" law_document = LawDocument() law_document.analyze(filename=textfname) text = law_document.document_type; return text
def get_document(self, dictname) : textfname = "../dictionary/text/" + dictname + ".txt" law_document = LawDocument() law_document.analyze(filename=textfname) text = "\n".join(law_document.document_title); return text
def get_document_chapiter(self, sims, dictname) : textfname = "../dictionary/text/" + dictname + ".txt" law_document = LawDocument() law_document.analyze(filename=textfname) text = ""; n_line = 1 for sim in sims : doc_no, simil = sim[0], sim [1] if (simil > 0.4) : text += "******** " + str(n_line) + " ********\n" text += law_document.get_document_chapiter(doc_no) + "\n" n_line += 1 if n_line > 2: break; else : break return text
def search_document(self, textpath, filename) : ld = LawDocument() ld.analyze(filename=filename) doc_tab = [] names = os.listdir("../dictionary/dict") n_file = 1 for filename in os.listdir("../dictionary/dict") : if filename.endswith(".dict") : dictname = filename.replace('.dict', '') total = 0.0 sentences = [] for sentence in ld.table_contents : if len(sentence) > 1 : sims = self.text_search_lsi(textpath, sentence[1]) total += self.get_similarity_value(sims) doc_tab.append([dictname, total]) doc_tab = sorted(doc_tab, key=lambda total: total[1], reverse=True) return self.get_document_type(doc_tab[0][0])