コード例 #1
0
    def __init__(self,
                 words: Dict[str, str] = None,
                 dictionary: str = None,
                 normalization: LyricsNormalizationParams = None):
        super().__init__()
        self.words = words if words else {}
        if normalization:
            normalization = LyricsNormalizationParams(
                **normalization.to_dict())
            normalization.lyrics_normalization = LyricsNormalization.SYLLABLES
            p = LyricsNormalizationProcessor(normalization)
        if dictionary:
            with open(dictionary) as f:
                for line in f:
                    word, hyphen = line.split()
                    if p:
                        word = p.apply(word)
                        hyphen = p.apply(hyphen)
                    self.words[word] = hyphen

        if len(self.words) == 0:
            raise Exception(
                "Empty dictionary for hyphenation. Either pass the hyphenation directly or as a file"
            )
コード例 #2
0
class Predictor(AlgorithmPredictor):
    @staticmethod
    def meta():
        return Meta

    def __init__(self, settings: AlgorithmPredictorSettings):
        super().__init__(settings)
        self.document_id = settings.params.documentId
        self.document_similar_tester = SimilarDocumentChecker()
        self.text_normalizer = LyricsNormalizationProcessor(
            LyricsNormalizationParams(LyricsNormalization.WORDS))

    @classmethod
    def unprocessed(cls, page: DatabasePage) -> bool:
        return True

    def predict(
        self,
        pages: List[DatabasePage],
        callback: Optional[PredictionCallback] = None
    ) -> AlgorithmPredictionResultGenerator:
        book = pages[0].book
        documents = DatabaseBookDocuments().load(book)
        document: Document = documents.database_documents.get_document_by_id(
            self.document_id)
        text = document.get_text_of_document(book)
        text = self.text_normalizer.apply(text)
        text = text.split(' ')
        count = self.document_similar_tester.check_word_based_similarity(text)
        texts = []
        for key, count in count.most_common(5):
            #print(self.document_similar_tester.document_dict[key].sentence)
            #print(self.document_similar_tester.document_dict[key].get_word_list())
            #print(self.document_similar_tester.document_dict[key].get_text())
            texts.append(
                self.document_similar_tester.document_dict[key].get_text())

        yield Result(texts)