Python TextProcessorの例

プログラミング言語: Python

名前空間/パッケージ名: pre_processing.text_processing

クラス/型: TextProcessor

hotexamples.comのコード掲載数: 6

Python TextProcessor - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのpre_processing.text_processing.TextProcessorの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

tokenize_text(4)

clean_text(4)

TextProcessor(2)

コード例 #1

ファイルを表示

ファイル: text_processing_test.py プロジェクト: nikkizhao1202/Tweet_Preprocessing_Update

class Test(unittest.TestCase):
    """
    pytest text_processing

    """
    def setUp(self):
        """
        Call Text Processor Library
        """
        self.textprocessor = TextProcessor()

    def test_clean_text(self):
        """
        Test wheter clean text properly

        """
        text = 'I love you from the Moon and back #hahaha @li'
        expected_result = 'I love you from the Moon and back  '
        result = self.textprocessor.clean_text(text)
        self.assertEqual(result, expected_result)

    def test_tokenize_text(self):
        """
        Test wether text being tokenized properly
        """
        text = 'i love from the moon and back'
        expected_result = ['love','moon','back']
        result = self.textprocessor.tokenize_text(text)
        self.assertEqual(result, expected_result)

コード例 #2

ファイルを表示

ファイル: pre_processing.py プロジェクト: 3One0Seven/AIOPS-Assignment5

class PreProcessor():
    """Pre Processing class used to convert json file into index list"""

    def __init__(self, padding_size=20, max_dictionary_size=500000):

        self.text_processor = TextProcessor()
        self.embedding = WordEmbedding(max_dictionary_size=max_dictionary_size)

        self.embedding.load_embedding_dictionary(self.embedding.dictionary_path)

        self.padding_size = padding_size

    def pre_process_text(self, text):

        cleaned_text = self.text_processor.clean_text(text)
        tokens = self.text_processor.tokenize_text(cleaned_text)
        print(tokens)
        embeding_indexes = self.embedding.replace_tokens_with_index(tokens)

        padded_indexes = self.pad_sequence(embeding_indexes)

        return padded_indexes

    def pad_sequence(self, input_sequence):
        """Step 4 pad_sequence"""

        sequence = input_sequence[-self.padding_size:]

        if len(sequence) < self.padding_size:

            pad_sequence = [0]*(self.padding_size - len(sequence))
            sequence += pad_sequence

        return sequence

コード例 #3

ファイルを表示

ファイル: pre_processing.py プロジェクト: 3One0Seven/AIOPS-Assignment5

    def __init__(self, padding_size=20, max_dictionary_size=500000):

        self.text_processor = TextProcessor()
        self.embedding = WordEmbedding(max_dictionary_size=max_dictionary_size)

        self.embedding.load_embedding_dictionary(self.embedding.dictionary_path)

        self.padding_size = padding_size

コード例 #4

ファイルを表示

ファイル: pre_processing.py プロジェクト: nikkizhao1202/aws_lamdafunction_sentimentjob

class PreProcessor():
    """
            Run the pre processing end to end

    """
    def __init__(self, padding_size=20, max_dictionary_size=500000):

        self.text_processor = TextProcessor()
        self.embedding = WordEmbedding(max_dictionary_size=max_dictionary_size)

        self.embedding.load_embedding_dictionary(
            self.embedding.dictionary_path)

        self.padding_size = padding_size

    def pre_process_text(self, text):
        """
        Run the pre processing end to end

        """

        cleaned_text = self.text_processor.clean_text(text)
        tokens = self.text_processor.tokenize_text(cleaned_text)

        embedding_indexs = self.embedding.replace_tokens_with_index(tokens)

        padded_index = self.pad_sequence(embedding_indexs)

        return padded_index

    def pad_sequence(self, input_squence):
        """
        Add 0 padding to a seqence

        """

        sequence = input_squence[-self.padding_size:]

        if len(sequence) < self.padding_size:

            pad_sequence = [0] * (self.padding_size - len(sequence))
            sequence = sequence + pad_sequence

        return sequence

コード例 #5

ファイルを表示

class PreProcessor():
    """
    Pre-process tweets
    """
    def __init__(self, padding_size=20, max_dictionary_size=500000):

        self.text_processor = TextProcessor()
        self.embedding = WordEmbedding(max_dictionary_size=max_dictionary_size)

        self.embedding.load_embedding_dictionary(
            self.embedding.dictionary_path)

        self.padding_size = padding_size

    def pre_process_text(self, text):
        """
        Clean and tokenize text, replace tokens with index
        """

        cleaned_text = self.text_processor.clean_text(text)
        tokens = self.text_processor.tokenize_text(cleaned_text)

        embedding_indexs = self.embedding.replace_tokens_with_index(tokens)

        padded_index = self.pad_sequence(embedding_indexs)

        return padded_index

    def pad_sequence(self, input_squence):
        """
        Padding: add 0 until max length
        """

        sequence = input_squence[-self.padding_size:]

        if len(sequence) < self.padding_size:

            pad_sequence = [0] * (self.padding_size - len(sequence))
            sequence = sequence + pad_sequence

        return sequence

コード例 #6

ファイルを表示

ファイル: text_processing_test.py プロジェクト: nikkizhao1202/Tweet_Preprocessing_Update

 def setUp(self):
     """
     Call Text Processor Library
     """
     self.textprocessor = TextProcessor()