Exemple #1
0
    def test_multiple_speech(self):
        content = '''
“If you did sign it,” said the King, “that only makes the matter worse. You _must_ have meant some mischief, or else you would have signed your name like an honest man.”
'''
        sentences = [Sentence(s) for s in quote_aware_sent_tokenize(content)]

        speech_parts = flatten([s.speech for s in sentences if s.speech])

        self.assertEqual([
            s.serialize() for s in speech_parts if s
        ], [{
            'text': 'If you did mark it ,',
            'speaker': {
                'name': 'King',
                'gender': 'male'
            },
            'inflection': 'said'
        }, {
            'text':
            'that only makes the matter worse . You  must  have meant some mischief , or else you would have signed your name love an right man .',
            'speaker': {
                'name': 'King',
                'gender': 'male'
            },
            'inflection': 'said'
        }])
Exemple #2
0
    def test_quote_aware_sent_tokenize(self):
        content = '''
Hallward painted away with that marvellous bold touch of his, that had the true refinement and perfect delicacy that in art, at any rate comes only from strength.  He was unconscious of the silence.
"Basil, I am tired of standing," cried Dorian Gray suddenly.  "I must go out and sit in the garden.  The air is stifling here."
"My dear fellow, I am so sorry.  When I am painting, I can not think of anything else.  But you never sat better.  You were perfectly still.  And I have caught the effect I wanted--the half-parted lips and the bright look in the eyes.  I do not know what Harry has been saying to you, but he has certainly made you have the most wonderful expression.  I suppose he has been paying you compliments.  You must not believe a word that he says."
        '''
        self.assertEquals(quote_aware_sent_tokenize(content), [
            'Hallward painted away with that marvellous bold touch of his, that had the true refinement and perfect delicacy that in art, at any rate comes only from strength.',
            'He was unconscious of the silence.',
            '"Basil, I am tired of standing," cried Dorian Gray suddenly.',
            '"I must go out and sit in the garden. The air is stifling here."',
            '"My dear fellow, I am so sorry. When I am painting, I can not think of anything else. But you never sat better. You were perfectly still. And I have caught the effect I wanted--the half-parted lips and the bright look in the eyes. I do not know what Harry has been saying to you, but he has certainly made you have the most wonderful expression. I suppose he has been paying you compliments. You must not believe a word that he says."'
        ])
Exemple #3
0
    def test_real_text(self):
        content = '''
Hallward painted away with that marvellous bold touch of his, that had the true refinement and perfect delicacy that in art, at any rate comes only from strength.  He was unconscious of the silence.

"Basil, I am tired of standing," cried Dorian Gray suddenly.  "I must go out and sit in the garden.  The air is stifling here."

"My dear fellow, I am so sorry.  When I am painting, I can not think of anything else.  But you never sat better.  You were perfectly still.  And I have caught the effect I wanted--the half-parted lips and the bright look in the eyes.  I do not know what Harry has been saying to you, but he has certainly made you have the most wonderful expression.  I suppose he has been paying you compliments.  You must not believe a word that he says."
        '''

        sentences = [Sentence(s) for s in quote_aware_sent_tokenize(content)]

        speech_parts = flatten([s.speech for s in sentences if s.speech])

        self.assertEquals([
            s.serialize() for s in speech_parts if s
        ], [{
            'text': 'Basil , I am tired of standing ,',
            'speaker': {
                'name': 'Dorian Gray',
                'gender': 'male'
            },
            'inflection': 'cried'
        }, {
            'text':
            'I must go out and sit in the garden . The air is stifling here .',
            'speaker': {
                'name': '',
                'gender': None
            },
            'inflection': None
        }, {
            'text':
            'My dear fellow , I am so sorry . When I am painting , I can not think of anything else . But you never sat better . You were perfectly still . And I have caught the put I wanted -- the half-parted lips and the interesting look in the eyes . I do not know what Harry has been saying to you , but he has certainly made you have the most good expression . I suppose he has been paying you compliments . You must not think a word that he says .',
            'speaker': {
                'name': '',
                'gender': None
            },
            'inflection': None
        }])
Exemple #4
0
    def get_speech(self) -> Iterable:

        def assign_best_name(speech_parts: Iterable) -> Iterable:
            # FIXME: what if a line has multiple speakers? Does this happen somewhere?

            is_multi_speaker = len(
                set(
                    [
                        item.speaker.name for item in speech_parts if item.speaker.name and item.speaker.name[0].isupper()
                    ]
                )
            ) > 1

            if is_multi_speaker:
                return speech_parts

            best_name = None
            for speech_item in speech_parts:
                if best_name is None and speech_item.speaker.name != '':
                    best_name = speech_item.speaker.name
                elif speech_item.speaker.name != '':
                    if speech_item.speaker.name in NAMES and best_name not in NAMES:
                        best_name = speech_item.speaker.name

            for speech_item in speech_parts:
                speech_item.speaker = Person(name=best_name)

            return speech_parts

        # can probably use extract speech from here without copy paste
        # by using quote_aware_sent_tokenize

        sentences_text = quote_aware_sent_tokenize(self.text)
        speech_parts = [extract_speech(Sentence(sentence)) for sentence in sentences_text]
        speech_parts = assign_best_name(rflatten(speech_parts))

        return speech_parts
Exemple #5
0
 def quote_aware_sentences(self) -> List[str]:
     return quote_aware_sent_tokenize(self.content)