def test_parse_sentences_with_dash_and_space(): assert Parser().parse('''\ 1 00:01:00,000 --> 00:01:03,000 - I hope to see my friend. - And shake his hand. ''') == [ Sentence('I hope to see my friend.', timedelta(minutes=1)), Sentence('And shake his hand.', timedelta(minutes=1)) ]
def test_parse_two_sentences(): assert Parser().parse('''\ 1 00:01:00,000 --> 00:01:03,000 I hope to see my friend. And shake his hand! ''') == [ Sentence('I hope to see my friend.', timedelta(minutes=1)), Sentence('And shake his hand!', timedelta(minutes=1)) ]
def test_exerpt_nearby_sentences(): sentences = [ Sentence("Duty calls.", td(seconds=25)), Sentence("Gentlemen.", td(seconds=30)), Sentence("I'll never understand.", td(seconds=31)), Sentence("All these books, a world of knowledge, and what do you do?", td(seconds=32)), Sentence("Play poker all night.", td(seconds=33)), Sentence("We got culture.", td(seconds=34)), Sentence("We got culture coming out our asses.", td(seconds=35)), Sentence("How's this for culture?", td(seconds=36)), Sentence("Phone.", td(seconds=40)) ] excerptor = Excerptor(td(seconds=5)) excerpt = excerptor.excerpt(sentences, 3, 'knowledge') assert excerpt == Excerpt([ sentences[1], sentences[2], sentences[3], sentences[4], sentences[5], sentences[6], sentences[7], ], 'knowledge')
def test_parse_two_sentences_across_multiple_lines(): assert Parser().parse('''\ 1 00:01:00,000 --> 00:01:03,000 I hope to see my friend. 2 00:02:00,000 --> 00:02:03,000 And shake his hand. ''') == [ Sentence('I hope to see my friend.', timedelta(minutes=1)), Sentence('And shake his hand.', timedelta(minutes=2)) ]
def test_exerpt_nearby_sentences_at_end(): sentences = [ Sentence("Roads?", td(minutes=90)), Sentence("Where we're going we don't need roads.", td(minutes=90)) ] excerptor = Excerptor(td(seconds=5)) excerpt = excerptor.excerpt(sentences, 1, 'roads') assert excerpt == Excerpt([ sentences[0], sentences[1], ], 'roads')
def test_exerpt_nearby_sentences_at_start(): sentences = [ Sentence("All right, there are five billion people on earth.", td(seconds=0)), Sentence("When I was a kid there were three.", td(seconds=1)) ] excerptor = Excerptor(td(seconds=5)) excerpt = excerptor.excerpt(sentences, 0, 'billion') assert excerpt == Excerpt([ sentences[0], sentences[1], ], 'billion')
def test_parse_simple_multiline_sentence(): assert Parser().parse('''\ 1 00:01:00,000 --> 00:01:03,000 I hope to see my friend and shake his hand. ''') == [ Sentence('I hope to see my friend and shake his hand.', timedelta(minutes=1)) ]
def test_parse_sentence_connected_with_ellipses(): assert Parser().parse('''\ 1 00:01:00,000 --> 00:01:03,000 I hope to see my friend... 2 00:01:00,000 --> 00:01:03,000 ...and shake his hand. ''') == [ Sentence('I hope to see my friend and shake his hand.', timedelta(minutes=1)) ]
def test_analysis_yields_excerpts(): text = '''\ 1 02:11:39,473 --> 02:11:42,375 I hoped to see my friend. I hoped. ''' _, analysis = cached_analyse(text) s1 = Sentence('I hoped to see my friend.', timedelta(0, 7899, 473000)) s2 = Sentence('I hoped.', timedelta(0, 7899, 473000)) assert dict(analysis.word_with_excerpts) == { Word('I', PartOfSpeach.OTHER): [Excerpt([s1, s2], 'I'), Excerpt([s1, s2], 'I')], Word('hop', PartOfSpeach.VERB): [Excerpt([s1, s2], 'hoped'), Excerpt([s1, s2], 'hoped')], Word('to', PartOfSpeach.OTHER): [Excerpt([s1, s2], 'to')], Word('see', PartOfSpeach.VERB): [Excerpt([s1, s2], 'see')], Word('my', PartOfSpeach.OTHER): [Excerpt([s1, s2], 'my')], Word('friend', PartOfSpeach.NOUN): [Excerpt([s1, s2], 'friend')] }
def test_parse_sentences_with_html(): assert Parser().parse('''\ 1 00:01:00,000 --> 00:01:03,000 <i>I hope to see my friend.</i> ''') == [Sentence('I hope to see my friend.', timedelta(minutes=1))]