def test_only_tagging(self, run_tokenization_mock, run_tagging_mock, parse_mock, *args): options = self._default_options(tagged=True) models = self._default_models() run_tokenization_mock.return_value = [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ] run_tagging_mock.side_effect = [ ( [], [], ["IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT"], [], ), ( [], [], [ "IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT", "PM|NOM", "PM|NOM" ], [], ), ] with open(os.devnull, 'w') as sys.stderr: open_mock = mock_open() with patch("swe_pipeline.open", open_mock, create=True): process_file(options, "file.txt", "", models) written_to_file = "".join( [call[0][0] for call in open_mock().write.call_args_list]) self.assertEqual( written_to_file, dedent(""" Hej mitt namn är Hej\tIN mitt\tPS|NEU|SIN|DEF namn\tNN|NEU|SIN|IND|NOM är\tVB|PRS|AKT Hej mitt namn är Slim Shady Hej\tIN mitt\tPS|NEU|SIN|DEF namn\tNN|NEU|SIN|IND|NOM är\tVB|PRS|AKT Slim\tPM|NOM Shady\tPM|NOM """).lstrip("\n"))
def test_only_tokenization( self, run_tokenization_mock, run_tagging_mock, parse_mock, *args ): options = self._default_options() models = self._default_models() run_tokenization_mock.return_value = [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ] with open(os.devnull, 'w') as sys.stderr: open_mock = mock_open() with patch("swe_pipeline.open", open_mock, create=True): process_file(options, "file.txt", "", models) written_to_file = "".join([ call[0][0] for call in open_mock().write.call_args_list ]) self.assertEqual( written_to_file, dedent(""" Hej mitt namn är Hej mitt namn är Slim Shady """).lstrip("\n") )
def test_only_tokenization(self, run_tokenization_mock, run_tagging_mock, parse_mock, *args): options = self._default_options() models = self._default_models() run_tokenization_mock.return_value = [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ] with open(os.devnull, 'w') as sys.stderr: open_mock = mock_open() with patch("swe_pipeline.open", open_mock, create=True): process_file(options, "file.txt", "", models) written_to_file = "".join( [call[0][0] for call in open_mock().write.call_args_list]) self.assertEqual( written_to_file, dedent(""" Hej mitt namn är Hej mitt namn är Slim Shady """).lstrip("\n"))
def test_empty_options(self, run_tokenization_mock, run_tagging_mock, parse_mock, open_mock, *args): options = self._default_options() models = self._default_models() with open(os.devnull, 'w') as sys.stderr: process_file(options, "file.txt", "", models) self.assertEqual(open_mock().write.call_count, 0)
def test_empty_options( self, run_tokenization_mock, run_tagging_mock, parse_mock, open_mock, *args ): options = self._default_options() models = self._default_models() with open(os.devnull, 'w') as sys.stderr: process_file(options, "file.txt", "", models) self.assertEqual(open_mock().write.call_count, 0)
def test_parsing( self, run_tokenization_mock, run_tagging_mock, parse_mock, *args ): options = self._default_options(parsed=True) models = self._default_models() run_tokenization_mock.return_value = [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ] run_tagging_mock.side_effect = [ ( ["hej", "min", "namn", "vara"], [ "INTJ|_", "DET|Definite=Def|Gender=Neut|Number=Sing|Poss=Yes", "NOUN|Case=Nom|Definite=Ind|Gender=Neut|Number=Sing", "AUX|Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act", ], ["IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT"], [], ), ( ["hej", "min", "namn", "vara", "Slim", "Shady"], [ "INTJ|_", "DET|Definite=Def|Gender=Neut|Number=Sing|Poss=Yes", "NOUN|Case=Nom|Definite=Ind|Gender=Neut|Number=Sing", "AUX|Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act", "PROPN|Case=Nom", "PROPN|Case=Nom", ], [ "IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT", "PM|NOM", "PM|NOM" ], [], ), ] with open(os.devnull, 'w') as sys.stderr: open_mock = mock_open() with patch("swe_pipeline.open", open_mock, create=True): process_file(options, "file.txt", "", models) self.assertEqual(run_tagging_mock.call_count, 2) self.assertEqual(parse_mock.call_count, 1)
def test_parsing(self, run_tokenization_mock, run_tagging_mock, parse_mock, *args): options = self._default_options(parsed=True) models = self._default_models() run_tokenization_mock.return_value = [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ] run_tagging_mock.side_effect = [ ( ["hej", "min", "namn", "vara"], [ "INTJ|_", "DET|Definite=Def|Gender=Neut|Number=Sing|Poss=Yes", "NOUN|Case=Nom|Definite=Ind|Gender=Neut|Number=Sing", "AUX|Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act", ], ["IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT"], [], ), ( ["hej", "min", "namn", "vara", "Slim", "Shady"], [ "INTJ|_", "DET|Definite=Def|Gender=Neut|Number=Sing|Poss=Yes", "NOUN|Case=Nom|Definite=Ind|Gender=Neut|Number=Sing", "AUX|Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act", "PROPN|Case=Nom", "PROPN|Case=Nom", ], [ "IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT", "PM|NOM", "PM|NOM" ], [], ), ] with open(os.devnull, 'w') as sys.stderr: open_mock = mock_open() with patch("swe_pipeline.open", open_mock, create=True): process_file(options, "file.txt", "", models) self.assertEqual(run_tagging_mock.call_count, 2) self.assertEqual(parse_mock.call_count, 1)
def test_tagging_and_lemmatization_and_ner(self, run_tokenization_mock, run_tagging_mock, parse_mock, *args): options = self._default_options(tagged=True, lemmatized=True, ner=True) models = self._default_models() run_tokenization_mock.return_value = [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ] run_tagging_mock.side_effect = [ ( ["hej", "min", "namn", "vara"], [ "INTJ|_", "DET|Definite=Def|Gender=Neut|Number=Sing|Poss=Yes", "NOUN|Case=Nom|Definite=Ind|Gender=Neut|Number=Sing", "AUX|Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act", ], ["IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT"], ["O", "O", "O", "O"], ), ( ["hej", "min", "namn", "vara", "Slim", "Shady"], [ "INTJ|_", "DET|Definite=Def|Gender=Neut|Number=Sing|Poss=Yes", "NOUN|Case=Nom|Definite=Ind|Gender=Neut|Number=Sing", "AUX|Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act", "PROPN|Case=Nom", "PROPN|Case=Nom", ], [ "IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT", "PM|NOM", "PM|NOM" ], ["O", "O", "O", "O", "B-person", "I-person"], ), ] with open(os.devnull, 'w') as sys.stderr: open_mock = mock_open() with patch("swe_pipeline.open", open_mock, create=True): process_file(options, "file.txt", "", models) written_to_file = "".join( [call[0][0] for call in open_mock().write.call_args_list]) self.assertEqual( written_to_file, dedent(""" Hej mitt namn är Hej\tIN\tINTJ\thej mitt\tPS|NEU|SIN|DEF\tDET\tmin namn\tNN|NEU|SIN|IND|NOM\tNOUN\tnamn är\tVB|PRS|AKT\tAUX\tvara Hej\tO mitt\tO namn\tO är\tO Hej mitt namn är Slim Shady Hej\tIN\tINTJ\thej mitt\tPS|NEU|SIN|DEF\tDET\tmin namn\tNN|NEU|SIN|IND|NOM\tNOUN\tnamn är\tVB|PRS|AKT\tAUX\tvara Slim\tPM|NOM\tPROPN\tSlim Shady\tPM|NOM\tPROPN\tShady Hej\tO mitt\tO namn\tO är\tO Slim\tB-person Shady\tI-person """).lstrip("\n"))
def test_only_tagging( self, run_tokenization_mock, run_tagging_mock, parse_mock, *args ): options = self._default_options(tagged=True) models = self._default_models() run_tokenization_mock.return_value = [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ] run_tagging_mock.side_effect = [ ( [], [], ["IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT"], [], ), ( [], [], [ "IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT", "PM|NOM", "PM|NOM" ], [], ), ] with open(os.devnull, 'w') as sys.stderr: open_mock = mock_open() with patch("swe_pipeline.open", open_mock, create=True): process_file(options, "file.txt", "", models) written_to_file = "".join([ call[0][0] for call in open_mock().write.call_args_list ]) self.assertEqual( written_to_file, dedent(""" Hej mitt namn är Hej\tIN mitt\tPS|NEU|SIN|DEF namn\tNN|NEU|SIN|IND|NOM är\tVB|PRS|AKT Hej mitt namn är Slim Shady Hej\tIN mitt\tPS|NEU|SIN|DEF namn\tNN|NEU|SIN|IND|NOM är\tVB|PRS|AKT Slim\tPM|NOM Shady\tPM|NOM """).lstrip("\n") )
def test_tagging_and_lemmatization_and_ner( self, run_tokenization_mock, run_tagging_mock, parse_mock, *args ): options = self._default_options(tagged=True, lemmatized=True, ner=True) models = self._default_models() run_tokenization_mock.return_value = [ ["Hej", "mitt", "namn", "är"], ["Hej", "mitt", "namn", "är", "Slim", "Shady"], ] run_tagging_mock.side_effect = [ ( ["hej", "min", "namn", "vara"], [ "INTJ|_", "DET|Definite=Def|Gender=Neut|Number=Sing|Poss=Yes", "NOUN|Case=Nom|Definite=Ind|Gender=Neut|Number=Sing", "AUX|Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act", ], ["IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT"], ["O", "O", "O", "O"], ), ( ["hej", "min", "namn", "vara", "Slim", "Shady"], [ "INTJ|_", "DET|Definite=Def|Gender=Neut|Number=Sing|Poss=Yes", "NOUN|Case=Nom|Definite=Ind|Gender=Neut|Number=Sing", "AUX|Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act", "PROPN|Case=Nom", "PROPN|Case=Nom", ], [ "IN", "PS|NEU|SIN|DEF", "NN|NEU|SIN|IND|NOM", "VB|PRS|AKT", "PM|NOM", "PM|NOM" ], ["O", "O", "O", "O", "B-person", "I-person"], ), ] with open(os.devnull, 'w') as sys.stderr: open_mock = mock_open() with patch("swe_pipeline.open", open_mock, create=True): process_file(options, "file.txt", "", models) written_to_file = "".join([ call[0][0] for call in open_mock().write.call_args_list ]) self.assertEqual( written_to_file, dedent(""" Hej mitt namn är Hej\tIN\tINTJ\thej mitt\tPS|NEU|SIN|DEF\tDET\tmin namn\tNN|NEU|SIN|IND|NOM\tNOUN\tnamn är\tVB|PRS|AKT\tAUX\tvara Hej\tO mitt\tO namn\tO är\tO Hej mitt namn är Slim Shady Hej\tIN\tINTJ\thej mitt\tPS|NEU|SIN|DEF\tDET\tmin namn\tNN|NEU|SIN|IND|NOM\tNOUN\tnamn är\tVB|PRS|AKT\tAUX\tvara Slim\tPM|NOM\tPROPN\tSlim Shady\tPM|NOM\tPROPN\tShady Hej\tO mitt\tO namn\tO är\tO Slim\tB-person Shady\tI-person """).lstrip("\n") )