def test_get_all_sentences(self, samp: Sample) -> None: """Test `Summarizer.summarize` Arguments: samp {Sample} -- sample data """ summ = Summarizer() sentences = samp.sentences # type: typing.List[SampleSentence] expecteds = sorted(sentences, key=lambda sent: sent.index) receiveds = summ.get_all_sentences(samp.body, samp.title) assert (len(receiveds) == len(expecteds)), assert_ex( 'summary result count', len(receiveds), len(expecteds), hint=samp.name) for index, received in enumerate(receiveds): expected = expecteds[index] # type: SampleSentence assert expected.equals(received), assert_ex( 'summary', received, expected, hint='{!r}: {!r}'.format(index, snip(receiveds[index].text)))
def test_summarize(sample_name: str, limit: int) -> None: """Test `summarize` in text subpackage Arguments: sample_name {str} -- name of data source limit {int} -- number of sentences to return """ samp = Sample(TEXT_PATH, sample_name) title = samp.title text = samp.body expecteds = _get_expected_sentences(samp, limit) receiveds = _get_received_sentences(title, text, limit) assert (len(receiveds) == len(expecteds)), assert_ex( 'summary sentence count', len(receiveds), limit) for i, received in enumerate(receiveds): expected = expecteds[i] assert (received == expected), assert_ex( 'summary [text at index]', received, expected, hint=[snip(received), i])
def test_get_sentence(self, sample: Sample, sentence: SampleSentence) -> None: """Test `Summarizer.get_sentence` Arguments: sample {Sample} -- sample data sentence {SampleSentence} -- individual sentence from sample """ summ = Summarizer() title_words = sample.title_words top_keywords = self._get_top_keywords(sample.keywords) top_keyword_list = pluck_keyword_words(top_keywords) text = sentence.text index = sentence.index total = len(sample.sentences) expected = sentence.total_score received = summ.get_sentence(text, index, total, title_words, top_keywords, top_keyword_list).total_score assert kinda.eq(received, expected), assert_ex('sentence score', received, expected, hint='{}: {!r}'.format( sample.name, snip(text)))
def test_parse_config( path_dict: typing.Dict[str, typing.Any], expected: typing.Any) -> None: """Parse a nominal configuration Arguments: path_dict {typing.Dict[str, typing.Any]} -- arguments for Parser initialization expected {typing.Any} -- idiom config or exception """ path_kwargs = {'root': BUILTIN, 'idiom': DEFAULT_IDIOM} path_kwargs.update(path_dict) cfg_path = get_config_path(**path_kwargs) try: ideal, language, stop_words = parse_config(cfg_path) received = (ideal, language, len(stop_words)) except Exception as err: # pylint: disable=broad-except received = check_exception(err, expected) assert (received == expected), assert_ex( 'parse config', received, expected)
def test_score_frequency(sample: Sample, sentence: SampleSentence) -> None: """Test `Summarizer` sentence scoring by keyword frequency Arguments: sample {Sample} -- sample data sentence {SampleSentence} -- individual sentence from sample """ summ = Summarizer() words = summ.parser.get_all_stems(sentence.text) top_keywords = summ.get_top_keywords(sample.body) top_keyword_list = pluck_keyword_words(top_keywords) params = ( ( 'density score', sentence.dbs_score, score_by_dbs(words, top_keywords, top_keyword_list), ), ( 'summation score', sentence.sbs_score, score_by_sbs(words, top_keywords, top_keyword_list), ), ) for desc, expected, received in params: result = kinda.eq(received, expected, COMPOSITE_TOLERANCE) assert result, assert_ex(desc, received, expected)
def test_get_stop_words( spec: typing.Dict[str, typing.Any], expected: int) -> None: """Test `get_stop_words` for ParserConfig Arguments: spec {typing.Dict[str, typing.Any]} -- nominal configuration expected {int} -- number of expected words (-ish) """ idiom_spec = {'stop_words': spec} language = 'english' user = spec.get('user', []) received = list(get_stop_words(idiom_spec, language)) missing = len([x for x in user if x not in received]) if user else 0 test = (missing == 0) if test: if expected < 0: test = (len(received) > abs(expected)) else: test = (len(received) == expected) assert test, assert_ex( 'get stop words', received, expected)
def test__float_len(item_list: typing.Sized, expected: float): """Test _float_len in summarizer subpackage Arguments: item_list {typing.Sized} -- a sized iterable expected {float} -- expected return """ received = _float_len(item_list) assert (received == expected), assert_ex('float length', received, expected)
def test_get_top_keyword_threshold(keywords: SampleKeywordList, expected: float) -> None: """Test `Summarizer.get_top_keyword_threshold` Arguments: keywords {SampleKeywordList} -- sample keywords expected {int} -- minimum count """ received = get_top_keyword_threshold(keywords) assert (received == expected), assert_ex('top keyword frequency >=', received, expected)
def test_get_all_words(self, samp: Sample) -> None: """Test `Parser.get_all_words` Arguments: samp {Sample} -- sample data """ parser = Parser() expected = samp.compare_words for received in parser.get_all_words(samp.body): assert (received in expected), assert_ex( 'all words', received, None)
def test_get_key_stems(self, samp: Sample) -> None: """Test `Parser.get_key_stems` Arguments: samp {Sample} -- sample data """ parser = Parser(idiom=samp.idiom) expected = sorted(self._get_expected_keywords(samp.keywords)) received = sorted(parser.get_key_stems(samp.body)) assert (received == expected), assert_ex( 'keyword list', expected, received)
def test_remove_punctuations(samp: Sample) -> None: """Test `remove_punctuations` for Parser Arguments: samp {Sample} -- sample data """ expected = samp.remove_punctuations received = remove_punctuations(samp.body) assert (received == expected), assert_ex( 'punctuation removal', repr(received), repr(expected))
def test_get_top_keywords(self, samp: Sample) -> None: """Test `Summarizer.get_top_keywords` Arguments: samp {Sample} -- sample data """ summ = Summarizer() expecteds = sorted(self._get_top_keywords(samp.keywords)) exp_len = len(expecteds) receiveds = sorted(summ.get_top_keywords(samp.body)) rcv_len = len(receiveds) assert (rcv_len == exp_len), assert_ex('top keywords count', rcv_len, exp_len) for i, expected in enumerate(expecteds): received = receiveds[i] assert (received == expected), assert_ex('top keyword', received, expected)
def test_get_keywords(self, samp: Sample) -> None: """Test `Parser.get_keywords` Arguments: samp {Sample} -- sample data """ exp_words, exp_scores = self._get_sample_keyword_data(samp) rcv_words, rcv_scores = self._get_keyword_result(samp.body) for word in set(exp_words + rcv_words): assert (word in exp_words) and (word in rcv_words), assert_ex( 'word list mismatch', rcv_words, exp_words) expected = exp_scores[word] received = rcv_scores[word] assert kinda.eq(received, expected), assert_ex( 'bad keyword score', received, expected, hint=word)
def test_get_config_path(root: str, idiom: str, expected: Path) -> None: """Test `get_config_path` for ParserConfig Arguments: root {str} -- root directory of idiom config idiom {str} -- basename of idiom config expected {Path} -- self explanatory """ received = get_config_path(root, idiom) assert (received == expected), assert_ex( 'config path', received, expected)
def test_score_body_sentences(samp: Sample) -> None: """Test `score_body_sentences` for text subpackage Arguments: samp {Sample} -- sample data """ for i, sent in enumerate(score_body_sentences(samp.body, samp.title)): expected = samp.sentences[i].total_score received = sent.total_score assert kinda.eq(received, expected), assert_ex( 'sentence score', received, expected, hint=snip(sent.text))
def test_score_by_length(self, samp: Sample) -> None: """Test `Summarizer.score_by_length` Arguments: samp {Sample} -- sample data """ summ = Summarizer(idiom=samp.idiom) words = summ.parser.get_all_words(samp.sentences[0].text) expected = samp.length_score received = summ.score_by_length(words) assert kinda.eq(received, expected), assert_ex('sentence score', received, expected, hint=' '.join(words))
def test_split_words(self, samp: Sample) -> None: """Test `Parser.split_words` Arguments: samp {Sample} -- sample data """ parser = Parser() text = samp.body expected = samp.split_words received = parser.split_words(text) assert (received == expected), assert_ex( 'word split', expected, received, hint=samp.name)
def test_score_by_title(samp: Sample) -> None: """Test `Parser.score_by_title` Arguments: samp {Sample} -- sample data """ summ = Summarizer(idiom=samp.idiom) title_words = summ.parser.get_key_words(samp.title) sentence_words = summ.parser.get_all_words(samp.sentences[0].text) expected = samp.title_score received = score_by_title(title_words, sentence_words) assert kinda.eq(received, expected), assert_ex( 'title score', received, expected, hint='\n'.join(['', repr(title_words), repr(sentence_words)]))
def test_load_idiom( kwargs: typing.Dict[str, typing.Any], expected: typing.Tuple[int, str, int]) -> None: """Test `load_idiom` for ParserConfig Arguments: kwargs {typing.Dict[str, typing.Any]} -- kwargs passed to Parser expected {typing.Tuple[int, str, int]} -- expected data """ test = False try: received = load_idiom(**kwargs) test = compare_loaded_idiom(received, expected) # pylint: disable=broad-except except (PermissionError, FileNotFoundError, ValueError) as err: test = check_exception(err, expected) is not None # pylint: enable=broad-except assert test, assert_ex('config', received, expected)
def test_split_sentences(self, samp: Sample) -> None: """Test `Parser.split_sentences` Arguments: samp {Sample} -- sample data """ parser = Parser(idiom=samp.idiom) expected = None try: expected = samp.split_sentences except AttributeError: expected = [sent.text for sent in samp.sentences] received = parser.split_sentences(samp.body) assert (received == expected), assert_ex( 'sentence split', received, expected)
def test_get_slice_length( nominal: typing.Any, total: int, expected: typing.Any) -> None: """Test `get_slice_length` in text subpackage Arguments: nominal {float} -- exact number (int) or percentage (0 < nominal < 1) total {int} -- number of items to slice from expected {typing.Any} -- expected Exception/number of items """ try: received = get_slice_length(nominal, total) except ValueError as err: received = check_exception(err, expected) assert (expected == received), assert_ex( 'slice length', received, expected, hint='nominal: {!r}'.format(nominal))
def test___init__( self, root: str, idiom: str, expected: typing.Tuple[int, str, int]) -> None: """Test `ParserConfig` initialization Arguments: root {str} -- root directory of idiom config idiom {str} -- basename of idiom config expected {typing.Tuple[int, str, int]} -- [ideal words, NLTK language, stop word count] """ config = ParserConfig(root, idiom) received = ( config.ideal_sentence_length, config.language, len(config.stop_words)) assert (received == expected), assert_ex( 'parser config', received, expected)