def test_run(self): test_data = [ Corpus("0", "hello", "hello world"), Corpus("1", "goodbye", "goodbye world") ] test_data = WordTokenizeWhitespacePunct().run(test_data) desired_results = [{ "term": "hello", "importance": 0.0, "corpus_id": "0" }, { "term": "world", "importance": -0.4054651081081644, "corpus_id": "0" }, { "term": "goodbye", "importance": 0.0, "corpus_id": "1" }, { "term": "world", "importance": -0.4054651081081644, "corpus_id": "1" }] desired_results = round_json_floats(desired_results) results = round_json_floats(self.op.run(test_data)) for result in results: self.assertTrue(result in desired_results)
def test_run(self): self.op = Tfidf() self.test_data = [ Corpus("0", "hello", "hello world"), Corpus("1", "goodbye", "goodbye world") ] # self.assertEqual(self.op.run(self.test_data), [[(0.4054651081081644, 'hello'),\ # (0.0, 'world')], [(0.4054651081081644, 'goodbye'), (0.0, 'world')] ]) desired_results = [] desired_results.append({ "term": "hello", "importance": 0.4054651081081644, "corpus_id": "0" }) desired_results.append({ "term": "world", "importance": 0.0, "corpus_id": "0" }) desired_results.append({ "term": "goodbye", "importance": 0.4054651081081644, "corpus_id": "1" }) desired_results.append({ "term": "world", "importance": 0.0, "corpus_id": "1" }) results = self.op.run(self.test_data) for result in results: self.assertTrue(result in desired_results)
def test_run_treebank(self): self.op = WordTokenizeTreebank() test_data = [ Corpus("0", "hello", "hello world"), Corpus("1", "goodbye", "goodbye world") ] results = self.op.run(test_data) desired_results = {"0": ["hello", "world"], "1": ["goodbye", "world"]} self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.tokenized_contents, desired_results[corpus.id])
def test_run(self): self.op = WordTokenizeSpaces() self.test_data = [ Corpus("0", "hello", "hello world"), Corpus("1", "goodbye", "goodbye world") ] desired_results = [] desired_results.append({ "corpus_id": "0", "tokenized_content": ["hello", "world"] }) desired_results.append({ "corpus_id": "1", "tokenized_content": ["goodbye", "world"] })
def parse_json(self, json_data): try: input_data = json.loads(json_data.decode()) print(input_data) self.transaction_id = input_data['transaction_id'] self.operation = input_data['operation'] self.library = input_data['library'] if 'user_id' in input_data.keys(): self.user_id = input_data['user_id'] if 'cleanup' in input_data.keys(): self.cleanups = input_data['cleanup'] self.corpora_ids = input_data['corpora_ids'] if 'tokenizer' in input_data.keys(): self.tokenizer = input_data['tokenizer'] except KeyError: raise TransactionException( 'Missing property transaction_id, operation, library, tokenizer or corpora_ids.' ) except ValueError: raise TransactionException('Could not parse JSON.') try: #load corpora from database corpora = DatabaseAdapter.getDB().corpus for id in self.corpora_ids: corpus = corpora.find_one({"_id": ObjectId(id)}) self.corpora.append( Corpus(id, corpus["title"], corpus["contents"], corpus["tags"])) except (TypeError, InvalidId): raise TransactionException('Could not find corpus.')
def test_run_pos_frequencies(self): self.op = SplatPOSFrequencies() self.test_data = [ Corpus( "0", "Test", "The very quick brown fox jumped over the lazy dog.\nI saw it happen." ) ] results = json.loads(self.op.run(self.test_data)) desired_results = [{ "corpus_id": "0", "pos_tags": { "PRP": ["I", "it"], "VB": ["happen"], "RB": ["very"], ".": ["."], "VBD": ["jumped", "saw"], "DT": ["The", "the"], "IN": ["over"], "JJ": ["quick", "lazy"], "NN": ["brown", "fox", "dog"] }, "pos_counts": { "PRP": 2, "VB": 1, "RB": 1, ".": 2, "VBD": 2, "DT": 2, "IN": 1, "JJ": 2, "NN": 3 } }] self.assertEqual(results, desired_results)
def test_run(self): test_data = [Corpus("0", "", "This tweet is great! #Hashtags")] desired_results = {"0": "This tweet is great! Hashtags"} results = self.op.run(test_data) self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.contents, desired_results[corpus.id])
def test_run(self): test_data_contents = '[{"start":10,"filler":false,"end":90,"word":"i"},' \ '{"start":100,"filler":false,"end":360,"word":"know"},' \ '{"start":370,"filler":false,"end":470,"word":"i"},' \ '{"start":480,"filler":false,"end":730,"word":"justice"},' \ '{"start":740,"filler":false,"end":950,"word":"as"},' \ '{"start":960,"filler":true,"end":980,"word":"<sil>"},' \ '{"start":990,"filler":false,"end":1070,"word":"you"},' \ '{"start":1080,"filler":false,"end":1320,"word":"this"},' \ '{"start":1490,"filler":true,"end":1600,"word":"<sil>"}]' test_data = [Corpus('0', '', test_data_contents)] results = self.op.run(test_data) desired_results = [{'transcript': 'i know i justice as [SIL] you this [SIL]', 'base_stats': {'num_fillers': 2, 'num_words': 7, 'filler_time': 0.13, 'word_time': 1.22, 'total_time': 1.6, 'words_per_minute': 262.5, 'syllables_per_minute': 300.0 }, 'longest_tokens': [{'word': 'know', 'length': 0.26}, {'word': 'justice', 'length': 0.25}, {'word': 'this', 'length': 0.24}, {'word': 'as', 'length': 0.21}, {'word': 'i', 'length': 0.1}, {'word': 'i', 'length': 0.08}, {'word': 'you', 'length': 0.08}]}] self.assertEqual(round_json_floats(results), round_json_floats(desired_results))
def test_run(self): test_data = [Corpus("0", "hello", "hello world hello hello world test")] WordTokenizeWhitespacePunct().run(test_data) desired_results = [{"term": "hello", "frequency": 3}, {"term": "world", "frequency": 2}, {"term": "test", "frequency": 1}] results = self.op.run(test_data) self.assertEqual(results["sentences"], desired_results)
def test_run_pronouns(self): self.op = SplatPronouns() self.test_data = [ Corpus( "0", "Test", "He and she jumped over my fence.\nI saw them do so, and I told you." ) ] results = json.loads(self.op.run(self.test_data)) print(results) desired_results = [{ 'corpus_id': '0', 'first-person': { 'MYSELF': [0, '1st-Person', 'Reflexive', 'Singular'], 'OURSELVES': [0, '1st-Person', 'Reflexive', 'Plural'], 'WE': [0, '1st-Person', 'Personal', 'Plural'], 'ME': [0, '1st-Person', 'Personal', 'Singular'], 'OUR': [0, '1st-Person', 'Possessive', 'Plural'], 'MY': [1, '1st-Person', 'Possessive', 'Singular'], 'MINE': [0, '1st-Person', 'Possessive', 'Singular'], 'US': [0, '1st-Person', 'Personal', 'Plural'], 'I': [2, '1st-Person', 'Personal', 'Singular'], 'OURS': [0, '1st-Person', 'Possessive', 'Plural'] }, 'second-person': { 'YOU': [1, '2nd-Person', 'Personal', 'Singular/Plural'], 'YOURSELVES': [0, '2nd-Person', 'Reflexive', 'Plural'], 'YOURS': [0, '2nd-Person', 'Possessive', 'Singular/Plural'], 'YOUR': [0, '2nd-Person', 'Possessive', 'Singular/Plural'], 'YOURSELF': [0, '2nd-Person', 'Reflexive', 'Singular'] }, 'third-person': { 'THEY': [0, '3rd-Person', 'Personal', 'Plural'], 'ITSELF': [0, '3rd-Person', 'Reflexive', 'Singular'], 'HERS': [0, '3rd-Person', 'Possessive', 'Singular'], 'HIM': [0, '3rd-Person', 'Personal', 'Singular'], 'SHE': [1, '3rd-Person', 'Personal', 'Singular'], 'HERSELF': [0, '3rd-Person', 'Reflexive', 'Singular'], 'ITS': [0, '3rd-Person', 'Possessive', 'Singular'], 'HIMSELF': [0, '3rd-Person', 'Reflexive', 'Singular'], 'THEIRS': [0, '3rd-Person', 'Possessive', 'Plural'], 'THEIR': [0, '3rd-Person', 'Possessive', 'Plural'], 'HIS': [0, '3rd-Person', 'Possessive', 'Singular'], 'IT': [0, '3rd-Person', 'Personal', 'Singular'], 'HE': [1, '3rd-Person', 'Personal', 'Singular'], 'HER': [0, '3rd-Person', 'Personal/Possessive', 'Singular/Plural'], 'THEMSELVES': [0, '3rd-Person', 'Reflexive', 'Plural'], 'THEM': [1, '3rd-Person', 'Personal', 'Plural'] }, 'sentences': [ 'He and she jumped over my fence.', 'I saw them do so, and I told you.' ] }] self.assertEqual(results, desired_results)
def test_run(self): self.op = WordCloudOp() self.test_data = [ Corpus("0", "hello", "hello world hello hello world test") ] desired_results = [] desired_results.append({"term": "hello", "frequency": 3}) desired_results.append({"term": "world", "frequency": 2}) desired_results.append({"term": "test", "frequency": 1})
def test_run(self): test_data = [Corpus("0", "", open('brown.txt', 'r').read())] results = self.op.run(test_data) self.assertIsNotNone(results) for result in results: self.assertTrue(result) for sigs_stems in result: self.assertTrue(sigs_stems['affixes']) self.assertLessEqual(len(sigs_stems['roots']), 15)
def test_run(self): test_data = [ Corpus("0", "", "the quick brown fox jumps over the lazy dog") ] test_data = WordTokenizeWhitespacePunct().run(test_data) desired_results = {"0": "quick brown fox jumps lazy dog"} results = self.op.run(test_data) self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.contents, desired_results[corpus.id])
def read_corpora(self, corpora_ids): try: #load corpora from database corpora = DatabaseAdapter.getDB().corpus for id in self.corpora_ids: corpus = corpora.find_one({"_id": ObjectId(id)}) self.corpora.append( Corpus(id, corpus["title"], corpus["contents"], corpus["tags"])) except (TypeError, InvalidId): raise TransactionException('Could not find corpus.')
def test_run(self): test_data = [ Corpus("0", "", "The quick brown fox {sl} jumped over the lazy dog.\n") ] desired_results = { "0": "The quick brown fox jumped over the lazy dog.\n" } results = self.op.run(test_data) self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.contents, desired_results[corpus.id])
def run(self, data): corpora = [] for corpus in data: corpus = corpus.contents.splitlines() corpus = "\n".join(corpus).split('\n\n') sentences = [Corpus(str(i), "", sentence.strip()) for i, sentence in enumerate(corpus) if sentence] corpora += sentences print('Num corpora = {}'.format(len(corpora))) corpora = RemovePunct().run(corpora) return self.execute(corpora)
def test_run(self): test_data = [ Corpus(str(num), "", line) for num, line in enumerate(open('brown.txt', 'r')) ] test_data = test_data[:100] results = self.op.run(test_data) self.assertIsNotNone(results) # Result is returned self.assertEqual(len(results), self.num_topics) # Correct number of topics returned for topic in results.values(): self.assertEqual(len(topic), 10) # Each topic has 10 words
def test_run_disfluency(self): self.op = SplatDisfluency() self.test_data = [ Corpus( "0", "Test", "The quick brown fox {sl} jumped over the lazy dog.\nI uh saw it happen." ) ] results = json.loads(self.op.run(self.test_data)) print(results) desired_results = [{ 'corpus_id': '0', 'sentences': { 'uh saw it happen.': { 'SILENT PAUSE': 0, 'HM': 0, 'BREAK': 0, 'UH': 1, 'UM': 0, 'AH': 0, 'REPETITION': 0, 'ER': 0 }, 'The quick brown fox {sl} jumped over the lazy dog.I': { 'SILENT PAUSE': 1, 'HM': 0, 'BREAK': 0, 'UH': 0, 'UM': 0, 'AH': 0, 'REPETITION': 0, 'ER': 0 } }, 'average_disfluencies_per_sentence': 1.0, 'total_disfluencies': { 'SILENT PAUSE': 1, 'HM': 0, 'BREAK': 0, 'TOTAL': 2, 'UM': 0, 'AH': 0, 'UH': 1, 'REPETITION': 0, 'ER': 0 } }] self.assertEqual(results, desired_results)
def test_run_has_data(self): self.test_data = [ Corpus("0", "Test", "The quick brown fox jumped over the lazy dog.\n") ] results = self.op.run(self.test_data) print(results) desired_results = { 'title': 'Test', 'tags': [], 'id': '0', 'contents': 'The quick brown fox jumped over the lazy dog.\n', 'tokenized_contents': None } self.assertEqual(results, desired_results)
def test_run(self): test_data_contents = '[{"start":10,"filler":false,"end":90,"word":"i"},' \ '{"start":100,"filler":false,"end":360,"word":"know"},' \ '{"start":370,"filler":false,"end":470,"word":"i"},' \ '{"start":480,"filler":false,"end":730,"word":"just"},' \ '{"start":740,"filler":false,"end":950,"word":"as"},' \ '{"start":960,"filler":true,"end":980,"word":"<sil>"},' \ '{"start":990,"filler":false,"end":1070,"word":"you"},' \ '{"start":1080,"filler":false,"end":1320,"word":"this"},' \ '{"start":1490,"filler":true,"end":1600,"word":"<sil>"}]' test_data = [Corpus('0', '', test_data_contents)] desired_results = {'0': 'i know i just as you this'} results = self.op.run(test_data) self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.contents, desired_results[corpus.id])
def test_run(self): test_data = [ Corpus( "0", "", "strange women lying in ponds distributing swords is no basis for a system of government" ) ] test_data = WordTokenizeWhitespacePunct().run(test_data) desired_results = { "0": "strange woman lie in pond distribute sword be no basis for a system of government" } results = self.op.run(test_data) self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.contents, desired_results[corpus.id])
def test_run_preserve_nnp(self): self.op = RemoveCapsPreserveNNP() test_data = [ Corpus( "0", "", "Removes all non-proper-noun capitals from a given text. Removes capital letters from text, even for Bill Clinton. Accepts as input a non-tokenized string." ) ] desired_results = { "0": "removes all non-proper-noun capitals from a given text. removes capital letters from text, even for Bill Clinton. accepts as input a non-tokenized string." } results = self.op.run(test_data) self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.contents, desired_results[corpus.id])
def test_run(self): test_data = [ Corpus( "0", "", "hello world. Will you say goodbye, world? I'll say hello.") ] desired_results = { "0": [ "hello world.", "Will you say goodbye, world?", "I'll say hello." ] } results = self.op.run(test_data) self.assertIsNotNone(results) for result in results: self.assertEqual(result['sentences'], desired_results[result['corpus_id']])
def test_porter(self): test_data = [ Corpus( "0", "", ' '.join([ 'strange', 'women', 'lying', 'ponds', 'distributing', 'swords', 'no', 'basis', 'system', 'government' ])) ] test_data = WordTokenizeWhitespacePunct().run(test_data) desired_results = { "0": [ 'strang', 'women', 'lie', 'pond', 'distribut', 'sword', 'no', 'basi', 'system', 'govern' ] } results = self.op.run(test_data) self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.tokenized_contents, desired_results[corpus.id])
def test_run_syllables(self): self.op = SplatSyllables() self.test_data = [ Corpus( "0", "Test", "The very quick brown fox jumped over the lazy dog.\nI saw it happen." ) ] results = json.loads(self.op.run(self.test_data)) desired_results = [{ 'corpus_id': '0', 'syllables': { '1': [ 'the', 'quick', 'brown', 'fox', 'jumped', 'dog', 'i', 'saw', 'it' ], '2': ['very', 'over', 'lazy', 'happen'] } }] self.assertEqual(results, desired_results)
def test_run_complexity(self): self.op = SplatComplexity() self.test_data = [ Corpus( "0", "Test", "The quick brown fox jumped over the lazy dog.\nI saw it happen." ) ] results = json.loads(self.op.run(self.test_data)) desired_results = [{ 'corpus_id': '0', 'content_density': [2.0, 2.0, 2.0], 'idea_density': 0.5, 'flesch_score': 96.1, 'kincaid_score': 1.5, 'types': 12, 'tokens': 13, 'type_token_ratio': 0.9230769230769231 }] self.assertEqual(round_json_floats(results), round_json_floats(desired_results))
def test_run_ngrams(self): self.op = SplatNGrams() self.test_data = [ Corpus("0", "Test", "The quick brown fox jumped over the lazy dog.\n") ] results = json.loads(self.op.run(self.test_data)) desired_results = [{ "corpus_id": "0", "unigrams": { "dog": 1, "the": 2, "fox": 1, "jumped": 1, "over": 1, "lazy": 1, "brown": 1, "quick": 1 }, "bigrams": { "the quick": 1, "quick brown": 1, "lazy dog": 1, "brown fox": 1, "fox jumped": 1, "jumped over": 1, "over the": 1, "the lazy": 1 }, "trigrams": { "the quick brown": 1, "quick brown fox": 1, "the lazy dog": 1, "jumped over the": 1, "over the lazy": 1, "brown fox jumped": 1, "fox jumped over": 1 } }] self.assertEqual(results, desired_results)
def test_run(self): self.test_data = [ Corpus("0", "Test", "The quick brown fox jumped over the lazy dog.\n") ] results = self.op.run(self.test_data) desired_results = [{ "corpus_id": "0", "unigrams": { "_": 8, "a": 1, "b": 1, "c": 1, "d": 2, "e": 4, "f": 1, "g": 1, "h": 2, "i": 1, "j": 1, "k": 1, "l": 1, "m": 1, "n": 1, "o": 4, "p": 1, "q": 1, "r": 2, "t": 2, "u": 2, "v": 1, "w": 1, "x": 1, "y": 1, "z": 1 }, "bigrams": { "_b": 1, "_d": 1, "_f": 1, "_j": 1, "_l": 1, "_o": 1, "_q": 1, "_t": 1, "az": 1, "br": 1, "ck": 1, "d_": 1, "do": 1, "e_": 2, "ed": 1, "er": 1, "fo": 1, "he": 2, "ic": 1, "ju": 1, "k_": 1, "la": 1, "mp": 1, "n_": 1, "og": 1, "ov": 1, "ow": 1, "ox": 1, "pe": 1, "qu": 1, "r_": 1, "ro": 1, "th": 2, "ui": 1, "um": 1, "ve": 1, "wn": 1, "x_": 1, "y_": 1, "zy": 1 }, "trigrams": { "_br": 1, "_do": 1, "_fo": 1, "_ju": 1, "_la": 1, "_ov": 1, "_qu": 1, "_th": 1, "azy": 1, "bro": 1, "ck_": 1, "d_o": 1, "dog": 1, "e_l": 1, "e_q": 1, "ed_": 1, "er_": 1, "fox": 1, "he_": 2, "ick": 1, "jum": 1, "k_b": 1, "laz": 1, "mpe": 1, "n_f": 1, "ove": 1, "own": 1, "ox_": 1, "ped": 1, "qui": 1, "r_t": 1, "row": 1, "the": 2, "uic": 1, "ump": 1, "ver": 1, "wn_": 1, "x_j": 1, "y_d": 1, "zy_": 1 } }] self.assertEqual(results, desired_results)
def test_run(self): self.test_data = [Corpus("0", "Test", "The quick brown fox jumped over the lazy dog.\n")] results = self.op.run(self.test_data) desired_results = [ {"chars": ["_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "t", "u", "v", "w", "x", "y", "z"], "array": { "p": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 1, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "b": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 1, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "d": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 1, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1, "l": 0, "n": 0}, "j": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 1, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "r": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 1, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1, "l": 0, "n": 0}, "x": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1, "l": 0, "n": 0}, "f": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 1, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "t": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 2, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "q": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 1, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "e": {"p": 0, "b": 0, "d": 1, "j": 0, "r": 1, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 2, "l": 0, "n": 0}, "g": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "a": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 1, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "o": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 1, "f": 0, "t": 0, "q": 0, "e": 0, "g": 1, "a": 0, "o": 0, "u": 0, "w": 1, "h": 0, "k": 0, "y": 0, "v": 1, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "u": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 1, "c": 0, "m": 1, "_": 0, "l": 0, "n": 0}, "w": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 1}, "h": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 2, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "k": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1, "l": 0, "n": 0}, "y": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1, "l": 0, "n": 0}, "v": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 1, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "z": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 1, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "i": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 1, "m": 0, "_": 0, "l": 0, "n": 0}, "c": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 1, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "m": {"p": 1, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "_": {"p": 0, "b": 1, "d": 1, "j": 1, "r": 0, "x": 0, "f": 1, "t": 1, "q": 1, "e": 0, "g": 0, "a": 0, "o": 1, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 1, "n": 0}, "l": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 1, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0, "l": 0, "n": 0}, "n": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0, "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1, "l": 0, "n": 0}}}] self.assertEqual(results, desired_results)
def setUp(self): self.op = StanfordCoreNLP([]) self.test_data = [ Corpus('0', 'Test', 'The quick brown fox jumped over the lazy dog.\n') ]