def test_run(self):
     test_data = [
         Corpus("0", "hello", "hello world"),
         Corpus("1", "goodbye", "goodbye world")
     ]
     test_data = WordTokenizeWhitespacePunct().run(test_data)
     desired_results = [{
         "term": "hello",
         "importance": 0.0,
         "corpus_id": "0"
     }, {
         "term": "world",
         "importance": -0.4054651081081644,
         "corpus_id": "0"
     }, {
         "term": "goodbye",
         "importance": 0.0,
         "corpus_id": "1"
     }, {
         "term": "world",
         "importance": -0.4054651081081644,
         "corpus_id": "1"
     }]
     desired_results = round_json_floats(desired_results)
     results = round_json_floats(self.op.run(test_data))
     for result in results:
         self.assertTrue(result in desired_results)
Example #2
0
 def test_run(self):
     self.op = Tfidf()
     self.test_data = [
         Corpus("0", "hello", "hello world"),
         Corpus("1", "goodbye", "goodbye world")
     ]
     # self.assertEqual(self.op.run(self.test_data), [[(0.4054651081081644, 'hello'),\
     #       (0.0, 'world')], [(0.4054651081081644, 'goodbye'), (0.0, 'world')] ])
     desired_results = []
     desired_results.append({
         "term": "hello",
         "importance": 0.4054651081081644,
         "corpus_id": "0"
     })
     desired_results.append({
         "term": "world",
         "importance": 0.0,
         "corpus_id": "0"
     })
     desired_results.append({
         "term": "goodbye",
         "importance": 0.4054651081081644,
         "corpus_id": "1"
     })
     desired_results.append({
         "term": "world",
         "importance": 0.0,
         "corpus_id": "1"
     })
     results = self.op.run(self.test_data)
     for result in results:
         self.assertTrue(result in desired_results)
Example #3
0
 def test_run_treebank(self):
     self.op = WordTokenizeTreebank()
     test_data = [
         Corpus("0", "hello", "hello world"),
         Corpus("1", "goodbye", "goodbye world")
     ]
     results = self.op.run(test_data)
     desired_results = {"0": ["hello", "world"], "1": ["goodbye", "world"]}
     self.assertIsNotNone(results)
     for corpus in results:
         self.assertEqual(corpus.tokenized_contents,
                          desired_results[corpus.id])
 def test_run(self):
     self.op = WordTokenizeSpaces()
     self.test_data = [
         Corpus("0", "hello", "hello world"),
         Corpus("1", "goodbye", "goodbye world")
     ]
     desired_results = []
     desired_results.append({
         "corpus_id": "0",
         "tokenized_content": ["hello", "world"]
     })
     desired_results.append({
         "corpus_id": "1",
         "tokenized_content": ["goodbye", "world"]
     })
Example #5
0
 def parse_json(self, json_data):
     try:
         input_data = json.loads(json_data.decode())
         print(input_data)
         self.transaction_id = input_data['transaction_id']
         self.operation = input_data['operation']
         self.library = input_data['library']
         if 'user_id' in input_data.keys():
             self.user_id = input_data['user_id']
         if 'cleanup' in input_data.keys():
             self.cleanups = input_data['cleanup']
         self.corpora_ids = input_data['corpora_ids']
         if 'tokenizer' in input_data.keys():
             self.tokenizer = input_data['tokenizer']
     except KeyError:
         raise TransactionException(
             'Missing property transaction_id, operation, library, tokenizer or corpora_ids.'
         )
     except ValueError:
         raise TransactionException('Could not parse JSON.')
     try:
         #load corpora from database
         corpora = DatabaseAdapter.getDB().corpus
         for id in self.corpora_ids:
             corpus = corpora.find_one({"_id": ObjectId(id)})
             self.corpora.append(
                 Corpus(id, corpus["title"], corpus["contents"],
                        corpus["tags"]))
     except (TypeError, InvalidId):
         raise TransactionException('Could not find corpus.')
 def test_run_pos_frequencies(self):
     self.op = SplatPOSFrequencies()
     self.test_data = [
         Corpus(
             "0", "Test",
             "The very quick brown fox jumped over the lazy dog.\nI saw it happen."
         )
     ]
     results = json.loads(self.op.run(self.test_data))
     desired_results = [{
         "corpus_id": "0",
         "pos_tags": {
             "PRP": ["I", "it"],
             "VB": ["happen"],
             "RB": ["very"],
             ".": ["."],
             "VBD": ["jumped", "saw"],
             "DT": ["The", "the"],
             "IN": ["over"],
             "JJ": ["quick", "lazy"],
             "NN": ["brown", "fox", "dog"]
         },
         "pos_counts": {
             "PRP": 2,
             "VB": 1,
             "RB": 1,
             ".": 2,
             "VBD": 2,
             "DT": 2,
             "IN": 1,
             "JJ": 2,
             "NN": 3
         }
     }]
     self.assertEqual(results, desired_results)
Example #7
0
 def test_run(self):
     test_data = [Corpus("0", "", "This tweet is great! #Hashtags")]
     desired_results = {"0": "This tweet is great! Hashtags"}
     results = self.op.run(test_data)
     self.assertIsNotNone(results)
     for corpus in results:
         self.assertEqual(corpus.contents, desired_results[corpus.id])
Example #8
0
 def test_run(self):
     test_data_contents = '[{"start":10,"filler":false,"end":90,"word":"i"},' \
                          '{"start":100,"filler":false,"end":360,"word":"know"},' \
                          '{"start":370,"filler":false,"end":470,"word":"i"},' \
                          '{"start":480,"filler":false,"end":730,"word":"justice"},' \
                          '{"start":740,"filler":false,"end":950,"word":"as"},' \
                          '{"start":960,"filler":true,"end":980,"word":"<sil>"},' \
                          '{"start":990,"filler":false,"end":1070,"word":"you"},' \
                          '{"start":1080,"filler":false,"end":1320,"word":"this"},' \
                          '{"start":1490,"filler":true,"end":1600,"word":"<sil>"}]'
     test_data = [Corpus('0', '', test_data_contents)]
     results = self.op.run(test_data)
     desired_results = [{'transcript': 'i know i justice as [SIL] you this [SIL]',
                         'base_stats': {'num_fillers': 2,
                                        'num_words': 7,
                                        'filler_time': 0.13,
                                        'word_time': 1.22,
                                        'total_time': 1.6,
                                        'words_per_minute': 262.5,
                                        'syllables_per_minute': 300.0
                                        },
                         'longest_tokens': [{'word': 'know', 'length': 0.26},
                                            {'word': 'justice', 'length': 0.25},
                                            {'word': 'this', 'length': 0.24},
                                            {'word': 'as', 'length': 0.21},
                                            {'word': 'i', 'length': 0.1},
                                            {'word': 'i', 'length': 0.08},
                                            {'word': 'you', 'length': 0.08}]}]
     self.assertEqual(round_json_floats(results), round_json_floats(desired_results))
 def test_run(self):
     test_data = [Corpus("0", "hello", "hello world hello hello world test")]
     WordTokenizeWhitespacePunct().run(test_data)
     desired_results = [{"term": "hello", "frequency": 3},
                        {"term": "world", "frequency": 2},
                        {"term": "test", "frequency": 1}]
     results = self.op.run(test_data)
     self.assertEqual(results["sentences"], desired_results)
Example #10
0
 def test_run_pronouns(self):
     self.op = SplatPronouns()
     self.test_data = [
         Corpus(
             "0", "Test",
             "He and she jumped over my fence.\nI saw them do so, and I told you."
         )
     ]
     results = json.loads(self.op.run(self.test_data))
     print(results)
     desired_results = [{
         'corpus_id':
         '0',
         'first-person': {
             'MYSELF': [0, '1st-Person', 'Reflexive', 'Singular'],
             'OURSELVES': [0, '1st-Person', 'Reflexive', 'Plural'],
             'WE': [0, '1st-Person', 'Personal', 'Plural'],
             'ME': [0, '1st-Person', 'Personal', 'Singular'],
             'OUR': [0, '1st-Person', 'Possessive', 'Plural'],
             'MY': [1, '1st-Person', 'Possessive', 'Singular'],
             'MINE': [0, '1st-Person', 'Possessive', 'Singular'],
             'US': [0, '1st-Person', 'Personal', 'Plural'],
             'I': [2, '1st-Person', 'Personal', 'Singular'],
             'OURS': [0, '1st-Person', 'Possessive', 'Plural']
         },
         'second-person': {
             'YOU': [1, '2nd-Person', 'Personal', 'Singular/Plural'],
             'YOURSELVES': [0, '2nd-Person', 'Reflexive', 'Plural'],
             'YOURS': [0, '2nd-Person', 'Possessive', 'Singular/Plural'],
             'YOUR': [0, '2nd-Person', 'Possessive', 'Singular/Plural'],
             'YOURSELF': [0, '2nd-Person', 'Reflexive', 'Singular']
         },
         'third-person': {
             'THEY': [0, '3rd-Person', 'Personal', 'Plural'],
             'ITSELF': [0, '3rd-Person', 'Reflexive', 'Singular'],
             'HERS': [0, '3rd-Person', 'Possessive', 'Singular'],
             'HIM': [0, '3rd-Person', 'Personal', 'Singular'],
             'SHE': [1, '3rd-Person', 'Personal', 'Singular'],
             'HERSELF': [0, '3rd-Person', 'Reflexive', 'Singular'],
             'ITS': [0, '3rd-Person', 'Possessive', 'Singular'],
             'HIMSELF': [0, '3rd-Person', 'Reflexive', 'Singular'],
             'THEIRS': [0, '3rd-Person', 'Possessive', 'Plural'],
             'THEIR': [0, '3rd-Person', 'Possessive', 'Plural'],
             'HIS': [0, '3rd-Person', 'Possessive', 'Singular'],
             'IT': [0, '3rd-Person', 'Personal', 'Singular'],
             'HE': [1, '3rd-Person', 'Personal', 'Singular'],
             'HER':
             [0, '3rd-Person', 'Personal/Possessive', 'Singular/Plural'],
             'THEMSELVES': [0, '3rd-Person', 'Reflexive', 'Plural'],
             'THEM': [1, '3rd-Person', 'Personal', 'Plural']
         },
         'sentences': [
             'He and she jumped over my fence.',
             'I saw them do so, and I told you.'
         ]
     }]
     self.assertEqual(results, desired_results)
Example #11
0
 def test_run(self):
     self.op = WordCloudOp()
     self.test_data = [
         Corpus("0", "hello", "hello world hello hello world test")
     ]
     desired_results = []
     desired_results.append({"term": "hello", "frequency": 3})
     desired_results.append({"term": "world", "frequency": 2})
     desired_results.append({"term": "test", "frequency": 1})
Example #12
0
 def test_run(self):
     test_data = [Corpus("0", "", open('brown.txt', 'r').read())]
     results = self.op.run(test_data)
     self.assertIsNotNone(results)
     for result in results:
         self.assertTrue(result)
         for sigs_stems in result:
             self.assertTrue(sigs_stems['affixes'])
             self.assertLessEqual(len(sigs_stems['roots']), 15)
Example #13
0
 def test_run(self):
     test_data = [
         Corpus("0", "", "the quick brown fox jumps over the lazy dog")
     ]
     test_data = WordTokenizeWhitespacePunct().run(test_data)
     desired_results = {"0": "quick brown fox jumps lazy dog"}
     results = self.op.run(test_data)
     self.assertIsNotNone(results)
     for corpus in results:
         self.assertEqual(corpus.contents, desired_results[corpus.id])
 def read_corpora(self, corpora_ids):
     try:
         #load corpora from database
         corpora = DatabaseAdapter.getDB().corpus
         for id in self.corpora_ids:
             corpus = corpora.find_one({"_id": ObjectId(id)})
             self.corpora.append(
                 Corpus(id, corpus["title"], corpus["contents"],
                        corpus["tags"]))
     except (TypeError, InvalidId):
         raise TransactionException('Could not find corpus.')
 def test_run(self):
     test_data = [
         Corpus("0", "",
                "The quick brown fox {sl} jumped over the lazy dog.\n")
     ]
     desired_results = {
         "0": "The quick brown fox jumped over the lazy dog.\n"
     }
     results = self.op.run(test_data)
     self.assertIsNotNone(results)
     for corpus in results:
         self.assertEqual(corpus.contents, desired_results[corpus.id])
Example #16
0
    def run(self, data):
        corpora = []
        for corpus in data:
            corpus = corpus.contents.splitlines()
            corpus = "\n".join(corpus).split('\n\n')
            sentences = [Corpus(str(i), "", sentence.strip()) for i, sentence in
                         enumerate(corpus) if sentence]
            corpora += sentences

        print('Num corpora = {}'.format(len(corpora)))
        corpora = RemovePunct().run(corpora)
        return self.execute(corpora)
Example #17
0
 def test_run(self):
     test_data = [
         Corpus(str(num), "", line)
         for num, line in enumerate(open('brown.txt', 'r'))
     ]
     test_data = test_data[:100]
     results = self.op.run(test_data)
     self.assertIsNotNone(results)  # Result is returned
     self.assertEqual(len(results),
                      self.num_topics)  # Correct number of topics returned
     for topic in results.values():
         self.assertEqual(len(topic), 10)  # Each topic has 10 words
Example #18
0
    def test_run_disfluency(self):
        self.op = SplatDisfluency()
        self.test_data = [
            Corpus(
                "0", "Test",
                "The quick brown fox {sl} jumped over the lazy dog.\nI uh saw it happen."
            )
        ]
        results = json.loads(self.op.run(self.test_data))
        print(results)
        desired_results = [{
            'corpus_id': '0',
            'sentences': {
                'uh saw it happen.': {
                    'SILENT PAUSE': 0,
                    'HM': 0,
                    'BREAK': 0,
                    'UH': 1,
                    'UM': 0,
                    'AH': 0,
                    'REPETITION': 0,
                    'ER': 0
                },
                'The quick brown fox {sl} jumped over the lazy dog.I': {
                    'SILENT PAUSE': 1,
                    'HM': 0,
                    'BREAK': 0,
                    'UH': 0,
                    'UM': 0,
                    'AH': 0,
                    'REPETITION': 0,
                    'ER': 0
                }
            },
            'average_disfluencies_per_sentence': 1.0,
            'total_disfluencies': {
                'SILENT PAUSE': 1,
                'HM': 0,
                'BREAK': 0,
                'TOTAL': 2,
                'UM': 0,
                'AH': 0,
                'UH': 1,
                'REPETITION': 0,
                'ER': 0
            }
        }]

        self.assertEqual(results, desired_results)
Example #19
0
 def test_run_has_data(self):
     self.test_data = [
         Corpus("0", "Test",
                "The quick brown fox jumped over the lazy dog.\n")
     ]
     results = self.op.run(self.test_data)
     print(results)
     desired_results = {
         'title': 'Test',
         'tags': [],
         'id': '0',
         'contents': 'The quick brown fox jumped over the lazy dog.\n',
         'tokenized_contents': None
     }
     self.assertEqual(results, desired_results)
Example #20
0
 def test_run(self):
     test_data_contents = '[{"start":10,"filler":false,"end":90,"word":"i"},' \
                          '{"start":100,"filler":false,"end":360,"word":"know"},' \
                          '{"start":370,"filler":false,"end":470,"word":"i"},' \
                          '{"start":480,"filler":false,"end":730,"word":"just"},' \
                          '{"start":740,"filler":false,"end":950,"word":"as"},' \
                          '{"start":960,"filler":true,"end":980,"word":"<sil>"},' \
                          '{"start":990,"filler":false,"end":1070,"word":"you"},' \
                          '{"start":1080,"filler":false,"end":1320,"word":"this"},' \
                          '{"start":1490,"filler":true,"end":1600,"word":"<sil>"}]'
     test_data = [Corpus('0', '', test_data_contents)]
     desired_results = {'0': 'i know i just as you this'}
     results = self.op.run(test_data)
     self.assertIsNotNone(results)
     for corpus in results:
         self.assertEqual(corpus.contents, desired_results[corpus.id])
 def test_run(self):
     test_data = [
         Corpus(
             "0", "",
             "strange women lying in ponds distributing swords is no basis for a system of government"
         )
     ]
     test_data = WordTokenizeWhitespacePunct().run(test_data)
     desired_results = {
         "0":
         "strange woman lie in pond distribute sword be no basis for a system of government"
     }
     results = self.op.run(test_data)
     self.assertIsNotNone(results)
     for corpus in results:
         self.assertEqual(corpus.contents, desired_results[corpus.id])
 def test_run_preserve_nnp(self):
     self.op = RemoveCapsPreserveNNP()
     test_data = [
         Corpus(
             "0", "",
             "Removes all non-proper-noun capitals from a given text. Removes capital letters from text, even for Bill Clinton. Accepts as input a non-tokenized string."
         )
     ]
     desired_results = {
         "0":
         "removes all non-proper-noun capitals from a given text. removes capital letters from text, even for Bill Clinton. accepts as input a non-tokenized string."
     }
     results = self.op.run(test_data)
     self.assertIsNotNone(results)
     for corpus in results:
         self.assertEqual(corpus.contents, desired_results[corpus.id])
Example #23
0
 def test_run(self):
     test_data = [
         Corpus(
             "0", "",
             "hello world. Will you say goodbye, world? I'll say hello.")
     ]
     desired_results = {
         "0": [
             "hello world.", "Will you say goodbye, world?",
             "I'll say hello."
         ]
     }
     results = self.op.run(test_data)
     self.assertIsNotNone(results)
     for result in results:
         self.assertEqual(result['sentences'],
                          desired_results[result['corpus_id']])
Example #24
0
 def test_porter(self):
     test_data = [
         Corpus(
             "0", "", ' '.join([
                 'strange', 'women', 'lying', 'ponds', 'distributing',
                 'swords', 'no', 'basis', 'system', 'government'
             ]))
     ]
     test_data = WordTokenizeWhitespacePunct().run(test_data)
     desired_results = {
         "0": [
             'strang', 'women', 'lie', 'pond', 'distribut', 'sword', 'no',
             'basi', 'system', 'govern'
         ]
     }
     results = self.op.run(test_data)
     self.assertIsNotNone(results)
     for corpus in results:
         self.assertEqual(corpus.tokenized_contents,
                          desired_results[corpus.id])
Example #25
0
 def test_run_syllables(self):
     self.op = SplatSyllables()
     self.test_data = [
         Corpus(
             "0", "Test",
             "The very quick brown fox jumped over the lazy dog.\nI saw it happen."
         )
     ]
     results = json.loads(self.op.run(self.test_data))
     desired_results = [{
         'corpus_id': '0',
         'syllables': {
             '1': [
                 'the', 'quick', 'brown', 'fox', 'jumped', 'dog', 'i',
                 'saw', 'it'
             ],
             '2': ['very', 'over', 'lazy', 'happen']
         }
     }]
     self.assertEqual(results, desired_results)
Example #26
0
 def test_run_complexity(self):
     self.op = SplatComplexity()
     self.test_data = [
         Corpus(
             "0", "Test",
             "The quick brown fox jumped over the lazy dog.\nI saw it happen."
         )
     ]
     results = json.loads(self.op.run(self.test_data))
     desired_results = [{
         'corpus_id': '0',
         'content_density': [2.0, 2.0, 2.0],
         'idea_density': 0.5,
         'flesch_score': 96.1,
         'kincaid_score': 1.5,
         'types': 12,
         'tokens': 13,
         'type_token_ratio': 0.9230769230769231
     }]
     self.assertEqual(round_json_floats(results),
                      round_json_floats(desired_results))
Example #27
0
 def test_run_ngrams(self):
     self.op = SplatNGrams()
     self.test_data = [
         Corpus("0", "Test",
                "The quick brown fox jumped over the lazy dog.\n")
     ]
     results = json.loads(self.op.run(self.test_data))
     desired_results = [{
         "corpus_id": "0",
         "unigrams": {
             "dog": 1,
             "the": 2,
             "fox": 1,
             "jumped": 1,
             "over": 1,
             "lazy": 1,
             "brown": 1,
             "quick": 1
         },
         "bigrams": {
             "the quick": 1,
             "quick brown": 1,
             "lazy dog": 1,
             "brown fox": 1,
             "fox jumped": 1,
             "jumped over": 1,
             "over the": 1,
             "the lazy": 1
         },
         "trigrams": {
             "the quick brown": 1,
             "quick brown fox": 1,
             "the lazy dog": 1,
             "jumped over the": 1,
             "over the lazy": 1,
             "brown fox jumped": 1,
             "fox jumped over": 1
         }
     }]
     self.assertEqual(results, desired_results)
Example #28
0
 def test_run(self):
     self.test_data = [
         Corpus("0", "Test",
                "The quick brown fox jumped over the lazy dog.\n")
     ]
     results = self.op.run(self.test_data)
     desired_results = [{
         "corpus_id": "0",
         "unigrams": {
             "_": 8,
             "a": 1,
             "b": 1,
             "c": 1,
             "d": 2,
             "e": 4,
             "f": 1,
             "g": 1,
             "h": 2,
             "i": 1,
             "j": 1,
             "k": 1,
             "l": 1,
             "m": 1,
             "n": 1,
             "o": 4,
             "p": 1,
             "q": 1,
             "r": 2,
             "t": 2,
             "u": 2,
             "v": 1,
             "w": 1,
             "x": 1,
             "y": 1,
             "z": 1
         },
         "bigrams": {
             "_b": 1,
             "_d": 1,
             "_f": 1,
             "_j": 1,
             "_l": 1,
             "_o": 1,
             "_q": 1,
             "_t": 1,
             "az": 1,
             "br": 1,
             "ck": 1,
             "d_": 1,
             "do": 1,
             "e_": 2,
             "ed": 1,
             "er": 1,
             "fo": 1,
             "he": 2,
             "ic": 1,
             "ju": 1,
             "k_": 1,
             "la": 1,
             "mp": 1,
             "n_": 1,
             "og": 1,
             "ov": 1,
             "ow": 1,
             "ox": 1,
             "pe": 1,
             "qu": 1,
             "r_": 1,
             "ro": 1,
             "th": 2,
             "ui": 1,
             "um": 1,
             "ve": 1,
             "wn": 1,
             "x_": 1,
             "y_": 1,
             "zy": 1
         },
         "trigrams": {
             "_br": 1,
             "_do": 1,
             "_fo": 1,
             "_ju": 1,
             "_la": 1,
             "_ov": 1,
             "_qu": 1,
             "_th": 1,
             "azy": 1,
             "bro": 1,
             "ck_": 1,
             "d_o": 1,
             "dog": 1,
             "e_l": 1,
             "e_q": 1,
             "ed_": 1,
             "er_": 1,
             "fox": 1,
             "he_": 2,
             "ick": 1,
             "jum": 1,
             "k_b": 1,
             "laz": 1,
             "mpe": 1,
             "n_f": 1,
             "ove": 1,
             "own": 1,
             "ox_": 1,
             "ped": 1,
             "qui": 1,
             "r_t": 1,
             "row": 1,
             "the": 2,
             "uic": 1,
             "ump": 1,
             "ver": 1,
             "wn_": 1,
             "x_j": 1,
             "y_d": 1,
             "zy_": 1
         }
     }]
     self.assertEqual(results, desired_results)
Example #29
0
 def test_run(self):
     self.test_data = [Corpus("0", "Test", "The quick brown fox jumped over the lazy dog.\n")]
     results = self.op.run(self.test_data)
     desired_results = [
         {"chars": ["_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
                    "p", "q", "r", "t", "u", "v", "w", "x", "y", "z"],
          "array": {
              "p": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 1, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "b": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 1, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "d": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 1, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1,
                    "l": 0, "n": 0},
              "j": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 1, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "r": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 1, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1,
                    "l": 0, "n": 0},
              "x": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1,
                    "l": 0, "n": 0},
              "f": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 1, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "t": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 2, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "q": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 1, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "e": {"p": 0, "b": 0, "d": 1, "j": 0, "r": 1, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 2,
                    "l": 0, "n": 0},
              "g": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "a": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 1, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "o": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 1, "f": 0, "t": 0, "q": 0, "e": 0, "g": 1, "a": 0,
                    "o": 0, "u": 0, "w": 1, "h": 0, "k": 0, "y": 0, "v": 1, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "u": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 1, "c": 0, "m": 1, "_": 0,
                    "l": 0, "n": 0},
              "w": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 1},
              "h": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 2, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "k": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1,
                    "l": 0, "n": 0},
              "y": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1,
                    "l": 0, "n": 0},
              "v": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 1, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "z": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 1, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "i": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 1, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "c": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 1, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "m": {"p": 1, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "_": {"p": 0, "b": 1, "d": 1, "j": 1, "r": 0, "x": 0, "f": 1, "t": 1, "q": 1, "e": 0, "g": 0, "a": 0,
                    "o": 1, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 1, "n": 0},
              "l": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 1,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 0,
                    "l": 0, "n": 0},
              "n": {"p": 0, "b": 0, "d": 0, "j": 0, "r": 0, "x": 0, "f": 0, "t": 0, "q": 0, "e": 0, "g": 0, "a": 0,
                    "o": 0, "u": 0, "w": 0, "h": 0, "k": 0, "y": 0, "v": 0, "z": 0, "i": 0, "c": 0, "m": 0, "_": 1,
                    "l": 0, "n": 0}}}]
     self.assertEqual(results, desired_results)
 def setUp(self):
     self.op = StanfordCoreNLP([])
     self.test_data = [
         Corpus('0', 'Test',
                'The quick brown fox jumped over the lazy dog.\n')
     ]