def setUp(self): self.user = User.objects.create_user(username='******', password='******', email='*****@*****.**') TextFile(user=self.user, file=None).save() self.text_file = TextFile.objects.last() CorpusItem(title='test_ci', text_file=self.text_file, user=self.user).save() self.corpus_item = CorpusItem.objects.last() f_path = abspath("tests/core/parse_response.json") with open(f_path) as json_file: json_event = json_file.read() parsed_text = json.loads(json_event) sentences = parsed_text['sentences'] # deal with bulk save words_to_save = [] for sentence in sentences: handler = SentenceHandler(sentence, self.corpus_item) words_to_save = words_to_save + handler.process_sentence() # buld save the words WordToken.objects.bulk_create(words_to_save) CorpusItemCollection(user=self.user, title='test').save() self.collection = CorpusItemCollection.objects.last() self.collection.corpus_items.add(self.corpus_item)
def test_that_sentence_can_be_rebuilt(self): handler = SentenceHandler(self.test_sentence, self.corpus_item) handler.create_sentence() handler.save_word_tokens() sentences = Sentence.objects.all() words = WordToken.objects.filter(sentence=sentences[0]) print words[0] sentence = rebuild_sentence_from_tokens(words) self.assertEqual( sentence, 'I had long been familiar with the area around the Boulevard Ornano.' )
def setUp(self): self.user = User.objects.create_user(username='******', password='******', email='*****@*****.**') self.line_file = create_doc_with_x_lines(100) TextFile(user=self.user, file=SimpleUploadedFile( 'best_file_eva.txt', str([str(num) + '\n' for num in xrange(100)]))).save() self.text_file = TextFile.objects.last() sentences = "I am a pony. I am a frong. I am a dog. I go to the zoo." TextFile(user=self.user, file=SimpleUploadedFile('best_file.txt', sentences)).save() self.sentence_file = TextFile.objects.last() CorpusItem(title='test_ci', text_file=self.text_file, user=self.user).save() self.corpus_item = CorpusItem.objects.last() f_path = abspath("tests/core/parse_response.json") self.parse_lock_path = abspath("tests/core/parse_locked.txt") with open(f_path) as json_file: json_event = json_file.read() parsed_text = json.loads(json_event) sentences = parsed_text['sentences'] # deal with bulk save words_to_save = [] for sentence in sentences: handler = SentenceHandler(sentence, self.corpus_item) words_to_save = words_to_save + handler.process_sentence() # buld save the words WordToken.objects.bulk_create(words_to_save) CorpusItemCollection(user=self.user, title='test').save() self.collection = CorpusItemCollection.objects.last() self.collection.corpus_items.add(self.corpus_item) settings.DEFAULT_FILTER = { "name": "bob", "filter_data": { "lemma": True, "ner": False, "pos": [ 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNS', 'NNP', 'NNPS', 'PDT', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB' ], "stopwords": ("I,i,me,my,myself,we,our,ours,ourselves,you,your,yours,yourself,yourselves,he,him,his," "himself,she,her,hers,herself,it,its,itself,they,them,their,theirs,themselves,what," "which,who,whom,this,that,these,those,am,is,are,was,were,be,been,being,have,has,had," "having,do,does,did ,doing,a,an,the,and,but,if,or,because,as,until,while,of,at,by," "for,with,about,against,between,into,through,during,before,after,above,below,to," "from,up,down,in,out,on,off,over,under,again,further,then,once,here,there,when," "where,why,how,all,any,both,each,few,more,most,other,some,such,no,nor,not,only,own," "same,so,than,too,very,s,t,can,will,just,don,should,now") } } self.filter = settings.DEFAULT_FILTER
def test_that_handler_is_saving_dependency_parse(self): handler = SentenceHandler(self.test_sentence, self.corpus_item) handler.create_sentence() handler.save_sentence_dependecy_parses("basic-dependencies") sd = SentenceDependency.objects.all() self.assertEqual(sd.count(), 13)
def test_that_handler_is_saving_tokens(self): handler = SentenceHandler(self.test_sentence, self.corpus_item) handler.create_sentence() handler.save_word_tokens() wd = WordToken.objects.all() self.assertEqual(wd.count(), 13)
def test_sentence_handler_create_sentence(self): handler = SentenceHandler(self.test_sentence, self.corpus_item) handler.create_sentence() sentences = Sentence.objects.all() self.assertEqual(sentences.count(), 1)