def testDocumentModel(self): with self.assertRaisesMessage( ValidationError, '(Document:None) (Field is required: [\'name\'])'): Doc(text=SAMPLE_TEXT).save() with self.assertRaisesMessage( ValidationError, '(Document:None) (StringField only accepts string values: [\'name\'])' ): Doc(name=1).save() with self.assertRaisesMessage( InvalidId, '\'piec\' is not a valid ObjectId, it must be a 12-byte input of type \'str\' or a 24-character hex string' ): Doc(id='piec', name='DOC_NAME').save()
def testRemoveText(self): doc = Doc(name=DOC_NAME, last_change=datetime.datetime.now(), text=SAMPLE_TEXT).save() creation_date = doc.last_change remove_text(doc, 'Uzytkownik', 0) doc = Doc.objects(name=DOC_NAME)[0] logger.debug('TestDBManager::testRemoveText compare ' + str(creation_date) + ' and ' + str(doc.last_change)) self.assertTrue(creation_date < doc.last_change) self.assertTrue(doc.text[0] == ' ') Doc.objects.delete() doc = Doc(name=DOC_NAME, text="ok").save() remove_text(doc, 'o', 0) remove_text(doc, 'k', 0) self.assertTrue(Doc.objects(name=DOC_NAME)[0].text == '')
def testInsertText(self): doc = Doc(name=DOC_NAME, last_change=datetime.datetime.now(), text=SAMPLE_TEXT).save() creation_date = doc.last_change insert_text(doc, 'A', 3) doc = Doc.objects(name=DOC_NAME)[0] logger.debug('TestDBManager::testInsertText compare ' + str(creation_date) + ' and ' + str(doc.last_change)) self.assertTrue(creation_date < doc.last_change) self.assertTrue(doc.text[3] == 'A') Doc.objects.delete() doc = Doc(name=DOC_NAME, text="").save() insert_text(doc, 'k', 0) insert_text(doc, 'o', 0) self.assertTrue(Doc.objects(name=DOC_NAME)[0].text == 'ok')
def testGetDocumentOr404(self): Doc(name=DOC_NAME, text=SAMPLE_TEXT).save() self.assertTrue( get_document_or_404(Doc, name=DOC_NAME).name == DOC_NAME) with self.assertRaisesMessage(Http404, 'No Document matches the given query.'): get_document_or_404(Doc, name='not_exisitng')
def testPolishCharacters(self): doc = Doc(name=DOC_NAME, last_change=datetime.datetime.now(), text=SAMPLE_TEXT).save() remove_text(doc, u'Użytkownik', 0) doc = Doc.objects(name=DOC_NAME)[0] self.assertTrue(doc.text[0] == ' ')
def testHandleList(self): message = {} request = MockRequest() handle_list(message, request) self.assertEqual(len(message['files']), 0) doc = Doc(name=DOC_NAME, last_change=datetime.datetime.now(), text=SAMPLE_TEXT).save() handle_list(message, request) self.assertEqual(len(message['files']), 1) self.assertTrue(message['files'][0]['name'] == DOC_NAME) doc = Doc(name=DOC_NAME + '1', last_change=datetime.datetime.now(), text=SAMPLE_TEXT).save() handle_list(message, request) self.assertEqual(len(message['files']), 2)
def post_mturk_upload(): data = request.get_json() text = data['text'] doc_type = data['doc_type'] if 'turker_id' in data: turker_id = data['turker_id'] g.user.turker_id = turker_id g.user.save() from nltk.tokenize import sent_tokenize sents = sent_tokenize(text) doc = Doc(title='', text=text, source='mturk', type=doc_type) if 'source_url' in data: doc.source = data['source_url'] doc.save() res = { 'doc_id': str(doc.id), 'sents': list(), 'seq': doc.seq, 'title': doc.title, 'created_at': doc.created_at.isoformat(), } for index in range(0, len(sents)): sent = Sent(index=index, text=sents[index], doc=doc).save() res['sents'].append(sent.dump()) return json.dumps(res)
def insert_doc(title, text, source): try: doc = Doc.objects.get(title=title) print('already exist -> pass') return except Doc.DoesNotExist: pass doc = Doc(title=title, text=text, source=source, type='v2') total = Doc.objects.count() doc.seq = total + 1 doc.save() import re regex = re.compile(r'\(Sent\d{1,4}\)') # from nltk import sent_tokenize for text in text.split('\n'): if len(text) == 0: continue index_str = regex.findall(text)[0] text = text.replace(index_str, '').strip() index = int(index_str.replace('(Sent', '').replace(')', '')) Sent(index=index, text=text, doc=doc).save()
def testHandleMsg(self): doc = Doc(name=DOC_NAME, last_change=datetime.datetime.now(), text=SAMPLE_TEXT).save() handle_msg({'type': 'i', 'pos': 0, 'text': 'ala'}, doc['id']) doc = Doc.objects(name=DOC_NAME)[0] self.assertTrue(doc.text[:3] == 'ala') Doc.objects.delete() doc = Doc(name=DOC_NAME, last_change=datetime.datetime.now(), text=SAMPLE_TEXT).save() handle_msg({'type': 'r', 'pos': 0, 'text': u'Użytkownik'}, doc['id']) doc = Doc.objects(name=DOC_NAME)[0] print doc.text[0] self.assertTrue(doc.text[0] == ' ') with self.assertRaisesMessage(Http404, 'No Document matches the given query.'): handle_msg({}, 'not_exisitng')
def testLoginWindow(self): Doc(name=DOC_NAME, text=LOREM_IPSUM).save() driver = self.driver time.sleep(5) driver.get(self.base_url + "/") time.sleep(5) driver.find_element_by_css_selector("td").click() time.sleep(5) driver.find_element_by_id("gDriveIntegration").click() driver.find_element_by_id("authorizeGDriveLink").click() time.sleep(1) driver.switch_to_window(driver.window_handles[1]) self.assertTrue(u"Logowanie – Konta Google" == driver.title or u'Sign in - Google Accounts' == driver.title)
def duplicate_doc(from_type='v2', to_type='v3'): docs = Doc.objects(type=from_type).all() for doc in tqdm(docs): title = doc.title.replace('TARGET_ONLY', to_type) new_doc = Doc(title=title, text=doc.text, source=doc.source, type=to_type) new_doc.seq = Doc.objects.count() + 1 new_doc.save() sents = Sent.objects(doc=doc).all() for sent in sents: Sent(index=sent.index, text=sent.text, doc=new_doc).save()
def testNewDocument(self): Doc(name=DOC_NAME + '1', text=LOREM_IPSUM).save() driver = self.driver time.sleep(1) driver.get(self.base_url) time.sleep(1) driver.find_element_by_css_selector("td").click() time.sleep(1) driver.find_element_by_id("newDocument").click() time.sleep(1) driver.find_element_by_id("documentName").clear() driver.find_element_by_id("documentName").send_keys(DOC_NAME) driver.find_element_by_id("saveDocumentButton").click() self.assertTrue( Doc.objects(name=DOC_NAME)[0]['text'] == EMPTY_DOC_STRING)
def testSaveAs(self): Doc(name=DOC_NAME + '1', text=LOREM_IPSUM).save() logger.debug('TestUI::testSaveAs documents: ' + str(Doc.objects())) driver = self.driver time.sleep(1) driver.get(self.base_url) time.sleep(1) driver.find_element_by_css_selector("td").click() time.sleep(1) driver.find_element_by_id("saveDocument").click() time.sleep(1) driver.find_element_by_id("documentName").clear() driver.find_element_by_id("documentName").send_keys(DOC_NAME) driver.find_element_by_id("saveDocumentButton").click() self.assertTrue(Doc.objects(name=DOC_NAME)[0]['text'] == LOREM_IPSUM)
def testRead(self): Doc.objects.delete() Doc(name=DOC_NAME, text=LOREM_IPSUM).save() driver = self.driver time.sleep(1) driver.get(self.base_url) time.sleep(1) driver.find_element_by_css_selector("td").click() time.sleep(1) driver.switch_to_frame("editorContent") time.sleep(5) content = driver.find_element_by_css_selector("#editorBody") try: self.assertEqual("Lorem ipsum.", content.text) except AssertionError as e: self.verificationErrors.append(str(e))
def addinfo(request): if request.method == 'GET': list = Doc.objects.all() return render_to_response('admin/addinfo.html', {'lists': list}) if request.method == 'POST': title = request.POST.get('title') summary = request.POST.get('summary') source = request.POST.get('source') author = request.POST.get('author') time = request.POST.get('calendar') content = request.POST.get('content') success = '成功发布!' failure = '发布不成功,请确认信息填充完整后重新发布!' doc = Doc() doc.title = title doc.source = source doc.time = time doc.author = author doc.content = content if doc.summary == '': doc.summary = doc.content[0:30] else: doc.summary = summary if doc.title == '' or doc.source == '' or doc.author == '' or doc.title == '' or doc.content == '': return render_to_response('admin/failure.html', {'failure': failure}) else: doc.save() return render_to_response('admin/success.html', {'success': success})