Exemple #1
0
 def testDocumentModel(self):
     with self.assertRaisesMessage(
             ValidationError,
             '(Document:None) (Field is required: [\'name\'])'):
         Doc(text=SAMPLE_TEXT).save()
     with self.assertRaisesMessage(
             ValidationError,
             '(Document:None) (StringField only accepts string values: [\'name\'])'
     ):
         Doc(name=1).save()
     with self.assertRaisesMessage(
             InvalidId,
             '\'piec\' is not a valid ObjectId, it must be a 12-byte input of type \'str\' or a 24-character hex string'
     ):
         Doc(id='piec', name='DOC_NAME').save()
Exemple #2
0
 def testRemoveText(self):
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     creation_date = doc.last_change
     remove_text(doc, 'Uzytkownik', 0)
     doc = Doc.objects(name=DOC_NAME)[0]
     logger.debug('TestDBManager::testRemoveText compare ' +
                  str(creation_date) + ' and ' + str(doc.last_change))
     self.assertTrue(creation_date < doc.last_change)
     self.assertTrue(doc.text[0] == ' ')
     Doc.objects.delete()
     doc = Doc(name=DOC_NAME, text="ok").save()
     remove_text(doc, 'o', 0)
     remove_text(doc, 'k', 0)
     self.assertTrue(Doc.objects(name=DOC_NAME)[0].text == '')
Exemple #3
0
 def testInsertText(self):
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     creation_date = doc.last_change
     insert_text(doc, 'A', 3)
     doc = Doc.objects(name=DOC_NAME)[0]
     logger.debug('TestDBManager::testInsertText compare ' +
                  str(creation_date) + ' and ' + str(doc.last_change))
     self.assertTrue(creation_date < doc.last_change)
     self.assertTrue(doc.text[3] == 'A')
     Doc.objects.delete()
     doc = Doc(name=DOC_NAME, text="").save()
     insert_text(doc, 'k', 0)
     insert_text(doc, 'o', 0)
     self.assertTrue(Doc.objects(name=DOC_NAME)[0].text == 'ok')
Exemple #4
0
 def testGetDocumentOr404(self):
     Doc(name=DOC_NAME, text=SAMPLE_TEXT).save()
     self.assertTrue(
         get_document_or_404(Doc, name=DOC_NAME).name == DOC_NAME)
     with self.assertRaisesMessage(Http404,
                                   'No Document matches the given query.'):
         get_document_or_404(Doc, name='not_exisitng')
Exemple #5
0
 def testPolishCharacters(self):
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     remove_text(doc, u'Użytkownik', 0)
     doc = Doc.objects(name=DOC_NAME)[0]
     self.assertTrue(doc.text[0] == ' ')
Exemple #6
0
 def testHandleList(self):
     message = {}
     request = MockRequest()
     handle_list(message, request)
     self.assertEqual(len(message['files']), 0)
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     handle_list(message, request)
     self.assertEqual(len(message['files']), 1)
     self.assertTrue(message['files'][0]['name'] == DOC_NAME)
     doc = Doc(name=DOC_NAME + '1',
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     handle_list(message, request)
     self.assertEqual(len(message['files']), 2)
Exemple #7
0
def post_mturk_upload():
    data = request.get_json()
    text = data['text']
    doc_type = data['doc_type']

    if 'turker_id' in data:
        turker_id = data['turker_id']

        g.user.turker_id = turker_id
        g.user.save()

    from nltk.tokenize import sent_tokenize
    sents = sent_tokenize(text)

    doc = Doc(title='', text=text, source='mturk', type=doc_type)
    if 'source_url' in data:
        doc.source = data['source_url']
    doc.save()

    res = {
        'doc_id': str(doc.id),
        'sents': list(),
        'seq': doc.seq,
        'title': doc.title,
        'created_at': doc.created_at.isoformat(),
    }
    for index in range(0, len(sents)):
        sent = Sent(index=index, text=sents[index], doc=doc).save()
        res['sents'].append(sent.dump())

    return json.dumps(res)
Exemple #8
0
def insert_doc(title, text, source):
    try:
        doc = Doc.objects.get(title=title)
        print('already exist -> pass')
        return
    except Doc.DoesNotExist:
        pass

    doc = Doc(title=title, text=text, source=source, type='v2')
    total = Doc.objects.count()
    doc.seq = total + 1
    doc.save()

    import re
    regex = re.compile(r'\(Sent\d{1,4}\)')

    # from nltk import sent_tokenize
    for text in text.split('\n'):
        if len(text) == 0:
            continue

        index_str = regex.findall(text)[0]
        text = text.replace(index_str, '').strip()
        index = int(index_str.replace('(Sent', '').replace(')', ''))

        Sent(index=index, text=text, doc=doc).save()
Exemple #9
0
 def testHandleMsg(self):
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     handle_msg({'type': 'i', 'pos': 0, 'text': 'ala'}, doc['id'])
     doc = Doc.objects(name=DOC_NAME)[0]
     self.assertTrue(doc.text[:3] == 'ala')
     Doc.objects.delete()
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     handle_msg({'type': 'r', 'pos': 0, 'text': u'Użytkownik'}, doc['id'])
     doc = Doc.objects(name=DOC_NAME)[0]
     print doc.text[0]
     self.assertTrue(doc.text[0] == ' ')
     with self.assertRaisesMessage(Http404,
                                   'No Document matches the given query.'):
         handle_msg({}, 'not_exisitng')
Exemple #10
0
 def testLoginWindow(self):
     Doc(name=DOC_NAME, text=LOREM_IPSUM).save()
     driver = self.driver
     time.sleep(5)
     driver.get(self.base_url + "/")
     time.sleep(5)
     driver.find_element_by_css_selector("td").click()
     time.sleep(5)
     driver.find_element_by_id("gDriveIntegration").click()
     driver.find_element_by_id("authorizeGDriveLink").click()
     time.sleep(1)
     driver.switch_to_window(driver.window_handles[1])
     self.assertTrue(u"Logowanie – Konta Google" == driver.title
                     or u'Sign in - Google Accounts' == driver.title)
Exemple #11
0
def duplicate_doc(from_type='v2', to_type='v3'):
    docs = Doc.objects(type=from_type).all()
    for doc in tqdm(docs):
        title = doc.title.replace('TARGET_ONLY', to_type)
        new_doc = Doc(title=title,
                      text=doc.text,
                      source=doc.source,
                      type=to_type)
        new_doc.seq = Doc.objects.count() + 1
        new_doc.save()

        sents = Sent.objects(doc=doc).all()
        for sent in sents:
            Sent(index=sent.index, text=sent.text, doc=new_doc).save()
Exemple #12
0
 def testNewDocument(self):
     Doc(name=DOC_NAME + '1', text=LOREM_IPSUM).save()
     driver = self.driver
     time.sleep(1)
     driver.get(self.base_url)
     time.sleep(1)
     driver.find_element_by_css_selector("td").click()
     time.sleep(1)
     driver.find_element_by_id("newDocument").click()
     time.sleep(1)
     driver.find_element_by_id("documentName").clear()
     driver.find_element_by_id("documentName").send_keys(DOC_NAME)
     driver.find_element_by_id("saveDocumentButton").click()
     self.assertTrue(
         Doc.objects(name=DOC_NAME)[0]['text'] == EMPTY_DOC_STRING)
Exemple #13
0
 def testSaveAs(self):
     Doc(name=DOC_NAME + '1', text=LOREM_IPSUM).save()
     logger.debug('TestUI::testSaveAs documents: ' + str(Doc.objects()))
     driver = self.driver
     time.sleep(1)
     driver.get(self.base_url)
     time.sleep(1)
     driver.find_element_by_css_selector("td").click()
     time.sleep(1)
     driver.find_element_by_id("saveDocument").click()
     time.sleep(1)
     driver.find_element_by_id("documentName").clear()
     driver.find_element_by_id("documentName").send_keys(DOC_NAME)
     driver.find_element_by_id("saveDocumentButton").click()
     self.assertTrue(Doc.objects(name=DOC_NAME)[0]['text'] == LOREM_IPSUM)
Exemple #14
0
 def testRead(self):
     Doc.objects.delete()
     Doc(name=DOC_NAME, text=LOREM_IPSUM).save()
     driver = self.driver
     time.sleep(1)
     driver.get(self.base_url)
     time.sleep(1)
     driver.find_element_by_css_selector("td").click()
     time.sleep(1)
     driver.switch_to_frame("editorContent")
     time.sleep(5)
     content = driver.find_element_by_css_selector("#editorBody")
     try:
         self.assertEqual("Lorem ipsum.", content.text)
     except AssertionError as e:
         self.verificationErrors.append(str(e))
Exemple #15
0
def addinfo(request):

    if request.method == 'GET':

        list = Doc.objects.all()
        return render_to_response('admin/addinfo.html', {'lists': list})

    if request.method == 'POST':

        title = request.POST.get('title')
        summary = request.POST.get('summary')
        source = request.POST.get('source')
        author = request.POST.get('author')
        time = request.POST.get('calendar')
        content = request.POST.get('content')
        success = '成功发布!'
        failure = '发布不成功,请确认信息填充完整后重新发布!'

        doc = Doc()
        doc.title = title
        doc.source = source
        doc.time = time
        doc.author = author
        doc.content = content

        if doc.summary == '':
            doc.summary = doc.content[0:30]
        else:
            doc.summary = summary

        if doc.title == '' or doc.source == '' or doc.author == '' or doc.title == '' or doc.content == '':
            return render_to_response('admin/failure.html',
                                      {'failure': failure})

        else:
            doc.save()
            return render_to_response('admin/success.html',
                                      {'success': success})