Beispiel #1
0
def post_mturk_upload():
    data = request.get_json()
    text = data['text']
    doc_type = data['doc_type']

    if 'turker_id' in data:
        turker_id = data['turker_id']

        g.user.turker_id = turker_id
        g.user.save()

    from nltk.tokenize import sent_tokenize
    sents = sent_tokenize(text)

    doc = Doc(title='', text=text, source='mturk', type=doc_type)
    if 'source_url' in data:
        doc.source = data['source_url']
    doc.save()

    res = {
        'doc_id': str(doc.id),
        'sents': list(),
        'seq': doc.seq,
        'title': doc.title,
        'created_at': doc.created_at.isoformat(),
    }
    for index in range(0, len(sents)):
        sent = Sent(index=index, text=sents[index], doc=doc).save()
        res['sents'].append(sent.dump())

    return json.dumps(res)
Beispiel #2
0
def insert_doc(title, text, source):
    try:
        doc = Doc.objects.get(title=title)
        print('already exist -> pass')
        return
    except Doc.DoesNotExist:
        pass

    doc = Doc(title=title, text=text, source=source, type='v2')
    total = Doc.objects.count()
    doc.seq = total + 1
    doc.save()

    import re
    regex = re.compile(r'\(Sent\d{1,4}\)')

    # from nltk import sent_tokenize
    for text in text.split('\n'):
        if len(text) == 0:
            continue

        index_str = regex.findall(text)[0]
        text = text.replace(index_str, '').strip()
        index = int(index_str.replace('(Sent', '').replace(')', ''))

        Sent(index=index, text=text, doc=doc).save()
Beispiel #3
0
def submit(request):
    subject = request.POST['subject']
    contents = request.POST['content']
    writer = request.POST['writer']
    flag = "none"

    post=Doc(dsubject=subject, contents=contents, writer=writer, flag=flag, reg_date=timezone.now())
    post.save()
    return HttpResponseRedirect('/')
Beispiel #4
0
 def testDocumentModel(self):
     with self.assertRaisesMessage(
             ValidationError,
             '(Document:None) (Field is required: [\'name\'])'):
         Doc(text=SAMPLE_TEXT).save()
     with self.assertRaisesMessage(
             ValidationError,
             '(Document:None) (StringField only accepts string values: [\'name\'])'
     ):
         Doc(name=1).save()
     with self.assertRaisesMessage(
             InvalidId,
             '\'piec\' is not a valid ObjectId, it must be a 12-byte input of type \'str\' or a 24-character hex string'
     ):
         Doc(id='piec', name='DOC_NAME').save()
Beispiel #5
0
def generate_encrypted_file(seq_id):
    from itertools import cycle

    def str_xor(s1, s2):
        result = []
        for (c1, c2) in zip(s1, cycle(s2)):
            result.append(str(ord(c1) ^ ord(c2)))
        return ",".join(result)

    try:
        doc = Doc.objects().get(seq=seq_id)
        sents = Sent.objects(doc=doc).order_by('index')
    except Exception:
        return

    data = {
        'doc_id': str(doc.id),
        'title': doc.title,
        'seq': doc.seq,
        'sents': [],
    }

    for sent in sents:
        data['sents'].append(sent.dump())

    data = json.dumps(data)
    data = str_xor(data, config.Config.ENCRYPTION_KEY)
    file_path = os.path.abspath(
        os.path.dirname(__file__) +
        '/../data/encrypted/#{}_{}'.format(seq_id, doc.title))
    with open(file_path, 'w') as f:
        f.write(data)
Beispiel #6
0
def index_v2_page(doc_type):
    item_per_page = 50
    page = request.args.get('p', 1)
    page = int(page)

    total = Doc.objects.filter(type=doc_type).count()
    total_page = math.ceil(total / item_per_page)
    paginator = Pagination(Doc.objects(type=doc_type).order_by('seq'), page, 50)
    docs = paginator.items

    docs_data = []
    for doc in docs:
        item = doc.dump()
        item['sent_total'] = Sent.objects(doc=doc).count()
        item['progress'] = Annotation.objects(doc=doc, user=g.user, type='sentence').count()

        docs_data.append(item)

    pagination = {
        'page': page,
        'total_page': total_page,
        'left': max(1, page - 5),
        'right': min(page + 5, total_page),
    }

    return render_template('index.html', type=doc_type, docs=docs_data, g=g, pagination=pagination)
Beispiel #7
0
 def testRemoveText(self):
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     creation_date = doc.last_change
     remove_text(doc, 'Uzytkownik', 0)
     doc = Doc.objects(name=DOC_NAME)[0]
     logger.debug('TestDBManager::testRemoveText compare ' +
                  str(creation_date) + ' and ' + str(doc.last_change))
     self.assertTrue(creation_date < doc.last_change)
     self.assertTrue(doc.text[0] == ' ')
     Doc.objects.delete()
     doc = Doc(name=DOC_NAME, text="ok").save()
     remove_text(doc, 'o', 0)
     remove_text(doc, 'k', 0)
     self.assertTrue(Doc.objects(name=DOC_NAME)[0].text == '')
Beispiel #8
0
 def testPolishCharacters(self):
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     remove_text(doc, u'Użytkownik', 0)
     doc = Doc.objects(name=DOC_NAME)[0]
     self.assertTrue(doc.text[0] == ' ')
Beispiel #9
0
 def testGetDocumentOr404(self):
     Doc(name=DOC_NAME, text=SAMPLE_TEXT).save()
     self.assertTrue(
         get_document_or_404(Doc, name=DOC_NAME).name == DOC_NAME)
     with self.assertRaisesMessage(Http404,
                                   'No Document matches the given query.'):
         get_document_or_404(Doc, name='not_exisitng')
Beispiel #10
0
 def testInsertText(self):
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     creation_date = doc.last_change
     insert_text(doc, 'A', 3)
     doc = Doc.objects(name=DOC_NAME)[0]
     logger.debug('TestDBManager::testInsertText compare ' +
                  str(creation_date) + ' and ' + str(doc.last_change))
     self.assertTrue(creation_date < doc.last_change)
     self.assertTrue(doc.text[3] == 'A')
     Doc.objects.delete()
     doc = Doc(name=DOC_NAME, text="").save()
     insert_text(doc, 'k', 0)
     insert_text(doc, 'o', 0)
     self.assertTrue(Doc.objects(name=DOC_NAME)[0].text == 'ok')
Beispiel #11
0
 def testHandleList(self):
     message = {}
     request = MockRequest()
     handle_list(message, request)
     self.assertEqual(len(message['files']), 0)
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     handle_list(message, request)
     self.assertEqual(len(message['files']), 1)
     self.assertTrue(message['files'][0]['name'] == DOC_NAME)
     doc = Doc(name=DOC_NAME + '1',
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     handle_list(message, request)
     self.assertEqual(len(message['files']), 2)
Beispiel #12
0
def doc_migration():
    docs = Doc.objects().all()
    for doc in tqdm(docs):
        if doc.mturk:
            doc.type = 'mturk'
        else:
            doc.type = 'v1'
        doc.save()
Beispiel #13
0
def delete_doc(doc_id):
    doc = Doc.objects().get(id=doc_id)
    sents = Sent.objects(doc=doc).order_by('index')
    for sent in sents:
        sent.delete()
    annotations = Sent.objects(doc=doc)
    for annotation in annotations:
        annotation.delete()
    doc.delete()
Beispiel #14
0
 def testHandleMsg(self):
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     handle_msg({'type': 'i', 'pos': 0, 'text': 'ala'}, doc['id'])
     doc = Doc.objects(name=DOC_NAME)[0]
     self.assertTrue(doc.text[:3] == 'ala')
     Doc.objects.delete()
     doc = Doc(name=DOC_NAME,
               last_change=datetime.datetime.now(),
               text=SAMPLE_TEXT).save()
     handle_msg({'type': 'r', 'pos': 0, 'text': u'Użytkownik'}, doc['id'])
     doc = Doc.objects(name=DOC_NAME)[0]
     print doc.text[0]
     self.assertTrue(doc.text[0] == ' ')
     with self.assertRaisesMessage(Http404,
                                   'No Document matches the given query.'):
         handle_msg({}, 'not_exisitng')
Beispiel #15
0
def target_migration():
    docs = Doc.objects().all()
    for doc in tqdm(docs):
        doc.text = doc.text.replace('<<TARGET>>', '(TARGET)')
        doc.save()

    sents = Sent.objects()
    for sent in tqdm(sents):
        sent.text = sent.text.replace('<<TARGET>>', '(TARGET)')
        sent.save()
Beispiel #16
0
def post_annotation():
    data = request.get_json()

    doc = data['doc']
    target_text = data['target_text']
    index = data['index']
    anchor_offset = data['anchor_offset']
    focus_offset = data['focus_offset']
    type = data['type']
    basket = data['basket']

    doc = Doc.objects().get(id=doc)
    sent = Sent.objects().get(doc=doc, index=index)
    user = g.user

    target_sent = Sent.objects().get(doc=doc, index=index)

    # In sentence, filter logic have to be changed
    if type == 'sentence':
        annotations = Annotation.objects.filter(doc=doc,
                                                sent=sent,
                                                index=index,
                                                user=g.user,
                                                type=type)
    else:
        annotations = Annotation.objects.filter(doc=doc,
                                                sent=sent,
                                                index=index,
                                                user=g.user,
                                                type=type,
                                                anchor_offset=anchor_offset)

    if annotations.count() > 0:
        annotation = annotations[0]
    else:
        annotation = Annotation(doc=doc,
                                sent=sent,
                                user=user,
                                index=index,
                                type=type,
                                anchor_offset=anchor_offset)

    annotation.anchor_offset = anchor_offset
    annotation.focus_offset = focus_offset
    annotation.entire_text = target_sent.text
    annotation.target_text = target_text
    annotation.basket = basket
    annotation.ip = request.remote_addr

    annotation.save()

    return json.dumps({
        'annotation': annotation.dump(),
    })
Beispiel #17
0
def get_annotation(doc_id):
    try:
        doc = Doc.objects().get(id=doc_id)
        annotations = Annotation.objects(doc=doc, user=g.user)
    except Exception as e:
        return Response('not found', status=404)
    data = []
    for annotation in annotations:
        data.append(annotation.dump())

    return json.dumps({
        'annotations': data,
    })
Beispiel #18
0
 def testLoginWindow(self):
     Doc(name=DOC_NAME, text=LOREM_IPSUM).save()
     driver = self.driver
     time.sleep(5)
     driver.get(self.base_url + "/")
     time.sleep(5)
     driver.find_element_by_css_selector("td").click()
     time.sleep(5)
     driver.find_element_by_id("gDriveIntegration").click()
     driver.find_element_by_id("authorizeGDriveLink").click()
     time.sleep(1)
     driver.switch_to_window(driver.window_handles[1])
     self.assertTrue(u"Logowanie – Konta Google" == driver.title
                     or u'Sign in - Google Accounts' == driver.title)
Beispiel #19
0
 def testNewDocument(self):
     Doc(name=DOC_NAME + '1', text=LOREM_IPSUM).save()
     driver = self.driver
     time.sleep(1)
     driver.get(self.base_url)
     time.sleep(1)
     driver.find_element_by_css_selector("td").click()
     time.sleep(1)
     driver.find_element_by_id("newDocument").click()
     time.sleep(1)
     driver.find_element_by_id("documentName").clear()
     driver.find_element_by_id("documentName").send_keys(DOC_NAME)
     driver.find_element_by_id("saveDocumentButton").click()
     self.assertTrue(
         Doc.objects(name=DOC_NAME)[0]['text'] == EMPTY_DOC_STRING)
Beispiel #20
0
 def testSaveAs(self):
     Doc(name=DOC_NAME + '1', text=LOREM_IPSUM).save()
     logger.debug('TestUI::testSaveAs documents: ' + str(Doc.objects()))
     driver = self.driver
     time.sleep(1)
     driver.get(self.base_url)
     time.sleep(1)
     driver.find_element_by_css_selector("td").click()
     time.sleep(1)
     driver.find_element_by_id("saveDocument").click()
     time.sleep(1)
     driver.find_element_by_id("documentName").clear()
     driver.find_element_by_id("documentName").send_keys(DOC_NAME)
     driver.find_element_by_id("saveDocumentButton").click()
     self.assertTrue(Doc.objects(name=DOC_NAME)[0]['text'] == LOREM_IPSUM)
Beispiel #21
0
 def testRead(self):
     Doc.objects.delete()
     Doc(name=DOC_NAME, text=LOREM_IPSUM).save()
     driver = self.driver
     time.sleep(1)
     driver.get(self.base_url)
     time.sleep(1)
     driver.find_element_by_css_selector("td").click()
     time.sleep(1)
     driver.switch_to_frame("editorContent")
     time.sleep(5)
     content = driver.find_element_by_css_selector("#editorBody")
     try:
         self.assertEqual("Lorem ipsum.", content.text)
     except AssertionError as e:
         self.verificationErrors.append(str(e))
Beispiel #22
0
def duplicate_doc(from_type='v2', to_type='v3'):
    docs = Doc.objects(type=from_type).all()
    for doc in tqdm(docs):
        title = doc.title.replace('TARGET_ONLY', to_type)
        new_doc = Doc(title=title,
                      text=doc.text,
                      source=doc.source,
                      type=to_type)
        new_doc.seq = Doc.objects.count() + 1
        new_doc.save()

        sents = Sent.objects(doc=doc).all()
        for sent in sents:
            Sent(index=sent.index, text=sent.text, doc=new_doc).save()
Beispiel #23
0
def generate_encrypted_files():
    docs = Doc.objects().all()
    for doc in tqdm(docs):
        if not (doc.type == 'v1' or doc.type == 'v2' or doc.type == 'v3'):
            continue
        generate_encrypted_file(seq_id=doc.seq)
Beispiel #24
0
def addinfo(request):

    if request.method == 'GET':

        list = Doc.objects.all()
        return render_to_response('admin/addinfo.html', {'lists': list})

    if request.method == 'POST':

        title = request.POST.get('title')
        summary = request.POST.get('summary')
        source = request.POST.get('source')
        author = request.POST.get('author')
        time = request.POST.get('calendar')
        content = request.POST.get('content')
        success = '成功发布!'
        failure = '发布不成功,请确认信息填充完整后重新发布!'

        doc = Doc()
        doc.title = title
        doc.source = source
        doc.time = time
        doc.author = author
        doc.content = content

        if doc.summary == '':
            doc.summary = doc.content[0:30]
        else:
            doc.summary = summary

        if doc.title == '' or doc.source == '' or doc.author == '' or doc.title == '' or doc.content == '':
            return render_to_response('admin/failure.html',
                                      {'failure': failure})

        else:
            doc.save()
            return render_to_response('admin/success.html',
                                      {'success': success})
Beispiel #25
0
def delete_doc_type(doc_type='v3'):
    docs = Doc.objects(type=doc_type).all()
    for doc in tqdm(docs):
        delete_doc(doc.id)