def test_makeContext0(self): f1_name = 'test_f1.txt' f2_name = 'test_f2.txt' f1 = open(f1_name, 'w') f1.write('Ляляля маму. Мррррррр. Бебебе!') f1.close() f2 = open(f2_name, 'w') f2.write('мамами мыла раму. М') f2.close() db_name = 'testdb' makeDB([f1_name, f2_name], db_name) ass = "[(['Ляляля маму'], [[(7, 11)]]), (['мамами мыла раму'], [[(0, 6)]])]" db = shelve.open(db_name) qres = query('маме', db_name) res = str(list(makeContexts(qres).values())) db.close() self.assertEqual(ass, res) os.unlink(f1_name) os.unlink(f2_name) if os.path.exists(db_name): os.unlink(db_name) if os.path.exists(db_name + '.dat'): os.unlink(db_name + '.dat') if os.path.exists(db_name + '.bak'): os.unlink(db_name + '.bak') if os.path.exists(db_name + '.dir'): os.unlink(db_name + '.dir')
def test_makeContexts(self): f1 = open('f1.txt', 'w') f1.write('foo bar') f1.close() f1 = open('f2.txt', 'w') f1.write('egg foo ham') f1.close() makeDB.makeDB(['f1.txt', 'f2.txt'], 'testdb') ass = "OrderedDict([('f2.txt', ['egg foo ham'])])" res = getQuery.query('foo ham', 'testdb') res = str(getQuery.makeContexts(res)) os.unlink('f1.txt') os.unlink('f2.txt') if os.path.exists('testdb'): os.unlink('testdb') if os.path.exists('testdb.dat'): os.unlink('testdb.dat') if os.path.exists('testdb.bak'): os.unlink('testdb.bak') if os.path.exists('testdb.dir'): os.unlink('testdb.dir') self.assertEqual(ass, res)
def do_POST(self): form = cgi.FieldStorage(fp=self.rfile, headers=self.headers, environ={ 'REQUEST_METHOD': 'POST', 'CONTENT_TYPE': self.headers['Content-Type'] }) self.send_response(200) self.send_header('Content-type', 'text/html; charset=utf-8') self.end_headers() inputWords = form.getvalue('query').lower() doc_count = form.getvalue('doc_count') try: doc_count = int(doc_count) if doc_count < 0: doc_count = 2 except: doc_count = 2 if form.getvalue('begin'): myHandler.DOC_START = 1 elif form.getvalue('back'): sh = doc_count if myHandler.DOC_START - sh < 1: sh = myHandler.DOC_START - 1 myHandler.DOC_START -= sh elif form.getfirst('forward'): myHandler.DOC_START += doc_count result_line = '' if inputWords != None: if myHandler.QUERY == inputWords and \ myHandler.DOC_COUNT == doc_count: for i in range(len(myHandler.QUTES_COUNTS)): countQuote = form.getvalue('countQuote' + str(i)) try: countQuote = int(countQuote) except: countQuote = 10 if form.getvalue('beginQuote' + str(i)): myHandler.QUTES_COUNTS[i] = [countQuote, 0] elif form.getvalue('backQuote' + str(i)): sh = countQuote if myHandler.QUTES_COUNTS[i][1] - sh < 0: sh = mmyHandler.QUTES_COUNTS[i][1] myHandler.QUTES_COUNTS[i][1] -= sh elif form.getfirst('forwardQuote' + str(i)): myHandler.QUTES_COUNTS[i][1] += countQuote myHandler.QUTES_COUNTS[i][0] = countQuote else: myHandler.DOC_START = 1 myHandler.DOC_COUNT = 2 myHandler.QUERY = inputWords myHandler.QUTES_COUNTS = [] for i in range(myHandler.DOC_COUNT): myHandler.QUTES_COUNTS.append([5, 0]) qres = getQuery.query(inputWords, config.DATABASE_NAME, myHandler.lemma, doc_count, myHandler.DOC_START, myHandler.QUTES_COUNTS) #resDict - { 'path' : ( [ 'context' ], [ [ (stBoldWord_1 , endBoldWord_1), (stBoldWord_2 , endBoldWord_2) ] ] ) } resDict = getQuery.makeContexts(qres, myHandler.QUTES_COUNTS) newQuotes = myHandler.QUTES_COUNTS == None if newQuotes: myHandler.QUTES_COUNTS = [] for i, path in enumerate(resDict): if newQuotes: myHandler.QUTES_COUNTS.append([5, 0]) #list for documents result_line += r'<li>' + r'<b>' + path + r'</b>' + r'<ul>' tup = resDict[path] for context, positions in zip(tup[0], tup[1]): #list for contexts result_line += r'<li>' result_line += context[:positions[0][0]] for j in range(len(positions) - 1): pos = positions[j] result_line += r'<b>' result_line += context[pos[0]:pos[1]] result_line += r'</b>' result_line += context[pos[1]:positions[j + 1][0]] #code for last bold word in context last_pos = positions[-1] result_line += r'<b>' result_line += context[last_pos[0]:last_pos[1]] result_line += r'</b>' result_line += context[last_pos[1]:] result_line += r'</li>' result_line += r'</ul><p>' result_line += r'<input type="submit" name="beginQuote' + str( i) + '" value="В начало"> ' result_line += r'<input type="submit" name="backQuote' + str( i) + '" value="Назад"> ' result_line += r'<input type="submit" name="forwardQuote' + str( i) + '" value="Вперед"> ' result_line += r'<input type="text" name="countQuote' + str( i) + r'" value="' countQuote = myHandler.QUTES_COUNTS[i][0] result_line += str(countQuote) + r'"></li></p>' if len(result_line) != 0: result_line = r'<ol type="I">' + result_line + r'</ol>' else: result_line = r'<p>Ничего не найдено. Искать в Яндекс, Google, Mail.ru</p>' else: result_line = r'<p><p><p>Задан пустой поисковый запрос</p></p></p>' inputWords = '' myHandler.QUERY = inputWords myHandler.DOC_COUNT = doc_count self.wfile.write( bytes(myHandler.HTML_DOC_1 + '"' + myHandler.QUERY + '"' + myHandler.HTML_DOC_2 + '"' + str(myHandler.DOC_COUNT) + myHandler.HTML_DOC_3 + result_line + myHandler.HTML_DOC_4, encoding='utf-8'))
def test_doc_count_0(self): lemma = lemmatizer() qres = getQuery.query('смотреть', config.DATABASE_NAME, lemma, 2, 1, None) #resDict - { 'path' : ( [ 'context' ], [ [ (stBoldWord_1 , endBoldWord_1), (stBoldWord_2 , endBoldWord_2) ] ] ) } resDict = getQuery.makeContexts(qres, None) self.assertEqual(len(resDict.keys()), 2)
def test_doc_names(self): lemma = lemmatizer() qres = getQuery.query('смотреть', config.DATABASE_NAME, lemma, 2, 1, None) resDict = getQuery.makeContexts(qres, None) self.assertEqual(sorted(resDict.keys()), ['mid_text_1.txt', 'mid_text_2.txt'])
def test_doc_count_4(self): lemma = lemmatizer() qres = getQuery.query('смотреть', config.DATABASE_NAME, lemma, 1, 3, None) resDict = getQuery.makeContexts(qres, None) self.assertEqual(len(resDict.keys()), 0)