def cql_search(request): from pynlpl.formats import fql, cql # парсинг входящих параметров params = json.loads(request.body.decode('utf-8')) # обновление фолиа-документа по актуальным данным doc = folia.Document(id='doc') text = folia.Text(doc, id='doc.text') sentences = Sentence.objects.all() # поиск слов в документе for s in sentences: sen = text.append(folia.Sentence(doc, id=doc.id + '.s.' + str(s.id))) words = Word.objects.filter(Sentence_id=s.id) for w in words: sen.append( folia.Word(doc, id=doc.id + '.s.' + str(s.id) + '.w.' + str(w.id), text=w.value)) doc.append(text) query = fql.Query(cql.cql2fql(params['title'])) texts = query(doc) arr = [] for t in texts: arr.append(t[0].parent.id.split('s.')[1]) sens = Sentence.objects.filter(id__in=arr) # вывод результатов return render(request, 'cabinet/cql_results.html', { 'texts': texts, 'sens': sens })
def splitcorrection(self, word, newwords, **kwargs): str_newwords = '' for nw in newwords: str_newwords += str(nw) + ' ' self.errout("Splitting " + str(word) + " into " + str_newwords) sentence = word.sentence() newwords = [ folia.Word(self.doc, generate_id_in=sentence, text=w) for w in newwords ] kwargs['suggest'] = True kwargs['datetime'] = datetime.datetime.now() word.split(*newwords, **kwargs)
def mergecorrection(self, newword, originalwords, **kwargs): str_originalwords = '' for ow in originalwords: str_originalwords += str(ow) + ' ' self.errout("Merging " + str_originalwords + " into " + newword) sentence = originalwords[0].sentence() if not sentence: raise Exception("Expected sentence for " + str(repr(originalwords[0])) + ", got " + str(repr(sentence))) newword = folia.Word(self.doc, generate_id_in=sentence, text=newword) kwargs['suggest'] = True kwargs['datetime'] = datetime.datetime.now() sentence.mergewords(newword, *originalwords, **kwargs)
if pos: words[i].append( folia.PosAnnotation(foliadoc, cls=pos) ) else: print >>sys.stderr,"WARNING: Out of sync after calling Frog! ", i, word else: #pass untokenised sentence try: sentext = s.text() except folia.NoSuchText: continue response = frogclient.process(sentext) for i, (word, lemma, morph, pos) in enumerate(response): if legacy: legacyout(i,word,lemma,morph,pos) if word: w = folia.Word(foliadoc, text=word, generate_id_in=s) if lemma: w.append( folia.LemmaAnnotation(foliadoc, cls=lemma) ) if pos: w.append( folia.PosAnnotation(foliadoc, cls=pos) ) s.append(w) if not found_s: #pass paragraph try: partext = p.text() except folia.NoSuchText: continue s = folia.Sentence(foliadoc, generate_id_in=p) response = frogclient.process(partext)