def _get_sentences(self, article): """ Return the sentences in the article, creating them if needed. @param article: an amcat.Article or AnalysedArticle model instance. @return: a sequence of Sentence model instances """ if isinstance(article, AnalysedArticle): article = article.article return sbd.get_or_create_sentences(article)
def _do_retrieve_article(self, analysed_article): for sentence in sbd.get_or_create_sentences(analysed_article.article): asent = AnalysisSentence.objects.create( analysed_article=analysed_article, sentence=sentence) log.debug( "Parsing {asent.id} : {asent.sentence.sentence!r}".format( **locals())) add_sentence(asent) return True
def store_parse(self, analysed_article, data): analysis_sentences = { sentence.id: AnalysisSentence.objects.create(analysed_article=analysed_article, sentence=sentence).id for sentence in sbd.get_or_create_sentences( analysed_article.article) } result = interpret_output(analysis_sentences, data) wordcreator.store_analysis(analysed_article, *result)
def getTokensFromRawText(self, a): """ If articles are not parsed, this function gets the raw text, which is then transformed to a list of tokens in self.tokenizeRawText. """ sentences = a.sentences.all() if len(sentences) == 0: sentences = sbd.get_or_create_sentences(a) for s in sentences: paragraph = s.parnr sentence = s.sentnr for word, pos in self.tokenizeRawText(s.sentence): yield (paragraph, sentence, word, pos)
def split(request, project, article): sentences = sbd.get_or_create_sentences(article).only("sentence", "parnr") form = forms.SplitArticleForm(project, article, data=request.POST or None) if form.is_valid(): selected_sentence_ids = set(get_sentence_ids(request.POST)) - {None,} if selected_sentence_ids: sentences = Sentence.objects.filter(id__in=selected_sentence_ids) context = handle_split(form, project, article, sentences) return render(request, "navigator/article/split-done.html", context) # Get sentences, skip headline sentences = _get_sentences(sentences) sentences.next() return render(request, "navigator/article/split.html", locals())
def split(request, project, article): sentences = sbd.get_or_create_sentences(article).only("sentence", "parnr") form = forms.SplitArticleForm(project, article, data=request.POST or None) if form.is_valid(): selected_sentence_ids = set(get_sentence_ids(request.POST)) - { None, } if selected_sentence_ids: sentences = Sentence.objects.filter(id__in=selected_sentence_ids) context = handle_split(form, project, article, sentences) return render(request, "navigator/article/split-done.html", context) # Get sentences, skip headline sentences = _get_sentences(sentences) sentences.next() return render(request, "navigator/article/split.html", locals())
def store_parse(self, analysed_article, data): if data.startswith("CoreNLP failed"): raise Exception(data) root = ElementTree.fromstring(data) # if the analysis sentences already exist, check there are no tokens and line the analysis_sentence up. # otherwise, create new ones sentences = list(sbd.get_or_create_sentences(analysed_article.article)) if AnalysisSentence.objects.filter(analysed_article=analysed_article).exists(): if Token.objects.filter(sentence__analysed_article=analysed_article).exists(): raise Exception("Article already has tokens!") analysis_sentences = [AnalysisSentence.objects.get(analysed_article=analysed_article, sentence=sentence).id for sentence in sentences] else: analysis_sentences = [AnalysisSentence.objects.create(analysed_article=analysed_article, sentence=sentence).id for sentence in sentences] result = interpret_xml(analysis_sentences, root) import pickle; pickle.dump(result[1], open("/tmp/triples", "w")) wordcreator.store_analysis(analysed_article, *result)
def _do_retrieve_article(self, analysed_article): for sentence in sbd.get_or_create_sentences(analysed_article.article): asent = AnalysisSentence.objects.create(analysed_article=analysed_article, sentence=sentence) log.debug("Parsing {asent.id} : {asent.sentence.sentence!r}".format(**locals())) add_sentence(asent) return True
def filter_queryset(self, queryset): qs = super(SentenceViewSet, self).filter_queryset(queryset) return qs.filter(article=self.article, id__in=sbd.get_or_create_sentences(self.article))
def store_parse(self, analysed_article, data): analysis_sentences = {sentence.id : AnalysisSentence.objects.create(analysed_article=analysed_article, sentence=sentence).id for sentence in sbd.get_or_create_sentences(analysed_article.article)} result = interpret_output(analysis_sentences, data) wordcreator.store_analysis(analysed_article, *result)