def _get_sentences(self, article):
     """
     Return the sentences in the article, creating them if needed.
     @param article: an amcat.Article or AnalysedArticle model instance.
     @return: a sequence of Sentence model instances
     """
     if isinstance(article, AnalysedArticle):
         article = article.article
     return sbd.get_or_create_sentences(article)
Exemple #2
0
 def _do_retrieve_article(self, analysed_article):
     for sentence in sbd.get_or_create_sentences(analysed_article.article):
         asent = AnalysisSentence.objects.create(
             analysed_article=analysed_article, sentence=sentence)
         log.debug(
             "Parsing {asent.id} : {asent.sentence.sentence!r}".format(
                 **locals()))
         add_sentence(asent)
     return True
Exemple #3
0
 def store_parse(self, analysed_article, data):
     analysis_sentences = {
         sentence.id:
         AnalysisSentence.objects.create(analysed_article=analysed_article,
                                         sentence=sentence).id
         for sentence in sbd.get_or_create_sentences(
             analysed_article.article)
     }
     result = interpret_output(analysis_sentences, data)
     wordcreator.store_analysis(analysed_article, *result)
Exemple #4
0
 def getTokensFromRawText(self, a):
     """
     If articles are not parsed, this function gets the raw text, which is then transformed to a list of tokens in self.tokenizeRawText.
     """
     sentences = a.sentences.all()
     if len(sentences) == 0: sentences = sbd.get_or_create_sentences(a)
     for s in sentences:
         paragraph = s.parnr
         sentence = s.sentnr
         for word, pos in self.tokenizeRawText(s.sentence):
             yield (paragraph, sentence, word, pos)
Exemple #5
0
def split(request, project, article):
    sentences = sbd.get_or_create_sentences(article).only("sentence", "parnr")
    form = forms.SplitArticleForm(project, article, data=request.POST or None)

    if form.is_valid():
        selected_sentence_ids = set(get_sentence_ids(request.POST)) - {None,}
        if selected_sentence_ids:
            sentences = Sentence.objects.filter(id__in=selected_sentence_ids)
            context = handle_split(form, project, article, sentences)
            return render(request, "navigator/article/split-done.html", context)

    # Get sentences, skip headline
    sentences = _get_sentences(sentences)
    sentences.next()
    return render(request, "navigator/article/split.html", locals())
Exemple #6
0
def split(request, project, article):
    sentences = sbd.get_or_create_sentences(article).only("sentence", "parnr")
    form = forms.SplitArticleForm(project, article, data=request.POST or None)

    if form.is_valid():
        selected_sentence_ids = set(get_sentence_ids(request.POST)) - {
            None,
        }
        if selected_sentence_ids:
            sentences = Sentence.objects.filter(id__in=selected_sentence_ids)
            context = handle_split(form, project, article, sentences)
            return render(request, "navigator/article/split-done.html",
                          context)

    # Get sentences, skip headline
    sentences = _get_sentences(sentences)
    sentences.next()
    return render(request, "navigator/article/split.html", locals())
Exemple #7
0
 def store_parse(self, analysed_article, data):
     if data.startswith("CoreNLP failed"):
         raise Exception(data)
     
     root = ElementTree.fromstring(data)
     # if the analysis sentences already exist, check there are no tokens and line the analysis_sentence up.
     # otherwise, create new ones
     sentences = list(sbd.get_or_create_sentences(analysed_article.article))
     if AnalysisSentence.objects.filter(analysed_article=analysed_article).exists():
         if Token.objects.filter(sentence__analysed_article=analysed_article).exists():
             raise Exception("Article already has tokens!")
         analysis_sentences = [AnalysisSentence.objects.get(analysed_article=analysed_article, sentence=sentence).id
                               for sentence in sentences]
     else:
         analysis_sentences = [AnalysisSentence.objects.create(analysed_article=analysed_article, sentence=sentence).id
                               for sentence in sentences]
     result = interpret_xml(analysis_sentences, root)
     import pickle; pickle.dump(result[1], open("/tmp/triples", "w"))
     wordcreator.store_analysis(analysed_article, *result)
Exemple #8
0
 def _do_retrieve_article(self, analysed_article):
     for sentence in sbd.get_or_create_sentences(analysed_article.article):
         asent = AnalysisSentence.objects.create(analysed_article=analysed_article, sentence=sentence)            
         log.debug("Parsing {asent.id} : {asent.sentence.sentence!r}".format(**locals()))
         add_sentence(asent)
     return True
Exemple #9
0
 def filter_queryset(self, queryset):
     qs = super(SentenceViewSet, self).filter_queryset(queryset)
     return qs.filter(article=self.article, id__in=sbd.get_or_create_sentences(self.article))
Exemple #10
0
 def filter_queryset(self, queryset):
     qs = super(SentenceViewSet, self).filter_queryset(queryset)
     return qs.filter(article=self.article,
                      id__in=sbd.get_or_create_sentences(self.article))
Exemple #11
0
 def store_parse(self, analysed_article, data):
     analysis_sentences = {sentence.id : AnalysisSentence.objects.create(analysed_article=analysed_article, sentence=sentence).id
                           for sentence in sbd.get_or_create_sentences(analysed_article.article)}
     result = interpret_output(analysis_sentences, data)
     wordcreator.store_analysis(analysed_article, *result)