Exemple #1
0
def render_models_for_document(doc):
    """
    Parse the original text (assumed in YAML format) for a
    certain document and create sections and articles.
    If they exist beforehand, we dont override them.
    We do this for every original text found for the document
    (i.e. every language)
    
    Raises exception on error
    """
    original_texts = OriginalText.objects.filter(document=doc)
    for ot in original_texts:
        parsed = yaml.load(ot.text)
        pprint(parsed.keys())
        for section_title,section_contents in parsed.iteritems():
            # We only need to check whether the section exists - if so, we assume
            # the articles to exist also
            try:
                Section.objects.get(document=doc, title=section_title, language=ot.language)
                continue
            except: pass
            s = Section(document=doc, title=section_title, language=ot.language)
            s.save()
            if section_contents:
                for article_title,article_contents in section_contents.iteritems():
                    a = Article(section=s, title=article_title, text=article_contents, language=ot.language)
                    a.save()
Exemple #2
0
def render_models_for_text(text):
    """
    Parse the original text (assumed in YAML format) for a
    certain text and create sections, articles, etc.
    We delete all objects previously derived.
    """
    if isnumeric(text):
        text = Text.objects.get(id=text)
    for section in text.sections:
        section.delete()
    parsed = load(text.original_text, Loader)
    pprint(parsed.keys())
    for section_title,section_contents in parsed.iteritems():
        s = Section(text=text, title=section_title)
        s.slug = slugify(s.title)
        s.save()
        if section_contents:
            for article_title,article_contents in section_contents.iteritems():
                article_title = article_title.strip()
                article_contents = article_contents.strip()
                if article_contents:
                    a = Article(section=s, title=article_title or '-')
                    a.slug = slugify(a.title)
                    a.save()
                    # Now split article into paragraphs
                    for par in article_contents.split('\n'):
                        par = par.strip()
                        if par:
                            p = Paragraph(article=a, title='P: %s...'%par[:7])
                            p.slug = slugify(p.title[:7])
                            p.save()
                            for sen in par.split('.'):
                                sen = sen.strip()
                                if sen:
                                    se = Sentence(paragraph=p, title='S: %s...'%sen[:7], raw_buffer=sen)
                                    se.slug = slugify(se.title[:7])
                                    se.save()
                                    sen_tmp = sen.replace(',', ' ,')
                                    for word in sen.split(' '):
                                        word = word.strip()
                                        if word:
                                            w = Word(sentence=se, title=word[:50], raw_buffer=word)
                                            w.slug = slugify(word[:50])
                                            w.save()