Beispiel #1
0
    def initFromStr(self,
                    informationsdata,
                    src,
                    reading_position,
                    srclanguage):
        """
                Body.initFromStr

                informationsdata        : InformationsData object
                src                     : either None if no body available either
                                          a list of strings without any EOF marker.
                reading_position        : ReadingPosition object
                srclanguage             : Logotheras.language.language.Language object
        """
        self.reset()
        self.reading_position = reading_position

        # no body at all ?
        if src is None:
            return self

        # we have a body to be analysed in <src> :
        self.reading_position.update( body = src )

        current_position = None         # None or "entry.content"

        current_entry_title__hlevel = None
        current_entry_title__pend_txt = None # pend[ing] text in the title
        current_entry_content = []      # list of strings

        for line in src:

            # maybe an "entry title" ?
            _hlevel = HierarchicalLevel(errors = self.errors,
                formatstr = logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by reading"])
            (hlevel_found,
             hlevel,
             hlevel_pending_txt) = _hlevel.initFromTextDBDictEntryTitle(line)

            if current_position is None:

                if hlevel_found:
                    # entry title : with <line> begins the first entry of <src>
                    current_position = "entry.content"

                    current_entry_title__hlevel = HierarchicalLevel(self.errors,
                        formatstr = \
                            logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by reading"])
                    current_entry_title__pend_txt = hlevel_pending_txt
                    current_entry_content = []

                else:
                    # content (text or extracts) to be linked with the article (not with an entry) :
                    # -> hierarchical level = []:
                    current_entry_title__hlevel = HierarchicalLevel(self.errors,
                        formatstr = \
                            logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by reading"])
                    current_position = "entry.content"
                    current_entry_content.append( line )

            elif current_position == "entry.content":

                if hlevel_found:
                    # entry title : end of the current entry; with <line> begins a new entry.

                    # current entry, to be stored :
                    new_entry = Entry(errors = self.errors,
                                      logotherasdata = self.logotherasdata,
                                     )

                    title_hlevel        = current_entry_title__hlevel
                    title_pending_text  = current_entry_title__pend_txt

                    # we store every entry but the entries with no text and no hlevel.
                    # This kind of entries are detected since there may be empty lines
                    # after an article's header. But entries other than article with
                    # no text are accepted and stored.
                    if not (title_pending_text is None and title_hlevel.getDepth()==0):
                        new_entry.initFromStr(informationsdata = informationsdata,
                                              title_hlevel = title_hlevel,
                                              title_pending_text = title_pending_text,
                                              str_content = current_entry_content,
                                              reading_position = self.reading_position,
                                              srclanguage = srclanguage)

                        entry_name_in_bodydata = new_entry.entrydata.getEntryNameInBodyData()
                        self.bodydata[entry_name_in_bodydata] = new_entry.entrydata

                    # new entry, to be read :
                    current_position = "entry.content"

                    current_entry_title__hlevel = hlevel
                    current_entry_title__pend_txt = hlevel_pending_txt
                    current_entry_content = []      # list of strings

                else:
                    # entry content :
                    current_entry_content.append( line )

        if current_entry_title__hlevel is not None:
            # last entry to be added :
            new_entry = Entry(errors = self.errors,
                              logotherasdata = self.logotherasdata,
                             )
            new_entry.initFromStr(informationsdata = informationsdata,
                                  title_hlevel = current_entry_title__hlevel,
                                  title_pending_text = current_entry_title__pend_txt,
                                  str_content = current_entry_content,
                                  reading_position = self.reading_position,
                                  srclanguage = srclanguage)

            self.bodydata[new_entry.entrydata.title] = new_entry.entrydata

        return self
Beispiel #2
0
    def getTextDBDictRepr(self):
        """
                Entry.getTextDBDictRepr

                Return a string representing the data in the TextDBDict format.
        """
        res = []

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # title :
        title = self.entrydata.title

        if title is not None:
            hlevel = HierarchicalLevel(errors = self.errors,
                formatstr = logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by writing"])
            hlevel.setData( self.entrydata.hlevel )

            # entry to-be-duplicated ?
            # If so, let's removing the symbols before and after the string to-be-duplicated :
            if self.entrydata.entry_to_be_duplicated is not None:

                if self.entrydata.important_entry_to_be_dup:
                    # 'important' symbol :
                    dup_symbol = BODY_ARTICLE_TO_DUPLICATED_IMPORTANT
                else:
                    # 'normal' symbol :
                    dup_symbol = BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT

                title = title.replace( self.entrydata.entry_to_be_duplicated,
                                       dup_symbol + \
                                       self.entrydata.entry_to_be_duplicated + \
                                       dup_symbol )

            # result added to <res> :
            string = "{0} {1}"
            res.append( string.format(hlevel.getTextDBDictRepr(),
                                      title))

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # text after the title :
        text = self.entrydata.text

        for line in text:
            if line != "":
                res.append(line)

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # if no extract, we add an empty line after the title and the text :
        if len(self.entrydata)==0:
            res.append("")

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # extracts :
        for extractdata in self.entrydata:

            extract = Extract(errors = self.errors,
                              logotherasdata = self.logotherasdata)
            extract.setData( extractdata )

            res.append( extract.getTextDBDictRepr() )

        return NEWLINE.join(res)