def initFromStr(self, informationsdata, src, reading_position, srclanguage): """ Body.initFromStr informationsdata : InformationsData object src : either None if no body available either a list of strings without any EOF marker. reading_position : ReadingPosition object srclanguage : Logotheras.language.language.Language object """ self.reset() self.reading_position = reading_position # no body at all ? if src is None: return self # we have a body to be analysed in <src> : self.reading_position.update( body = src ) current_position = None # None or "entry.content" current_entry_title__hlevel = None current_entry_title__pend_txt = None # pend[ing] text in the title current_entry_content = [] # list of strings for line in src: # maybe an "entry title" ? _hlevel = HierarchicalLevel(errors = self.errors, formatstr = logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by reading"]) (hlevel_found, hlevel, hlevel_pending_txt) = _hlevel.initFromTextDBDictEntryTitle(line) if current_position is None: if hlevel_found: # entry title : with <line> begins the first entry of <src> current_position = "entry.content" current_entry_title__hlevel = HierarchicalLevel(self.errors, formatstr = \ logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by reading"]) current_entry_title__pend_txt = hlevel_pending_txt current_entry_content = [] else: # content (text or extracts) to be linked with the article (not with an entry) : # -> hierarchical level = []: current_entry_title__hlevel = HierarchicalLevel(self.errors, formatstr = \ logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by reading"]) current_position = "entry.content" current_entry_content.append( line ) elif current_position == "entry.content": if hlevel_found: # entry title : end of the current entry; with <line> begins a new entry. # current entry, to be stored : new_entry = Entry(errors = self.errors, logotherasdata = self.logotherasdata, ) title_hlevel = current_entry_title__hlevel title_pending_text = current_entry_title__pend_txt # we store every entry but the entries with no text and no hlevel. # This kind of entries are detected since there may be empty lines # after an article's header. But entries other than article with # no text are accepted and stored. if not (title_pending_text is None and title_hlevel.getDepth()==0): new_entry.initFromStr(informationsdata = informationsdata, title_hlevel = title_hlevel, title_pending_text = title_pending_text, str_content = current_entry_content, reading_position = self.reading_position, srclanguage = srclanguage) entry_name_in_bodydata = new_entry.entrydata.getEntryNameInBodyData() self.bodydata[entry_name_in_bodydata] = new_entry.entrydata # new entry, to be read : current_position = "entry.content" current_entry_title__hlevel = hlevel current_entry_title__pend_txt = hlevel_pending_txt current_entry_content = [] # list of strings else: # entry content : current_entry_content.append( line ) if current_entry_title__hlevel is not None: # last entry to be added : new_entry = Entry(errors = self.errors, logotherasdata = self.logotherasdata, ) new_entry.initFromStr(informationsdata = informationsdata, title_hlevel = current_entry_title__hlevel, title_pending_text = current_entry_title__pend_txt, str_content = current_entry_content, reading_position = self.reading_position, srclanguage = srclanguage) self.bodydata[new_entry.entrydata.title] = new_entry.entrydata return self
def getTextDBDictRepr(self): """ Entry.getTextDBDictRepr Return a string representing the data in the TextDBDict format. """ res = [] #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # title : title = self.entrydata.title if title is not None: hlevel = HierarchicalLevel(errors = self.errors, formatstr = logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by writing"]) hlevel.setData( self.entrydata.hlevel ) # entry to-be-duplicated ? # If so, let's removing the symbols before and after the string to-be-duplicated : if self.entrydata.entry_to_be_duplicated is not None: if self.entrydata.important_entry_to_be_dup: # 'important' symbol : dup_symbol = BODY_ARTICLE_TO_DUPLICATED_IMPORTANT else: # 'normal' symbol : dup_symbol = BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT title = title.replace( self.entrydata.entry_to_be_duplicated, dup_symbol + \ self.entrydata.entry_to_be_duplicated + \ dup_symbol ) # result added to <res> : string = "{0} {1}" res.append( string.format(hlevel.getTextDBDictRepr(), title)) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # text after the title : text = self.entrydata.text for line in text: if line != "": res.append(line) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if no extract, we add an empty line after the title and the text : if len(self.entrydata)==0: res.append("") #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # extracts : for extractdata in self.entrydata: extract = Extract(errors = self.errors, logotherasdata = self.logotherasdata) extract.setData( extractdata ) res.append( extract.getTextDBDictRepr() ) return NEWLINE.join(res)