def _parse_message(self, path, relative_path, url, index, load): message = Message(url=url, file_path=relative_path) message.index = index message.title = self._process_title(message, load) author_text = self._process_author(message, load) if author_text is not None: message.author = self._get_author(author_text, self.lock) message.msg_date = self._process_date(message, load) ucontent = self._process_content(message, load) #print(url) #print('CONTENT DEBUG:\n {0}'.format(ucontent)) message.word_count = get_word_count_text(ucontent) if load.entry is not None: message.sthread = load.entry message.save() load.sub_entries.append(message) if self.parse_refs: self._process_references(message, load, ucontent)
def _create_section(self, page, load, section_element, text): tree = load.tree title = \ self.xsectiontitle.get_text_from_parent(section_element).strip() xpath = tree.getpath(section_element) number = \ self._get_section_number(page, load, section_element, title, xpath).strip() word_count = get_word_count_text(text) section = Section(page=page, title=title, xpath=xpath, file_path=page.file_path, url=page.url, number=number, word_count=word_count) section.save() return section
def _create_section(self, page, load, section_element, text): tree = load.tree title = \ self.xsectiontitle.get_text_from_parent(section_element).strip() xpath = tree.getpath(section_element) number = \ self._get_section_number(page, load, section_element, title, xpath).strip() word_count = get_word_count_text(text) section = Section( page=page, title=title, xpath=xpath, file_path=page.file_path, url=page.url, number=number, word_count=word_count) section.save() return section