def getScriptureLine(self, text): regexp_scripture = "'sa'>(.*)</div>" scripture_list = re.findall(regexp_scripture, text) if scripture_list == []: return "" scripture = scripture_list[0] scripture = jw_common.removeHtml(scripture) return scripture.encode("utf8")
def getComment(self, text): regexp_full_comment = "'sb'>(.*)" full_comment_list = re.findall(regexp_full_comment, text) if full_comment_list == []: return "" full_comment = full_comment_list[0] full_comment = jw_common.removeHtml(full_comment) return full_comment.encode("utf8")
def getText(self, text): text = re.sub("<strong>", "[B]", text) text = re.sub("</strong>", "[/B]", text) text = re.sub("<a[^>]+>", "", text) regexp_pars = '<p id="p[0-9]+" class="p[0-9]+">(.+)</p>|<h3 class="inline">(.+)</h3>' pars = re.findall(regexp_pars, text) out = "" for par in pars: text = par[0] + "[B]"+par[1]+"[/B]" out = out + "\n\n" + jw_common.removeHtml(text) out = out + "\n\n[COLOR=FF0000FF][I]" + jw_common.t(30038).encode("utf8") + "[/I][/COLOR]" return out
def getTitle(self, text): regexp_header = "<h1([^>]*)>(.*)</h1>" headers = re.findall(regexp_header, text) return jw_common.removeHtml(headers[0][1])
def getProgram(self, text): text = re.sub("<b>", "[B]", text) text = re.sub("</b>", "[/B]", text) clean = jw_common.removeHtml(text) spaced = re.sub("\n", "\n\n", clean) return spaced