def parse_index(raw=None): # {{{ raw = raw or read(INDEX).decode('utf-8', 'replace') dep_start = raw.index('>Deprecated/Renamed/Retired Plugins:<') dpat = re.compile(r'''(?is)Donate\s*:\s*<a\s+href=['"](.+?)['"]''') key_pat = re.compile(r'''(?is)(History|Uninstall)\s*:\s*([^<;]+)[<;]''') seen = {} for match in re.finditer(r'''(?is)<li.+?<a\s+href=['"](https://www.mobileread.com/forums/showthread.php\?[pt]=\d+).+?>(.+?)<(.+?)</li>''', raw): deprecated = match.start() > dep_start donate = uninstall = None history = False name, url, rest = u(match.group(2)), u(match.group(1)), match.group(3) m = dpat.search(rest) if m is not None: donate = u(m.group(1)) for m in key_pat.finditer(rest): k = m.group(1).lower() if k == 'history' and m.group(2).strip().lower() in {'yes', 'true'}: history = True elif k == 'uninstall': uninstall = tuple(x.strip() for x in m.group(2).strip().split(',')) thread_id = url_to_plugin_id(url, deprecated) if thread_id in seen: raise ValueError('thread_id for %s and %s is the same: %s' % (seen[thread_id], name, thread_id)) seen[thread_id] = name entry = IndexEntry(name, url, donate, history, uninstall, deprecated, thread_id) yield entry
def parse_plugin_zip_url(raw): for m in re.finditer( r'''(?is)<a\s+href=['"](attachment.php\?[^'"]+?)['"][^>]*>([^<>]+?\.zip)\s*<''', raw): url, name = u(m.group(1)), u(m.group(2).strip()) if name.lower().endswith('.zip'): return MR_URL + url, name return None, None