def parse_thread(r): try: j = json.loads(r.content.decode('utf-8', 'ignore')) except JSONDecodeError: logger.warning("JSONDecodeError for %s:" % (r.url,)) logger.warning(r.text) return [] return j["posts"]
def parse_threads_list(self, r): try: j = json.loads(r.content.decode('utf-8', 'ignore')) except JSONDecodeError: logger.warning("JSONDecodeError for %s:" % (r.url,)) logger.warning(r.text) return [], None if j["currentPage"] < j["totalPages"]: return j["data"], self._base_url + "boards/%d" % (j["currentPage"] + 1,) return j["data"]
def parse_thread(r): try: j = json.loads(r.content.decode('utf-8', 'ignore')) except JSONDecodeError: logger.warning("JSONDecodeError for %s:" % (r.url, )) logger.warning(r.text) return [] thread = j.copy() del thread["replies"] yield thread for post in j["replies"]: yield post
def parse_thread(r): try: j = json.loads(r.content.decode('utf-8', 'ignore')) except JSONDecodeError: logger.warning("JSONDecodeError for %s:" % (r.url,)) logger.warning(r.text) return [] all_items = [] for post in j["posts"]: post["_parent"] = j["threadId"] all_items.append(post) del j["posts"] all_items.append(j) return all_items
def _ts(text): time = re.sub(r"^\w{2} ", "", text.strip()) \ .replace("января", "01") \ .replace("февраля", "02") \ .replace("марта", "03") \ .replace("апреля", "04") \ .replace("мая", "05") \ .replace("июня", "06") \ .replace("июля", "07") \ .replace("августа", "08") \ .replace("сентября", "09") \ .replace("октября", "10") \ .replace("ноября", "11") \ .replace("декабря", "12") \ .replace("⑨", "9") # For some reason, some dates are fuzzed / in chinese try: return int( datetime.datetime.strptime(time, "%d %m %Y %H:%M:%S").timestamp()) except Exception as e: logger.warning("Error during date parsing (iichan): " + str(e)) return 0
def parse_threads_list(r): try: j = json.loads(r.content.decode('utf-8', 'ignore')) if len(j) == 0 or "post_id" not in j[0]: logger.warning("No threads in response for %s: %s" % ( r.url, r.text, )) return [], None except JSONDecodeError: logger.warning("JSONDecodeError for %s:" % (r.url, )) logger.warning(r.text) return [], None return j, None
def parse_threads_list(r): try: j = json.loads(r.content.decode('utf-8', 'ignore')) if len(j) == 0 or "threads" not in j[0]: logger.warning("No threads in response for %s: %s" % (r.url, r.text,)) return [], None except JSONDecodeError: logger.warning("JSONDecodeError for %s:" % (r.url,)) logger.warning(r.text) return [], None threads = [] for page in j: for thread in page["threads"]: threads.append(thread) return threads, None
def parse_threads_list(r): try: j = json.loads(r.content.decode('utf-8', 'ignore')) if len(j) == 0 or "threads" not in j: logger.warning("No threads in response for %s: %s" % (r.url, r.text,)) return [], None except JSONDecodeError: logger.warning("JSONDecodeError for %s:" % (r.url,)) logger.warning(r.text) return [], None next_page = None url = r.url[:r.url.rfind("?")] if "?" in r.url else r.url current_page = int(url[url.rfind("/") + 1:-5]) if current_page < j["pageCount"]: next_page = urljoin(r.url, "%d.json" % (current_page + 1)) return j["threads"], next_page