def get(self): if REDIS_CACHE.is_empty(): response_body = '<script type="text/javascript">location.href="http://mp.weixin.qq.com/mp/getmasssendmsg?__biz=%s#wechat_redirect"</script>' last_official_account = OfficialAccount.select().order_by( OfficialAccount.last_update_time).get() if last_official_account != None: official_account = last_official_account.wechat_code #("get official account id: %d" % (last_official_account.id)) last_official_account.last_update_time = datetime.datetime.now( ) last_official_account.save() print last_official_account.id self.write(response_body % official_account) else: self.write('empty') else: article_url = REDIS_CACHE.get_random() if article_url != None: self.write( '<script type="text/javascript">location.href="%s"</script>' % article_url) else: self.write('empty')
def get(self): if REDIS_CACHE.is_empty(): response_body = '<script type="text/javascript">location.href="http://mp.weixin.qq.com/mp/getmasssendmsg?__biz=%s#wechat_redirect"</script>' last_official_account = OfficialAccount.select().order_by(OfficialAccount.last_update_time).get() if last_official_account != None: official_account = last_official_account.wechat_code #("get official account id: %d" % (last_official_account.id)) last_official_account.last_update_time = datetime.datetime.now() last_official_account.save() print last_official_account.id self.write(response_body % official_account) else: self.write('empty') else: article_url = REDIS_CACHE.get_random() if article_url != None: self.write('<script type="text/javascript">location.href="%s"</script>' % article_url) else: self.write('empty')
def main(): while True: url = REDIS_FROM.get_random() if url == None: log("Not url task.") time.sleep(1) continue url_parse_object = urlparse.urlparse(url) path = url_parse_object.path params = urlparse.parse_qs(url_parse_object.query) wechat_type = '' date_str = datetime.datetime.now().strftime('%Y%m%d') official_account_id = params.get('__biz') or [] article_id = params.get('sn') or [] uin = params.get('uin') or [] # get type from url wechat_type = get_url_type(path) if 'list' == wechat_type: log("download list page") # get list html html = get(url) if html != None: filename = config.DOWNLOAD_PATH + "/" + date_str + "/list/" + official_account_id[0] + ".html" wechat_code = official_account_id[0] official_account = OfficialAccount.get(OfficialAccount.wechat_code == wechat_code) if official_account is not None: first_group_date = LIST_PARSE.get_first_group_datetime(html) if first_group_date is None: log("First group datetime is None.") continue else: reg = re.match(ur"([\d]+)年([\d]+)月([\d]+)日([\d]+):([\d]+)", first_group_date) last_datetime = datetime.datetime(int(reg.group(1)), \ int(reg.group(2)), int(reg.group(3)), int(reg.group(4)), \ int(reg.group(5)), 0) if last_datetime > official_account.last_article_time: log("First article time is great then last_article_time, download first group articles, wechat code:" + wechat_code) official_account.last_article_time = last_datetime official_account.save() save_html(html, filename) # get article list from html msg_list = LIST_PARSE.get_first_group_urls(html) if msg_list is not None: for msg_url in msg_list: log("process article page") article_process(msg_url) pass else: log("First article time is equal to last_article_time, continue.") pass else: log("wechat_code is not found in mysql, " + wechat_code) # article process elif 'article' == wechat_type: log("download article page") html = get(url) filename = config.DOWNLOAD_PATH + "/" + date_str + "/article/" + official_account_id[0] + "/" + article_id[0] + ".html" save_html(html, filename) else: log("Unkown wechat type") time.sleep(1) continue