Exemple #1
0
    def get(self):
        if REDIS_CACHE.is_empty():
            response_body = '<script type="text/javascript">location.href="http://mp.weixin.qq.com/mp/getmasssendmsg?__biz=%s#wechat_redirect"</script>'
            last_official_account = OfficialAccount.select().order_by(
                OfficialAccount.last_update_time).get()

            if last_official_account != None:
                official_account = last_official_account.wechat_code
                #("get official account id: %d" % (last_official_account.id))

                last_official_account.last_update_time = datetime.datetime.now(
                )
                last_official_account.save()
                print last_official_account.id

                self.write(response_body % official_account)
            else:
                self.write('empty')
        else:
            article_url = REDIS_CACHE.get_random()
            if article_url != None:
                self.write(
                    '<script type="text/javascript">location.href="%s"</script>'
                    % article_url)
            else:
                self.write('empty')
Exemple #2
0
    def get(self):
        if REDIS_CACHE.is_empty():
            response_body = '<script type="text/javascript">location.href="http://mp.weixin.qq.com/mp/getmasssendmsg?__biz=%s#wechat_redirect"</script>'
            last_official_account = OfficialAccount.select().order_by(OfficialAccount.last_update_time).get()

            if last_official_account != None:
                official_account = last_official_account.wechat_code
                #("get official account id: %d" % (last_official_account.id))

                last_official_account.last_update_time = datetime.datetime.now()
                last_official_account.save()
                print last_official_account.id

                self.write(response_body % official_account)
            else:
                self.write('empty')
        else:
            article_url = REDIS_CACHE.get_random()
            if article_url != None:
                self.write('<script type="text/javascript">location.href="%s"</script>' % article_url)
            else:
                self.write('empty')
Exemple #3
0
def main():
    while True:
        url = REDIS_FROM.get_random()
        if url == None:
            log("Not url task.")
            time.sleep(1)
            continue

        url_parse_object = urlparse.urlparse(url)
        path = url_parse_object.path
        params = urlparse.parse_qs(url_parse_object.query)
        wechat_type = ''
        date_str = datetime.datetime.now().strftime('%Y%m%d')

        official_account_id = params.get('__biz') or []
        article_id = params.get('sn') or []
        uin = params.get('uin') or []
        # get type from url
        wechat_type = get_url_type(path)

        if 'list' == wechat_type:
            log("download list page")
            # get list html
            html = get(url)

            if html != None:
                filename = config.DOWNLOAD_PATH + "/" + date_str + "/list/" + official_account_id[0] + ".html"
                wechat_code = official_account_id[0]
                official_account = OfficialAccount.get(OfficialAccount.wechat_code == wechat_code)

                if official_account is not None:
                    first_group_date = LIST_PARSE.get_first_group_datetime(html)

                    if first_group_date is None:
                        log("First group datetime is None.")
                        continue
                    else:
                        reg = re.match(ur"([\d]+)年([\d]+)月([\d]+)日([\d]+):([\d]+)", first_group_date)

                        last_datetime = datetime.datetime(int(reg.group(1)), \
                            int(reg.group(2)), int(reg.group(3)), int(reg.group(4)), \
                            int(reg.group(5)), 0)

                        if last_datetime > official_account.last_article_time:
                            log("First article time is great then last_article_time, download first group articles, wechat code:" + wechat_code)
                            official_account.last_article_time = last_datetime
                            official_account.save()

                            save_html(html, filename)

                            # get article list from html
                            msg_list = LIST_PARSE.get_first_group_urls(html)

                            if msg_list is not None:
                                for msg_url in msg_list:
                                    log("process article page")
                                    article_process(msg_url)
                                    pass
                        else:
                            log("First article time is equal to last_article_time, continue.")
                            pass
                else:
                    log("wechat_code is not found in mysql, " + wechat_code)

        # article process
        elif 'article' == wechat_type:
            log("download article page")
            html = get(url)
            filename = config.DOWNLOAD_PATH + "/" + date_str + "/article/" + official_account_id[0] + "/" + article_id[0] + ".html"

            save_html(html, filename)

        else:
            log("Unkown wechat type")

        time.sleep(1)
        continue