Ejemplo n.º 1
0
 async def changeDomainIp(self, ip, sub_domain=''):
     assert ip
     r = await self.getRecordList(sub_domain)
     if r:
         r = r[0]
         name, updated_on, record_id, old_ip = r['name'], r[
             'updated_on'], r['id'], r['value']
         #-#            info('%s(id %s) ip %s @%s', name, record_id, old_ip, updated_on)
         if old_ip != ip:
             rtn = await self.changeRecord(record_id, ip, sub_domain)
             info('%s(id %s) ip %s -> %s res:\n%s', name, record_id, old_ip,
                  ip, pcformat(rtn))
             if sub_domain == '@':
                 try:
                     sendmail_conf = getConf('config/dn.yaml',
                                             root_key='sendmail')
                     send_mail(
                         sendmail_conf['smtp_server'],
                         sendmail_conf['smtp_port'], sendmail_conf['user'],
                         sendmail_conf['pwd'], sendmail_conf['user'],
                         (sendmail_conf['user'], ), [],
                         f'ip地址变化 {self.conf["Domain"]} {datetime.now().strftime("%Y%m%d %H:%M:%S")}',
                         f'{old_ip} --> {ip}', [])
                 except:
                     pass
         else:
             info('%s(id %s) ip not change %s', name, record_id, old_ip)
     else:
         info('no record matching sub domain %s, try to create ...',
              sub_domain)
         rtn = await self.addRecord(ip, sub_domain)
         info('%s create %s res:\n%s', sub_domain, ip, pcformat(rtn))
Ejemplo n.º 2
0
    def createQrCodeTicket(data):
        u'''创建二维码ticket,一般不直接使用

        返回ticket或者None
        '''
        ret_data = None
        access_token = yield WXLoginManager.getAccessToken()
        url = 'https://api.weixin.qq.com/cgi-bin/qrcode/create?access_token={ACCESS_TOKEN}'.format(ACCESS_TOKEN=access_token)
        info('url: %s', url)
        info('data: %s', pcformat(data))
#-#        info('body: %s', json.dumps(data))
        req = HTTPRequest(url, method='POST', body=json.dumps(data, ensure_ascii=False))  # , validate_cert = False)
        httpc_lient = AsyncHTTPClient()
        try:
            resp = yield gen.Task(httpc_lient.fetch, req)
            info('resp: %s', resp.body)
            j_data = json.loads(resp.body)
        except:
            error('', exc_info=True)
        else:
            if j_data.get('errcode', None):
                info('创建二维码ticket出错: errcode %s, errmsg: %s', j_data['errcode'], j_data.get('errmsg', ''))
            else:
                ret_data = j_data
        raise gen.Return(ret_data)
Ejemplo n.º 3
0
    def sendTplMsg(tpl_id, openid, url, in_data):
        u'''发送模板消息

        * True 成功
        * False 失败
        '''
        ret_data = False
        access_token = yield WXLoginManager.getAccessToken()
        url = '''https://api.weixin.qq.com/cgi-bin/message/template/send?access_token={ACCESS_TOKEN}'''.format(ACCESS_TOKEN=access_token)
        data = {'touser': openid,
                'template_id': tpl_id,
                'url': url,
                'data': in_data,
                }
        info('url: %s', url)
        info('data: %s', pcformat(data))
#-#        info('body: %s', json.dumps(data))
        req = HTTPRequest(url, method='POST', body=json.dumps(data, ensure_ascii=False))  # , validate_cert = False)
        httpc_lient = AsyncHTTPClient()
        try:
            resp = yield gen.Task(httpc_lient.fetch, req)
            info('resp: %s', resp.body)
            j_data = json.loads(resp.body)
        except:
            error('', exc_info=True)
        else:
            if j_data['errcode']:
                info('发送模板消息出错: errcode %s, errmsg: %s', j_data['errcode'], j_data.get('errmsg', ''))
            else:
                ret_data = True
        raise gen.Return(ret_data)
Ejemplo n.º 4
0
 async def addRecord(self,
                     val,
                     sub_domain='',
                     record_type='A',
                     record_line='默认'):
     rtn = {}
     url = self.conf['url']
     d = {
         'domain': self.conf['Domain'],
         'recordLine': record_line,
         'value': val,
     }
     if sub_domain:
         d['subDomain'] = sub_domain
     if record_type:
         d['recordType'] = record_type
     d.update(self.getPubArg('RecordCreate'))
     self.Sign('GET', url, d)
     _, j_data, ok = await self._getData(url,
                                         params=d,
                                         timeout=10,
                                         my_fmt='json',
                                         my_json_encoding='utf8')
     if ok and j_data['code'] == 0:
         #-#            debug('resp %s', pcformat(j_data))
         rtn = j_data.get('data', {}).get('record', {})
     else:
         info('error ? %s %s', ok, pcformat(j_data))
     return rtn
Ejemplo n.º 5
0
    def _addUserWord(self):
        """添加自定义词组
        """
# #        l_dynamic_word = sorted(chain((x.get('inc', []) for x in self.l_concern), (x.get('exc', []) for x in self.l_concern)), key=lambda x: len(x) if x else 0, reverse=True)
# #        l_dynamic_word = [m for m in chain(*(x.get('inc', []) for x in self.l_concern), *(x.get('exc', []) for x in self.l_concern)) if len(m) > 0]
        l_dynamic_word = [m for m in chain(*(x.get('inc', []) for x in self.l_concern)) if len(m) > 0]  # 只把inc中的词做自定义分词,exc中的不做
        l_dynamic_word = sorted(set(l_dynamic_word), key=lambda x: len(x) if x else 0, reverse=True)
        debug(pcformat(l_dynamic_word))
        list(map(lambda w: jieba.add_word(w, freq=1500, tag=None) if w else 0, l_dynamic_word))
        debug('added %s include/exclude word(s) to jieba', len(l_dynamic_word))
Ejemplo n.º 6
0
    def get_from_linkstars(self, url, source=''):
        real_url = url
        if url and url.startswith('https://www.linkstars.com/click.php?'):
#-#            debug('%s%slinkstars url found %s', source, ' ' if source else '', url)
            up = urlparse(url)
            d_p = parse_qs(up.query)
            for _k in ('to', ):
                try:
                    if _k in d_p:
                        real_url = d_p[_k][0]
                        break
                except UnicodeDecodeError as e:
                    warn('d_p %s %s', pcformat(d_p))
                    raise e

#-#        if real_url != url:
#-#            debug('%s%sfound url from linkstars %s', source, ' ' if source else '', real_url)
        return real_url or ''
Ejemplo n.º 7
0
    def getQrPicBySceneId(scene_id, want_temp=True):
        u'''通过场景值获取二维码图片数据

        * ``scene_id``
        * ``want_temp`` True 临时(默认)  False 永久

        返回二维码图片数据或者None
        '''
        pic_data = None
        max_expire = 604800
        r = m_redis.get_instance('ad')
        c_k = '_Z_WX_QR_%s' % scene_id
        ticket = r.get(c_k)
        if not ticket:
            if want_temp:
                if not isinstance(scene_id, int):
                    info('参数错误: 临时二维码的scene_id必须为32位非0整型!')
                    raise gen.Return(pic_data)
                data = {'expire_seconds': max_expire, 'action_name': 'QR_SCENE', 'action_info': {'scene': {'scene_id': scene_id}}}
            else:
                if isinstance(scene_id, int):
                    if not (0 < scene_id <= 100000):
                        info('参数错误: 永久二维码的scene_id为整数时,范围为(0,100000]')
                        raise gen.Return(pic_data)
                    data = {'expire_seconds': max_expire, 'action_name': 'QR_LIMIT_SCENE', 'action_info': {'scene': {'scene_id': scene_id}}}
                elif isinstance(scene_id, str):
                    if not (0 < len(scene_id) <= 64):
                        info('参数错误: 永久二维码的scene_id为字符串时,长度范围为[1,64]')
                        raise gen.Return(pic_data)
                    data = {'expire_seconds': max_expire, 'action_name': 'QR_LIMIT_STR_SCENE', 'action_info': {'scene': {'scene_str': scene_id}}}
                else:
                    info('参数错误: 永久二维码的scene_id应该为int或str')
                    raise gen.Return(pic_data)

            j_data = yield WXLoginManager.createQrCodeTicket(data)
            info('%s', pcformat(j_data))
            ticket = j_data['ticket']
            expire_at = j_data['expire_seconds']
            r.setex(c_k, ticket, expire_at)
        if ticket:
            pic_data = yield WXLoginManager.getQrCodeByTicket(ticket)
            open('/tmp/t.jpg', 'wb').write(pic_data)
        raise gen.Return(pic_data)
Ejemplo n.º 8
0
    def main_test():
        handler = ToolsMixin()
#-#        usc = handler.user_service_conn()
#-#        r = yield usc.addScore(handler, {'uid': 10000000,
#-#                                         'device_id': '999',
#-#                                         'event_type': UserLib.EVENT_SCORE_REFUND,
#-#                                         'event_sub_type': 0,
#-#                                         'score': 23 * 100,
#-#                                         'order_id': '',
#-#                                         'pay_id': 0,
#-#                                         'remark': '测试充值',
#-#                                         'ip': '192.168.199.112',
#-#                                         'os_type': '',
#-#                                         })
        # r = yield UserLib.checkTicket(handler, {'reg_source': None, 'ticket': sys.argv[1]})
#-#        r = yield UserLib.getGrandparent(handler, 10080990)
#-#        r = UserLib.clear_push_cache(handler, 10000000, 'ios')
#-#        r = yield UserLib.addUser(handler, {'reg_qid': 13512345679,
#-#                                                'token': 'test_token',
#-#                                                'reg_source': 'mb',
#-#                                                'invite_uid': '',
#-#                                                'ip': '192.168.199.112',
#-#                                                'os_type': 'android',
#-#                                                'app_version': '1.3.5.7',
#-#                                                'channel': 'self_test',
#-#                                                'nickname': '',
#-#                                                'gender': '',
#-#                                                'figure_url': '',
#-#                                                'province': '',
#-#                                                'city': '',
#-#                                                'country': '',
#-#                                                }
#-#                                      )
#-#        r = yield UserLib.checkUserVipLevel(handler, 1000, UserLib.USER_VIP_TYPE_INVITE | UserLib.USER_VIP_TYPE_BANKER)
        r = yield UserLib.checkUserVipLevel(handler, 1000, UserLib.USER_VIP_TYPE_NONE)
        from applib.tools_lib import pcformat
        info('r: %s', pcformat(r))
Ejemplo n.º 9
0
def getConf(conf_path='./pn_conf.yaml', root_key=None, force_reload=False):
    global _cache_conf
    conf_file_path = os.path.abspath(conf_path)
    conf = _cache_conf.get(conf_file_path, None)

    if conf is None or force_reload:
        if force_reload:
            debug('force load conf from file %s', conf_file_path)
        assert os.path.exists(conf_file_path)
        conf = yaml.load(open(conf_file_path), Loader=yaml.FullLoader)  # https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation
        _cache_conf[conf_file_path] = conf
#-#        debug('load done %s. %s key(s)', conf_file_path, len(conf))
#-#    else:
#-#        debug('get conf from cache %s', conf_file_path)

    if root_key:
        if root_key not in conf:
            error('conf no root_key %s in %s !!!', root_key, conf_file_path)
        conf = conf.get(root_key)

    return conf


if __name__ == '__main__':
    from applib.tools_lib import pcformat
    conf = getConf('./config/pn_conf.yaml')
    info(pcformat(conf))


Ejemplo n.º 10
0
                attachlist_title = post_content.xpath(
                    forum_cfg['post_attachlist_title'])[0]
                attachlist_url = post_content.xpath(
                    forum_cfg['post_attachlist_url'])[0]
                attach_info = (attachlist_title,
                               urljoin(forum_cfg["post_base_url"],
                                       attachlist_url))
                image_list = post_content.xpath(
                    './/img[starts-with(@src, "http")]/@src')


#-#                info(f'{pcformat(image_list)}\n{attach_info}')
            elif '无权' in etree.tounicode(tree):
                info(f'无权查看 {title} {url}')
        return content, attach_size, image_list, attach_info

if __name__ == '__main__':
    loop = asyncio.get_event_loop()

    try:
        dz = DiscuzManager()
        x = loop.run_until_complete(dz.getPostList(loop))
        info(pcformat(x))
    except KeyboardInterrupt:
        info('cancel on KeyboardInterrupt..')
        #-#        task.cancel()
        loop.run_forever()
#-#        task.exception()
    finally:
        loop.stop()
Ejemplo n.º 11
0
    async def postData(self, url, *args, **kwargs):
        """post 方式提交数据,基本照搬getData,自定义参数最好不用
        """
        resp, data, ok = None, None, False
        str_encoding = kwargs.pop('my_str_encoding', None)
        fmt = kwargs.pop('my_fmt', 'str')
        json_encoding = kwargs.pop('my_json_encoding', None)
        json_loads = kwargs.pop('my_json_loads', json.loads)
        streaming_chunk_size = kwargs.pop('my_streaming_chunk_size', 1024)
        streaming_cb = kwargs.pop('my_streaming_cb', None)
        max_try = kwargs.pop('my_retry', 1)

        for nr_try in range(max_try):
            try:
                #-#                debug('url %s %s %s', url, pcformat(args), pcformat(kwargs))
                resp = await self.sess.post(url, *args, **kwargs)
                if fmt == 'str':
                    try:
                        data = await resp.text(encoding=str_encoding)
                    except UnicodeDecodeError:
                        txt = await resp.read()
                        data = txt.decode(str_encoding, 'ignore')
                        warn('ignore decode error from %s', url)
#-#                    except ContentEncodingError:
                    except aiohttp.client_exceptions.ContentTypeError:
                        warn('ignore content encoding error from %s', url)
                elif fmt == 'json':
                    data = await resp.json(encoding=json_encoding,
                                           loads=json_loads,
                                           content_type=None)
#-#                    if not data:
#-#                    if 'json' not in resp.headers.get('content-type', ''):
#-#                        warn('data not in json? %s', resp.headers.get('content-type', ''))
                elif fmt == 'bytes':
                    data = await resp.read()
                elif fmt == 'stream':
                    while 1:
                        chunk = await resp.content.read(streaming_chunk_size)
                        if not chunk:
                            break
                        streaming_cb(url, chunk)
                ok = True
                break
            except asyncio.TimeoutError:
                if nr_try == max_try - 1:  # 日志输出最后一次超时
                    debug('%sTimeoutError %s',
                          ('%s/%s ' %
                           (nr_try + 1, max_try)) if max_try > 1 else '', url)
            except aiohttp.client_exceptions.ClientConnectorError:
                error('%sClientConnectionError %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '', url,
                      pcformat(args), pcformat(kwargs))
            except ConnectionResetError:
                error('%sConnectionResetError %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '', url,
                      pcformat(args), pcformat(kwargs))
            except aiohttp.client_exceptions.ContentTypeError:
                error('%sContentTypeError %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '',
                      url,
                      pcformat(args),
                      pcformat(kwargs),
                      exc_info=True)
                data = await resp.text(encoding=str_encoding)
                info('data %s', data[:50])
            except ClientError:
                error('%sClientError %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '',
                      url,
                      pcformat(args),
                      pcformat(kwargs),
                      exc_info=True)
            except UnicodeDecodeError:
                error('%sUnicodeDecodeError %s %s %s %s\n%s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '',
                      url,
                      pcformat(args),
                      pcformat(kwargs),
                      pcformat(resp.headers),
                      await resp.read(),
                      exc_info=True)
#-#                raise e
            except json.decoder.JSONDecodeError:
                error('%sJSONDecodeError %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '',
                      url,
                      pcformat(args),
                      pcformat(kwargs),
                      exc_info=True)
            except aiodns.error.DNSError:
                error('%sDNSError %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '', url,
                      pcformat(args), pcformat(kwargs))
            finally:
                if resp:
                    resp.release()

        return resp, data, ok
Ejemplo n.º 12
0
    async def GetJdCouponWithCookie(self, title, item):
        """自动领取京东普通优惠券

        参考 http://selenium-python.readthedocs.io/index.html
        """
        rslt, err = '', ''
        if self.conf['geckodriver'] not in sys.path:
            sys.path.append(self.conf['geckodriver'])
        opt = Options()
        #-#        opt.add_argument('--headless')
        ff = webdriver.Firefox(firefox_options=opt)
        #-#        display = Display(visible=0, size=(800, 600))
        #-#        display.start()
        #-#        ff = webdriver.Firefox()
        #-#        if not item['receiveUrl'].startswith('http'):
        if item['receiveUrl'].startswith('coupon.m.jd.com'):
            item['receiveUrl'] = 'http://' + item['receiveUrl']

        if 'm.jd.com' in item['receiveUrl']:
            cookie_file = os.path.abspath(
                os.path.join(self.conf['cookie_dir'],
                             'plogin.m.jd.com.cookie.pkl'))
        else:
            cookie_file = os.path.abspath(
                os.path.join(self.conf['cookie_dir'],
                             'passport.jd.com.cookie.pkl'))
        try:
            if os.path.exists(cookie_file):
                debug('读取已有cookie %s', cookie_file)
                #-#                ff.get('https://home.m.jd.com' if 'm.jd.com' in item['receiveUrl'] else 'http://help.jd.com/index.html')
                #-#                ff.get('https://so.m.jd.com/category/all.html?searchFrom=bysearchbox' if 'm.jd.com' in item['receiveUrl'] else 'http://help.jd.com/index.html')
                url = 'https://p.m.jd.com/cart/cart.action' if 'm.jd.com' in item[
                    'receiveUrl'] else 'http://help.jd.com/index.html'
                debug('fetching %s', url)
                ff.get(url)

                for _c in pickle.load(open(cookie_file, 'rb')):
                    #-#                    info('cookie data %s', pcformat(_c))
                    try:
                        ff.add_cookie(_c)
                    except:
                        pass
#-#                        error('ignore except', exc_info=True)
                debug('读取完毕cookie %s', cookie_file)
            for _ in range(2):
                try:
                    info('尝试自动领取 %s ...\n%s', title, pcformat(item))
                    info('fetching %s', item['receiveUrl'])
                    ff.get(item['receiveUrl'])
                    no_btn = False
                    try:
                        element = ff.find_element_by_id('btnSubmit')
                    except NoSuchElementException:
                        try:
                            element = ff.find_element_by_link_text('立即领取')
                        except NoSuchElementException:
                            try:
                                element = ff.find_element_by_class_name('btn')
                            except NoSuchElementException:
                                no_btn = True
                    # 判断下是否有登录提示
                    need_login = False
                    if 'm.jd.com' in item['receiveUrl']:
                        try:
                            need_login = ff.find_element_by_xpath(
                                '//div[@class="login"]/a')
                        except NoSuchElementException:
                            pass
                        else:
                            #-#                            embed()
                            info('need_login %s', need_login.text)
                            need_login = True
                    if no_btn or need_login:
                        # 没登录?
                        info('没登录? 尝试登录')
                        #-#                        embed()
                        # 登录京东
                        try:
                            if 'm.jd.com' in item['receiveUrl']:
                                url = 'https://plogin.m.jd.com/user/login.action?appid=100&kpkey=&returnurl=%s' % quote(
                                    item['receiveUrl'])
                                info('open login page %s', url)
                                ff.get(url)
                                ff.find_element_by_id('username').send_keys(
                                    self.jd_user)
                                ff.find_element_by_id('password').send_keys(
                                    self.jd_password)
                                ff.find_element_by_id('loginBtn').click()
                                # 判断是否需要输入验证码
                                code = None
                                try:
                                    code = ff.find_element_by_id('code')
                                except:  # 不需要输入
                                    pass
                                else:
                                    info('貌似需要输入验证码')
                                    #-#                                raise Exception('无法自动登录')
                                    embed()
                            else:
                                url = 'https://passport.jd.com/new/login.aspx?ReturnUrl=%s' % quote(
                                    item['receiveUrl'])
                                info('open login page %s', url)
                                ff.get(url)
                                ff.find_element_by_link_text('账户登录').click()
                                await asyncio.sleep(0.5)
                                ff.find_element_by_name('loginname').send_keys(
                                    self.jd_user)
                                await asyncio.sleep(0.5)
                                ff.find_element_by_name('nloginpwd').send_keys(
                                    self.jd_password)
                                await asyncio.sleep(0.5)
                                ff.find_element_by_id('loginsubmit').click()
                            await asyncio.sleep(2)
                        except:
                            info('登录京东时出错', exc_info=True)
                            break
                        else:
                            info('登录貌似成功了,保存cookie %s', cookie_file)
                            pickle.dump(ff.get_cookies(),
                                        open(cookie_file, 'wb'))
                            continue
                    else:
                        #-#                        info('element %s', element)
                        #-#                        embed()
                        if 'btn-unable' in element.get_attribute('class'):
                            info('不能领取:%s', element.text)
                        elif element.text.find('查看') != -1:
                            info('不能领取(已领取过?):%s', element.text)
                        else:
                            try:
                                element.click()
                                element = WebDriverWait(ff, 3).until(
                                    EC.presence_of_element_located(
                                        (By.XPATH,
                                         '//p[@class="coupon-txt"]')))
                                info('领取结果 %s', element.text)
                            except:
                                try:
                                    element = WebDriverWait(ff, 1).until(
                                        EC.presence_of_element_located(
                                            (By.CLASS_NAME, 'btn')))
                                    info('领取结果 %s', element.text)
                                except:
                                    error('获取领取结果时出错', exc_info=True)
#-#                                    embed()
                            finally:
                                debug('自动领取完成')
                                await asyncio.sleep(1)
                        break
                except:
                    error('自动领取出错', exc_info=True)
        except:
            error('自动领取出错', exc_info=True)
        finally:
            pass
            #-#            embed()
            ff.quit()
#-#            display.stop()

        return rslt, err
Ejemplo n.º 13
0
    async def _process_each(self, text_in):
        """获取一段文字的语音合成数据
        """
        ok = True
        s = None
        access_token = None
        #-#        access_token = self.conf['baidu_access_token']
        resp = None
        try:
            url = 'https://openapi.baidu.com/oauth/2.0/token'
            args = {
                'grant_type': 'client_credentials',
                'client_id': self.conf['baidu_api_key'],
                'client_secret': self.conf['baidu_secret_key'],
            }
            #-#            info('query access token ...')
            resp = await self.sess.post(url, data=args, timeout=30)
        except asyncio.TimeoutError:
            info('TimeoutError %s %s', url, pcformat(args))
#-#        except ClientConnectionError:
#-#            error('ConnectionError %s %s', url, pcformat(args))
#-#        except ClientHttpProcessingError:
#-#            error('ClientHttpProcessingError %s %s', url, pcformat(args), exc_info=True)
#-#        except ClientTimeoutError:
#-#            error('ClientTimeoutError %s %s', url, pcformat(args))
        except ClientError:
            error('ClientError %s %s', url, pcformat(args), exc_info=True)
        except UnicodeDecodeError as e:
            error('UnicodeDecodeError %s %s %s %s\n%s',
                  url,
                  pcformat(args),
                  pcformat(resp.headers),
                  await resp.read(),
                  exc_info=True)
            raise e
        else:
            data = await resp.json()
            if 'access_token' in data:
                access_token = data['access_token']
#-#                info('data: %s', pcformat(data))
#-#                info('access token: %s', access_token)
            else:
                error('%s: %s', data['error'], data['error_description'])
        finally:
            if resp:
                resp.release()

        assert access_token
        if access_token:
            resp = None
            try:
                #-#                info('getting audio data ...')
                url = 'http://tsn.baidu.com/text2audio'
                args = {
                    'tex': text_in.decode('utf8'),
                    'lan': 'zh',
                    'tok': access_token,
                    'ctp': '1',
                    'cuid': '00000000',
                    'spd': '6',
                    'pit': '5',
                    'vol': '9',
                    'per': '0',
                }
                #-#                resp = await self.sess.post(url, data=json.dumps(args), timeout=15)
                resp = await self.sess.post(url, data=args, timeout=20)
            except asyncio.TimeoutError:
                info('TimeoutError %s %s', url, pcformat(args))
#-#            except ClientConnectionError:
#-#                error('ConnectionError %s %s', url, pcformat(args))
#-#            except ClientHttpProcessingError:
#-#                error('ClientHttpProcessingError %s %s', url, pcformat(args), exc_info=True)
#-#            except ClientTimeoutError:
#-#                error('ClientTimeoutError %s %s', url, pcformat(args))
            except ClientError:
                error('ClientError %s %s', url, pcformat(args), exc_info=True)
            except UnicodeDecodeError as e:
                error('UnicodeDecodeError %s %s %s %s\n%s',
                      url,
                      pcformat(args),
                      pcformat(resp.headers),
                      await resp.read(),
                      exc_info=True)
                raise e
            else:
                #-#                info('headers: %s', pcformat(resp.headers))
                if resp.headers['Content-Type'] == 'audio/mp3':
                    data = await resp.read()
                    s = data
                elif resp.headers['Content-Type'] == 'application/json':
                    data = await resp.json()
                    error('%s: %s', data['err_no'], data['err_msg'])
                else:
                    error('未知头 %s', pcformat(resp.headers))
            finally:
                if resp:
                    resp.release()

        return s, ok
Ejemplo n.º 14
0
    async def _getData(self, url, *args, **kwargs):
        """封装网络请求

        my_fmt:
            str:
                my_str_encoding
            json:
                my_json_encoding
                my_json_loads
            bytes:
                None
            streaming:
                my_streaming_chunk_size
                my_streaming_cb
        """
        resp, data, ok = None, None, False
        str_encoding = kwargs.pop('my_str_encoding', None)
        fmt = kwargs.pop('my_fmt', 'str')
        json_encoding = kwargs.pop('my_json_encoding', None)
        json_loads = kwargs.pop('my_json_loads', json.loads)
        streaming_chunk_size = kwargs.pop('my_streaming_chunk_size', 1024)
        streaming_cb = kwargs.pop('my_streaming_cb', None)
        max_try = kwargs.pop('my_retry', 1)

        for nr_try in range(max_try):
            try:
                #-#                debug('url %s %s %s', url, pcformat(args), pcformat(kwargs))
                resp = await self.sess.get(url, *args, **kwargs)
                if fmt == 'str':
                    try:
                        data = await resp.text(encoding=str_encoding)
                    except UnicodeDecodeError:
                        txt = await resp.read()
                        data = txt.decode(str_encoding, 'ignore')
#-#                        warn('ignore decode error from %s', url)
                elif fmt == 'json':
                    try:
                        data = await resp.json(encoding=json_encoding,
                                               loads=json_loads)
                    except aiohttp.client_exceptions.ContentTypeError:
                        #-#                        warn('ContentTypeError, try decode json ...')
                        try:
                            data = await resp.text(encoding=json_encoding)
                        except UnicodeDecodeError:
                            txt = await resp.read()
                            data = txt.decode(str_encoding, 'ignore')
                        try:
                            data = json.loads(data)
                        except:
                            error('json except', exc_info=True)

#-#                    if not data:
#-#                    if 'json' not in resp.headers.get('content-type', ''):
#-#                        warn('data not in json? %s', resp.headers.get('content-type', ''))
                elif fmt == 'bytes':
                    data = await resp.read()
                elif fmt == 'stream':
                    while 1:
                        chunk = await resp.content.read(streaming_chunk_size)
                        if not chunk:
                            break
                        streaming_cb(url, chunk)
                ok = True
                break
            except aiohttp.ServerDisconnectedError:
                info('%sServerDisconnectedError %s %s %s',
                     ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '',
                     url, pcformat(args), pcformat(kwargs))
            except asyncio.TimeoutError:
                info('%sTimeoutError %s %s %s',
                     ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '',
                     url, pcformat(args), pcformat(kwargs))
            except aiohttp.ClientConnectionError:
                error('%sConnectionError %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '', url,
                      pcformat(args), pcformat(kwargs))
#-#            except aiohttp.errors.ClientHttpProcessingError:
#-#                error('%sClientHttpProcessingError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), exc_info=True)
            except aiohttp.client_exceptions.ContentTypeError:
                error('%sContentTypeError %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '',
                      url,
                      pcformat(args),
                      pcformat(kwargs),
                      exc_info=True)
#-#            except aiohttp.ClientTimeoutError:
#-#                error('%sClientTimeoutError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs))
            except aiohttp.ClientError:
                error('%sClientError %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '',
                      url,
                      pcformat(args),
                      pcformat(kwargs),
                      exc_info=True)
            except UnicodeDecodeError:
                #-#                txt = await resp.read()
                #-#                open('/tmp/txt_%s.html' % time.time(), 'wb').write(txt)
                error('%sUnicodeDecodeError %s %s %s %s\n%s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '',
                      url,
                      pcformat(args),
                      pcformat(kwargs),
                      pcformat(resp.headers),
                      txt[:100],
                      exc_info=True)
                break


#-#                raise e
            except Exception:
                error('%sException %s %s %s',
                      ('%s/%s ' %
                       (nr_try + 1, max_try)) if max_try > 1 else '',
                      url,
                      pcformat(args),
                      pcformat(kwargs),
                      exc_info=True)
            finally:
                if resp:
                    resp.release()

        return resp, data, ok
Ejemplo n.º 15
0
    async def _get_real_url_4mmb(self, url):
        real_url = url
        if url is not None:
            raw_url = url
            nr_redirect = 0
            while url.find('manmanbuy') != -1 and urlparse(url).path:
                r, _, ok = await self.net.getData(url, timeout=7, my_fmt='bytes', my_retry=2)
                nr_redirect += 1
                if ok:
                    if r.status == 200:
                        url = str(r.url)
#-#                        info('url=%s', url)
                        if 'url=' in url:  # found 'url=' or 'tourl='
                            up = urlparse(url)
                            d_p = parse_qs(up.query, encoding='gbk')
                            for _k in ('url', 'tourl'):
                                try:
                                    if _k in d_p:
                                        url = d_p[_k][0]
                                        break
                                except UnicodeDecodeError as e:
                                    warn('d_p %s %s', pcformat(d_p))
                                    raise e
                    elif r.status == 400:
                        url = str(r.url)
                        if 'url=' in url:  # found 'url=' or 'tourl='
                            up = urlparse(url)
                            d_p = parse_qs(up.query, encoding='gbk')
                            for _k in ('url', 'tourl'):
                                try:
                                    if _k in d_p:
                                        url = d_p[_k][0]
                                        break
                                except UnicodeDecodeError as e:
                                    warn('d_p %s %s', pcformat(d_p))
                                    raise e
                        elif url.count('http') > 1:
                            for x in ('http://cu.manmanbuy.com/http', ):
                                if url.startswith(x):
                                    url = raw_url[len(x) - 4:]
                                    if url[0] == 's':  # https
                                        url = url[1:]
                                        info('got %s from ', url, r.url)
                        else:
                            info('real url not found: code %s %s %s', r.status, raw_url, r.url)
                    else:
                        x = 'http://cu.manmanbuy.com/http'
                        y = '.manmanbuy.com/redirectUrl.aspx?'
                        if x in str(r.url):
                            url = r.url[len(x) - 4:]
                            if url[0] == 's':  # https
                                url = url[1:]
#-#                                    debug('url from bad url: %s -> %s', raw_url, url)
                        elif r.url.startswith(('http://detail.tmall.com/', 'https://detail.tmall.com/')):
                            url = str(r.url)
                        elif y in str(r.url):
                            up = urlparse(r.url)
                            d_p = parse_qs(up.query, encoding='gbk')
                            for _k in ('tourl', ):
                                try:
                                    if _k in d_p:
                                        url = d_p[_k][0]
#-#                                                info('found url from %s', d_p)
                                        break
                                except UnicodeDecodeError as e:
                                    warn('d_p %s %s', pcformat(d_p))
                                    raise e
                            if url:
                                break
                        else:
                            warn('real url not found: code %s %s %s', r.status, raw_url, r.url)
                        break
                    if nr_redirect > 5:
                        warn('too many redirect %s', real_url)
                        break
                    if url.endswith('404.html'):
                        if r.history:  # 从历史url中找
                            if 'url=' in str(r.history[-1].url):  # found 'url=' or 'tourl='
                                up = urlparse(str(r.history[-1].url))
                                d_p = parse_qs(up.query, encoding='gbk')
                                for _k in ('url', 'tourl'):
                                    try:
                                        if _k in d_p:
                                            url = d_p[_k][0]
                                            break
                                    except UnicodeDecodeError as e:
                                        warn('d_p %s %s', pcformat(d_p))
                                        raise e
                            else:
                                warn('real url not found: %s (history %s)', real_url, r.history[-1].url)
                        else:
                            warn('real url not found: %s (only found %s)', real_url, url)
                        break
                else:
#-#                            info('fetching url not ok %s', url)
                    break

            real_url = url

        return real_url
Ejemplo n.º 16
0
            map(lambda x, d=kwargs: d.get(x, ''), ('source', 'sid', 'show_title', 'item_url', 'real_url', 'pic_url', 'get_time', 'sess'))
        if not sess:
            sess = self.getSess()
        try:
            item = Item(source=source,
                        sid=sid,
                        show_title=show_title,
                        item_url=item_url,
                        real_url=real_url[:1024],
                        pic_url=pic_url,
                        get_time=get_time)
            sess.add(item)
            sess.commit()
            sess.close()
        except:
            error('create item error', exc_info=True)

    def clean(self):
        pass


#-#        info('closed.')

if __name__ == '__main__':
    h = HistoryDB()
    info(
        pcformat(
            h.getRecentItems('mmb',
                             datetime.now() + timedelta(seconds=-240))))
    info(h.existsItem('mmb', 882564))