async def changeDomainIp(self, ip, sub_domain=''): assert ip r = await self.getRecordList(sub_domain) if r: r = r[0] name, updated_on, record_id, old_ip = r['name'], r[ 'updated_on'], r['id'], r['value'] #-# info('%s(id %s) ip %s @%s', name, record_id, old_ip, updated_on) if old_ip != ip: rtn = await self.changeRecord(record_id, ip, sub_domain) info('%s(id %s) ip %s -> %s res:\n%s', name, record_id, old_ip, ip, pcformat(rtn)) if sub_domain == '@': try: sendmail_conf = getConf('config/dn.yaml', root_key='sendmail') send_mail( sendmail_conf['smtp_server'], sendmail_conf['smtp_port'], sendmail_conf['user'], sendmail_conf['pwd'], sendmail_conf['user'], (sendmail_conf['user'], ), [], f'ip地址变化 {self.conf["Domain"]} {datetime.now().strftime("%Y%m%d %H:%M:%S")}', f'{old_ip} --> {ip}', []) except: pass else: info('%s(id %s) ip not change %s', name, record_id, old_ip) else: info('no record matching sub domain %s, try to create ...', sub_domain) rtn = await self.addRecord(ip, sub_domain) info('%s create %s res:\n%s', sub_domain, ip, pcformat(rtn))
def createQrCodeTicket(data): u'''创建二维码ticket,一般不直接使用 返回ticket或者None ''' ret_data = None access_token = yield WXLoginManager.getAccessToken() url = 'https://api.weixin.qq.com/cgi-bin/qrcode/create?access_token={ACCESS_TOKEN}'.format(ACCESS_TOKEN=access_token) info('url: %s', url) info('data: %s', pcformat(data)) #-# info('body: %s', json.dumps(data)) req = HTTPRequest(url, method='POST', body=json.dumps(data, ensure_ascii=False)) # , validate_cert = False) httpc_lient = AsyncHTTPClient() try: resp = yield gen.Task(httpc_lient.fetch, req) info('resp: %s', resp.body) j_data = json.loads(resp.body) except: error('', exc_info=True) else: if j_data.get('errcode', None): info('创建二维码ticket出错: errcode %s, errmsg: %s', j_data['errcode'], j_data.get('errmsg', '')) else: ret_data = j_data raise gen.Return(ret_data)
def sendTplMsg(tpl_id, openid, url, in_data): u'''发送模板消息 * True 成功 * False 失败 ''' ret_data = False access_token = yield WXLoginManager.getAccessToken() url = '''https://api.weixin.qq.com/cgi-bin/message/template/send?access_token={ACCESS_TOKEN}'''.format(ACCESS_TOKEN=access_token) data = {'touser': openid, 'template_id': tpl_id, 'url': url, 'data': in_data, } info('url: %s', url) info('data: %s', pcformat(data)) #-# info('body: %s', json.dumps(data)) req = HTTPRequest(url, method='POST', body=json.dumps(data, ensure_ascii=False)) # , validate_cert = False) httpc_lient = AsyncHTTPClient() try: resp = yield gen.Task(httpc_lient.fetch, req) info('resp: %s', resp.body) j_data = json.loads(resp.body) except: error('', exc_info=True) else: if j_data['errcode']: info('发送模板消息出错: errcode %s, errmsg: %s', j_data['errcode'], j_data.get('errmsg', '')) else: ret_data = True raise gen.Return(ret_data)
async def addRecord(self, val, sub_domain='', record_type='A', record_line='默认'): rtn = {} url = self.conf['url'] d = { 'domain': self.conf['Domain'], 'recordLine': record_line, 'value': val, } if sub_domain: d['subDomain'] = sub_domain if record_type: d['recordType'] = record_type d.update(self.getPubArg('RecordCreate')) self.Sign('GET', url, d) _, j_data, ok = await self._getData(url, params=d, timeout=10, my_fmt='json', my_json_encoding='utf8') if ok and j_data['code'] == 0: #-# debug('resp %s', pcformat(j_data)) rtn = j_data.get('data', {}).get('record', {}) else: info('error ? %s %s', ok, pcformat(j_data)) return rtn
def _addUserWord(self): """添加自定义词组 """ # # l_dynamic_word = sorted(chain((x.get('inc', []) for x in self.l_concern), (x.get('exc', []) for x in self.l_concern)), key=lambda x: len(x) if x else 0, reverse=True) # # l_dynamic_word = [m for m in chain(*(x.get('inc', []) for x in self.l_concern), *(x.get('exc', []) for x in self.l_concern)) if len(m) > 0] l_dynamic_word = [m for m in chain(*(x.get('inc', []) for x in self.l_concern)) if len(m) > 0] # 只把inc中的词做自定义分词,exc中的不做 l_dynamic_word = sorted(set(l_dynamic_word), key=lambda x: len(x) if x else 0, reverse=True) debug(pcformat(l_dynamic_word)) list(map(lambda w: jieba.add_word(w, freq=1500, tag=None) if w else 0, l_dynamic_word)) debug('added %s include/exclude word(s) to jieba', len(l_dynamic_word))
def get_from_linkstars(self, url, source=''): real_url = url if url and url.startswith('https://www.linkstars.com/click.php?'): #-# debug('%s%slinkstars url found %s', source, ' ' if source else '', url) up = urlparse(url) d_p = parse_qs(up.query) for _k in ('to', ): try: if _k in d_p: real_url = d_p[_k][0] break except UnicodeDecodeError as e: warn('d_p %s %s', pcformat(d_p)) raise e #-# if real_url != url: #-# debug('%s%sfound url from linkstars %s', source, ' ' if source else '', real_url) return real_url or ''
def getQrPicBySceneId(scene_id, want_temp=True): u'''通过场景值获取二维码图片数据 * ``scene_id`` * ``want_temp`` True 临时(默认) False 永久 返回二维码图片数据或者None ''' pic_data = None max_expire = 604800 r = m_redis.get_instance('ad') c_k = '_Z_WX_QR_%s' % scene_id ticket = r.get(c_k) if not ticket: if want_temp: if not isinstance(scene_id, int): info('参数错误: 临时二维码的scene_id必须为32位非0整型!') raise gen.Return(pic_data) data = {'expire_seconds': max_expire, 'action_name': 'QR_SCENE', 'action_info': {'scene': {'scene_id': scene_id}}} else: if isinstance(scene_id, int): if not (0 < scene_id <= 100000): info('参数错误: 永久二维码的scene_id为整数时,范围为(0,100000]') raise gen.Return(pic_data) data = {'expire_seconds': max_expire, 'action_name': 'QR_LIMIT_SCENE', 'action_info': {'scene': {'scene_id': scene_id}}} elif isinstance(scene_id, str): if not (0 < len(scene_id) <= 64): info('参数错误: 永久二维码的scene_id为字符串时,长度范围为[1,64]') raise gen.Return(pic_data) data = {'expire_seconds': max_expire, 'action_name': 'QR_LIMIT_STR_SCENE', 'action_info': {'scene': {'scene_str': scene_id}}} else: info('参数错误: 永久二维码的scene_id应该为int或str') raise gen.Return(pic_data) j_data = yield WXLoginManager.createQrCodeTicket(data) info('%s', pcformat(j_data)) ticket = j_data['ticket'] expire_at = j_data['expire_seconds'] r.setex(c_k, ticket, expire_at) if ticket: pic_data = yield WXLoginManager.getQrCodeByTicket(ticket) open('/tmp/t.jpg', 'wb').write(pic_data) raise gen.Return(pic_data)
def main_test(): handler = ToolsMixin() #-# usc = handler.user_service_conn() #-# r = yield usc.addScore(handler, {'uid': 10000000, #-# 'device_id': '999', #-# 'event_type': UserLib.EVENT_SCORE_REFUND, #-# 'event_sub_type': 0, #-# 'score': 23 * 100, #-# 'order_id': '', #-# 'pay_id': 0, #-# 'remark': '测试充值', #-# 'ip': '192.168.199.112', #-# 'os_type': '', #-# }) # r = yield UserLib.checkTicket(handler, {'reg_source': None, 'ticket': sys.argv[1]}) #-# r = yield UserLib.getGrandparent(handler, 10080990) #-# r = UserLib.clear_push_cache(handler, 10000000, 'ios') #-# r = yield UserLib.addUser(handler, {'reg_qid': 13512345679, #-# 'token': 'test_token', #-# 'reg_source': 'mb', #-# 'invite_uid': '', #-# 'ip': '192.168.199.112', #-# 'os_type': 'android', #-# 'app_version': '1.3.5.7', #-# 'channel': 'self_test', #-# 'nickname': '', #-# 'gender': '', #-# 'figure_url': '', #-# 'province': '', #-# 'city': '', #-# 'country': '', #-# } #-# ) #-# r = yield UserLib.checkUserVipLevel(handler, 1000, UserLib.USER_VIP_TYPE_INVITE | UserLib.USER_VIP_TYPE_BANKER) r = yield UserLib.checkUserVipLevel(handler, 1000, UserLib.USER_VIP_TYPE_NONE) from applib.tools_lib import pcformat info('r: %s', pcformat(r))
def getConf(conf_path='./pn_conf.yaml', root_key=None, force_reload=False): global _cache_conf conf_file_path = os.path.abspath(conf_path) conf = _cache_conf.get(conf_file_path, None) if conf is None or force_reload: if force_reload: debug('force load conf from file %s', conf_file_path) assert os.path.exists(conf_file_path) conf = yaml.load(open(conf_file_path), Loader=yaml.FullLoader) # https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation _cache_conf[conf_file_path] = conf #-# debug('load done %s. %s key(s)', conf_file_path, len(conf)) #-# else: #-# debug('get conf from cache %s', conf_file_path) if root_key: if root_key not in conf: error('conf no root_key %s in %s !!!', root_key, conf_file_path) conf = conf.get(root_key) return conf if __name__ == '__main__': from applib.tools_lib import pcformat conf = getConf('./config/pn_conf.yaml') info(pcformat(conf))
attachlist_title = post_content.xpath( forum_cfg['post_attachlist_title'])[0] attachlist_url = post_content.xpath( forum_cfg['post_attachlist_url'])[0] attach_info = (attachlist_title, urljoin(forum_cfg["post_base_url"], attachlist_url)) image_list = post_content.xpath( './/img[starts-with(@src, "http")]/@src') #-# info(f'{pcformat(image_list)}\n{attach_info}') elif '无权' in etree.tounicode(tree): info(f'无权查看 {title} {url}') return content, attach_size, image_list, attach_info if __name__ == '__main__': loop = asyncio.get_event_loop() try: dz = DiscuzManager() x = loop.run_until_complete(dz.getPostList(loop)) info(pcformat(x)) except KeyboardInterrupt: info('cancel on KeyboardInterrupt..') #-# task.cancel() loop.run_forever() #-# task.exception() finally: loop.stop()
async def postData(self, url, *args, **kwargs): """post 方式提交数据,基本照搬getData,自定义参数最好不用 """ resp, data, ok = None, None, False str_encoding = kwargs.pop('my_str_encoding', None) fmt = kwargs.pop('my_fmt', 'str') json_encoding = kwargs.pop('my_json_encoding', None) json_loads = kwargs.pop('my_json_loads', json.loads) streaming_chunk_size = kwargs.pop('my_streaming_chunk_size', 1024) streaming_cb = kwargs.pop('my_streaming_cb', None) max_try = kwargs.pop('my_retry', 1) for nr_try in range(max_try): try: #-# debug('url %s %s %s', url, pcformat(args), pcformat(kwargs)) resp = await self.sess.post(url, *args, **kwargs) if fmt == 'str': try: data = await resp.text(encoding=str_encoding) except UnicodeDecodeError: txt = await resp.read() data = txt.decode(str_encoding, 'ignore') warn('ignore decode error from %s', url) #-# except ContentEncodingError: except aiohttp.client_exceptions.ContentTypeError: warn('ignore content encoding error from %s', url) elif fmt == 'json': data = await resp.json(encoding=json_encoding, loads=json_loads, content_type=None) #-# if not data: #-# if 'json' not in resp.headers.get('content-type', ''): #-# warn('data not in json? %s', resp.headers.get('content-type', '')) elif fmt == 'bytes': data = await resp.read() elif fmt == 'stream': while 1: chunk = await resp.content.read(streaming_chunk_size) if not chunk: break streaming_cb(url, chunk) ok = True break except asyncio.TimeoutError: if nr_try == max_try - 1: # 日志输出最后一次超时 debug('%sTimeoutError %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url) except aiohttp.client_exceptions.ClientConnectorError: error('%sClientConnectionError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs)) except ConnectionResetError: error('%sConnectionResetError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs)) except aiohttp.client_exceptions.ContentTypeError: error('%sContentTypeError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), exc_info=True) data = await resp.text(encoding=str_encoding) info('data %s', data[:50]) except ClientError: error('%sClientError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), exc_info=True) except UnicodeDecodeError: error('%sUnicodeDecodeError %s %s %s %s\n%s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), pcformat(resp.headers), await resp.read(), exc_info=True) #-# raise e except json.decoder.JSONDecodeError: error('%sJSONDecodeError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), exc_info=True) except aiodns.error.DNSError: error('%sDNSError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs)) finally: if resp: resp.release() return resp, data, ok
async def GetJdCouponWithCookie(self, title, item): """自动领取京东普通优惠券 参考 http://selenium-python.readthedocs.io/index.html """ rslt, err = '', '' if self.conf['geckodriver'] not in sys.path: sys.path.append(self.conf['geckodriver']) opt = Options() #-# opt.add_argument('--headless') ff = webdriver.Firefox(firefox_options=opt) #-# display = Display(visible=0, size=(800, 600)) #-# display.start() #-# ff = webdriver.Firefox() #-# if not item['receiveUrl'].startswith('http'): if item['receiveUrl'].startswith('coupon.m.jd.com'): item['receiveUrl'] = 'http://' + item['receiveUrl'] if 'm.jd.com' in item['receiveUrl']: cookie_file = os.path.abspath( os.path.join(self.conf['cookie_dir'], 'plogin.m.jd.com.cookie.pkl')) else: cookie_file = os.path.abspath( os.path.join(self.conf['cookie_dir'], 'passport.jd.com.cookie.pkl')) try: if os.path.exists(cookie_file): debug('读取已有cookie %s', cookie_file) #-# ff.get('https://home.m.jd.com' if 'm.jd.com' in item['receiveUrl'] else 'http://help.jd.com/index.html') #-# ff.get('https://so.m.jd.com/category/all.html?searchFrom=bysearchbox' if 'm.jd.com' in item['receiveUrl'] else 'http://help.jd.com/index.html') url = 'https://p.m.jd.com/cart/cart.action' if 'm.jd.com' in item[ 'receiveUrl'] else 'http://help.jd.com/index.html' debug('fetching %s', url) ff.get(url) for _c in pickle.load(open(cookie_file, 'rb')): #-# info('cookie data %s', pcformat(_c)) try: ff.add_cookie(_c) except: pass #-# error('ignore except', exc_info=True) debug('读取完毕cookie %s', cookie_file) for _ in range(2): try: info('尝试自动领取 %s ...\n%s', title, pcformat(item)) info('fetching %s', item['receiveUrl']) ff.get(item['receiveUrl']) no_btn = False try: element = ff.find_element_by_id('btnSubmit') except NoSuchElementException: try: element = ff.find_element_by_link_text('立即领取') except NoSuchElementException: try: element = ff.find_element_by_class_name('btn') except NoSuchElementException: no_btn = True # 判断下是否有登录提示 need_login = False if 'm.jd.com' in item['receiveUrl']: try: need_login = ff.find_element_by_xpath( '//div[@class="login"]/a') except NoSuchElementException: pass else: #-# embed() info('need_login %s', need_login.text) need_login = True if no_btn or need_login: # 没登录? info('没登录? 尝试登录') #-# embed() # 登录京东 try: if 'm.jd.com' in item['receiveUrl']: url = 'https://plogin.m.jd.com/user/login.action?appid=100&kpkey=&returnurl=%s' % quote( item['receiveUrl']) info('open login page %s', url) ff.get(url) ff.find_element_by_id('username').send_keys( self.jd_user) ff.find_element_by_id('password').send_keys( self.jd_password) ff.find_element_by_id('loginBtn').click() # 判断是否需要输入验证码 code = None try: code = ff.find_element_by_id('code') except: # 不需要输入 pass else: info('貌似需要输入验证码') #-# raise Exception('无法自动登录') embed() else: url = 'https://passport.jd.com/new/login.aspx?ReturnUrl=%s' % quote( item['receiveUrl']) info('open login page %s', url) ff.get(url) ff.find_element_by_link_text('账户登录').click() await asyncio.sleep(0.5) ff.find_element_by_name('loginname').send_keys( self.jd_user) await asyncio.sleep(0.5) ff.find_element_by_name('nloginpwd').send_keys( self.jd_password) await asyncio.sleep(0.5) ff.find_element_by_id('loginsubmit').click() await asyncio.sleep(2) except: info('登录京东时出错', exc_info=True) break else: info('登录貌似成功了,保存cookie %s', cookie_file) pickle.dump(ff.get_cookies(), open(cookie_file, 'wb')) continue else: #-# info('element %s', element) #-# embed() if 'btn-unable' in element.get_attribute('class'): info('不能领取:%s', element.text) elif element.text.find('查看') != -1: info('不能领取(已领取过?):%s', element.text) else: try: element.click() element = WebDriverWait(ff, 3).until( EC.presence_of_element_located( (By.XPATH, '//p[@class="coupon-txt"]'))) info('领取结果 %s', element.text) except: try: element = WebDriverWait(ff, 1).until( EC.presence_of_element_located( (By.CLASS_NAME, 'btn'))) info('领取结果 %s', element.text) except: error('获取领取结果时出错', exc_info=True) #-# embed() finally: debug('自动领取完成') await asyncio.sleep(1) break except: error('自动领取出错', exc_info=True) except: error('自动领取出错', exc_info=True) finally: pass #-# embed() ff.quit() #-# display.stop() return rslt, err
async def _process_each(self, text_in): """获取一段文字的语音合成数据 """ ok = True s = None access_token = None #-# access_token = self.conf['baidu_access_token'] resp = None try: url = 'https://openapi.baidu.com/oauth/2.0/token' args = { 'grant_type': 'client_credentials', 'client_id': self.conf['baidu_api_key'], 'client_secret': self.conf['baidu_secret_key'], } #-# info('query access token ...') resp = await self.sess.post(url, data=args, timeout=30) except asyncio.TimeoutError: info('TimeoutError %s %s', url, pcformat(args)) #-# except ClientConnectionError: #-# error('ConnectionError %s %s', url, pcformat(args)) #-# except ClientHttpProcessingError: #-# error('ClientHttpProcessingError %s %s', url, pcformat(args), exc_info=True) #-# except ClientTimeoutError: #-# error('ClientTimeoutError %s %s', url, pcformat(args)) except ClientError: error('ClientError %s %s', url, pcformat(args), exc_info=True) except UnicodeDecodeError as e: error('UnicodeDecodeError %s %s %s %s\n%s', url, pcformat(args), pcformat(resp.headers), await resp.read(), exc_info=True) raise e else: data = await resp.json() if 'access_token' in data: access_token = data['access_token'] #-# info('data: %s', pcformat(data)) #-# info('access token: %s', access_token) else: error('%s: %s', data['error'], data['error_description']) finally: if resp: resp.release() assert access_token if access_token: resp = None try: #-# info('getting audio data ...') url = 'http://tsn.baidu.com/text2audio' args = { 'tex': text_in.decode('utf8'), 'lan': 'zh', 'tok': access_token, 'ctp': '1', 'cuid': '00000000', 'spd': '6', 'pit': '5', 'vol': '9', 'per': '0', } #-# resp = await self.sess.post(url, data=json.dumps(args), timeout=15) resp = await self.sess.post(url, data=args, timeout=20) except asyncio.TimeoutError: info('TimeoutError %s %s', url, pcformat(args)) #-# except ClientConnectionError: #-# error('ConnectionError %s %s', url, pcformat(args)) #-# except ClientHttpProcessingError: #-# error('ClientHttpProcessingError %s %s', url, pcformat(args), exc_info=True) #-# except ClientTimeoutError: #-# error('ClientTimeoutError %s %s', url, pcformat(args)) except ClientError: error('ClientError %s %s', url, pcformat(args), exc_info=True) except UnicodeDecodeError as e: error('UnicodeDecodeError %s %s %s %s\n%s', url, pcformat(args), pcformat(resp.headers), await resp.read(), exc_info=True) raise e else: #-# info('headers: %s', pcformat(resp.headers)) if resp.headers['Content-Type'] == 'audio/mp3': data = await resp.read() s = data elif resp.headers['Content-Type'] == 'application/json': data = await resp.json() error('%s: %s', data['err_no'], data['err_msg']) else: error('未知头 %s', pcformat(resp.headers)) finally: if resp: resp.release() return s, ok
async def _getData(self, url, *args, **kwargs): """封装网络请求 my_fmt: str: my_str_encoding json: my_json_encoding my_json_loads bytes: None streaming: my_streaming_chunk_size my_streaming_cb """ resp, data, ok = None, None, False str_encoding = kwargs.pop('my_str_encoding', None) fmt = kwargs.pop('my_fmt', 'str') json_encoding = kwargs.pop('my_json_encoding', None) json_loads = kwargs.pop('my_json_loads', json.loads) streaming_chunk_size = kwargs.pop('my_streaming_chunk_size', 1024) streaming_cb = kwargs.pop('my_streaming_cb', None) max_try = kwargs.pop('my_retry', 1) for nr_try in range(max_try): try: #-# debug('url %s %s %s', url, pcformat(args), pcformat(kwargs)) resp = await self.sess.get(url, *args, **kwargs) if fmt == 'str': try: data = await resp.text(encoding=str_encoding) except UnicodeDecodeError: txt = await resp.read() data = txt.decode(str_encoding, 'ignore') #-# warn('ignore decode error from %s', url) elif fmt == 'json': try: data = await resp.json(encoding=json_encoding, loads=json_loads) except aiohttp.client_exceptions.ContentTypeError: #-# warn('ContentTypeError, try decode json ...') try: data = await resp.text(encoding=json_encoding) except UnicodeDecodeError: txt = await resp.read() data = txt.decode(str_encoding, 'ignore') try: data = json.loads(data) except: error('json except', exc_info=True) #-# if not data: #-# if 'json' not in resp.headers.get('content-type', ''): #-# warn('data not in json? %s', resp.headers.get('content-type', '')) elif fmt == 'bytes': data = await resp.read() elif fmt == 'stream': while 1: chunk = await resp.content.read(streaming_chunk_size) if not chunk: break streaming_cb(url, chunk) ok = True break except aiohttp.ServerDisconnectedError: info('%sServerDisconnectedError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs)) except asyncio.TimeoutError: info('%sTimeoutError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs)) except aiohttp.ClientConnectionError: error('%sConnectionError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs)) #-# except aiohttp.errors.ClientHttpProcessingError: #-# error('%sClientHttpProcessingError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), exc_info=True) except aiohttp.client_exceptions.ContentTypeError: error('%sContentTypeError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), exc_info=True) #-# except aiohttp.ClientTimeoutError: #-# error('%sClientTimeoutError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs)) except aiohttp.ClientError: error('%sClientError %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), exc_info=True) except UnicodeDecodeError: #-# txt = await resp.read() #-# open('/tmp/txt_%s.html' % time.time(), 'wb').write(txt) error('%sUnicodeDecodeError %s %s %s %s\n%s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), pcformat(resp.headers), txt[:100], exc_info=True) break #-# raise e except Exception: error('%sException %s %s %s', ('%s/%s ' % (nr_try + 1, max_try)) if max_try > 1 else '', url, pcformat(args), pcformat(kwargs), exc_info=True) finally: if resp: resp.release() return resp, data, ok
async def _get_real_url_4mmb(self, url): real_url = url if url is not None: raw_url = url nr_redirect = 0 while url.find('manmanbuy') != -1 and urlparse(url).path: r, _, ok = await self.net.getData(url, timeout=7, my_fmt='bytes', my_retry=2) nr_redirect += 1 if ok: if r.status == 200: url = str(r.url) #-# info('url=%s', url) if 'url=' in url: # found 'url=' or 'tourl=' up = urlparse(url) d_p = parse_qs(up.query, encoding='gbk') for _k in ('url', 'tourl'): try: if _k in d_p: url = d_p[_k][0] break except UnicodeDecodeError as e: warn('d_p %s %s', pcformat(d_p)) raise e elif r.status == 400: url = str(r.url) if 'url=' in url: # found 'url=' or 'tourl=' up = urlparse(url) d_p = parse_qs(up.query, encoding='gbk') for _k in ('url', 'tourl'): try: if _k in d_p: url = d_p[_k][0] break except UnicodeDecodeError as e: warn('d_p %s %s', pcformat(d_p)) raise e elif url.count('http') > 1: for x in ('http://cu.manmanbuy.com/http', ): if url.startswith(x): url = raw_url[len(x) - 4:] if url[0] == 's': # https url = url[1:] info('got %s from ', url, r.url) else: info('real url not found: code %s %s %s', r.status, raw_url, r.url) else: x = 'http://cu.manmanbuy.com/http' y = '.manmanbuy.com/redirectUrl.aspx?' if x in str(r.url): url = r.url[len(x) - 4:] if url[0] == 's': # https url = url[1:] #-# debug('url from bad url: %s -> %s', raw_url, url) elif r.url.startswith(('http://detail.tmall.com/', 'https://detail.tmall.com/')): url = str(r.url) elif y in str(r.url): up = urlparse(r.url) d_p = parse_qs(up.query, encoding='gbk') for _k in ('tourl', ): try: if _k in d_p: url = d_p[_k][0] #-# info('found url from %s', d_p) break except UnicodeDecodeError as e: warn('d_p %s %s', pcformat(d_p)) raise e if url: break else: warn('real url not found: code %s %s %s', r.status, raw_url, r.url) break if nr_redirect > 5: warn('too many redirect %s', real_url) break if url.endswith('404.html'): if r.history: # 从历史url中找 if 'url=' in str(r.history[-1].url): # found 'url=' or 'tourl=' up = urlparse(str(r.history[-1].url)) d_p = parse_qs(up.query, encoding='gbk') for _k in ('url', 'tourl'): try: if _k in d_p: url = d_p[_k][0] break except UnicodeDecodeError as e: warn('d_p %s %s', pcformat(d_p)) raise e else: warn('real url not found: %s (history %s)', real_url, r.history[-1].url) else: warn('real url not found: %s (only found %s)', real_url, url) break else: #-# info('fetching url not ok %s', url) break real_url = url return real_url
map(lambda x, d=kwargs: d.get(x, ''), ('source', 'sid', 'show_title', 'item_url', 'real_url', 'pic_url', 'get_time', 'sess')) if not sess: sess = self.getSess() try: item = Item(source=source, sid=sid, show_title=show_title, item_url=item_url, real_url=real_url[:1024], pic_url=pic_url, get_time=get_time) sess.add(item) sess.commit() sess.close() except: error('create item error', exc_info=True) def clean(self): pass #-# info('closed.') if __name__ == '__main__': h = HistoryDB() info( pcformat( h.getRecentItems('mmb', datetime.now() + timedelta(seconds=-240)))) info(h.existsItem('mmb', 882564))