def testWrongPassword(self): json_data = json.loads(self.json_string) decrypter = Decrypter(json_data, self.incorrect_password) try: decrypter.decrypt() self.fail('decrypter should fail with incorrect password') except ValueError as err: self.assertEqual(str(err), 'Error: incorrect password')
def __init__(self, shopId, db=None, cookie=None): self.id = shopId self.cookie = cookie self.db = init_db(db) self.session = requests.Session() self.home_url = self.api.format(id=shopId, page=1) self.decrypter = Decrypter(shopId) self.homepage = None self.proxy = None self.css = None self.css_proxy = None self.decrypt_dict = None self.css_headers = CSS_HEADERS self.more_page = None self._headers = None
def main(args): password = args.password if not password: print('Please enter wallet password: '******'address']) private_key = None try: private_key = decrypter.decrypt().hex() except ValueError as err: print(str(err), file=sys.stderr) exit(-1) print('public address: ' + encoded_address) print('private key: ' + private_key) if args.address_qr: generate_qr_code(address, args.address_qr) if args.private_key_qr: generate_qr_code(private_key, args.private_key_qr)
#coding:utf-8 import codecs from util.shop import parse_shop_css from decrypt import Decrypter from bs4 import BeautifulSoup as bs # 解密步骤1:获取当前页面的html内容 with codecs.open('txt/fake.html','r',encoding='utf-8') as f: html = f.read() # 解密步骤2:获取当前页面的加密用css文件内容,具体获取可以用正则匹配等 with codecs.open('txt/fake.css','r',encoding='utf-8') as f: css = f.read() # 解密步骤3:整个html进行解析后,获取要解密内容所在的标签,例如,店铺地址所在的标签 soup = bs(html,'lxml') address_tag = soup('div',class_='address-info')[0] print(f'未解密地址标签:{address_tag}\n') # 解密步骤4:直接解析获取到的CSS文件,具体规则看函数parse_shop_css # 此步骤获取到解密映射字典 cls_dict,css_dict = parse_shop_css(css) # 解密步骤5:使用Decrypter对象解密标签获得解密文本 # 其中,Decrypter的decrypt函数增加了参数说明 # 下面解析地址标签的内容 decrypter = Decrypter() text = decrypter.decrypt(address_tag,cls_dict,css_dict) print(f'解密后地址文本:{text}\n') #其他的加密标签解密也是类似的:)
class Comments(object): """ 大众点评商铺点评类 """ api = API_REVIEWS def __init__(self, shopId, db=None, cookie=None): self.id = shopId self.cookie = cookie self.db = init_db(db) self.session = requests.Session() self.home_url = self.api.format(id=shopId, page=1) self.decrypter = Decrypter(shopId) self.homepage = None self.proxy = None self.css = None self.css_proxy = None self.decrypt_dict = None self.css_headers = CSS_HEADERS self.more_page = None self._headers = None @property def headers(self): if self._headers: return self._headers elif self.cookie: headers = HEADERS headers['Cookie'] = self.cookie return headers else: return LOGIN_HEADERS @headers.setter def headers(self, headers): self._headers = headers @timer def get(self, url=None, headers=LOGIN_HEADERS, proxy=None): _url = url if url else self.home_url result = send_http(self.session, 'get', _url, retries=MAX_RETRY, headers=headers, proxy=proxy, timeout=TIMEOUT, kind='SHOP') if result: response, self.proxy, self.headers = result self.homepage = response.text logger.info(f'成功获取店铺:{self.id} 点评相关页.') else: self.homepage = None @already def get_shop_css(self, reget=False): src = from_pattern(PATTERN_CSS, self.homepage) if src: url = '//'.join([CSS_URL_PREFIX, src]) result = send_http( self.session, 'get', url, retries=MAX_RETRY, headers=self.css_headers, proxy=self.css_proxy, timeout=TIMEOUT, kind='CSS', ) if result: response, self.css_proxy, self.css_headers = result self.css = response.text return self.css @already def get_reviews_pages_count(self, reget=False): span = get_sub_tag(self.homepage, 'reviews') count = int(from_pattern(PATTERN_NUMS, span.text).strip()) if count == 0: return 0 if count > COMMENT_META_COUNT: next = get_sub_tag(self.homepage, 'next') if next: return int(next.previous_sibling.previous_sibling.text.strip()) else: return else: return 1 @already @more_than(1) def get_reviews(self, save=True, path=None, tname=None, frompage=1, count=COMMENTS_RESULTS, write_mode='a', reget=False): def save_page_reviews(reviews, total=0): for review in reviews: res.append(review) if save: if self.db: review.save(db=self.db, tname=tname) else: raise NoDatabaseFound('未找到对应点评存储数据库') elif path: review.write(path=path, mode=write_mode) total += 1 if total >= count and count > 0: logger.info( f'爬取存储{count}条店铺:{self.id}的点评数据 任务完成.已存储:{total}') return return total res = [] total = 0 tname = tname if tname else self.db.table if frompage == 1: reviews = self.get_cur_page_reviews() total = save_page_reviews(reviews) start = 2 elif frompage >= 1 and frompage < self.more_page: start = frompage elif frompage > self.more_page: logger.error( f'[超过上限-{frompage}]当前商铺:{self.id}总点评页数只有 {self.more_page} 页.') return else: raise TypeError(f'非法页数类型:{frompage},页数应>=1') if self.more_page > 1: logger.info(f'店铺:{self.id} 点评数据有 {self.more_page} 页.') for i in range(start, self.more_page + 1): url = self.api.format(id=self.id, page=i) self.get(url, headers=self.headers, proxy=self.proxy) if self.homepage: logger.info(f'[获取] 店铺:{self.id} 点评 第{i}页.') reviews = self.get_cur_page_reviews() total = save_page_reviews(reviews, total) if total is None: return time.sleep(random.uniform(*COMMENTS_SLEEP)) else: continue logger.info( f'店铺:{self.id} 此次运行点评数据爬取至最后一页完毕,页数:{self.more_page - frompage + 1},此次爬取:{total}' ) return res def get_single_page_reviews(self, page, save=False, tname=None, path=None, mode='a'): url = self.api.format(id=self.id, page=page) tname = tname if tname else self.db.table self.get(url) reviews = self.get_cur_page_reviews() for i in reviews: if save and self.db: i.save(self.db, tname) elif path: i.write(path, mode) return reviews @already def get_cur_page_reviews(self, reget=False): res = [] div = get_sub_tag(self.homepage, 'review_items') if div: lis = div(not_has_class_li) for li in lis: review = self._parse_review(li) res.append(review) return res @already @parsed_css def _parse_review(self, li): _user_info = li('div', class_='dper-info')[0] _user_rank = li('div', class_='review-rank')[0] _user_words = li('div', class_='review-words')[0] _reply = li('a', class_='reply')[0] _review_pic_li = li('div', class_='review-pictures') _review_pics = _review_pic_li[0] if _review_pic_li else None _review_info = li('div', class_='misc-info')[0] _score = _user_rank('span', class_='item') _actions = _review_info('span', class_='actions')[0] _actions_a = _actions('a') actions = {} imgs = [] user_img = li.img['data-lazyload'].split('%')[0] # 有可能是匿名用户 user_url = HOST + li.a['href'].strip() if li.a.has_attr( 'href') else None user_id = li.a['data-user-id'] user_name = _user_info.a.text.strip() user_level = from_pattern( PATTERN_USER_LEVEL, _user_info.img['src']) if _user_info.img else None use_vip = True if _user_info.span and _user_info.span['class'][ 0] == 'vip' else False star = from_pattern(PATTERN_STAR, ''.join(_user_rank.span['class'])) score = { i.text.strip().split(':')[0]: i.text.strip().split(':')[1] for i in _score } review_time = _review_info.span.text.strip() review_shop = _review_info('span', class_='shop')[0].text review_shop_id = self.id review_id = _reply['data-id'] review_url = 'http:' + _reply['href'] for a in _actions_a: action_name = a.text.strip() _next = a.next_sibling.next_sibling if _next and _next.name == 'em': num = from_pattern(PATTERN_NUMS, _next.text) actions[action_name] = num if _review_pics: for pic in _review_pics('img'): imgs.append(pic['data-big']) words = self.decrypter.decrypt(_user_words, *self.decrypt_dict, comment=True) review = Review(user_name, user_id, user_url, user_img, user_level, use_vip, star, score, review_id, review_url, review_time, review_shop, review_shop_id, imgs, words, actions) return review @already @parsed_css def decrypt_tag(self, tag_soup, pattern='.*', is_comment=False): text = self.decrypter.decrypt(tag_soup, *self.decrypt_dict, comment=is_comment, pattern=pattern) return text
# coding:utf-8 import codecs from util.shop import parse_shop_css from decrypt import Decrypter from bs4 import BeautifulSoup as bs # 解密步骤1:获取当前页面的html内容 with codecs.open('txt/fake.html', 'r', encoding='utf-8') as f: html = f.read() # 解密步骤2:获取当前页面的加密用css文件内容,具体获取可以用正则匹配等 with codecs.open('txt/fake.css', 'r', encoding='utf-8') as f: css = f.read() # 解密步骤3:整个html进行解析后,获取要解密内容所在的标签,例如,店铺地址所在的标签,店铺点评评论 soup = bs(html, 'lxml') address_tag = soup('div', class_='expand-info address')[0] comment_tag = soup('p', class_='desc J-desc')[0] print(f'未解密地址标签:{address_tag}\n') print(f'为解密店铺点评评论:{comment_tag}\n') # 解密步骤4:直接解析获取到的CSS文件,具体规则看函数parse_shop_css # 此步骤获取到解密映射字典 cls_dict, css_dict = parse_shop_css(css) # 解密步骤5:使用Decrypter对象解密标签获得解密文本 # 其中,Decrypter的decrypt函数增加了参数说明 # 下面解析地址标签的内容 decrypter = Decrypter() text = decrypter.decrypt(address_tag, cls_dict, css_dict) dp = decrypter.decrypt(comment_tag, cls_dict, css_dict, comment=True) print(f'解密后地址文本:{text}\n') print(f'解密后店铺点评评论:{dp}\n') # 其他的加密标签解密也是类似的:)
def testDecrypt(self): json_data = json.loads(self.json_string) decrypter = Decrypter(json_data, self.password) decrypted_plain_key = decrypter.decrypt() self.assertEqual(decrypted_plain_key.hex(), self.plain_key)