Beispiel #1
0
 def testWrongPassword(self):
     json_data = json.loads(self.json_string)
     decrypter = Decrypter(json_data, self.incorrect_password)
     try:
         decrypter.decrypt()
         self.fail('decrypter should fail with incorrect password')
     except ValueError as err:
         self.assertEqual(str(err), 'Error: incorrect password')
Beispiel #2
0
 def __init__(self, shopId, db=None, cookie=None):
     self.id = shopId
     self.cookie = cookie
     self.db = init_db(db)
     self.session = requests.Session()
     self.home_url = self.api.format(id=shopId, page=1)
     self.decrypter = Decrypter(shopId)
     self.homepage = None
     self.proxy = None
     self.css = None
     self.css_proxy = None
     self.decrypt_dict = None
     self.css_headers = CSS_HEADERS
     self.more_page = None
     self._headers = None
Beispiel #3
0
def main(args):
    password = args.password
    if not password:
        print('Please enter wallet password: '******'address'])
    private_key = None
    try:
        private_key = decrypter.decrypt().hex()
    except ValueError as err:
        print(str(err), file=sys.stderr)
        exit(-1)
    print('public address: ' + encoded_address)
    print('private key: ' + private_key)
    if args.address_qr:
        generate_qr_code(address, args.address_qr)
    if args.private_key_qr:
        generate_qr_code(private_key, args.private_key_qr)
Beispiel #4
0
#coding:utf-8
import codecs
from util.shop import parse_shop_css
from decrypt import Decrypter
from bs4 import BeautifulSoup as bs 
# 解密步骤1:获取当前页面的html内容
with codecs.open('txt/fake.html','r',encoding='utf-8') as f:
    html = f.read()
# 解密步骤2:获取当前页面的加密用css文件内容,具体获取可以用正则匹配等
with codecs.open('txt/fake.css','r',encoding='utf-8') as f:
    css = f.read()
# 解密步骤3:整个html进行解析后,获取要解密内容所在的标签,例如,店铺地址所在的标签
soup = bs(html,'lxml')
address_tag = soup('div',class_='address-info')[0]
print(f'未解密地址标签:{address_tag}\n')
# 解密步骤4:直接解析获取到的CSS文件,具体规则看函数parse_shop_css
# 此步骤获取到解密映射字典
cls_dict,css_dict = parse_shop_css(css)
# 解密步骤5:使用Decrypter对象解密标签获得解密文本
# 其中,Decrypter的decrypt函数增加了参数说明
# 下面解析地址标签的内容
decrypter = Decrypter()
text = decrypter.decrypt(address_tag,cls_dict,css_dict)
print(f'解密后地址文本:{text}\n')
#其他的加密标签解密也是类似的:)
Beispiel #5
0
class Comments(object):
    """
    大众点评商铺点评类
    """

    api = API_REVIEWS

    def __init__(self, shopId, db=None, cookie=None):
        self.id = shopId
        self.cookie = cookie
        self.db = init_db(db)
        self.session = requests.Session()
        self.home_url = self.api.format(id=shopId, page=1)
        self.decrypter = Decrypter(shopId)
        self.homepage = None
        self.proxy = None
        self.css = None
        self.css_proxy = None
        self.decrypt_dict = None
        self.css_headers = CSS_HEADERS
        self.more_page = None
        self._headers = None

    @property
    def headers(self):
        if self._headers:
            return self._headers
        elif self.cookie:
            headers = HEADERS
            headers['Cookie'] = self.cookie
            return headers
        else:
            return LOGIN_HEADERS

    @headers.setter
    def headers(self, headers):
        self._headers = headers

    @timer
    def get(self, url=None, headers=LOGIN_HEADERS, proxy=None):
        _url = url if url else self.home_url
        result = send_http(self.session,
                           'get',
                           _url,
                           retries=MAX_RETRY,
                           headers=headers,
                           proxy=proxy,
                           timeout=TIMEOUT,
                           kind='SHOP')
        if result:
            response, self.proxy, self.headers = result
            self.homepage = response.text
            logger.info(f'成功获取店铺:{self.id} 点评相关页.')
        else:
            self.homepage = None

    @already
    def get_shop_css(self, reget=False):
        src = from_pattern(PATTERN_CSS, self.homepage)
        if src:
            url = '//'.join([CSS_URL_PREFIX, src])
            result = send_http(
                self.session,
                'get',
                url,
                retries=MAX_RETRY,
                headers=self.css_headers,
                proxy=self.css_proxy,
                timeout=TIMEOUT,
                kind='CSS',
            )
            if result:
                response, self.css_proxy, self.css_headers = result
                self.css = response.text
                return self.css

    @already
    def get_reviews_pages_count(self, reget=False):
        span = get_sub_tag(self.homepage, 'reviews')
        count = int(from_pattern(PATTERN_NUMS, span.text).strip())
        if count == 0:
            return 0
        if count > COMMENT_META_COUNT:
            next = get_sub_tag(self.homepage, 'next')
            if next:
                return int(next.previous_sibling.previous_sibling.text.strip())
            else:
                return
        else:
            return 1

    @already
    @more_than(1)
    def get_reviews(self,
                    save=True,
                    path=None,
                    tname=None,
                    frompage=1,
                    count=COMMENTS_RESULTS,
                    write_mode='a',
                    reget=False):
        def save_page_reviews(reviews, total=0):
            for review in reviews:
                res.append(review)
                if save:
                    if self.db:
                        review.save(db=self.db, tname=tname)
                    else:
                        raise NoDatabaseFound('未找到对应点评存储数据库')
                elif path:
                    review.write(path=path, mode=write_mode)
                total += 1
                if total >= count and count > 0:
                    logger.info(
                        f'爬取存储{count}条店铺:{self.id}的点评数据 任务完成.已存储:{total}')
                    return
            return total

        res = []
        total = 0
        tname = tname if tname else self.db.table
        if frompage == 1:
            reviews = self.get_cur_page_reviews()
            total = save_page_reviews(reviews)
            start = 2
        elif frompage >= 1 and frompage < self.more_page:
            start = frompage
        elif frompage > self.more_page:
            logger.error(
                f'[超过上限-{frompage}]当前商铺:{self.id}总点评页数只有 {self.more_page} 页.')
            return
        else:
            raise TypeError(f'非法页数类型:{frompage},页数应>=1')
        if self.more_page > 1:
            logger.info(f'店铺:{self.id} 点评数据有 {self.more_page} 页.')
            for i in range(start, self.more_page + 1):
                url = self.api.format(id=self.id, page=i)
                self.get(url, headers=self.headers, proxy=self.proxy)
                if self.homepage:
                    logger.info(f'[获取] 店铺:{self.id} 点评 第{i}页.')
                    reviews = self.get_cur_page_reviews()
                    total = save_page_reviews(reviews, total)
                    if total is None:
                        return
                    time.sleep(random.uniform(*COMMENTS_SLEEP))
                else:
                    continue
            logger.info(
                f'店铺:{self.id} 此次运行点评数据爬取至最后一页完毕,页数:{self.more_page - frompage + 1},此次爬取:{total}'
            )
        return res

    def get_single_page_reviews(self,
                                page,
                                save=False,
                                tname=None,
                                path=None,
                                mode='a'):
        url = self.api.format(id=self.id, page=page)
        tname = tname if tname else self.db.table
        self.get(url)
        reviews = self.get_cur_page_reviews()
        for i in reviews:
            if save and self.db:
                i.save(self.db, tname)
            elif path:
                i.write(path, mode)
        return reviews

    @already
    def get_cur_page_reviews(self, reget=False):
        res = []
        div = get_sub_tag(self.homepage, 'review_items')
        if div:
            lis = div(not_has_class_li)
            for li in lis:
                review = self._parse_review(li)
                res.append(review)
        return res

    @already
    @parsed_css
    def _parse_review(self, li):
        _user_info = li('div', class_='dper-info')[0]
        _user_rank = li('div', class_='review-rank')[0]
        _user_words = li('div', class_='review-words')[0]
        _reply = li('a', class_='reply')[0]
        _review_pic_li = li('div', class_='review-pictures')
        _review_pics = _review_pic_li[0] if _review_pic_li else None
        _review_info = li('div', class_='misc-info')[0]
        _score = _user_rank('span', class_='item')
        _actions = _review_info('span', class_='actions')[0]
        _actions_a = _actions('a')
        actions = {}
        imgs = []
        user_img = li.img['data-lazyload'].split('%')[0]
        # 有可能是匿名用户
        user_url = HOST + li.a['href'].strip() if li.a.has_attr(
            'href') else None
        user_id = li.a['data-user-id']
        user_name = _user_info.a.text.strip()
        user_level = from_pattern(
            PATTERN_USER_LEVEL,
            _user_info.img['src']) if _user_info.img else None
        use_vip = True if _user_info.span and _user_info.span['class'][
            0] == 'vip' else False
        star = from_pattern(PATTERN_STAR, ''.join(_user_rank.span['class']))
        score = {
            i.text.strip().split(':')[0]: i.text.strip().split(':')[1]
            for i in _score
        }
        review_time = _review_info.span.text.strip()
        review_shop = _review_info('span', class_='shop')[0].text
        review_shop_id = self.id
        review_id = _reply['data-id']
        review_url = 'http:' + _reply['href']
        for a in _actions_a:
            action_name = a.text.strip()
            _next = a.next_sibling.next_sibling
            if _next and _next.name == 'em':
                num = from_pattern(PATTERN_NUMS, _next.text)
                actions[action_name] = num
        if _review_pics:
            for pic in _review_pics('img'):
                imgs.append(pic['data-big'])
        words = self.decrypter.decrypt(_user_words,
                                       *self.decrypt_dict,
                                       comment=True)
        review = Review(user_name, user_id, user_url, user_img, user_level,
                        use_vip, star, score, review_id, review_url,
                        review_time, review_shop, review_shop_id, imgs, words,
                        actions)
        return review

    @already
    @parsed_css
    def decrypt_tag(self, tag_soup, pattern='.*', is_comment=False):
        text = self.decrypter.decrypt(tag_soup,
                                      *self.decrypt_dict,
                                      comment=is_comment,
                                      pattern=pattern)
        return text
Beispiel #6
0
# coding:utf-8
import codecs
from util.shop import parse_shop_css
from decrypt import Decrypter
from bs4 import BeautifulSoup as bs

# 解密步骤1:获取当前页面的html内容
with codecs.open('txt/fake.html', 'r', encoding='utf-8') as f:
    html = f.read()
# 解密步骤2:获取当前页面的加密用css文件内容,具体获取可以用正则匹配等
with codecs.open('txt/fake.css', 'r', encoding='utf-8') as f:
    css = f.read()
# 解密步骤3:整个html进行解析后,获取要解密内容所在的标签,例如,店铺地址所在的标签,店铺点评评论
soup = bs(html, 'lxml')
address_tag = soup('div', class_='expand-info address')[0]
comment_tag = soup('p', class_='desc J-desc')[0]
print(f'未解密地址标签:{address_tag}\n')
print(f'为解密店铺点评评论:{comment_tag}\n')
# 解密步骤4:直接解析获取到的CSS文件,具体规则看函数parse_shop_css
# 此步骤获取到解密映射字典
cls_dict, css_dict = parse_shop_css(css)
# 解密步骤5:使用Decrypter对象解密标签获得解密文本
# 其中,Decrypter的decrypt函数增加了参数说明
# 下面解析地址标签的内容
decrypter = Decrypter()
text = decrypter.decrypt(address_tag, cls_dict, css_dict)
dp = decrypter.decrypt(comment_tag, cls_dict, css_dict, comment=True)
print(f'解密后地址文本:{text}\n')
print(f'解密后店铺点评评论:{dp}\n')
# 其他的加密标签解密也是类似的:)
Beispiel #7
0
 def testDecrypt(self):
     json_data = json.loads(self.json_string)
     decrypter = Decrypter(json_data, self.password)
     decrypted_plain_key = decrypter.decrypt()
     self.assertEqual(decrypted_plain_key.hex(), self.plain_key)