Python parse_timeの例

プログラミング言語: Python

名前空間/パッケージ名: T_NOVEL_SUMMARY.utils.process

メソッド/関数: parse_time

hotexamples.comのコード掲載数: 6

Python parse_time - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのT_NOVEL_SUMMARY.utils.process.parse_timeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

 def parse(self, response):
     print('1,=========================', response.url)
     text = response.text
     # print(text)
     item = TNovelSummaryItem()
     src_url = response.url
     item["src_url"] = src_url
     print('src_url:', src_url)
     product_number = ''.join(
         response.xpath('//h2/text()').extract()).strip()
     print('product_number:', product_number)
     product_number = get_product_number(product_number)
     print('product_number:', product_number)
     item["product_number"] = product_number
     plat_number = 'P31'
     print('plat_number:', plat_number)
     item["plat_number"] = plat_number
     Chapter_num_update = ''.join(
         response.xpath('//h4/a/text()').extract()).strip()
     Chapter_num_update = ''.join(
         re.findall(r'第([\u4e00-\u9fa5]{1,10})章', Chapter_num_update,
                    re.I | re.M))
     Chapter_num_update = chinese_to_arabic(Chapter_num_update)
     item["Chapter_num_update"] = Chapter_num_update
     print('Chapter_num_update:', Chapter_num_update)
     update_date = ''.join(
         response.xpath(
             '//h4/span[@class="time"]/text()').extract()).strip()
     update_date = parse_time(update_date)
     item["update_date"] = update_date
     print('update_date:', update_date)
     words = ''.join(
         response.xpath('//span[@class="words"]/text()').extract()).strip()
     words = ''.join(re.findall(r'(\d+)字', words, re.I | re.M))
     item["words"] = words
     print('words:', words)
     tickets_num = None
     item["tickets_num"] = tickets_num
     score = None
     item["score"] = score
     reward_num = None
     item["reward_num"] = reward_num
     bookId = ''.join(re.findall(r'book\/(\d+)', src_url, re.I | re.M))
     link = 'http://a.heiyan.com/ajax/book/extend/{}/detail'.format(bookId)
     # print('link:',link)
     yield scrapy.Request(url=link,
                          callback=self.parse_page_click_num,
                          meta={
                              'item': item,
                              'bookId': bookId
                          },
                          dont_filter=True)

コード例 #2

ファイルを表示

    def parse(self, response):
        print('1,======================', response.url)
        text = response.text
        # print(text)
        item = TNovelSummaryItem()
        src_url = response.url
        item["src_url"] = src_url
        print('src_url:', src_url)
        product_number = ''.join(
            response.xpath('//h1/em/text()').extract()).strip()
        print('product_number:', product_number)
        product_number = get_product_number(product_number)
        print('product_number:', product_number)
        item["product_number"] = product_number
        plat_number = 'P33'
        print('plat_number:', plat_number)
        item["plat_number"] = plat_number
        Chapter_num_update = ''.join(
            response.xpath(
                '//div[@class="update"]/p/a/text()').extract()).strip()
        Chapter_num_update = ''.join(
            re.findall(r'第(\d+)章', Chapter_num_update, re.I | re.S))
        item["Chapter_num_update"] = Chapter_num_update
        print('Chapter_num_update:', Chapter_num_update)
        update_date = ''.join(
            response.xpath(
                '//div[@class="update"]/p/span/text()').extract()).strip()
        update_date = parse_time(update_date)
        item["update_date"] = update_date
        print('update_date:', update_date)
        words = ''.join(
            response.xpath(
                '//div[@class="book-info"]/p[@class="total"]//text()').extract(
                )).strip()
        words = ''.join(re.findall(r'(.*?)字\|', words, re.I | re.S))
        words = process_number(words)
        item["words"] = words
        print('words:', words)
        click_num = ''.join(
            response.xpath(
                '//div[@class="book-info"]/p[@class="total"]//text()').extract(
                )).strip()
        click_num = ''.join(re.findall(r'([0-9]+\.[0-9]+万)总点击', click_num))
        print('click_num:', click_num)
        click_num = process_number(click_num)
        item["click_num"] = click_num
        print('click_num:', click_num)
        tickets_num = ''.join(
            response.xpath('//*[@id="monthCount"]/text()').extract()).strip()
        item["tickets_num"] = tickets_num
        print('tickets_num:', tickets_num)
        comment_num = ''.join(
            response.xpath(
                '//div[@class="lbf-pagination"]/ul/li[last()-1]/a/text()').
            extract()).strip()
        if comment_num:
            comment_num = int(comment_num) * 10
        else:
            comment_num = 10
        item["comment_num"] = comment_num
        print('comment_num:', comment_num)
        score = None
        item["score"] = score
        collect_num = None
        item["collect_num"] = collect_num
        reward_num = None
        item["reward_num"] = reward_num
        last_modify_date = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')
        item["last_modify_date"] = last_modify_date
        print('last_modify_date:', last_modify_date)

        print(item)
        yield item

コード例 #3

ファイルを表示

 def parse(self, response):
     print('1,=================', response.url)
     text = response.text
     # print(text)
     url = response.url
     item = TNovelSummaryItem()
     src_url = url
     item["src_url"] = src_url
     print('src_url:', src_url)
     product_number = ''.join(
         response.xpath('//h1/em/text()').extract()).strip()
     print('product_number:', product_number)
     product_number = get_product_number(product_number)
     item["product_number"] = product_number
     print('product_number:', product_number)
     plat_number = 'P20'
     print('plat_number:', plat_number)
     item["plat_number"] = plat_number
     update_date = ''.join(
         response.xpath('//p[@class="cf"]/em[@class="time"]/text()').
         extract()).strip()
     update_date = parse_time(update_date)
     item["update_date"] = update_date
     print('update_date:', update_date)
     # words = ''.join(response.xpath('//div[@class="book-info "]/p[3]/em[1]/span/text()').extract()).strip()
     # item["words"] = words
     # print('words:',words)
     tickets_num = ''.join(
         response.xpath('//*[@id="monthCount"]/text()').extract()).strip()
     item["tickets_num"] = tickets_num
     print('tickets_num:', tickets_num)
     score_s = ''.join(
         response.xpath(
             '//*[@id="j_bookScore"]//text()').extract()).strip()
     if '暂无评分' in score_s:
         score = 0
     else:
         score = score_s
     item["score"] = score
     print('score:', score)
     collect_num = None
     item["collect_num"] = collect_num
     print('collect_num:', collect_num)
     reward_num = ''.join(
         response.xpath('//*[@id="rewardNum"]/text()').extract()).strip()
     item["reward_num"] = reward_num
     print('reward_num:', reward_num)
     last_modify_date = datetime.datetime.now().strftime(
         '%Y-%m-%d %H:%M:%S')
     item["last_modify_date"] = last_modify_date
     print('last_modify_date:', last_modify_date)
     authorId = ''.join(
         response.xpath(
             '//*[@id="authorId"]/@data-authorid').extract()).strip()
     print('authorId:', authorId)
     chanId = re.findall(r'chanId\=(\d+)', text)[0]
     print('chanId:', chanId)
     bookId = ''.join(
         re.findall(r'https\:\/\/book\.qidian\.com\/info\/(\d+)', url,
                    re.I | re.M))
     print('bookId:', bookId)
     _csrfToken = 'HUc4mzWMTveOYkK01P9mREV04r5f0zisvnDNZl7j'
     link = 'https://book.qidian.com/ajax/book/category?_csrfToken=HUc4mzWMTveOYkK01P9mREV04r5f0zisvnDNZl7j&bookId={}'.format(
         bookId)
     yield scrapy.Request(url=link,
                          callback=self.parse_page_Chapter_num,
                          meta={
                              'item': item,
                              'authorId': authorId,
                              'chanId': chanId,
                              'bookId': bookId
                          },
                          dont_filter=True)

コード例 #4

ファイルを表示

    def parse(self, response):
        print('1,=======================', response.url)
        text = response.text
        # print(text)
        item = TNovelSummaryItem()
        src_url = response.url
        item["src_url"] = src_url
        print('src_url:', src_url)
        product_number = ''.join(
            response.xpath(
                '//div[@class="main"]/div[@class="status fl"]/h1/a/text()').
            extract()).strip()
        if '【' and '】' in product_number:
            product_number = product_number.replace('【', '[').replace('】', ']')
            print('product_number:', product_number)
            product_number = get_product_number(product_number)
            print('product_number:', product_number)
            item["product_number"] = product_number
        else:
            product_number = product_number
            product_number = get_product_number(product_number)
            print('product_number:', product_number)
            item["product_number"] = product_number
        plat_number = 'P21'
        item["plat_number"] = plat_number
        print('plat_number:', plat_number)
        Chapter_num_update = ''.join(
            response.xpath(
                '//div[@class="update box"]/div[@class="cont"]/a/text()').
            extract()).strip()
        if Chapter_num_update:
            Chapter_num_update = ''.join(
                re.findall(u'第(.*?)部', Chapter_num_update, re.I | re.M))
            Chapter_num_update = chinesedigits(Chapter_num_update)
            item["Chapter_num_update"] = Chapter_num_update
            print('Chapter_num_update:', Chapter_num_update)
        update_date = ''.join(
            response.xpath(
                '//div[@class="update box"]/div[@class="uptime"]/text()').
            extract()).strip().split('\n')[0].replace('·', '')
        update_date = parse_time(update_date)
        item["update_date"] = update_date
        print('update_date:', update_date)
        words = ''.join(
            response.xpath(
                '//div[@class="main"]/div[@class="status fl"]/div[@class="booksub"]/span[@title]/text()'
            ).extract()).strip()
        item["words"] = words
        print('words:', words)
        click_num = ' '.join(
            response.xpath(
                '//div[@class="vote_info"]/p//text()').extract()).strip()
        if click_num:
            click_num = ''.join(
                re.findall(r'总点击： (\d+)', click_num, re.I | re.M))
        else:
            click_num = None
        item["click_num"] = click_num
        print('click_num:', click_num)
        comment_num = '  '.join(
            response.xpath(
                '//div[@class="vote_info"]/p//text()').extract()).strip()
        if comment_num:
            comment_num = ''.join(
                re.findall(r'评论数：  (\d+)', comment_num, re.I | re.M))
        else:
            comment_num = None
        item["comment_num"] = comment_num
        print('comment_num:', comment_num)
        score = None
        item["score"] = score
        collect_num = '  '.join(
            response.xpath(
                '//div[@class="vote_info"]/p//text()').extract()).strip()

        if collect_num:
            collect_num = ''.join(
                re.findall(r'总收藏：  (\d+)', collect_num, re.I | re.M))
        else:
            collect_num = None
        item["collect_num"] = collect_num
        print('collect_num:', collect_num)
        reward_num = None
        item["reward_num"] = reward_num
        last_modify_date = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')
        item["last_modify_date"] = last_modify_date
        print('last_modify_date:', last_modify_date)
        bookId = ''.join(re.findall(r'bookId=\"(\d+)\"', text, re.I | re.M))
        print('bookId:', bookId)
        link = 'http://book.zongheng.com/book/async/info.htm'
        formdata = {"bookId": bookId}
        yield scrapy.FormRequest(
            url=link,
            formdata=formdata,
            callback=self.parse_page,
            meta={'item': item},
            dont_filter=True,
        )

コード例 #5

ファイルを表示

ファイル: qidian_2.py プロジェクト: jecktion/wangwen_Spiders

    def parse(self, response):
        print('1,=================', response.url)
        text = response.text
        # with open('qidian.txt', "wb") as f:  # 开始写文件，wb代表写二进制文件
        #     f.write(response.body)
        url = response.url
        item = TNovelSummaryItem()
        src_url = url
        item["src_url"] = src_url
        print('src_url:', src_url)
        product_number = ''.join(
            response.xpath('//h1/em/text()').extract()).strip()
        print('product_number:', product_number)
        product_number = get_product_number(product_number)
        item["product_number"] = product_number
        print('product_number:', product_number)
        plat_number = 'P20'
        print('plat_number:', plat_number)
        item["plat_number"] = plat_number
        tickets_num = ''.join(
            response.xpath('//*[@id="monthCount"]/text()').extract()).strip()
        item["tickets_num"] = tickets_num
        print('tickets_num:', tickets_num)
        reward_num = ''.join(
            response.xpath('//*[@id="rewardNum"]/text()').extract()).strip()
        item["reward_num"] = reward_num
        print('reward_num:', reward_num)
        last_modify_date = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')
        item["last_modify_date"] = last_modify_date
        print('last_modify_date:', last_modify_date)
        font_type = ''.join(
            response.xpath(
                '//div[@class="book-info "]/p/em[1]/span[@class]/@class').
            extract()).strip()
        print('font_type:', font_type)
        font_url = 'https://qidian.gtimg.com/qd_anti_spider/{}.woff'.format(
            font_type)
        print('font_url:', font_url)
        words = re.findall(
            r'</style><span class="{}">(.*)</span></em><cite>万字</cite><i>|</i><em><style>'
            .format(font_type), response.text, re.I | re.M)[0]
        print('words:', words)
        words = get_words(words, font_url)
        words = int(float(words) * 10000)
        item["words"] = words
        print('words:', words)
        click_num = re.findall(
            r'</style><span class="{}">(.*)</span></em><cite>万总会员点击<span>'.
            format(font_type), response.text, re.I | re.M)[0]
        click_num = re.findall(
            r'</style><span class="{}">(.*)'.format(font_type), click_num,
            re.I | re.M)[0]
        print('click_num:', click_num)
        click_num = get_words(click_num, font_url)
        click_num = int(float(click_num) * 10000)
        item["click_num"] = click_num
        print('click_num:', click_num)
        update_date = ''.join(
            response.xpath(
                '//li[@class="update"]/div[@class="detail"]/p[@class="cf"]/em/text()'
            ).extract()).strip()
        update_date = parse_time(update_date)
        item["update_date"] = update_date
        print('update_date:', update_date)
        collect_num = None
        item["collect_num"] = collect_num
        authorId = ''.join(
            response.xpath(
                '//*[@id="authorId"]/@data-authorid').extract()).strip()
        print('authorId:', authorId)
        chanId = re.findall(r'chanId\=(\d+)', text)[0]
        print('chanId:', chanId)
        bookId = ''.join(
            re.findall(r'https\:\/\/book\.qidian\.com\/info\/(\d+)', url,
                       re.I | re.M))
        print('bookId:', bookId)
        _csrfToken = 'HUc4mzWMTveOYkK01P9mREV04r5f0zisvnDNZl7j'
        link = 'https://book.qidian.com/ajax/comment/index?_csrfToken=HUc4mzWMTveOYkK01P9mREV04r5f0zisvnDNZl7j&bookId={}&pageSize=15'.format(
            bookId)

        yield scrapy.Request(url=link,
                             callback=self.parse_page_score,
                             meta={
                                 'item': item,
                                 'authorId': authorId,
                                 'chanId': chanId,
                                 'bookId': bookId
                             },
                             dont_filter=True)

コード例 #6

ファイルを表示

    def parse(self, response):
        print('1,==========================', response.url)
        text = response.text
        # print(text)
        item = TNovelSummaryItem()
        src_url = response.url
        item["src_url"] = src_url
        print('src_url:', src_url)
        product_number = ''.join(
            response.xpath(
                '//h1[@class="fllf"]/a[@title]/text()').extract()).strip()
        print('product_number:', product_number)
        product_number = get_product_number(product_number)
        print('product_number:', product_number)
        item["product_number"] = product_number
        plat_number = 'P32'
        print('plat_number:', plat_number)
        item["plat_number"] = plat_number
        Chapter_num_update = ''.join(
            response.xpath('//h3[@class="bom10"]/a[@class="cboy"]/text()').
            extract()).strip()
        Chapter_num_update = ''.join(
            re.findall(r'第(\d+)章', Chapter_num_update, re.I | re.M))
        item["Chapter_num_update"] = Chapter_num_update
        print('Chapter_num_update:', Chapter_num_update)
        update_date = ''.join(
            response.xpath('//h3[@class="bom10"]/span[@class="lf10"]/text()').
            extract()).strip()
        update_date = parse_time(update_date)
        item["update_date"] = update_date
        print('update_date:', update_date)
        words = ' '.join(
            response.xpath(
                '//div[@class="right"]/p[@class="infor bom10"]/span/text()').
            extract()).strip()
        words = ''.join(re.findall(r'总字数：(.*?)\s', words, re.I | re.M))
        words = process_number(words)
        item["words"] = words
        print('words:', words)
        click_num = ' '.join(
            response.xpath(
                '//div[@class="right"]/p[@class="infor bom10"]/span/text()').
            extract()).strip()
        click_num = ''.join(re.findall(r'点击：(.*?)\s ', click_num, re.I | re.M))
        print('click_num:', click_num)
        if '万' in click_num:
            click_num = click_num.replace('万', '')
            click_num = int(atof(click_num) * 10000)
            item["click_num"] = click_num
            print('click_num:', click_num)
        else:
            click_num = int(atof(click_num))
            item["click_num"] = click_num
            print('click_num:', click_num)
        tickets_num = None
        item["tickets_num"] = tickets_num
        comment_num = ''.join(
            response.xpath(
                '//div[@category="comment"]/a[@class="tabfmbtn cboy"]/text()').
            extract()).strip()
        comment_num = ''.join(re.findall(r'最新书评(.*)',
                                         comment_num, re.I | re.S)).replace(
                                             '(', '').replace(')', '')
        comment_num = int(atof(comment_num))
        item["comment_num"] = comment_num
        print('comment_num:', comment_num)
        score = None
        item["score"] = score
        collect_num = None
        item["collect_num"] = collect_num
        reward_num = None
        item["reward_num"] = reward_num
        last_modify_date = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')
        item["last_modify_date"] = last_modify_date
        print('last_modify_date:', last_modify_date)

        print(item)
        yield item