Python ConfigHtmlの例

プログラミング言語: Python

名前空間/パッケージ名: spider.common

クラス/型: ConfigHtml

hotexamples.comのコード掲載数: 5

Python ConfigHtml - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのspider.common.ConfigHtmlの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

get_content(5)

xpath_content(5)

よく使われるメソッド

get_content (5)

xpath_content (5)

コード例 #1

ファイルを表示

ファイル: shipin_short_video_thunder.py プロジェクト: zero-wangdu/wangdu

    def run(self):
        with open('000_short_video_all.csv', 'r', encoding='utf-8') as f:
            self.csv_file = csv.reader(f)
            for line in self.csv_file:
                print(line)

                name = line[0]
                page_url = line[1]

                url = self.url.format(self.baseUrl, page_url)

                try:
                    content = Config.get_content(url)
                    https = Config.xpath_content(
                        content, '//input[@id="lin1k0"]/@value')
                    thunder = Config.xpath_content(
                        content, '//input[@id="lin1k1"]/@value')
                except Exception as e:
                    print('error :' + str(e))

                https = https[0] if len(https) != 0 else ''
                thunder = thunder[0] if len(thunder) != 0 else ''

                print(name, page_url, https, thunder)
                SAVE_CSV_FILE('shipin_short_video_thunder.csv',
                              [name, page_url, https, thunder], False)

                time.sleep(4)

コード例 #2

ファイルを表示

ファイル: xiazai_video_page_url.py プロジェクト: zero-wangdu/wangdu

    def run(self):
        for i in range(1, 35 + 1):
            self.url = 'https://{}/xiazai/list-%e5%8f%98%e6%80%81%e5%8f%a6%e7%b1%bb-{}.html'.format(
                self.baseUrl, i)

            print(self.url)

            # 获取短视频页的所有链接
            content = Config.get_content(self.url)

            a_href = Config.xpath_content(
                content, '//div[@id="tpl-img-content"]/li/a/@href')
            a_text = Config.xpath_content(
                content, '//div[@id="tpl-img-content"]/li/a/@title')

            print(a_href)
            print(a_text)

            # 保存短视频页的所有链接
            fileName = datetime.datetime.now().strftime('%H%M%S%f')
            fileName = 'abnormal_video_' + str(fileName) + '.csv'
            filePath = './abnormal_csv/'
            if not os.path.exists(filePath):
                os.makedirs(filePath)

            with open(filePath + fileName, 'w', encoding='utf-8',
                      newline='') as f:
                writer = csv.writer(f)
                for i in zip(a_text, a_href):
                    writer.writerow(i)

            time.sleep(4)

コード例 #3

ファイルを表示

    def run(self):
        for i in range(22,23+1):
            self.url = 'https://{}/xiazai/list-%E4%BA%9A%E6%B4%B2%E7%94%B5%E5%BD%B1-{}.html'.format(self.baseUrl, i)
            print(self.url)

            # 获取短视频页的所有链接
            content = Config.get_content(self.url)

            a_href = Config.xpath_content(content, '//div[@id="tpl-img-content"]/li/a/@href')
            a_text = Config.xpath_content(content, '//div[@id="tpl-img-content"]/li/a/@title')

            print(a_href)
            print(a_text)

            # 保存短视频页的所有链接
            fileName = datetime.datetime.now().strftime('%H%M%S%f')
            fileName = 'asia_video_' + str(fileName) + '.csv'
            filePath = './asia_video_main_csv/'
            if not os.path.exists(filePath):
                os.makedirs(filePath)

            with open(filePath+fileName, 'w', encoding='utf-8', newline='') as f:
                writer = csv.writer(f)
                for i in zip(a_text, a_href):
                    writer.writerow(i)

            time.sleep(4)

コード例 #4

ファイルを表示

ファイル: shipin_short_video.py プロジェクト: zero-wangdu/wangdu

    def run(self):
        for i in range(108, 109):
            url = 'https://{}/shipin/list-%E7%9F%AD%E8%A7%86%E9%A2%91-{}.html'.format(
                self.baseUrl, i)
            print(url)

            # 获取短视频页的所有链接
            content = Config.get_content(url)

            a_href = Config.xpath_content(content,
                                          '//*[@id="grid"]/li/a/@href')
            a_text = Config.xpath_content(content,
                                          '//*[@id="grid"]/li/a/@title')

            print(a_href)
            print(a_text)

            # 保存短视频页的所有链接
            fileName = datetime.datetime.now().strftime('%H%M%S%f')
            fileName = 'short_video_' + str(fileName) + '.csv'
            with open(fileName, 'w', encoding='utf-8', newline='') as f:
                writer = csv.writer(f)
                for i in zip(a_text, a_href):
                    writer.writerow(i)

            time.sleep(3)

コード例 #5

ファイルを表示

    def run(self):
        for i in range(31, 39 + 1):
            self.url = 'https://{}/xiazai/list-%E5%88%B6%E6%9C%8D%E4%B8%9D%E8%A2%9C-{}.html'.format(
                self.baseUrl, i)
            print(self.url)

            try:
                # 获取视频页的所有链接
                content = Config.get_content(self.url)

                self.a_href = Config.xpath_content(
                    content, '//div[@id="tpl-img-content"]/li/a/@href')
                self.a_text = Config.xpath_content(
                    content, '//div[@id="tpl-img-content"]/li/a/@title')

                print(self.a_href)
                print(self.a_text)

                # 保存视频页的所有链接
                fileName = datetime.datetime.now().strftime('%H%M%S%f')
                fileName = 'zifu_video_' + str(fileName) + '.csv'

                abs_path = os.path.dirname(sys.argv[0])
                filePath = abs_path + '/zifu_video_main_csv/'

                if not os.path.exists(filePath):
                    os.makedirs(filePath)

                with open(filePath + fileName,
                          'w',
                          encoding='utf-8',
                          newline='') as f:
                    writer = csv.writer(f)
                    for i in zip(self.a_text, self.a_href):
                        writer.writerow(i)
            except Exception as e:
                MY_ERROR_LOG(self.url)
                MY_ERROR_LOG(self.a_href)
                MY_ERROR_LOG(self.a_text)
                print('error : ', str(e))

            time.sleep(4)