Python load_config 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: news_spider.spiders.tools

메소드/함수: load_config

hotexamples.com에서의 예제들: 5

Python load_config - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 news_spider.spiders.tools.load_config에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: news.py 프로젝트: financialIndex/FinancialIndexPython

 def __init__(self):
     cf = tools.load_config()
     self.threshold = float(cf.get('Section', 'threshold'))
     self.days = int(cf.get('Section', 'days'))
     self.maxPage = int(cf.get('Section', 'maxPage'))
     self.decoding = cf.get('Section', 'decoding')
     target_path = cf.get('Section', 'target_path')
     stopwords_path = cf.get('Section', 'stopwords_path')
     dict_path = cf.get('Section', 'corpus')
     self.s = similarity.TextSimilarity(target_path, stopwords_path,
                                        dict_path)
     # 扫描的批次
     self.scan_id = str(time.time())
     # 首页
     self.science_url = 'https://pacaio.match.qq.com/irs/rcd?cid=58&token=c232b098ee7611faeffc46409e836360&ext=tech&page='
     # 互联网
     self.internet_url = 'https://pacaio.match.qq.com/irs/rcd?cid=52&token=8f6b50e1667f130c10f981309e1d8200&ext=614,603,605,611,612,613,615,620,618&page=1'
     # IT
     self.it_url = 'https://pacaio.match.qq.com/irs/rcd?cid=52&token=8f6b50e1667f130c10f981309e1d8200&ext=604,609&page='
     # 区块链
     self.blockchain_url = 'https://pacaio.match.qq.com/tags/tag2articles?id=276813&num=15&page='
     # AI
     self.ai_url = 'https://pacaio.match.qq.com/irs/rcd?cid=52&token=8f6b50e1667f130c10f981309e1d8200&ext=602,608,622&page='
     # 创业创新
     self.innovate_url = 'https://pacaio.match.qq.com/irs/rcd?cid=52&token=8f6b50e1667f130c10f981309e1d8200&ext=619,617,610&page='
     # 前沿科技
     self.leadingSci_url = 'https://pacaio.match.qq.com/irs/rcd?cid=52&token=8f6b50e1667f130c10f981309e1d8200&ext=607,616,623,624&page='
     # 添加进start_urls
     self.start_urls.append(self.science_url)
     self.start_urls.append(self.internet_url)
     self.start_urls.append(self.it_url)
     self.start_urls.append(self.blockchain_url)
     self.start_urls.append(self.ai_url)
     self.start_urls.append(self.innovate_url)
     self.start_urls.append(self.leadingSci_url)

예제 #2

파일 보기

파일: news.py 프로젝트: financialIndex/FinancialIndexPython

 def __init__(self):
     cf = tools.load_config()
     self.threshold = float(cf.get('Section', 'threshold'))
     self.days = int(cf.get('Section', 'days'))
     self.maxPage = int(cf.get('Section', 'maxPage'))
     target_path = cf.get('Section', 'target_path')
     stopwords_path = cf.get('Section', 'stopwords_path')
     dict_path = cf.get('Section', 'corpus')
     self.decoding = cf.get('Section', 'decoding')
     self.s = similarity.TextSimilarity(target_path, stopwords_path,
                                        dict_path)
     # 扫描的批次
     self.scan_id = str(time.time())

예제 #3

파일 보기

파일: news.py 프로젝트: financialIndex/FinancialIndexPython

 def __init__(self):
     cf = tools.load_config()
     self.threshold = float(cf.get('Section', 'threshold'))
     self.days = int(cf.get('Section', 'days'))
     self.maxPage = int(cf.get('Section', 'maxPage'))
     target_path = cf.get('Section', 'target_path')
     dict_path = cf.get('Section', 'corpus')
     stopwords_path = cf.get('Section', 'stopwords_path')
     self.decoding = cf.get('Section', 'decoding')
     self.s = similarity.TextSimilarity(target_path, stopwords_path,
                                        dict_path)
     # 扫描的批次
     self.scan_id = str(time.time())
     self.category_urls = []
     self.page = 1
     # url
     self.tech_url = 'http://news.sina.com.cn/roll/#pageid=153&lid=2515&page='

예제 #4

파일 보기

파일: pipelines.py 프로젝트: financialIndex/FinancialIndexPython

    def __init__(self):
        # 初始化操作
        cf = tools.load_config()
        self.decoding = cf.get('Section', 'decoding')
        settings = get_project_settings()

        # 连接数据库
        self.connect = pymysql.connect(host=settings.get('MYSQL_HOST'),
                                       port=settings.get('MYSQL_PORT'),
                                       db=settings.get('MYSQL_DBNAME'),
                                       user=settings.get('MYSQL_USER'),
                                       passwd=settings.get('MYSQL_PASSWD'),
                                       charset='utf8',
                                       use_unicode=True)

        # 通过cursor执行增删查改
        self.cursor = self.connect.cursor()
        self.connect.autocommit(True)

        # 获取数据库的URL
        self.cursor.execute(self.source_urlselect)
        for r in self.cursor:
            self.url_list.append(r[0])

예제 #5

파일 보기

# -*- coding: utf-8 -*-

# Scrapy settings for news_spider project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://doc.scrapy.org/en/latest/topics/settings.html
#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
import random
from news_spider.spiders import tools
cf = tools.load_config()

BOT_NAME = 'news_spider'

CLOSESPIDER_ITEMCOUNT = int(cf.get('Section',
                                   'closespider_itemcount'))  # 爬取多少个item后终止爬虫
SPIDER_MODULES = ['news_spider.spiders']
NEWSPIDER_MODULE = 'news_spider.spiders'
LOG_LEVEL = 'ERROR'
ROBOTSTXT_OBEY = False
CONCURRENT_REQUESTS = 3
DOWNLOAD_TIMEOUT = 180
DOWNLOAD_DELAY = random.randint(1, 3)
RETRY_ENABLED = False
COOKIES_ENABLED = False
REDIRECT_ENABLED = False
DEFAULT_REQUEST_HEADERS = {
    "User-Agent":
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",