Ejemplo n.º 1
0
    def __init__(self):
        self.list_url = "http://money.163.com/special/00251G8F/news_json.js"
        self.extractor = GeneralNewsExtractor()
        self.headers = {
            "User-Agent":
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36",
        }
        self.local = LOCAL
        if self.local:
            conf = {
                "host": LOCAL_MYSQL_HOST,
                "port": LOCAL_MYSQL_PORT,
                "user": LOCAL_MYSQL_USER,
                "password": LOCAL_MYSQL_PASSWORD,
                "db": LOCAL_MYSQL_DB,
            }
            self.db = LOCAL_MYSQL_DB
        else:
            conf = {
                "host": MYSQL_HOST,
                "port": MYSQL_PORT,
                "user": MYSQL_USER,
                "password": MYSQL_PASSWORD,
                "db": MYSQL_DB,
            }
            self.db = MYSQL_DB

        self.sql_pool = PyMysqlPoolBase(**conf)
        self.table = "netease_money"
        self.error_detail = []
Ejemplo n.º 2
0
    def __init__(self, key):
        # 本地运行亦或者是在服务器上运行
        self.local = LOCAL
        # 股票代码中文简称
        self.key = key

        print(self.key, "\n\n\n")

        # 请求的起始 url
        self.start_url = 'http://api.so.eastmoney.com/bussiness/Web/GetSearchList?'
        self.page_size = 10
        self.headers = {
            "Referer":
            "http://so.eastmoney.com/CArticle/s?keyword={}".format(
                self.key.encode()),
            "User-Agent":
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36",
        }
        self.db = MYSQL_DB
        self.table = "eastmoney_carticle"
        if self.local:
            conf = {
                "host": LOCAL_MYSQL_HOST,
                "port": LOCAL_MYSQL_PORT,
                "user": LOCAL_MYSQL_USER,
                "password": LOCAL_MYSQL_PASSWORD,
                "db": LOCAL_MYSQL_DB,
            }
        else:
            conf = {
                "host": MYSQL_HOST,
                "port": MYSQL_PORT,
                "user": MYSQL_USER,
                "password": MYSQL_PASSWORD,
                "db": MYSQL_DB,
            }
        self.sql_pool = PyMysqlPoolBase(**conf)

        # 记录出错的列表页 以及 详情页 url
        self.error_detail = []
        self.error_list = []

        # 初始化代理
        self.proxy = self._get_proxy()
        self.dt_format = '%Y-%m-%d %H:%M:%S'
        # 增量爬取的临界时间
        self.limit_time = datetime.datetime(2020, 2, 1)
        # 是否使用代理
        self.use_proxy = 1
Ejemplo n.º 3
0
    def __init__(self):
        self.zuixin_url = "http://webapi.cninfo.com.cn//api/sysapi/p_sysapi1128"
        self.stock_url = "http://webapi.cninfo.com.cn//api/sysapi/p_sysapi1078"
        self.fund_url = "http://webapi.cninfo.com.cn//api/sysapi/p_sysapi1126"
        self.datas_url = "http://webapi.cninfo.com.cn//api/sysapi/p_sysapi1127"

        self.mcode = self._generate_mcode()
        self.headers = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Content-Length': '0',
            'Cookie':
            '__qc_wId=726; pgv_pvid=6020356972; Hm_lvt_489bd07e99fbfc5f12cbb4145adb0a9b=1581945588; codeKey=ce7a9a719b; Hm_lpvt_489bd07e99fbfc5f12cbb4145adb0a9b=1582016401',
            'Host': 'webapi.cninfo.com.cn',
            'mcode': '{}'.format(self.mcode),
            'Origin': 'http://webapi.cninfo.com.cn',
            'Pragma': 'no-cache',
            'Referer': 'http://webapi.cninfo.com.cn/',
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36',
            'X-Requested-With': 'XMLHttpRequest',
        }
        self.local = LOCAL
        if self.local:
            conf = {
                "host": LOCAL_MYSQL_HOST,
                "port": LOCAL_MYSQL_PORT,
                "user": LOCAL_MYSQL_USER,
                "password": LOCAL_MYSQL_PASSWORD,
                "db": LOCAL_MYSQL_DB,
            }
            self.db = LOCAL_MYSQL_DB
        else:
            conf = {
                "host": MYSQL_HOST,
                "port": MYSQL_PORT,
                "user": MYSQL_USER,
                "password": MYSQL_PASSWORD,
                "db": MYSQL_DB,
            }
            self.db = MYSQL_DB
        self.sql_pool = PyMysqlPoolBase(**conf)
        self.table = "juchao_info"
        self.error_detail = []
Ejemplo n.º 4
0
 def _init_pool(self):
     if self.local:
         conf = {
             "host": LOCAL_MYSQL_HOST,
             "port": LOCAL_MYSQL_PORT,
             "user": LOCAL_MYSQL_USER,
             "password": LOCAL_MYSQL_PASSWORD,
             "db": LOCAL_MYSQL_DB,
         }
     else:
         conf = {
             "host": MYSQL_HOST,
             "port": MYSQL_PORT,
             "user": MYSQL_USER,
             "password": MYSQL_PASSWORD,
             "db": MYSQL_DB,
         }
     self.sql_pool = PyMysqlPoolBase(**conf)
Ejemplo n.º 5
0
 def __init__(self, key):
     # 本地运行亦或者是在服务器上运行
     self.local = LOCAL
     # 是否使用阿布云代理
     self.abu = False
     # 股票代码中文简称
     self.key = key
     self.start_url = 'http://api.so.eastmoney.com/bussiness/Web/GetSearchList?'
     self.page_size = 10
     self.headers = {
         "Referer":
         "http://so.eastmoney.com/CArticle/s?keyword={}".format(
             self.key.encode()),
         "User-Agent":
         "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36",
     }
     self.db = MYSQL_DB
     self.table = "eastmoney_carticle"
     if self.local:
         conf = {
             "host": LOCAL_MYSQL_HOST,
             "port": LOCAL_MYSQL_PORT,
             "user": LOCAL_MYSQL_USER,
             "password": LOCAL_MYSQL_PASSWORD,
             "db": LOCAL_MYSQL_DB,
         }
     else:
         conf = {
             "host": MYSQL_HOST,
             "port": MYSQL_PORT,
             "user": MYSQL_USER,
             "password": MYSQL_PASSWORD,
             "db": MYSQL_DB,
         }
     self.sql_pool = PyMysqlPoolBase(**conf)
     # 不使用阿布云的情况下 初始化代理
     if not self.abu:
         self.proxy = self._get_proxy()
     # 记录出错的列表页 以及 详情页 url
     self.error_detail = []
     self.error_list = []
Ejemplo n.º 6
0
    def __init__(self, *args, **kwargs):
        headers = {
            "User-Agent":
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36",
            "Referer":
            "http://news.cnstock.com/news/sns_yw/index.html",
        }
        self.headers = headers
        self.list_url = "http://app.cnstock.com/api/waterfall?"
        self.extractor = GeneralNewsExtractor()
        self.local = LOCAL
        if self.local:
            conf = {
                "host": LOCAL_MYSQL_HOST,
                "port": LOCAL_MYSQL_PORT,
                "user": LOCAL_MYSQL_USER,
                "password": LOCAL_MYSQL_PASSWORD,
                "db": LOCAL_MYSQL_DB,
            }
            self.db = LOCAL_MYSQL_DB
        else:
            conf = {
                "host": MYSQL_HOST,
                "port": MYSQL_PORT,
                "user": MYSQL_USER,
                "password": MYSQL_PASSWORD,
                "db": MYSQL_DB,
            }
            self.db = MYSQL_DB

        self.sql_pool = PyMysqlPoolBase(**conf)
        self.table = "cn_stock"
        self.error_list = []
        self.error_detail = []
        self.topic = kwargs.get("topic")
        self.check_date = datetime.datetime.today() - datetime.timedelta(
            days=1)
Ejemplo n.º 7
0
    def __init__(self):
        self.local = LOCAL
        self.token = "8f6b50e1667f130c10f981309e1d8200"
        self.headers = {
            'User-Agent':
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36"
        }
        self.list_url = "https://pacaio.match.qq.com/irs/rcd?cid=52&token={}" \
       "&ext=3911,3922,3923,3914,3913,3930,3915,3918,3908&callback=__jp1".format(self.token)
        # self.proxy = None
        self.extractor = GeneralNewsExtractor()

        self.local = LOCAL
        if self.local:
            conf = {
                "host": LOCAL_MYSQL_HOST,
                "port": LOCAL_MYSQL_PORT,
                "user": LOCAL_MYSQL_USER,
                "password": LOCAL_MYSQL_PASSWORD,
                "db": LOCAL_MYSQL_DB,
            }
            self.db = LOCAL_MYSQL_DB
        else:
            conf = {
                "host": MYSQL_HOST,
                "port": MYSQL_PORT,
                "user": MYSQL_USER,
                "password": MYSQL_PASSWORD,
                "db": MYSQL_DB,
            }
            self.db = MYSQL_DB

        self.sql_pool = PyMysqlPoolBase(**conf)
        self.table = "qq_Astock_news"
        self.error_detail = []