Ejemplo n.º 1
0
    def get(self):
        # 东方财富
        url = 'http://stock.eastmoney.com/a/chyyj.html'
        # url = 'http://finance.eastmoney.com/a/ccjdd.html'
        # url = 'http://finance.eastmoney.com/a/202008121590598091.html'

        response = RequestUtil.get(url)

        code = response.status_code
        result = response.text
        log.info(code)
        if code != 200:
            return None

        # log.info result

        # 解析html
        soup = bs4.BeautifulSoup(result, 'html.parser')
        # 东方财富的查找
        pls = soup.find_all('a', href=re.compile(r'finance.eastmoney.com/a/'))
        self.parse_sub_page(pls)
        log.info('解析完成,开始打印数据...')
        for k, v in self.results.items():
            log.info(k)
            log.info(
                json.dumps(self.results.get(k),
                           encoding='utf8',
                           ensure_ascii=False))
Ejemplo n.º 2
0
    def parse_sub_page(self, pls):
        for p in pls:
            url = p['href']
            # 检查当前url是否已经解析过了
            # if self.results.has_key(url):
            if url in self.results:
                log.info(url + "\tparse repetition")
                continue
            else:
                log.info(url)

            response = RequestUtil.get(url)

            html = response.text
            # log.info(result2)
            # 解析html
            soup = bs4.BeautifulSoup(html, 'html.parser')
            # 去除不需要的标签
            [s.extract() for s in soup(['img', 'iframe', 'video'])]
            # 去除标签中不需要的属性
            # del soup.a["class"]
            # del soup.a["href"]
            # divs = soup.find_all().findall('div', __class=re.compile('newsContent')))
            h1 = soup.find('div', class_='newsContent').find('h1')
            time = soup.find('div', class_='newsContent').find('div',
                                                               class_='time')
            source = soup.find('div', class_='newsContent').find(
                'div', class_='source data-source')
            content_body = soup.find('div', class_='newsContent').find(
                'div', id=re.compile('ContentBody'))
            # log.info("标题: %s" % h1)
            # log.info("时间:%s" % time)
            # log.info("来源:%s" % source)
            # log.info(h1)
            # log.info(time)
            # log.info(source)
            # log.info(content_body)

            result = {
                'title': h1.get_text(),
                'time': time.get_text(),
                'source': str(source.get_text()).replace("来源:", ""),
                'content': str(content_body)
            }
            self.results.setdefault(url, result)
Ejemplo n.º 3
0
def get_session(user, password, host, port, db):
    try:
        if DBSession is None:
            url = 'mysql+pymysql://{}:{}@{}:{}/{}'.format(user, password, host, port, db)
            log.info(url)
            log.info("开始连接数据库...")
            # engine = create_engine(url, echo=True)
            engine = create_engine(url)
            log.info("数据库连接成功...")
            session = sessionmaker(bind=engine)
            log.info("会话已创建")
            return session()
        else:
            return DBSession
    except Exception as e:
        log.error("数据库连接失败...")
        log.error(e.args)
Ejemplo n.º 4
0
def test_req():
    log.info("开始请求...")
    req = RequestUtil()
    i = 0
    while i < 10:
        # https://x-quote.cls.cn/quote/index/tline?app=CailianpressWeb&date=20200821&os=web&sv=7.2.2&sign=19451680ce43b6be73f28481e91cfc32
        res = req.get(
            'https://x-quote.cls.cn/quote/index/tline?app=CailianpressWeb&date=20200821&os=web&sv=7.2.2&sign=19451680ce43b6be73f28481e91cfc32'
        )
        res2 = req.get(
            'https://www.cls.cn/v3/transaction/anchor?app=CailianpressWeb&cdate=2020-08-21&os=web&sv=7.2.2&sign=c1a1f220a04f4aa92d04bc57bc4a9836'
        )

        log.info(res.status_code)
        log.info(res.text)
        log.info(res2.status_code)
        log.info(res2.text)
        i = i + 1
    log.info("结束请求...")
    log.info("")
Ejemplo n.º 5
0
 def get(self):
     ut = database.DBSession.query(UserTest).filter(UserTest.id == 41).one()
     log.info(ut.id)
     log.info(ut.name)
Ejemplo n.º 6
0
# coding=utf-8

import sys
import request
import database
from service.eastmoney import Eastmoney
from util.my_logger import log
from util.my_schedule import run_schedule
from util.my_thread_pool_executor import MyThreadPoolExecutor
from entity.user_test import UserTest

reload(sys)  # 设置系统默认编码
sys.setdefaultencoding('utf-8')  # 添加该方法声明编码

if __name__ == '__main__':
    database.DBSession = database.get_session("root", "root", "localhost",
                                              "3306", "test")  # 连接数据库
    future = MyThreadPoolExecutor.add(run_schedule)  # 启动定时任务
    log.info('start %s', not future.done())
    MyThreadPoolExecutor.add(Eastmoney().get)  # 爬取东方财富
    MyThreadPoolExecutor.add(request.test_req)  # 请求大盘板块信息

    ut = UserTest()
    ut.get()
    log.info('end...')