''' Author: Ribbon Huang MongoDB的调用的封装 ''' from utils.logger import LOGGER import pymongo from conf.settings import MONGO_HOST, MONGO_PORT, MONGO_DB, MONGO_SHEET, LOGGER_MONGO_NAME from pymongo.errors import WriteError, WTimeoutError, ConnectionFailure import numpy as np import pandas as pd # 记录日常日志 logger = LOGGER.createLogger(LOGGER_MONGO_NAME) class MongoUse: def __init__(self): try: self.client = pymongo.MongoClient(host=MONGO_HOST, port=MONGO_PORT) except ConnectionFailure: logger.warning('MongoDB ConnectionFailure') except TypeError: logger.warning('MongoDB Variables is error') db = self.client[MONGO_DB] self.sheet = db[MONGO_SHEET] def insertDb(self, info): try: self.sheet.insert(info) except WriteError:
Desc : 凤凰财经获取一天的基本信息,这里的股市每个股票当天的基本信息。这里的爬取信息存储到资料库中。 http://app.finance.ifeng.com/list/stock.php?t=ha&f=chg_pct&o=desc&p= + 页面 ''' import random import re import time from conf.settings import MIN_WAIT_TIME, MAX_WAIT_TIME, NUM_RETRIES, LOGGER_SPIDER_NAME, MIN_PAGE, MAX_PAGE, STATUS_CODES import requests from utils.logger import LOGGER from utils.userAgent import USER_AGENTS from requests.exceptions import ConnectionError, MissingSchema, ConnectTimeout, InvalidURL, ChunkedEncodingError, ReadTimeout from multiprocessing import Pool from utils.sqlUtil import SqlUtile logger = LOGGER.createLogger(LOGGER_SPIDER_NAME) db = SqlUtile() ''' 凤凰财经网信息爬取的爬虫逻辑 ''' class PhoenixSpider: def __init__(self): self.urls = [ 'http://app.finance.ifeng.com/list/stock.php?t=ha&f=chg_pct&o=desc&p=' + str(i) for i in range(MIN_PAGE, MAX_PAGE) ] def changeUseAgent(self): # 该函数用于改变浏览器头信息
Author: Ribbon Huang 根据爬取下来的资料,对股票进行预测 ''' from utils.mongou import MongoUse import matplotlib.pyplot as plt from statsmodels.graphics.tsaplots import plot_pacf, plot_acf from statsmodels.stats.diagnostic import acorr_ljungbox from statsmodels.tsa.stattools import adfuller from utils.logger import LOGGER from conf.settings import LOGGER_ANALYSIS_NAME import prettytable import numpy as np from statsmodels.tsa.arima_model import ARMA from conf.settings import PREDICT_START_POINT, PREDICT_END_POINT, MAX_COUNT, MAX_SQRT_COUNT logger = LOGGER.createLogger(LOGGER_ANALYSIS_NAME) db = MongoUse() def pre_table(table_names, table_rows): table = prettytable.PrettyTable() table.field_names = table_names for row in table_rows: table.add_row(row) print(table) ''' 数据分析: 1. 平稳性分析 2. 随机性检测 3. 模型训练 4. 结果预测 '''
''' Redis BloomFilter 去重处理 Author: Ribbon Huang ''' import redis from redis.exceptions import BusyLoadingError, DataError, ConnectionError from utils.logger import LOGGER from conf.settings import LOGGER_REDIS_NAME, REDIS_HOST, REDIS_PORT, REDIS_DB0, REDIS_KEY # redis位大小设置, 以及redis质数的设置 from conf.settings import REDIS_PRIMSES, REDIS_BIT_SIZES # 记录日常日志 logger = LOGGER.createLogger(LOGGER_REDIS_NAME) ''' 直接建立一个连接池,然后作为参数Redis,实现多个Redis实例共享一个连接池 ''' try: pool = redis.ConnectionPool(host = REDIS_HOST, port = REDIS_PORT, db = REDIS_DB0) conn = redis.Redis(connection_pool=pool) except ConnectionError: logger.warning('Error : Connect fail') except BusyLoadingError: logger.warning('Error : BusyLoadingError') except: logger.warning('Error : undefined error') class SimpleHash: def __init__(self,cap,seed): self.cap = cap self.seed = seed
''' Sql进程池的方式进行连接 Author: Ribbon Huang ''' from DBUtils.PooledDB import PooledDB import conf.settings as Config import pymysql from pymysql import DatabaseError from utils.logger import LOGGER from conf.settings import LOGGER_MYSQL_NAME # 记录日常日志 logger = LOGGER.createLogger(LOGGER_MYSQL_NAME) ''' Mysql数据库连接池 ''' class SqlConnectionPool(object): __pool = None def __enter__(self): '''单个用户连接进入连接池后,即可以执行sql语句进行查询''' try: self.conn = self.__getConn() self.cursor = self.conn.cursor() except DatabaseError: logger.warning('sql connect error') except Exception as e: logger.warning('sql undefined error' + e) return self def __getConn(self):