Exemple #1
0
'''
    Author: Ribbon Huang
    MongoDB的调用的封装
'''
from utils.logger import LOGGER
import pymongo
from conf.settings import MONGO_HOST, MONGO_PORT, MONGO_DB, MONGO_SHEET, LOGGER_MONGO_NAME
from pymongo.errors import WriteError, WTimeoutError, ConnectionFailure
import numpy as np
import pandas as pd

# 记录日常日志
logger = LOGGER.createLogger(LOGGER_MONGO_NAME)


class MongoUse:
    def __init__(self):
        try:
            self.client = pymongo.MongoClient(host=MONGO_HOST, port=MONGO_PORT)
        except ConnectionFailure:
            logger.warning('MongoDB ConnectionFailure')
        except TypeError:
            logger.warning('MongoDB Variables is error')

        db = self.client[MONGO_DB]
        self.sheet = db[MONGO_SHEET]

    def insertDb(self, info):
        try:
            self.sheet.insert(info)
        except WriteError:
Exemple #2
0
    Desc :
        凤凰财经获取一天的基本信息,这里的股市每个股票当天的基本信息。这里的爬取信息存储到资料库中。
        http://app.finance.ifeng.com/list/stock.php?t=ha&f=chg_pct&o=desc&p= + 页面
'''
import random
import re
import time
from conf.settings import MIN_WAIT_TIME, MAX_WAIT_TIME, NUM_RETRIES, LOGGER_SPIDER_NAME, MIN_PAGE, MAX_PAGE, STATUS_CODES
import requests
from utils.logger import LOGGER
from utils.userAgent import USER_AGENTS
from requests.exceptions import ConnectionError, MissingSchema, ConnectTimeout, InvalidURL, ChunkedEncodingError, ReadTimeout
from multiprocessing import Pool
from utils.sqlUtil import SqlUtile

logger = LOGGER.createLogger(LOGGER_SPIDER_NAME)
db = SqlUtile()
'''
    凤凰财经网信息爬取的爬虫逻辑
'''


class PhoenixSpider:
    def __init__(self):
        self.urls = [
            'http://app.finance.ifeng.com/list/stock.php?t=ha&f=chg_pct&o=desc&p='
            + str(i) for i in range(MIN_PAGE, MAX_PAGE)
        ]

    def changeUseAgent(self):
        # 该函数用于改变浏览器头信息
Exemple #3
0
    Author: Ribbon Huang
    根据爬取下来的资料,对股票进行预测
'''
from utils.mongou import MongoUse
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import adfuller
from utils.logger import LOGGER
from conf.settings import LOGGER_ANALYSIS_NAME
import prettytable
import numpy as np
from statsmodels.tsa.arima_model import ARMA
from conf.settings import PREDICT_START_POINT, PREDICT_END_POINT, MAX_COUNT, MAX_SQRT_COUNT

logger = LOGGER.createLogger(LOGGER_ANALYSIS_NAME)
db = MongoUse()
def pre_table(table_names, table_rows):
    table = prettytable.PrettyTable()
    table.field_names = table_names
    for row in table_rows:
        table.add_row(row)
    print(table)

'''
    数据分析:
        1. 平稳性分析
        2. 随机性检测
        3. 模型训练
        4. 结果预测
'''
Exemple #4
0
'''
    Redis BloomFilter 去重处理
    Author: Ribbon Huang
'''
import redis
from redis.exceptions import BusyLoadingError, DataError, ConnectionError
from utils.logger import LOGGER
from conf.settings import LOGGER_REDIS_NAME, REDIS_HOST, REDIS_PORT, REDIS_DB0, REDIS_KEY
# redis位大小设置, 以及redis质数的设置
from conf.settings import REDIS_PRIMSES, REDIS_BIT_SIZES

# 记录日常日志
logger = LOGGER.createLogger(LOGGER_REDIS_NAME)

'''
    直接建立一个连接池,然后作为参数Redis,实现多个Redis实例共享一个连接池
'''
try:
    pool = redis.ConnectionPool(host = REDIS_HOST, port = REDIS_PORT, db = REDIS_DB0)
    conn = redis.Redis(connection_pool=pool)
except ConnectionError:
    logger.warning('Error : Connect fail')
except BusyLoadingError:
    logger.warning('Error : BusyLoadingError')
except:
    logger.warning('Error : undefined error')

class SimpleHash:
    def __init__(self,cap,seed):
        self.cap = cap
        self.seed = seed
Exemple #5
0
'''
    Sql进程池的方式进行连接
    Author: Ribbon Huang
'''
from DBUtils.PooledDB import PooledDB
import conf.settings as Config
import pymysql
from pymysql import DatabaseError
from utils.logger import LOGGER
from conf.settings import LOGGER_MYSQL_NAME

# 记录日常日志
logger = LOGGER.createLogger(LOGGER_MYSQL_NAME)
'''
    Mysql数据库连接池
'''
class SqlConnectionPool(object):
    __pool = None

    def __enter__(self):
        '''单个用户连接进入连接池后,即可以执行sql语句进行查询'''
        try:
            self.conn = self.__getConn()
            self.cursor = self.conn.cursor()
        except DatabaseError:
            logger.warning('sql connect error')
        except Exception as e:
            logger.warning('sql undefined error' + e)
        return self

    def __getConn(self):