Example #1
0
 def initialize(self):
     # 初始化日志服务
     self.initLogging()
     # 初始化路径值
     self.initPath()
     # 数据库连接,连接配置可在tool文件中修改
     self.con = Connection().getConnection()
     self.cur = self.con.cursor()
Example #2
0
 def initialize(self):
     # 初始化日志服务
     self.initLogging()
     # 初始化路径值
     self.initPath()
     # 数据库连接,连接配置可在tool文件中修改
     self.con = Connection().getConnection()
     self.cur = self.con.cursor()
Example #3
0
class ClassifiedData(object):
    '''新浪行业分类指数的构造'''
    def __init__(self):
        self.initialize()
        self.main()
        
    def initialize(self):
        # 初始化日志服务
        self.initLogging()
        # 数据库连接,连接配置可在tool文件中修改
        self.con = Connection().getConnection()
        self.cur = self.con.cursor()
        
    def main(self):
        classifiedDict = self.getClassifiedDict()
        for key in classifiedDict:
            self.createClassifiedData(key,classifiedDict[key])
            
#------------------------------------------------------------------------------ 
    def initLogging(self):
        '''初始化日志对象'''
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)-15s %(lineno)-6d %(funcName)-30s %(message)s',
                            datefmt='%m-%d %H:%M:%S',
                            filename=r'D:\classifiedData.txt',
                            filemode='w')
        
    def getClassifiedDict(self):
        
        classifiedDict = collections.defaultdict(list)
        
        sql = 'select symbol,industryname from industryclassified'
        self.cur.execute(sql)
        for record in self.cur.fetchall():
            symbol = u'SH' + record[0] if record[0][0] in ['5','6'] else u'SZ' + record[0]
            classifiedDict[record[1]].append(symbol)
            
        return classifiedDict
    
    def createClassifiedData(self,classifiedName,symbolList):
        '''生成行业的指数,从2007-01-01开始'''
        stockDataList = []
        firstDataList = []
        insertDataList = []
        
        firstDay = datetime.date(2007,1,4)
        sql = 'select date,openprice,highprice,lowprice,closeprice from stockdata_day_tdx where symbol = "%s" and date >= "2007-01-01" order by date'
        for symbol in symbolList:
            stockData = []
            self.cur.execute(sql % symbol)
            for record in self.cur.fetchall():
                stockData.append(list(record))
            if not stockData:   continue
            
            stockData = np.array(stockData)
            stockDataDict = {'openprice':stockData[:,1],'highprice':stockData[:,2],'lowprice':stockData[:,3],
                             'closeprice':stockData[:,4]}
            stockDataFrame = pd.DataFrame(stockDataDict,index=stockData[:,0],columns=['openprice','highprice','lowprice',
                                                                                     'closeprice'])
            if firstDay in stockDataFrame.index:
                firstDataList.append(stockDataFrame.ix[firstDay].values)
            # 存放该行业的所有的股票数据
            stockDataFrame = (stockDataFrame / stockDataFrame.shift(1) - 1).dropna()
            stockDataList.append(stockDataFrame)
        # 获取第一天的数据   
        firstDataList = np.array(firstDataList)
        firstOpen = 1000
        firstHigh = round(firstDataList[:,1].sum() / firstDataList[:,0].sum() * 1000,2)
        firstLow  = round(firstDataList[:,2].sum() / firstDataList[:,0].sum() * 1000,2)
        firstClose = round(firstDataList[:,3].sum() / firstDataList[:,0].sum() * 1000,2)
        insertDataList.append([classifiedName,firstDay,firstOpen,firstHigh,firstLow,firstClose])
        # 获得其它日期的数据
        dateList = [dataDate.to_datetime().date() for dataDate in pd.date_range('2007-01-04',datetime.date.today(),freq='B')]
        dataDict = {dataDate:[stockDataFrame.ix[dataDate].values for stockDataFrame in stockDataList if dataDate in stockDataFrame.index] for dataDate in dateList}
        
        for key in sorted(dataDict.keys()):
            yesterdayData = insertDataList[-1]
            if not dataDict[key]:    continue
            otherDayData = np.array(dataDict[key])
            otherOpen = round(yesterdayData[2] * (1 + otherDayData[:,0].mean()),2)
            otherHigh = round(yesterdayData[3] * (1 + otherDayData[:,1].mean()),2)
            otherLow  = round(yesterdayData[4] * (1 + otherDayData[:,2].mean()),2)
            otherClose = round(yesterdayData[5] * (1 + otherDayData[:,3].mean()),2)
            insertDataList.append([classifiedName,key,otherOpen,otherHigh,otherLow,otherClose])
        
        sql = 'insert ignore into classifieddata value(' + '%s,' * 5 + '%s)'
        self.cur.executemany(sql,tuple(insertDataList))
        self.con.commit()
Example #4
0
 def initialize(self):
     w.start()
     self.initLogging()
     self.con = Connection().getConnection()
     self.cur = self.con.cursor()
Example #5
0
class StockData():
    '''获得股票数据'''
    def __init__(self):
        self.initialize()
        self.main()
        
    def initialize(self):
        w.start()
        self.initLogging()
        self.con = Connection().getConnection()
        self.cur = self.con.cursor()
       
       
    def main(self):
        self.historyWindData() # 历史数据,日线和分钟线
        self.updateWindCode()  # 更新A股的代码
        self.currentWindData() # 每日的更新
        self.close()
        
#------------------------------------------------------------------------------ init
    def initLogging(self):
        '''初始化日志对象'''
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)-15s %(lineno)-6d %(funcName)-30s %(message)s',
                            datefmt='%m-%d %H:%M:%S',
                            filename=r'D:\log_windData.txt',
                            filemode='w')
        
#------------------------------------------------------------------------------ historyData
    def historyWindData(self):
        '''得到历史数据'''
        stockCodeList = self.getStockCode() # 股票代码
        fields = {'d':['pre_close','open','high','low','close','volume','amt','pct_chg'],
                  'm':['open','high','low','close','volume','amt','pct_chg']}
        startDate = {'d':'20050101','m':'2014-01-01 09:30:00'}
        endDate   = {'d':datetime.date.today().strftime('%Y%m%d'),'m':datetime.date.today().strftime('%Y-%m-%d')+' 15:00:00'}
        # day
        self.getDayData(stockCodeList,fields['d'],startDate['d'],endDate['d'],option='PriceAdj=F')
        # min
        for period in [5,15,30,60]:
            self.getMinData(stockCodeList,fields['m'],startDate['m'],endDate['m'],period)
        
    def getDayData(self,stockCodeList,fields,startDate,endDate,option):
        '''日数据的具体的获取步骤'''
        for stockCode in stockCodeList:
            print '添加日数据,代碼%s' % stockCode
            wsd_data = w.wsd(stockCode,fields,startDate,endDate,option)
            if wsd_data.ErrorCode==0: # 返回成功
                stockDate = wsd_data.Times
                dateList = [date.strftime("%Y-%m-%d") for date in stockDate]
                stockDataDict = {'stockCode':stockCode,'date':dateList}
                for i in range(len(fields)):
                    stockDataDict[fields[i]] = wsd_data.Data[i]
                stockData = pd.DataFrame(stockDataDict,columns=['stockCode','date']+fields,index=dateList).dropna() # 只要有缺失的数据就删掉这一行,保证数据最为干净
                stockData['pct_chg'] = stockData['pct_chg'] / 100 # 让涨幅变为实际涨幅(查询出来的是百分比数据)
                # 插入到數據庫中
                sql = "insert ignore into stockdata_day values(" + "%s,"*(len(fields)+1)+"%s)" 
                self.cur.executemany(sql,tuple(stockData.values.tolist()))
                self.con.commit()
            else:
                logging.info('ERROR-%s-day數據下載失敗,錯誤代碼為:%s' % (stockCode,wsd_data.ErrorCode))
                
                
    def getMinData(self,stockCodeList,fields,startDate,endDate,period):
        '''分钟线的具体的获取步骤'''
        option = 'BarSize=%s;PriceAdj=F' % period
        for stockCode in stockCodeList:
            print '添加%s 分钟数据,代碼%s' % (period,stockCode)
            wsi_data = w.wsi(stockCode,fields,startDate,endDate,option)
            if wsi_data.ErrorCode==0: # 返回成功
                stockDate = wsi_data.Times
                timeList = [time.strftime("%Y-%m-%d %H-%M-%S") for time in stockDate]
                stockDataDict = {'stockCode':stockCode,'time':timeList}
                for i in range(len(fields)):
                    stockDataDict[fields[i]] = wsi_data.Data[i]
                stockData = pd.DataFrame(stockDataDict,columns=['stockCode','time']+fields,index=timeList).dropna() # 只要有缺失的数据就删掉这一行,保证数据最为干净
                stockData['pct_chg'] = stockData['pct_chg'] / 100 # 让涨幅变为实际涨幅(查询出来的是百分比数据)
                # 插入到數據庫中
                sql = "insert ignore into stockData_%smin" % period + " values("+"%s,"*(len(fields)+1)+"%s)"   
                self.cur.executemany(sql,tuple(stockData.values.tolist()))
                self.con.commit()
            else:
                logging.info('ERROR-%s歷史分鐘%smin數據下載失敗' % (stockCode,period))
    
    
    def updateWindCode(self):
        '''更新股票代码'''
        today = date.today().strftime('%Y%m%d')
        field = 'wind_code,sec_name' # 字段名:股票代码和股票名称
        sector = '全部A股'
        option = 'date=%s;sector=%s;field=%s' % (today,sector,field)
        
        wset_data = w.wset('SectorConstituent',option)
        if wset_data.ErrorCode == 0:
            stockCodeData = zip(wset_data.Data[0],wset_data.Data[1]) # 返回值data[0],data[1]分别为代码和名称
            sql = "delete from stockCode"
            self.cur.execute(sql)
            self.con.commit()
            print '删除代码完毕'
            # 插入到数据库中
            sql = "insert ignore into stockCode values(%s,%s)"
            self.cur.executemany(sql,tuple(stockCodeData))
            self.con.commit()
        else:
            logging.info('ERROR-股票代码更新错误')
        print '更新代码完毕'
        
#------------------------------------------------------------------------------ currentData
    def currentWindData(self):
        '''更新数据'''
        stockCodeList = self.getStockCode()
        reRightCode = self.getUpdateCode(stockCodeList)
        self.reRightData(reRightCode) # 复权数据
        self.insertTodayData(stockCodeList) # 插入今天数据
        
    def getUpdateCode(self,stockCodeList):
        '''得到直接插入和复权的代码'''
        reRightCode = [] # 需要復權的股票
        # 得到数据库中最新一天的收盘价
        closeList = []
        for code in stockCodeList:
            sql = "select close from stockData_day where stockCode = %s order by date desc limit 1" 
            self.cur.execute(sql,code)
            result = self.cur.fetchone()
            if not result:
                closeList.append(0.0)
            else:
                closeList.append(result[0])
        # 查找今天的數據
        fields = ['pre_close']
        today = datetime.date.today().strftime('%Y%m%d') #今天的日期
        option = 'showblank=0.0;PriceAdj=F' # 要進行前復權處理
        wsd_data = w.wsd(stockCodeList,fields,today,today,option)
        if wsd_data.ErrorCode==0: # 返回成功
            stockData = wsd_data.Data[0]
            
        ifEqualList = (np.array(stockData) == np.array(closeList))
        for i in range(len(ifEqualList)):
            if not ifEqualList[i]:
                rate = (stockData[i] - closeList[i]) / closeList[i]
                reRightCode.append([stockCodeList[i],rate])
                
        print '需要复权的数量为'
        print len(reRightCode)
        print '需要复权的股票代码为:'
        print reRightCode
        logging.info('需要复权的股票代码为:%s' % reRightCode)
        return reRightCode
    
    def reRightData(self,reRightCode):
        '''复权数据'''
        print '开始复权数据'
        code = [record[0] for record in reRightCode]
        rate = [record[1] for record in reRightCode]
        
        paraList = []
        for i in range(len(reRightCode)):
            paraList.append((rate[i],)*6 + (code[i],))
             
        sql = 'update stockData_day set '+\
        'pre_close=pre_close*(1+%s),open=open*(1+%s),high=high*(1+%s),low=low*(1+%s),close=close*(1+%s),amt=amt*(1+%s)' +\
         " where stockCode=%s" 
        self.cur.executemany(sql,paraList)
        self.con.commit()
        print '日线复权完毕'
        
        paraList = []
        for i in range(len(reRightCode)):
            paraList.append((rate[i],)*5 + (code[i],))
            
        for period in [5,15,30,60]:
            sql = 'update stockData_%smin ' % period + \
            'set open=open*(1+%s),high=high*(1+%s),low=low*(1+%s),close=close*(1+%s),amt=amt*(1+%s)' + \
            " where stockCode=%s" 
            self.cur.executemany(sql,paraList)
            self.con.commit()
        print '分钟线复权完毕'
        
    
    def insertTodayData(self,stockCodeList):
        '''插入当天的数据'''
        fields = {'d':['pre_close','open','high','low','close','volume','amt','pct_chg'],
                  'm':['open','high','low','close','volume','amt','pct_chg']}
        startDate = {'d':datetime.date.today().strftime('%Y%m%d'),'m':datetime.date.today().strftime('%Y-%m-%d')+' 09:30:00'}
        endDate   = {'d':datetime.date.today().strftime('%Y%m%d'),'m':datetime.date.today().strftime('%Y-%m-%d')+' 15:00:00'}
        # day
        self.getDayData(stockCodeList,fields['d'],startDate['d'],endDate['d'],option='PriceAdj=F')
        # min
        for period in [5,15,30,60]:
            self.getMinData(stockCodeList,fields['m'],startDate['m'],endDate['m'],period)
        
        
    def getStockCode(self):
        '''获得股票代码'''
        sql = 'select distinct stockCode from stockCode'
        self.cur.execute(sql)
        stockCodeList = []
        for code in self.cur.fetchall():
            stockCodeList.append(code[0])
        return stockCodeList
    
    
    def close(self):
        '''關閉數據庫'''
        self.con.close()
        self.cur.close()
Example #6
0
class StockData():
    '''通过通达信导出的txt文件,更新日线和分钟级数据'''
    def __init__(self):
        self.initialize()
        self.main()

    def initialize(self):
        # 初始化日志服务
        self.initLogging()
        # 初始化路径值
        self.initPath()
        # 数据库连接,连接配置可在tool文件中修改
        self.con = Connection().getConnection()
        self.cur = self.con.cursor()

    def main(self):
        # 读入历史数据,只是第一次导入数据库时需要运行,以后每天更新即可。
        self.historyData()
        #         self.currentData() # 更新數據庫的數據,每天运行
        self.close()

#------------------------------------------------------------------------------ initialize

    def initLogging(self):
        '''初始化日志对象'''
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)-15s %(lineno)-6d %(funcName)-30s %(message)s',
            datefmt='%m-%d %H:%M:%S',
            filename=r'D:\tdxData.txt',
            filemode='w')

    def initPath(self):
        '''初始化各种读入文件的路径名,包括历史的日数据,分钟数据路径;当天的收盘价和前收的对比数据路径,以及当天的日数据和分钟数据的路径名'''
        self.historyDayPath = r'C:\Users\Administrator\Desktop\tdx\historyDayData'  #历史日数据文件的路径,会把当前路径的数据全部写入数据库
        self.historyMinPath = r'C:\Users\Administrator\Desktop\tdx\historyMinData'  #历史分钟数据文件的路径,会把当前路径的数据全部写入数据库,并相应生成15,30,60数据
        self.checkDataPath = r'C:\Users\Administrator\Desktop\tdx\checkData'  #收盘价和前收的对比文件路径,也就是通过公式导出的文件的路径,该路径只应该存在最新的对比文件
        self.currentMinPath = r'C:\Users\Administrator\Desktop\tdx\currentMinData'  #当天分钟级的数据,并相应形成15,30,60数据

#------------------------------------------------------------------------------ history

    def historyData(self):
        '''读入历史数据'''
        #         self.getDayData()
        self.getMinData()

    def getDayData(self):
        '''读入日线数据'''
        # 得到文件夹下面所有txt结尾的文件路径
        fileNameList = self.getFileNameList(
            self.historyDayPath)  # 内容是文件名,不包括前面的路径
        filePathList = [
            os.path.join(self.historyDayPath, fileName)
            for fileName in fileNameList
        ]
        symbolList = [fileName.split('.')[0] for fileName in fileNameList]
        # 循环每个文件写入数据库
        for i in range(len(fileNameList)):
            stockFile = open(filePathList[i])
            fileContent = stockFile.readlines()[:-1]
            stockFile.close()
            if len(fileContent) == 0: continue
            # 读取数据
            stockDataList = []
            for row in fileContent:
                rowData = row.strip().split('\t')
                valueData = np.array([float(item) for item in rowData[1:]])
                if all(valueData == 0.0): continue  # 全部为0.证明该股票还未上市操作
                if len(rowData) == 7:  # 查看了海信在6-15日停牌的情况,停牌的数据没有包含在下载的文件中。
                    rowData.insert(0, symbolList[i])
                    stockDataList.append(rowData)
            # 将这只股票的数据写入数据库
            sql = "insert ignore into stockData_day_tdx values(" + "%s," * 7 + "%s)"  # 一共有8列数据
            self.cur.executemany(sql, tuple(stockDataList))
            self.con.commit()
            print '%s日线数据写入完毕' % symbolList[i]

        print '全部日线数据写入完毕'

    def getMinData(self):
        '''读入分钟数据'''
        # 得到文件夹下面所有txt结尾的文件路径
        fileNameList = self.getFileNameList(
            self.historyMinPath)  # 内容是文件名,不包括前面的路径
        filePathList = [
            os.path.join(self.historyMinPath, fileName)
            for fileName in fileNameList
        ]
        symbolList = [fileName.split('.')[0] for fileName in fileNameList]
        # 循环每个文件写入数据库
        for i in range(len(fileNameList)):
            # 读取文件,从第一行读取(不导出头文件)
            stockFile = open(filePathList[i])
            fileContent = stockFile.readlines()[:-1]
            stockFile.close()
            if len(fileContent) == 0: continue
            # 读取数据
            stockDataList5 = []
            for row in fileContent:
                rowData = row.strip().split('\t')
                stockTime = '%s %s:%s:00' % (rowData[0], rowData[1][:-2],
                                             rowData[1][-2:])
                rowData[0:2] = [stockTime]
                valueData = np.array([float(item) for item in rowData[1:]])
                if all(valueData == 0.0): continue  # 全部为0.证明该股票还未上市操作
                if len(rowData) == 7:  # 查看了海信在6-15日停牌的情况,停牌的数据没有包含在下载的min文件中。
                    rowData.insert(0, symbolList[i])
                    stockDataList5.append(rowData)
            # 找到其他周期的数据
            stockDataList15 = stockDataList5[2::3]
            stockDataList30 = stockDataList15[1::2]
            stockDataList60 = [
                item for item in stockDataList30 if item[1].endswith('00:00')
            ]
            # 将这只股票的数据写入数据库
            for period in [5, 15, 30, 60]:
                sql = "insert ignore into stockData_%smin_tdx" % period + " values(" + "%s," * 7 + "%s)"
                self.cur.executemany(sql,
                                     tuple(eval("stockDataList%s" % period)))
                self.con.commit()
                print '%s分钟线数据写入完毕' % symbolList[i]

        print '全部分钟线数据写入完毕'


#------------------------------------------------------------------------------current

    def currentData(self):
        '''复权,更新当天的数据'''
        stockCodeList = self.getStockCode()
        reRightCode, stockDataList = self.getUpdateCode(stockCodeList)
        self.reRightData(reRightCode)  # 复权数据
        self.insertDayData(stockDataList)  # 插入今天日线数据
        self.insertMinData()  # 插入今天分钟线数据

    def getUpdateCode(self, stockCodeList):
        '''獲得不同情況下更新的股票代碼'''
        reRightCode = []  # 需要復權的股票和复权因子的列表
        # 得到数据库中最新一天的收盘价
        closeDict = {}  # 股票代码:收盘价
        for code in stockCodeList:
            sql = "select ClosePrice from stockdata_day_tdx where Symbol = %s order by date desc limit 1"
            self.cur.execute(sql, code)
            result = self.cur.fetchone()
            closeDict[code] = result[0]
        # 查找今天的數據
        fileName = self.getFileNameList(self.checkDataPath)[0]
        path = r'%s/%s' % (self.checkDataPath, fileName)

        stockFile = open(path)
        fileContent = stockFile.readlines()[2:-1]  # 去掉最后一行
        stockFile.close()
        rowDataList = []
        for row in fileContent:
            rowData = [item.strip() for item in row.strip().split('\t')]
            if len(rowData) != 11 or (rowData[3] == '0') or (rowData[4]
                                                             == '0'):
                continue  # 不等于11列则该股票还未上市,rowData[3]为空证明停牌,停牌不用复权
            rowData[0] = rowData[0].replace('=', '')
            rowData[0] = rowData[0].replace('"', '')
            rowDataList.append(rowData)
        # 由文件数据转化成结构数据
        print len(rowDataList)
        stockDataList = self.formatStockData(rowDataList)
        for stockData in stockDataList:
            symbol = stockData[0]
            if symbol not in closeDict: continue

            closePrice = closeDict[symbol]  # 得到数据库中最新的收盘价
            if closePrice != float(stockData[-1]):  # 如果不一致则需要复权
                rate = (float(stockData[-1]) - closePrice) / closePrice
                reRightCode.append([symbol, rate])

        print stockDataList
        print '复权的数量为 %s' % len(reRightCode)
        print '复权的股票代码为:'
        print reRightCode
        logging.info('需要复权的股票代码为:%s' % reRightCode)
        return reRightCode, stockDataList

    def formatStockData(self, rowDataList):
        '''将从文件读入的数据变为数据库标准数据,代码加前缀,成交量和成交额改变;o,h,l,c,v,a'''
        today = date.today()
        stockDataList = []
        for rowData in rowDataList:
            rowData[0] = 'SH' + rowData[0] if rowData[0].startswith(
                '6') else 'SZ' + rowData[0]
            for i in [4, 5]:
                if rowData[i].endswith('\xd2\xda'):
                    rowData[i] = float(rowData[i].replace('\xd2\xda',
                                                          '')) * (10**8)
                elif rowData[i].endswith('\xcd\xf2'):
                    rowData[i] = float(rowData[i].replace('\xcd\xf2',
                                                          '')) * (10**4)
            stockData = [
                rowData[0], today, rowData[9], rowData[7], rowData[8],
                rowData[6], rowData[4], rowData[5], rowData[10]
            ]
            stockDataList.append(stockData)
        return stockDataList

    def reRightData(self, reRightCode):
        ''''''
        code = [record[0] for record in reRightCode]
        rate = [record[1] for record in reRightCode]

        paraList = []
        for i in range(len(reRightCode)):
            paraList.append((rate[i], ) * 5 + (code[i], ))

        sql = 'update stockData_day_tdx set '+\
        'OpenPrice=OpenPrice*(1+%s),HighPrice=HighPrice*(1+%s),LowPrice=LowPrice*(1+%s),ClosePrice=ClosePrice*(1+%s),Amt=Amt*(1+%s)' +\
         " where Symbol=%s"
        self.cur.executemany(sql, paraList)
        self.con.commit()
        print '日线复权完毕'
        paraList = []
        for i in range(len(reRightCode)):
            paraList.append((rate[i], ) * 5 + (code[i], ))

        for period in [5, 15, 30, 60]:
            sql = 'update stockData_%smin_tdx ' % period + \
            'set OpenPrice=OpenPrice*(1+%s),HighPrice=HighPrice*(1+%s),LowPrice=LowPrice*(1+%s),ClosePrice=ClosePrice*(1+%s),Amt=Amt*(1+%s)' + \
            " where Symbol=%s"
            self.cur.executemany(sql, paraList)
            self.con.commit()
        print '分钟线复权完毕'

    def insertDayData(self, todayData):
        '''更新日数据'''
        for data in todayData:
            data[:] = data[:-1]
        sql = "insert ignore into stockData_day_tdx values(" + "%s," * 7 + "%s)"  # 一共有8列数据
        self.cur.executemany(sql, tuple(todayData))
        self.con.commit()
        print 'day数据更新完成'

    def insertMinData(self):
        '''更新分钟数据'''
        today = date.today().strftime('%Y-%m-%d')
        # 得到文件夹下面所有txt结尾的文件路径
        fileNameList = self.getFileNameList(
            self.currentMinPath)  # 内容是文件名,不包括前面的路径
        filePathList = [
            os.path.join(self.historyDayPath, fileName)
            for fileName in fileNameList
        ]
        symbolList = [fileName.split('.')[0] for fileName in fileNameList]
        # 循环每个文件写入数据库
        for i in range(len(fileNameList)):
            # 读取文件,从第一行读取(不导出头文件)
            stockFile = open(filePathList[i])
            fileContent = stockFile.readlines()[:-1]
            stockFile.close()
            if len(fileContent) == 0: continue

            stockDataList5 = []
            for row in fileContent[::-1]:
                rowData = row.strip().split('\t')
                if rowData[0] != today: break  # 从最后一行读取,如果其日期不是当天日期,则退出循环。
                stockTime = '%s %s:%s:00' % (rowData[0], rowData[1][:-2],
                                             rowData[1][-2:])
                rowData[0:2] = [stockTime]
                valueData = np.array([float(item) for item in rowData[1:]])
                if all(valueData == 0.0): continue  # 全部为0.证明该股票还未上市操作
                if len(rowData) == 7:  # 查看了海信在6-15日停牌的情况,停牌的数据没有包含在下载的min文件中。
                    rowData.insert(0, symbolList[i])
                    stockDataList5.append(rowData)
            # 找到其他周期的数据
            stockDataList15 = stockDataList5[2::3]
            stockDataList30 = stockDataList15[1::2]
            stockDataList60 = [
                item for item in stockDataList30 if item[1].endswith('00:00')
            ]
            # 将这只股票的数据写入数据库
            for period in [5, 15, 30, 60]:
                sql = "insert ignore into stockData_%smin_tdx" % period + " values(" + "%s," * 7 + "%s)"
                self.cur.executemany(sql,
                                     tuple(eval("stockDataList%s" % period)))
                self.con.commit()
                print '%s分钟线数据写入完毕' % symbolList[i]
        print 'min数据更新完成'

    def getFileNameList(self, filePath):
        '''得到路径下所有的文件名'''
        return [
            filename for filename in os.listdir(filePath)
            if os.path.splitext(filename)[-1] in ['.xls', '.xlsx']
        ]

    def getStockCode(self):
        '''获得股票代码'''
        sql = 'select distinct Symbol from stockdata_day_tdx'
        self.cur.execute(sql)
        return [symbol[0] for symbol in self.cur.fetchall() if symbol[0]]

    def close(self):
        '''關閉數據庫'''
        self.con.close()
        self.cur.close()
Example #7
0
class ClassifiedData(object):
    '''新浪行业分类指数的构造'''
    def __init__(self):
        self.initialize()
        self.main()

    def initialize(self):
        # 初始化日志服务
        self.initLogging()
        # 数据库连接,连接配置可在tool文件中修改
        self.con = Connection().getConnection()
        self.cur = self.con.cursor()

    def main(self):
        classifiedDict = self.getClassifiedDict()
        for key in classifiedDict:
            self.createClassifiedData(key, classifiedDict[key])


#------------------------------------------------------------------------------

    def initLogging(self):
        '''初始化日志对象'''
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)-15s %(lineno)-6d %(funcName)-30s %(message)s',
            datefmt='%m-%d %H:%M:%S',
            filename=r'D:\classifiedData.txt',
            filemode='w')

    def getClassifiedDict(self):

        classifiedDict = collections.defaultdict(list)

        sql = 'select symbol,industryname from industryclassified'
        self.cur.execute(sql)
        for record in self.cur.fetchall():
            symbol = u'SH' + record[0] if record[0][0] in [
                '5', '6'
            ] else u'SZ' + record[0]
            classifiedDict[record[1]].append(symbol)

        return classifiedDict

    def createClassifiedData(self, classifiedName, symbolList):
        '''生成行业的指数,从2007-01-01开始'''
        stockDataList = []
        firstDataList = []
        insertDataList = []

        firstDay = datetime.date(2007, 1, 4)
        sql = 'select date,openprice,highprice,lowprice,closeprice from stockdata_day_tdx where symbol = "%s" and date >= "2007-01-01" order by date'
        for symbol in symbolList:
            stockData = []
            self.cur.execute(sql % symbol)
            for record in self.cur.fetchall():
                stockData.append(list(record))
            if not stockData: continue

            stockData = np.array(stockData)
            stockDataDict = {
                'openprice': stockData[:, 1],
                'highprice': stockData[:, 2],
                'lowprice': stockData[:, 3],
                'closeprice': stockData[:, 4]
            }
            stockDataFrame = pd.DataFrame(
                stockDataDict,
                index=stockData[:, 0],
                columns=['openprice', 'highprice', 'lowprice', 'closeprice'])
            if firstDay in stockDataFrame.index:
                firstDataList.append(stockDataFrame.ix[firstDay].values)
            # 存放该行业的所有的股票数据
            stockDataFrame = (stockDataFrame / stockDataFrame.shift(1) -
                              1).dropna()
            stockDataList.append(stockDataFrame)
        # 获取第一天的数据
        firstDataList = np.array(firstDataList)
        firstOpen = 1000
        firstHigh = round(
            firstDataList[:, 1].sum() / firstDataList[:, 0].sum() * 1000, 2)
        firstLow = round(
            firstDataList[:, 2].sum() / firstDataList[:, 0].sum() * 1000, 2)
        firstClose = round(
            firstDataList[:, 3].sum() / firstDataList[:, 0].sum() * 1000, 2)
        insertDataList.append([
            classifiedName, firstDay, firstOpen, firstHigh, firstLow,
            firstClose
        ])
        # 获得其它日期的数据
        dateList = [
            dataDate.to_datetime().date() for dataDate in pd.date_range(
                '2007-01-04', datetime.date.today(), freq='B')
        ]
        dataDict = {
            dataDate: [
                stockDataFrame.ix[dataDate].values
                for stockDataFrame in stockDataList
                if dataDate in stockDataFrame.index
            ]
            for dataDate in dateList
        }

        for key in sorted(dataDict.keys()):
            yesterdayData = insertDataList[-1]
            if not dataDict[key]: continue
            otherDayData = np.array(dataDict[key])
            otherOpen = round(
                yesterdayData[2] * (1 + otherDayData[:, 0].mean()), 2)
            otherHigh = round(
                yesterdayData[3] * (1 + otherDayData[:, 1].mean()), 2)
            otherLow = round(
                yesterdayData[4] * (1 + otherDayData[:, 2].mean()), 2)
            otherClose = round(
                yesterdayData[5] * (1 + otherDayData[:, 3].mean()), 2)
            insertDataList.append([
                classifiedName, key, otherOpen, otherHigh, otherLow, otherClose
            ])

        sql = 'insert ignore into classifieddata value(' + '%s,' * 5 + '%s)'
        self.cur.executemany(sql, tuple(insertDataList))
        self.con.commit()
Example #8
0
class StockData():
    '''通过通达信导出的txt文件,更新日线和分钟级数据'''
    def __init__(self):
        self.initialize()
        self.main()
        
    def initialize(self):
        # 初始化日志服务
        self.initLogging()
        # 初始化路径值
        self.initPath()
        # 数据库连接,连接配置可在tool文件中修改
        self.con = Connection().getConnection()
        self.cur = self.con.cursor()
       
    def main(self):
        # 读入历史数据,只是第一次导入数据库时需要运行,以后每天更新即可。
        self.historyData()
#         self.currentData() # 更新數據庫的數據,每天运行
        self.close()

#------------------------------------------------------------------------------ initialize
    def initLogging(self):
        '''初始化日志对象'''
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)-15s %(lineno)-6d %(funcName)-30s %(message)s',
                            datefmt='%m-%d %H:%M:%S',
                            filename=r'D:\tdxData.txt',
                            filemode='w')
        
    def initPath(self):
        '''初始化各种读入文件的路径名,包括历史的日数据,分钟数据路径;当天的收盘价和前收的对比数据路径,以及当天的日数据和分钟数据的路径名'''
        self.historyDayPath = r'C:\Users\Administrator\Desktop\tdx\historyDayData' #历史日数据文件的路径,会把当前路径的数据全部写入数据库
        self.historyMinPath = r'C:\Users\Administrator\Desktop\tdx\historyMinData' #历史分钟数据文件的路径,会把当前路径的数据全部写入数据库,并相应生成15,30,60数据
        self.checkDataPath  = r'C:\Users\Administrator\Desktop\tdx\checkData' #收盘价和前收的对比文件路径,也就是通过公式导出的文件的路径,该路径只应该存在最新的对比文件
        self.currentMinPath = r'C:\Users\Administrator\Desktop\tdx\currentMinData' #当天分钟级的数据,并相应形成15,30,60数据
        
#------------------------------------------------------------------------------ history
    def historyData(self):
        '''读入历史数据'''
#         self.getDayData()
        self.getMinData()
        
    def getDayData(self):
        '''读入日线数据'''
        # 得到文件夹下面所有txt结尾的文件路径
        fileNameList = self.getFileNameList(self.historyDayPath) # 内容是文件名,不包括前面的路径
        filePathList = [os.path.join(self.historyDayPath,fileName) for fileName in fileNameList]
        symbolList   = [fileName.split('.')[0] for fileName in fileNameList]
        # 循环每个文件写入数据库
        for i in range(len(fileNameList)):
            stockFile = open(filePathList[i])
            fileContent = stockFile.readlines()[:-1] 
            stockFile.close()
            if len(fileContent) == 0:   continue
            # 读取数据
            stockDataList = [] 
            for row in fileContent:
                rowData = row.strip().split('\t')
                valueData = np.array([float(item) for item in rowData[1:]])
                if all(valueData==0.0): continue # 全部为0.证明该股票还未上市操作
                if len(rowData) == 7: # 查看了海信在6-15日停牌的情况,停牌的数据没有包含在下载的文件中。
                    rowData.insert(0,symbolList[i])
                    stockDataList.append(rowData)
            # 将这只股票的数据写入数据库
            sql = "insert ignore into stockData_day_tdx values("+"%s,"*7+"%s)"   # 一共有8列数据
            self.cur.executemany(sql,tuple(stockDataList))
            self.con.commit()
            print '%s日线数据写入完毕' % symbolList[i]
            
        print '全部日线数据写入完毕'
            
    def getMinData(self):
        '''读入分钟数据'''
        # 得到文件夹下面所有txt结尾的文件路径
        fileNameList = self.getFileNameList(self.historyMinPath) # 内容是文件名,不包括前面的路径
        filePathList = [os.path.join(self.historyMinPath,fileName) for fileName in fileNameList]
        symbolList   = [fileName.split('.')[0] for fileName in fileNameList]
        # 循环每个文件写入数据库
        for i in range(len(fileNameList)):
            # 读取文件,从第一行读取(不导出头文件)
            stockFile = open(filePathList[i])
            fileContent = stockFile.readlines()[:-1] 
            stockFile.close()
            if len(fileContent) == 0:   continue
            # 读取数据
            stockDataList5 = [] 
            for row in fileContent:
                rowData = row.strip().split('\t')
                stockTime = '%s %s:%s:00' % (rowData[0],rowData[1][:-2],rowData[1][-2:])
                rowData[0:2] = [stockTime]
                valueData = np.array([float(item) for item in rowData[1:]])
                if all(valueData==0.0): continue # 全部为0.证明该股票还未上市操作
                if len(rowData) == 7: # 查看了海信在6-15日停牌的情况,停牌的数据没有包含在下载的min文件中。
                    rowData.insert(0,symbolList[i])
                    stockDataList5.append(rowData)
            # 找到其他周期的数据
            stockDataList15 = stockDataList5[2::3]
            stockDataList30 = stockDataList15[1::2]
            stockDataList60 = [item for item in stockDataList30 if item[1].endswith('00:00')]
            # 将这只股票的数据写入数据库
            for period in [5,15,30,60]:
                sql = "insert ignore into stockData_%smin_tdx" % period + " values("+"%s,"*7+"%s)"
                self.cur.executemany(sql,tuple(eval("stockDataList%s" % period)))
                self.con.commit()
                print '%s分钟线数据写入完毕' % symbolList[i]
                
        print '全部分钟线数据写入完毕' 
                
#------------------------------------------------------------------------------current
    def currentData(self):
        '''复权,更新当天的数据'''
        stockCodeList = self.getStockCode()
        reRightCode,stockDataList = self.getUpdateCode(stockCodeList)
        self.reRightData(reRightCode) # 复权数据
        self.insertDayData(stockDataList) # 插入今天日线数据
        self.insertMinData() # 插入今天分钟线数据
        
    def getUpdateCode(self,stockCodeList):
        '''獲得不同情況下更新的股票代碼'''
        reRightCode = [] # 需要復權的股票和复权因子的列表
        # 得到数据库中最新一天的收盘价
        closeDict = {} # 股票代码:收盘价
        for code in stockCodeList:
            sql = "select ClosePrice from stockdata_day_tdx where Symbol = %s order by date desc limit 1" 
            self.cur.execute(sql,code)
            result = self.cur.fetchone()
            closeDict[code] = result[0]
        # 查找今天的數據
        fileName = self.getFileNameList(self.checkDataPath)[0]
        path = r'%s/%s' % (self.checkDataPath,fileName)
        
        stockFile = open(path)
        fileContent = stockFile.readlines()[2:-1] # 去掉最后一行
        stockFile.close()
        rowDataList = []
        for row in fileContent:
            rowData = [item.strip() for item in row.strip().split('\t')]
            if len(rowData) != 11 or (rowData[3] == '0') or (rowData[4] == '0'): continue# 不等于11列则该股票还未上市,rowData[3]为空证明停牌,停牌不用复权
            rowData[0] = rowData[0].replace('=','')
            rowData[0] = rowData[0].replace('"','')
            rowDataList.append(rowData)
        # 由文件数据转化成结构数据
        print len(rowDataList)
        stockDataList = self.formatStockData(rowDataList)
        for stockData in stockDataList:
            symbol = stockData[0]
            if symbol not in closeDict: continue
            
            closePrice = closeDict[symbol] # 得到数据库中最新的收盘价
            if closePrice != float(stockData[-1]): # 如果不一致则需要复权
                rate = (float(stockData[-1]) - closePrice) / closePrice
                reRightCode.append([symbol,rate])
                
        print stockDataList
        print '复权的数量为 %s' % len(reRightCode)
        print '复权的股票代码为:'
        print reRightCode
        logging.info('需要复权的股票代码为:%s' % reRightCode)
        return reRightCode,stockDataList
    
    def formatStockData(self,rowDataList):
        '''将从文件读入的数据变为数据库标准数据,代码加前缀,成交量和成交额改变;o,h,l,c,v,a'''
        today = date.today()
        stockDataList = []
        for rowData in rowDataList:
            rowData[0] = 'SH'+rowData[0] if rowData[0].startswith('6') else 'SZ'+rowData[0]
            for i in [4,5]:
                if rowData[i].endswith('\xd2\xda'):
                    rowData[i] = float(rowData[i].replace('\xd2\xda','')) * (10**8)
                elif rowData[i].endswith('\xcd\xf2'):
                    rowData[i] = float(rowData[i].replace('\xcd\xf2','')) * (10**4)
            stockData = [rowData[0],today,rowData[9],rowData[7],rowData[8],rowData[6],rowData[4],rowData[5],rowData[10]]      
            stockDataList.append(stockData)
        return stockDataList
    
    def reRightData(self,reRightCode):
        ''''''
        code = [record[0] for record in reRightCode]
        rate = [record[1] for record in reRightCode]
        
        paraList = []
        for i in range(len(reRightCode)):
            paraList.append((rate[i],)*5 + (code[i],))
             
        sql = 'update stockData_day_tdx set '+\
        'OpenPrice=OpenPrice*(1+%s),HighPrice=HighPrice*(1+%s),LowPrice=LowPrice*(1+%s),ClosePrice=ClosePrice*(1+%s),Amt=Amt*(1+%s)' +\
         " where Symbol=%s" 
        self.cur.executemany(sql,paraList)
        self.con.commit()
        print '日线复权完毕'
        paraList = []
        for i in range(len(reRightCode)):
            paraList.append((rate[i],)*5 + (code[i],))
            
        for period in [5,15,30,60]:
            sql = 'update stockData_%smin_tdx ' % period + \
            'set OpenPrice=OpenPrice*(1+%s),HighPrice=HighPrice*(1+%s),LowPrice=LowPrice*(1+%s),ClosePrice=ClosePrice*(1+%s),Amt=Amt*(1+%s)' + \
            " where Symbol=%s" 
            self.cur.executemany(sql,paraList)
            self.con.commit()
        print '分钟线复权完毕'
    
    def insertDayData(self,todayData):
        '''更新日数据'''
        for data in todayData:
            data[:] = data[:-1]
        sql = "insert ignore into stockData_day_tdx values("+"%s,"*7+"%s)"   # 一共有8列数据
        self.cur.executemany(sql,tuple(todayData))
        self.con.commit()
        print 'day数据更新完成'
        
    def insertMinData(self):
        '''更新分钟数据'''
        today = date.today().strftime('%Y-%m-%d')
        # 得到文件夹下面所有txt结尾的文件路径
        fileNameList = self.getFileNameList(self.currentMinPath) # 内容是文件名,不包括前面的路径
        filePathList = [os.path.join(self.historyDayPath,fileName) for fileName in fileNameList]
        symbolList   = [fileName.split('.')[0] for fileName in fileNameList]
        # 循环每个文件写入数据库
        for i in range(len(fileNameList)):
            # 读取文件,从第一行读取(不导出头文件)
            stockFile = open(filePathList[i])
            fileContent = stockFile.readlines()[:-1] 
            stockFile.close()
            if len(fileContent) == 0:   continue
            
            stockDataList5 = [] 
            for row in fileContent[::-1]:
                rowData = row.strip().split('\t')
                if rowData[0] != today: break # 从最后一行读取,如果其日期不是当天日期,则退出循环。
                stockTime = '%s %s:%s:00' % (rowData[0],rowData[1][:-2],rowData[1][-2:])
                rowData[0:2] = [stockTime]
                valueData = np.array([float(item) for item in rowData[1:]])
                if all(valueData==0.0): continue # 全部为0.证明该股票还未上市操作
                if len(rowData) == 7: # 查看了海信在6-15日停牌的情况,停牌的数据没有包含在下载的min文件中。
                    rowData.insert(0,symbolList[i])
                    stockDataList5.append(rowData)
            # 找到其他周期的数据
            stockDataList15 = stockDataList5[2::3]
            stockDataList30 = stockDataList15[1::2]
            stockDataList60 = [item for item in stockDataList30 if item[1].endswith('00:00')]
            # 将这只股票的数据写入数据库
            for period in [5,15,30,60]:
                sql = "insert ignore into stockData_%smin_tdx" % period + " values("+"%s,"*7+"%s)"
                self.cur.executemany(sql,tuple(eval("stockDataList%s" % period)))
                self.con.commit()
                print '%s分钟线数据写入完毕' % symbolList[i]
        print 'min数据更新完成'
    
    def getFileNameList(self,filePath):
        '''得到路径下所有的文件名'''
        return [filename for filename in os.listdir(filePath) if os.path.splitext(filename)[-1] in ['.xls','.xlsx']]
        
    def getStockCode(self):
        '''获得股票代码'''
        sql = 'select distinct Symbol from stockdata_day_tdx'
        self.cur.execute(sql)
        return [symbol[0] for symbol in self.cur.fetchall() if symbol[0]]
        
    def close(self):
        '''關閉數據庫'''
        self.con.close()
        self.cur.close()