Example #1
0
    def _parsing_dayprice_json(self, pageNum=1):
        """
               处理当日行情分页数据,格式为json
         Parameters
         ------
            pageNum:页码
         return
         -------
            DataFrame 当日所有股票交易数据(DataFrame)
        """
        consts._write_console()
        request = Request(consts.SINA_DAY_PRICE_URL%(consts.P_TYPE['http'], consts.DOMAINS['vsf'],
                                     consts.PAGES['jv'], pageNum))
        text = urlopen(request, timeout=10).read()
        if text == 'null':
            return None
        reg = re.compile(r'\,(.*?)\:') 
        text = reg.sub(r',"\1":', text.decode('gbk')) 
        text = text.replace('"{symbol', '{"symbol')
        text = text.replace('{symbol', '{"symbol"')

        jstr = json.dumps(text)
       
        js = json.loads(jstr)
        df = pandas.DataFrame(pandas.read_json(js, dtype={'code':object}),
                          columns=consts.DAY_TRADING_COLUMNS)
        df = df.drop('symbol', axis=1)
    #     df = df.ix[df.volume > 0]
        return df
Example #2
0
 def getRehabilitationData(self, code, start=None, end=None, autype='qfq',
                index=False, retry_count=3, pause=0.001, drop_factor=True):
     '''
                     获取历史复权数据
     Parameters
     ------
       code:string
                       股票代码 e.g. 600848
       start:string
                       开始日期 format:YYYY-MM-DD 为空时取当前日期
       end:string
                       结束日期 format:YYYY-MM-DD 为空时取去年今日
       autype:string
                       复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq
       retry_count : int, 默认 3
                      如遇网络等问题重复执行的次数 
       pause : int, 默认 0
                     重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
       drop_factor : bool, 默认 True
                     是否移除复权因子,在分析过程中可能复权因子意义不大,但是如需要先储存到数据库之后再分析的话,有该项目会更加灵活
     return
     -------
       DataFrame
           date 交易日期 (index)
           open 开盘价
           high  最高价
           close 收盘价
           low 最低价
           volume 成交量
           amount 成交金额
     '''
     #if no give start ,set start last year
     start = DataUtils.getDefaultLastYear() if start is None else start
     end = DataUtils.getStrToday() if end is None else end
     qs = DataUtils.getQuarters(start, end)
     qt = qs[0]
     consts._write_head()
     data = self._getRehabilitationByQuarter(self._get_index_url(index, code, qt), index,
                           retry_count, pause)
     if len(qs)>1:
         for d in range(1, len(qs)):
             qt = qs[d]
             consts._write_console()
             df = self._getRehabilitationByQuarter(self._get_index_url(index, code, qt), index,
                                 retry_count, pause)
             data = data.append(df, ignore_index=True)
     if len(data) == 0 or len(data[(data.date>=start)&(data.date<=end)]) == 0:
         return None
     data = data.drop_duplicates('date')
     if index:
         data = data[(data.date>=start) & (data.date<=end)]
         data = data.set_index('date')
         data = data.sort_index(ascending=False)
         return data
     if autype == 'hfq':
         if drop_factor:
             data = data.drop('factor', axis=1)
         data = data[(data.date>=start) & (data.date<=end)]
         for label in ['open', 'high', 'close', 'low']:
             data[label] = data[label].map(consts.FORMAT)
             data[label] = data[label].astype(float)
         data = data.set_index('date')
         data = data.sort_index(ascending = False)
         return data
     else:
         if autype == 'qfq':
             if drop_factor:
                 data = data.drop('factor', axis=1)
             df = self._parase_fq_factor(code, start, end)
             df = df.drop_duplicates('date')
             df = df.sort_values('date', ascending=False)
             firstDate = data.head(1)['date']
             frow = df[df.date == firstDate[0]]
             rt = self.get_realtime_quotes(code)
             if rt is None:
                 return None
             if ((float(rt['high']) == 0) & (float(rt['low']) == 0)):
                 preClose = float(rt['pre_close'])
             else:
                 if DataUtils.isHoliday(DataUtils.getStrToday()):
                     preClose = float(rt['price'])
                 else:
                     if (DataUtils.getTodayHour() > 9) & (DataUtils.getTodayHour() < 18):
                         preClose = float(rt['pre_close'])
                     else:
                         preClose = float(rt['price'])
             
             rate = float(frow['factor']) / preClose
             data = data[(data.date >= start) & (data.date <= end)]
             for label in ['open', 'high', 'low', 'close']:
                 data[label] = data[label] / rate
                 data[label] = data[label].map(consts.FORMAT)
                 data[label] = data[label].astype(float)
             data = data.set_index('date')
             data = data.sort_index(ascending = False)
             return data
         else:
             for label in ['open', 'high', 'close', 'low']:
                 data[label] = data[label] / data['factor']
             if drop_factor:
                 data = data.drop('factor', axis=1)
             data = data[(data.date>=start) & (data.date<=end)]
             for label in ['open', 'high', 'close', 'low']:
                 data[label] = data[label].map(consts.FORMAT)
             data = data.set_index('date')
             data = data.sort_index(ascending = False)
             data = data.astype(float)
             return data