Esempio n. 1
0
    def test_everydayCount(self):
        Stocktradedate.importList()
        for d in [
                d[0] for d in set(
                    list(HSGTCGHold.getlist().values_list('tradedate')))
        ]:
            dcount = HSGTCGHold.getlist(d).count()
            print(d, dcount)

        from stocks.models import HSGTCG
        tdate = HSGTCGHold.getNearestTradedate()
        while tdate > convertToDate('2018-5-1'):
            dcount = HSGTCGHold.getlist(tdate).count()
            dcount1 = HSGTCG.getlist().filter(tradedate=tdate,
                                              hamount__gte=8000).count()
            print(tdate, dcount, dcount1, dcount - dcount1)
            tdate = HSGTCGHold.getNearestTradedate(tdate -
                                                   datetime.timedelta(1))

        from stocks.models import HSGTCG
        tdate = HSGTCGHold.getNearestTradedate()
        while tdate > convertToDate('2018-5-1'):
            df1 = pd.DataFrame(list(HSGTCGHold.getlist(tdate).values('code')))
            dcount = len(df1)
            df2 = pd.DataFrame(
                list(HSGTCG.getlist().filter(
                    tradedate=tdate, hamount__gte=8000).values('code')))
            dcount1 = len(df2)
            print(tdate, dcount, dcount1, dcount - dcount1)
            if dcount - dcount1 != 0 and tdate > convertToDate('2018-6-1'):
                print('数据不一致:', end='')
                print(list(HSGTCGHold.dfNotInAnotherdf(df1, df2)['code']))
            tdate = HSGTCGHold.getNearestTradedate(tdate -
                                                   datetime.timedelta(1))
Esempio n. 2
0
    def get_real_datelisting(cls, start, end):
        """ 取数据的真实区间列表

        :param start:
        :param end:
        :return: list of datetime.date()
        """

        datesse = cls()
        return datesse.trade_date_sse.filter(
            tradedate__gte=convertToDate(start),
            tradedate__lte=convertToDate(end))
Esempio n. 3
0
    def daterange(start_date, end_date):
        """ Iterating through a range of dates
        例子:
        start_date = date(2013, 1, 1)
        end_date = date(2015, 6, 2)
        for single_date in daterange(start_date, end_date):
            print(single_date.strftime("%Y-%m-%d"))

        :param start_date: 开始日期
        :param end_date: 结束日期
        :return: 从start到end截止的日期序列
        """

        from datetime import timedelta
        for n in range(int((convertToDate(end_date) - convertToDate(start_date) + 1).days)):
            yield start_date + timedelta(n)
Esempio n. 4
0
    def scrapjson(url):
        import requests, json

        # 异常处理 最多三次抓取
        for _ in range(3):
            try:
                response = requests.get(url, timeout=40)

                response = response.content.decode()
                data = response
                # data = data[len('var CiydgPzJ='):len(response) - 2]
                data = data[len('var CiydgPzJ='):]
                data_list = json.loads(
                    data.replace('pages',
                                 '"pages"').replace('data', ' "data"'))
                df = pd.DataFrame(data_list['data'])
                df['code'] = df.SCODE.astype(str)
                df['hamount'] = df.SHAREHOLDPRICE.apply(
                    lambda x: round(x / 10000, 2)).astype(float)
                df['tradedate'] = df['HDDATE'].apply(
                    lambda x: convertToDate(str(x)[:10])).astype(datetime.date)
                if len(df) > 0:
                    break
            except Exception as e:
                print('requests.get(url, timeout=40)\n{}'.format(e.args))
                time.sleep(1)
        return df[['code', 'tradedate', 'hamount']]
Esempio n. 5
0
    def getStockHdStatistics(cls, code, browser, retryCount=3):
        """ 抓取持股统计

        :param code: 股票代码
        :param browser: webdriver浏览器
        :return:
        """
        url = 'http://data.eastmoney.com/hsgtcg/StockHdStatistics.aspx?stock={}'.format(
            code)
        for i in range(retryCount):
            df = cls.scrap(url, browser)
            if len(df) > 0:
                # 修复持股数量
                df['hvol'] = df['hvol'].apply(
                    lambda x: HSGTCG.hz2Num(x)).astype(float)
                df['hamount'] = df['hamount'].apply(
                    lambda x: HSGTCG.hz2Num(x)).astype(float)
                df['close'] = df['close'].astype(float)
                df['tradedate'] = df['tradedate'].apply(
                    lambda x: convertToDate(x)).astype(datetime.date)
                df = df[df['tradedate'].apply(
                    lambda x: Stocktradedate.if_tradeday(
                        x))]  # 删除不是交易日的数据。这是东方财富网页版的bug
                df.index = pd.RangeIndex(len(df.index))
                break
            else:
                pass

        return df
Esempio n. 6
0
    def test_newcomingin(self):
        """ 新加入市值八千万的个股

        :return:
        """
        Stocktradedate.importList()
        from stocks.models import HSGTCG
        #  2018 - 06 - 04 新增北向持股金额大于八千万
        list1 = [
            '603658', '600460', '002812', '002557', '600188', '000690',
            '600329'
        ]

        tdate = HSGTCGHold.getNearestTradedate()
        # tdate = HSGTCGHold.getNearestTradedate('2018-6-4')
        tdate1 = HSGTCGHold.getNearestTradedate(tdate - datetime.timedelta(1))
        hsg = HSGTCGHold.getlist(tdate)
        hsg1 = HSGTCGHold.getlist(tdate1)
        list2 = []
        for c in hsg.exclude(code__in=hsg1.values_list('code')):
            list2.append(c.code)
        # 验证是否前一天市值小于八千万
        for code in list2:
            df = pd.DataFrame(
                list(
                    HSGTCG.getlist(code).filter(tradedate__gte=tdate1).values(
                    ).order_by('-tradedate')))
            data1 = float(df.iloc[-2].hamount)
            data2 = float(df.iloc[-1].hamount)
            if not (data1 >= 8000 and data2 < 8000):
                print('不是新增持股金额大于八千万:{} 持股金额:{} {}'.format(code, data1, data2))

        tdate = HSGTCGHold.getNearestTradedate()
        while tdate > convertToDate('2018-5-2'):
            tdate1 = HSGTCGHold.getNearestTradedate(tdate -
                                                    datetime.timedelta(1))
            hsg = HSGTCGHold.getlist(tdate)
            hsg1 = HSGTCGHold.getlist(tdate1)
            list2 = []
            for c in hsg.exclude(code__in=hsg1.values_list('code')):
                list2.append(c.code)
            # 验证是否前一天市值小于八千万
            for code in list2:
                df = pd.DataFrame(
                    list(
                        HSGTCG.getlist(code).filter(tradedate__gte=tdate1).
                        values().order_by('-tradedate')))
                if len(df) > 1 and HSGTCG.getlist().filter(
                        tradedate=tdate1).count() > 0:
                    # if len(df)> 1 :
                    data1 = float(df.iloc[-2].hamount)
                    data2 = float(df.iloc[-1].hamount)
                    if not (data1 >= 8000 and data2 < 8000):
                        print('{} 不是新增持股金额大于八千万:{} 持股金额:{} {}'.format(
                            tdate, code, data1, data2))
            tdate = HSGTCGHold.getNearestTradedate(tdate -
                                                   datetime.timedelta(1))
Esempio n. 7
0
    def test_scrapt(self):
        """ 个股北向持股

        :return:
        """
        code = '600066'
        code = '000425'
        code = '000792'
        code = '002493'
        url = 'http://data.eastmoney.com/hsgtcg/StockHdStatistics.aspx?stock={}'.format(
            code)

        browser = webdriver.Firefox()
        browser.maximize_window()
        try:
            browser.get(url)
            for x in ['lxml', 'xml', 'html5lib']:
                # 可能会出现lxml版本大于4.1.1时,获取不到table
                try:
                    time.sleep(random.random() / 4)
                    soup = BeautifulSoup(browser.page_source, x)
                    table = soup.find_all(id='tb_cgtj')[0]
                    if table:
                        break
                except:
                    time.sleep(0.1)
                    print('using BeautifulSoup {}'.format(x))
            # soup = BeautifulSoup(browser.page_source, 'lxml')
            # table = soup.find_all(id='tb_cgtj')[0]
            df = pd.read_html(str(table), header=1)[0]
            df.columns = [
                'date', 'related', 'close', 'zd', 'hvol', 'hamount',
                'hpercent', 'oneday', 'fiveday', 'tenday'
            ]
            for i in df.index:
                v = df.iloc[i]
                print('{} {} {} {}'.format(v.close, v.hvol, v.hamount,
                                           v.hpercent))
                HSGTCG.objects.get_or_create(code=code,
                                             close=v.close,
                                             hvol=str2Float(v.hvol),
                                             hamount=str2Float(v.hamount),
                                             hpercent=v.hpercent,
                                             tradedate=convertToDate(v.date))
        finally:
            if browser:
                browser.close()
        hsgtcg = HSGTCG.getlist(code)
        # hsgtcg = HSGTCG.getlist()
        print(hsgtcg)
        self.assertTrue(hsgtcg.count() > 10,
                        '保存的数量: {}'.format(hsgtcg.count()))
        self.assertTrue(isinstance(hsgtcg[0].tradedate, datetime.date))
        self.assertTrue(hsgtcg.filter(tradedate=None).count() == 0)
Esempio n. 8
0
    def importjsonList(cls, enddate=None, days=5):
        """ 导入市值大于指定值的列表

        网址: http://data.eastmoney.com/hsgtcg/StockStatistics.aspx
        直接下载json文件转换,效率比importList高

        :param firefoxHeadless: 是否显示浏览器界面:
            True  不显示界面
            False 显示界面
            默认不显示浏览器界面

        :return: 最近交易日期的列表jsonname
        """
        if enddate:
            end = convertToDate(enddate)
        else:
            end = cls.getNearestTradedate()
            hsgh = cls.getlist(tradedate=end)
            if hsgh.count() > 0:
                return hsgh
        pagesize = 300  # 每页数据量
        page = 1
        sortRule = -1  # sortRule-1 按照市值降序排序; 1 按照市值升序排序

        start = end - datetime.timedelta(days)
        jsname = cls.getRandomStr('letter')
        for page in range(1, 100):
            if page > 1:
                sr = -1
            # st=SHAREHOLDPRICE 按照持股市值排序; st sortType
            url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/' \
                  'get?type=HSGTHDSTA&token=70f12f2f4f091e459a279469fe49eca5&st=SHAREHOLDPRICE&sr={sortRule}' \
                  '&p={page}&ps={pagesize}&js=var%20{jsname}={pages:(tp),data:(x)}&filter=(MARKET%20in%20(%27001%27,%27003%27))' \
                  '(HDDATE%3E=^{start}^%20and%20HDDATE%3C=^{end}^)&rt=50950960' \
                .replace('{start}', str(start)).replace('{end}', str(end)) \
                .replace('{sortRule}', str(sortRule)) \
                .replace('{pagesize}', str(pagesize)) \
                .replace('{page}', str(page)) \
                .replace('{jsname}', str(jsname))
            df = cls.scrapjson(url)
            dfn = df[df['hamount'] >= MINHAMOUNT]
            if len(dfn) > 0:
                dfn = dfn[dfn['tradedate'].apply(
                    lambda x: Stocktradedate.if_tradeday(x))]
                # 去除重复数据
                dfn = dfn[~dfn.duplicated()]
                cls.savedf(dfn[['code', 'tradedate']])
            print('page: {}'.format(page))
            if len(df[df['hamount'] < MINHAMOUNT]):
                # 持股金额小于
                break
        return cls.getlist(tradedate=cls.getNearestTradedate())
Esempio n. 9
0
    def getlist(cls, tradedate=None):
        """
        返回列表

        :param tradedate: 交易日期

        :return: objects.all().filter(tradedate=convertToDate(tradedate))
        """
        if tradedate:
            # 返回所有代码
            # from stocks.models import convertToDate
            return cls.objects.all().filter(tradedate=convertToDate(tradedate))

        return cls.objects.all()
Esempio n. 10
0
 def importList(cls, firefoxHeadless=True):
     i, j = 0, 0
     while i < 10 and j == 0:
         # 最多循环十次,若j在退出循环的时候为0,则无数据
         hsgh = HSGTCGHold.getlist(
             tradedate=datetime.datetime.now().date() -
             datetime.timedelta(i + 1))
         i += 1
         j = hsgh.count()
     if j == 0:
         HSGTCGHold.importList()
         hsgh = HSGTCGHold.getlist(
             tradedate=datetime.datetime.now().date() -
             datetime.timedelta(1))
     browser = cls.getBrowser(firefoxHeadless)
     try:
         for code in list(hsgh.values_list('code')):
             hsghc = hsgh.filter(code=code)
             if hsghc.count() > 0:
                 continue
             url = 'http://data.eastmoney.com/hsgtcg/StockHdStatistics.aspx?stock={}'.format(
                 code[0])
             df = cls.scrap(url, browser)
             # 修复持股数量
             df['hvol'] = df['hvol'].apply(
                 lambda x: HSGTCGHold.hz2Num(x)).astype(float)
             df['hamount'] = df['hamount'].apply(
                 lambda x: HSGTCGHold.hz2Num(x)).astype(float)
             df['close'] = df['close'].astype(float)
             with transaction.atomic():
                 for i in df.index:
                     v = df.iloc[i]
                     try:
                         print('saving ... {} {}'.format(code[0], v.close))
                         HSGTCG.objects.get_or_create(
                             code=code[0],
                             close=v.close,
                             hvol=v.hvol,
                             hamount=v.hamount,
                             hpercent=v.hpercent,
                             tradedate=convertToDate(v.date))
                     except Exception as e:
                         # print(code[0], v, type(v.close), type(v.hpercent))
                         print(code[0], e.args)
                         # raise Exception(e.args)
     finally:
         if browser:
             browser.close()
Esempio n. 11
0
    def importStockListing(cls, start=None):
        """ 插入所有股票RPS预备数据

        :return:
        """
        if start is None:
            # 数据库中最大的已计算日期
            latest = cls.getlist('stock').aggregate(
                Max('tradedate'))['tradedate__max']
            if latest:
                start = cls.getNearestTradedate(latest, -5)
            else:
                start = '2014-1-1'
        codelist = Listing.getlist('stock')
        # todo 如果已经插入,则判断是否有更新
        try:
            # 批量创建对象,减少SQL查询次数
            querysetlist = []
            delisted = []  # quantaxis中无数据list
            qssaved = []
            tdate = cls.getNearestTradedate()
            realStart120 = cls.getNearestTradedate(start, -120)
            realStart = cls.getNearestTradedate(start, -250)
            # with transaction.atomic():
            # for v in codelist.values()[11:100]:
            for v in codelist.values():
                print('Dealing {} {} {}'.format(format(v['id'], '05d'),
                                                v['code'], v['name']))
                try:
                    # get stockcode
                    code = Listing.objects.get(code=v['code'], category=10)
                    # 本地获取指数日线数据
                    data = qa.QA_fetch_stock_day_adv(
                        v['code'], realStart,
                        datetime.datetime.now().strftime("%Y-%m-%d")).to_qfq()
                    if len(data) > 120:
                        df = pd.DataFrame(data.close)
                        df['rps120'] = round(df.close / df.close.shift(120), 3)
                        df['rps250'] = round(df.close / df.close.shift(250), 3)
                        del df['close']
                        if code.timeToMarket > realStart120:
                            # 上市日期较早
                            cutDay = 120
                        else:
                            cutDay = 250
                        df = df[cutDay:]
                        df.reset_index(inplace=True)
                        df.columns = ['tradedate', 'code', 'rps120', 'rps250']
                        del df['code']
                        df['tradedate'] = df['tradedate'].apply(
                            lambda x: convertToDate(str(x)[:10])).astype(
                                datetime.date)
                        df['code_id'] = code.id
                        df, dfsaved = cls.dfNotInModel(df, code.id,
                                                       df['tradedate'].min())
                        if len(df) > 0:
                            # print(df)
                            cls.savedf(df)
                        if len(dfsaved) > 0:
                            # 日期在原来保存区间的数据
                            qssaved.append(dfsaved)

                except Exception as e:
                    delisted.append(v['code'])
                    print(len(delisted), e.args)
                    # print(df)

            cls.updateSaved(qssaved)

            print('保存过的数据更新数量 {} \n {}'.format(len(qssaved), qssaved))
            print('delisted count {} :\n {}'.format(len(delisted), delisted))
            # RPSprepare.objects.bulk_create(querysetlist)
        except Exception as e:
            print(e.args)
        return cls.getlist('stock')