Beispiel #1
0
def read_index2(code='000905'):
    url = 'http://www.csindex.com.cn/uploads/file/autofile/perf/{}perf.xls'.format(code)
    book = get_excel_book(url)
    # print(book)
    if code == '000300':
        name = '沪深300'
    elif code == '000905':
        name = '中证500'
    elif code == '000016':
        name = '上证50'
    for sheet in range(book.nsheets):
        sh = book.sheet_by_index(sheet)
        for rx in range(sh.nrows):
            row = sh.row(rx)
            df = DataFrame(row)
            # print(df)
            print(row)
            print(len(row))
            if len(row) > 15:
                date = row[0].value
                pe1 = row[15].value
                pe2 = row[16].value
                dividend_yield_ratio1 = row[17].value
                dividend_yield_ratio2 = row[18].value
                turnover = row[13].value
                # # print(type(pe))
                if date and turnover and pe1 and type(pe1) == float:
                    py_date = xlrd.xldate.xldate_as_datetime(date, book.datemode)
                    print(py_date)
                    date = str(py_date)
                    print(pd.to_datetime(date))
                    Index.objects(name=name, date=date).update_one(name=name, date=date, pe=pe1, pe_ttm=pe2,
                                                                   dividend_yield_ratio=dividend_yield_ratio1,
                                                                   turnover=turnover,
                                                                   upsert=True)
Beispiel #2
0
def hs_cei():
    # url = 'http://www.hsi.com.hk/HSI-Net/static/revamp/contents/en/dl_centre/reports_stat/monthly/pe/hscei.xls'
    url = 'https://www.hsi.com.hk/static/uploads/contents/en/dl_centre/monthly/pe/hscei.xls'
    # r = requests.get(url)
    # file_contents = io.BytesIO(r.content)
    # book = xlrd.open_workbook(file_contents=file_contents.read())
    book = get_excel_book(url)
    # print(book)
    name = 'HSCEI'
    for sheet in range(book.nsheets):
        sh = book.sheet_by_index(sheet)
        for rx in range(sh.nrows):
            row = sh.row(rx)
            # df = DataFrame(row)
            # print(df)
            # print(row)
            date = row[0].value
            pe = row[1].value
            # print(type(pe))
            if date and pe and type(pe) == float:
                py_date = xlrd.xldate.xldate_as_datetime(date, book.datemode)
                # print(py_date)
                date = str(py_date)
                print(pd.to_datetime(date))
                Index.objects(name=name, date=date).update_one(name=name, date=date, pe=pe, upsert=True)
Beispiel #3
0
def csi_by_type(date='2011-05-04', data_type='zy1'):
    # http://115.29.204.48/syl/bk20180202.zip
    day = arrow.get(date, date_format).date()
    weekday = day.weekday()
    # ignore weekend
    if weekday == 5 or weekday == 6:
        return
    url = '{}industry-price-earnings-ratio?date={}&type={}'.format(csi_domain, date, data_type)
    print('url***', url)
    page = parse(url).getroot()
    # result = etree.tostring(page)
    # print(result)
    xpath = '//table[@class="table  table-bg p_table table-bordered table-border mb-20"]'
    if data_type == 'zz1' or data_type == 'zz2' or data_type == 'zz3' or data_type == 'zz4':
        xpath = '//table[@class="table table-bg p_table table-bordered table-border mb-20"]'
    r = page.xpath(xpath)
    # print(len(r))
    tree = etree.ElementTree(r[0])
    # print(etree.tostring(tree))
    html_table = etree.tostring(tree)
    dfs = pd.read_html(html_table, flavor='lxml')
    df = dfs[0]
    print(df)
    # v1 = df.iloc[3][0]
    # print(v1, df.iloc[3][1], df.iloc[3][2])
    # print(v1, df.iloc[4][1], df.iloc[4][2])
    for index, row in df.iterrows():
        v0 = row.iloc[0]
        v1 = row.iloc[1]
        v2 = row.iloc[2]
        # print(index, name, value)
        print('index {} v0:{} v1:{} v2:{}***'.format(index, v0, v1, v2))
        try:
            if data_type == 'zy1':
                # 静态市盈率
                Index.objects(name=v0, date=day).update_one(name=v0, pe=v1, upsert=True)
            elif data_type == 'zy2':
                # 滚动市盈率
                Index.objects(name=v0, date=day).update_one(name=v0, pe_ttm=v1, upsert=True)
            elif data_type == 'zy3':
                # 市净率
                Index.objects(name=v0, date=day).update_one(name=v0, pb=v1, upsert=True)
            elif data_type == 'zy4':
                # 股息率
                Index.objects(name=v0, date=day).update_one(name=v0, dividend_yield_ratio=v1, upsert=True)
            elif data_type == 'zz1':
                # 行业静态市盈率
                Industry.objects(code=v0, date=day).update_one(code=v0, date=day, name=v1, pe=v2, upsert=True)
            elif data_type == 'zz2':
                # 行业滚动市盈率
                Industry.objects(code=v0, date=day).update_one(code=v0, pe_ttm=v2, upsert=True)
            elif data_type == 'zz3':
                # 行业市净率
                Industry.objects(code=v0, date=day).update_one(code=v0, pb=v2, upsert=True)
            elif data_type == 'zz4':
                # 行业股息率
                Industry.objects(code=v0, date=day).update_one(code=v0, dividend_yield_ratio=v2, upsert=True)
        except:
            continue
Beispiel #4
0
def parse_sz_market():
    page = parse('http://www.szse.cn/main/marketdata/tjsj/jbzb/').getroot()

    r = page.get_element_by_id('REPORTID_tab1')
    # print etree.tostring(r)
    # read html <table> to list of DataFrame
    dfs = pd.read_html(etree.tostring(r), flavor='lxml')
    # print dfs
    # print len(dfs)
    if len(dfs) >= 1:
        df = dfs[0]
        # print df
        total_market = df.iloc[10][1]
        volume = df.iloc[12][1]
        avg_price = df.iloc[13][1]
        pe = df.iloc[14][1]
        turnover_rate = df.iloc[15][1]

        if type(total_market) == type(pd.NaT):
            total_market = 0
        if type(volume) == type(pd.NaT):
            volume = 0
        if type(turnover_rate) == type(pd.NaT):
            turnover_rate = 0
        if type(pe) == type(pd.NaT):
            pe = 0
        # print 'total_market:{} volume:{} turnover_rate:{} pe:{}'.format(total_market, volume, turnover_rate, pe)
        market = Index('sz', total_market_cap=float(total_market) / 100000000, volume=float(volume) / 100000000,
                       turnover=float(turnover_rate), pe=float(pe))
        print(market)
        # print df.index
        # print df.columns
        # print df.values
        # print df.describe()
        return market
Beispiel #5
0
def csi(date='20171228'):
    # http://115.29.204.48/syl/bk20180202.zip
    day = arrow.get(date,'YYYYMMDD').date()
    weekday = day.weekday()
    # ignore weekend
    if weekday == 5 or weekday == 6:
        return
    url = 'http://115.29.204.48/syl/bk'+date+'.zip'
    r = requests.get(url)
    if r.status_code == 404:
        return
    # create memory file
    z = zipfile.ZipFile(io.BytesIO(r.content))
    # not extract to disk file here
    # z.extractall()
    memory_unzip_files = extract_zip(z)
    # print(zip_files)
    # pandas read_csv not work!
    # df = pd.read_csv("bk20171228.csv")
    # xls_file = pd.ExcelFile('bk20171228.xls', encoding_override="gb2312")
    # xls_file = pd.read_excel('bk20171228.xls', encoding="gb2312")
    for name in memory_unzip_files.keys():
        book = xlrd.open_workbook(file_contents=memory_unzip_files.get(name), encoding_override="gbk")
        print("The number of worksheets is {0}".format(book.nsheets))
        print("Worksheet name(s): {0}".format(book.sheet_names()))
        for sheet in range(book.nsheets):
            sh = book.sheet_by_index(sheet)
            print("{0} {1} {2}".format(sh.name, sh.nrows, sh.ncols))
            for rx in range(sh.nrows):
                row = sh.row(rx)
                # print(row)
                name = row[0].value
                value = row[1].value
                print(name, value)
                # print(pe.replace('.', '', 1).isdigit(), type(pe))
                if value.replace('.', '', 1).isdigit():
                    if sheet == 0:
                        # 静态市盈率
                        Index.objects(name=name, date=day).update_one(name=name, date=day, pe=value, upsert=True)
                    elif sheet == 1:
                        # 滚动市盈率
                        print(Index.objects(name=name, date=day))
                        Index.objects(name=name, date=day).update_one(name=name, pe_ttm=value, upsert=True)
                    elif sheet == 2:
                        # 板块市净率
                        Index.objects(name=name, date=day).update_one(name=name, pb=value, upsert=True)
                    elif sheet == 3:
                        # 板块股息率
                        Index.objects(name=name, date=day).update_one(name=name, dividend_yield_ratio=value, upsert=True)
Beispiel #6
0
def read_index2(code='000905'):
    url = 'http://www.csindex.com.cn/uploads/file/autofile/perf/{}perf.xls'.format(
        code)
    book = get_excel_book(url)
    # print(book)
    if code == '000300':
        name = '沪深300'
    elif code == '000905':
        name = '中证500'
    elif code == '000016':
        name = '上证50'
    for sheet in range(book.nsheets):
        sh = book.sheet_by_index(sheet)
        for rx in range(sh.nrows):
            row = sh.row(rx)
            df = DataFrame(row)
            # print(df)
            print(row)
            print(len(row))
            if len(row) > 15:
                date = row[0].value
                pe1 = row[15].value
                pe2 = row[16].value
                dividend_yield_ratio1 = row[17].value
                dividend_yield_ratio2 = row[18].value
                turnover = row[13].value
                # # print(type(pe))
                if date and pe1 and type(pe1) == float:
                    py_date = xlrd.xldate.xldate_as_datetime(
                        date, book.datemode)
                    print(py_date)
                    date = str(py_date)
                    print(pd.to_datetime(date))
                    Index.objects(name=name, date=date).update_one(
                        name=name,
                        date=date,
                        pe=pe1,
                        pe_ttm=pe2,
                        dividend_yield_ratio=dividend_yield_ratio1,
                        turnover=turnover,
                        upsert=True)
Beispiel #7
0
def parse_sh_market():
    page = parse('http://www.sse.com.cn/market/stockdata/overview/day/').getroot()
    result = etree.tostring(page)
    print(result)

    r = page.get_element_by_id('dateList')
    statistics = r.text_content().split()
    # for word in statistics:
    #     print word

    market = Index(name='sh', total_market_cap=statistics[1], volume=float(statistics[8]) / 10000,
                   turnover=statistics[12], pe=statistics[14], date=statistics[2])
    # print market
    return market
Beispiel #8
0
def parse_sz_market_common(name, url):
    page = parse(url).getroot()
    # result = etree.tostring(page)
    # print '*'*20
    # print result
    # print '*'*20

    r = page.get_element_by_id('REPORTID_tab1')
    # print '*'*20
    # print etree.tostring(r)
    # print '*'*20
    # read html <table> to list of DataFrame
    dfs = pd.read_html(etree.tostring(r), flavor='lxml')
    # dfs = pd.read_html(etree.tostring(r), flavor='bs4')
    if len(dfs) >= 1:
        df = dfs[0]
        print(df)
        tradable_shares = df.iloc[4][1]
        total_market = df.iloc[5][1]
        volume_money = df.iloc[7][1]
        volume = df.iloc[8][1]
        pe = df.iloc[10][1]
        # high_pe = df.iloc[10][3]
        # value = df.iloc[13][1]

        if isinstance(tradable_shares, type(pd.NaT)):
            tradable_shares = 0
        if type(total_market) == type(pd.NaT):
            total_market = 0
        if isinstance(volume_money, type(pd.NaT)):
            volume_money = 0
        if isinstance(volume, type(pd.NaT)):
            volume = 0
        if isinstance(pe, type(pd.NaT)):
            pe = 0
        # if type(value) != float:
        #     value = 0.0

        # 换手率=成交量÷当日实际流通量
        if tradable_shares == 0:
            turnover = 0
        else:
            turnover = float(volume)/float(tradable_shares)
        # print 'name:{} total_market:{} volume:{} turnover:{} pe:{} value:{}'.format(name,
        #                                                                            total_market, volume_money,
        #                                                                            turnover, pe, value)
        market = Index(name, float(total_market) / 100000000, float(volume_money) / 100000000, turnover, pe)
        # print market
        return market
Beispiel #9
0
    def get(self, request, *args, **kw):
        # Process any get params that you may need
        # If you don't need to process get params,
        # you can skip this part
        name = request.GET.get('code')
        items = Index.objects(name=name).order_by('date')
        index_col = db.index.find({'name': name})
        df = pd.DataFrame(list(index_col))
        serializer = IndexListSerializer({'items': items})

        # HSCEI index has no pb data
        if name == 'HSCEI':
            df['pb'] = 0

        result = get_result(serializer, df)
        response = Response(result, status=status.HTTP_200_OK)

        return get_response_cors(response)
Beispiel #10
0
    def get(self, request, *args, **kw):
        # Process any get params that you may need
        # If you don't need to process get params,
        # you can skip this part
        name = request.GET.get('code')
        items = Index.objects(name=name).order_by('date')
        index_col = db.index.find({'name': name})
        df = pd.DataFrame(list(index_col))
        serializer = IndexListSerializer({'items': items})

        # HSCEI index has no pb data
        if name == 'HSCEI':
            df['pb'] = 0

        result = get_result(serializer, df)
        response = Response(result, status=status.HTTP_200_OK)

        return get_response_cors(response)
Beispiel #11
0
def parse_cyb2(url='http://www.szse.cn/szseWeb/FrontController.szse?randnum=0.5328349224291742'):
    payload = {'ACTIONID': 7, 'AJAX': 'AJAX-TRUE','CATALOGID':'1898_nm','TABKEY':'tab1','txtQueryDate':'2016-01-15','REPORT_ACTION':'reach'}
    res = requests.post(url, data=payload)
    # print res.text
    # read html <table> to list of DataFrame
    dfs = pd.read_html(res.text, flavor='lxml')
    # dfs = pd.read_html(etree.tostring(r), flavor='bs4')
    if len(dfs) >= 1:
        df = dfs[0]
        # print df
        tradable_shares = df.iloc[4][1]
        total_market = df.iloc[5][1]
        volume_money = df.iloc[7][1]
        volume = df.iloc[8][1]
        pe = df.iloc[10][1]
        high_pe = df.iloc[10][3]
        value = df.iloc[13][1]

        if isinstance(tradable_shares, type(pd.NaT)):
            tradable_shares = 0
        if type(total_market) == type(pd.NaT):
            total_market = 0
        if isinstance(volume_money, type(pd.NaT)):
            volume_money = 0
        if isinstance(volume, type(pd.NaT)):
            volume = 0
        if isinstance(pe, type(pd.NaT)):
            pe = 0
        if type(value) != float:
            value = 0.0

        # 换手率=成交量÷当日实际流通量
        if tradable_shares == 0:
            turnover = 0
        else:
            turnover = float(volume)/float(tradable_shares)
        # print 'name:{} total_market:{} volume:{} turnover:{} pe:{} value:{}'.format(name,
        #                                                                            total_market, volume_money,
        #                                                                            turnover, pe, value)
        market = Index('CYB', float(total_market) / 100000000, float(volume_money) / 100000000, turnover, pe, value)
        # print market
        return market
Beispiel #12
0
    def get(self, request, *args, **kw):
        # Process any get params that you may need
        # If you don't need to process get params,
        # you can skip this part
        print('*' * 15)
        name = request.GET.get('name')
        items = Index.objects(name=name).order_by('date')
        index_col = db.index.find({'name': name})
        df = pd.DataFrame(list(index_col))
        serializer = IndexListSerializer({'items': items})
        # print serializer.is_valid()
        # print serializer.errors
        content = JSONRenderer().render(serializer.data)
        print('**********content:{}'.format(content))
        json_output = json.loads(content)
        print('****json:{}'.format(json_output))
        pb_list = []
        pe_list = []
        for item in json_output.get('items'):
            # date = arrow.get(item.get('date'), 'YYYY-MM-DD HH:mm:ss').timestamp
            timestamp = arrow.get(item.get('date'),
                                  'YYYY-MM-DD HH:mm:ss').timestamp * 1000
            if item.get('pb'):
                pb_list.append([timestamp, item.get('pb')])
            pe_list.append([timestamp, item.get('pe')])

        # HSCEI index has no pb data
        if name == 'HSCEI':
            df['pb'] = 0
        result = {
            'PB': pb_list,
            'PE': pe_list,
            'PB_avg': df['pb'].mean(),
            'PE_avg': df['pe'].mean()
        }
        response = Response(result, status=status.HTTP_200_OK)

        return response
Beispiel #13
0
def read_market(nh, nl, date):
    # 破净率
    low_pb = low_pb_ratio()
    print(low_pb)
    broken_net_ratio = low_pb[0]
    broken_net = low_pb[1]
    stock_count = low_pb[3]
    nh_ratio = float(nh)/stock_count
    nl_ratio = float(nl)/stock_count
    nhnl = nh - nl

    # 跌停板
    dt = screen_by_pencentage(-10.11, -9.9)
    dt_ratio = dt/stock_count
    # 涨停板
    zt = screen_by_pencentage(9.9, 10.11)
    zt_ratio = zt/stock_count
    zdr = zt-dt
    print('dtb:{} ztb:{} zdr'.format(dt, zt, zdr))

    # 仙股
    penny_stocks = screen_by_price(0.1, 1)['count']
    penny_stocks_ratio = penny_stocks/stock_count

    #  破发率
    broken_ipo_count, total_ipo, broken_ipo_rate, broken_list = broken_ipo()

    # CIX范围从0到100,由10个指标组成
    cix = 0
    cix_data = {}
    weight_range = [0, 10]

    # 1 SH PE
    # pe_df = avg_sh_pe('2000-1-31')
    # max_pe = pe_df['PE'].max()
    # min_pe = pe_df['PE'].min()
    # # get latest PE DF by tail()
    # # latest_pe_df = pe_df.tail(1)
    # # latest_pe = latest_pe_df.iloc[0][1]
    # # print 'latest PE:{}'.format(latest_pe)
    # latest_sh = Index.objects(name='上海A股').order_by('-date').first()
    # print('items***{}'.format(latest_sh))
    # pe = interp(latest_sh.pe, [min_pe, max_pe], weight_range)
    # # print('min_pe:{} max_pe:{} latest_pe:{} pe:{}'.format(min_pe, max_pe, latest_pe, pe))
    # cix += pe

    # 1 替换为沪深A股PE TODO
    max_pe = 30
    min_pe = 12
    # get latest PE DF by tail()
    # latest_pe_df = pe_df.tail(1)
    # latest_pe = latest_pe_df.iloc[0][1]
    # print 'latest PE:{}'.format(latest_pe)
    latest_sh = Index.objects(name='沪深A股').order_by('-date').first()
    print('items***{}'.format(latest_sh))
    pe = interp(latest_sh.pe, [min_pe, max_pe], [0, 50])
    # print('min_pe:{} max_pe:{} latest_pe:{} pe:{}'.format(min_pe, max_pe, latest_pe, pe))
    cix += pe
    cix_data.update({'pe': pe})

    # 2 破净率
    min_low_pb = 0.02
    max_low_pb = 0.15
    pb = interp(-broken_net_ratio, [-max_low_pb, min_low_pb], weight_range)
    cix += pb
    cix_data.update({'broken_net': pb})

    # 3 AH premium index
    ah_now = xueqiu('HKHSAHP')
    ah_current = ah_now.current
    ah = interp(ah_current, [100, 130], weight_range)
    cix += ah
    cix_data.update({'ah': ah})

    # 4 GDP rate
    rate = gdp_rate()
    gdp = interp(rate, [0.4, 1], weight_range)
    cix += gdp
    cix_data.update({'gdp': gdp})

    # 5 百元股 [0,3.6%]
    high_price = high_price_ratio()
    g100 = high_price[0]
    g100_ratio = high_price[1]
    high = interp(g100_ratio, [0, 0.036], weight_range)
    cix += high
    cix_data.update({'over_100': high})

    # 8 NHNL
    n = interp(nhnl, [-1000, 1000], weight_range)
    cix += n
    cix_data.update({'nhnl': n})

    # 9 融资规模及占比 TODO tushare

    # 10 社交媒体挖掘 TODO xueqiu

    # 5 SH换手率 [1%,3%]
    sh = read_index_market('SH000001')
    turnover_rate = sh['turnover_rate']
    turnover = interp(turnover_rate, [1, 3], weight_range)
    # cix += turnover

    # 6 涨跌停差额

    # 7 TODO 最近一年IPO、可转债涨幅或破发率

    # low price
    low_price = low_price_ratio()
    print('low_price***{}'.format(low_price))
    print(cix_data)
    # TODO cix 映射到0.5-1.5区间,代表持仓比例
    Market.objects(date=get_date(date)).update_one(nh=nh, nl=nl, nhnl=nhnl, nh_ratio=nh_ratio, nl_ratio=nl_ratio,
                                                   stock_count=stock_count,
                                                   over_100=g100, over_100_ratio=g100_ratio,
                                                   penny_stocks=penny_stocks, penny_stocks_ratio=penny_stocks_ratio,
                                                   low_price_ratio=low_price,
                                                   pe=latest_sh.pe, turnover=turnover_rate,
                                                   ah=ah_current, gdp=rate, cix=cix,cix_data=cix_data,
                                                   broken_net=broken_net, broken_net_ratio=broken_net_ratio,
                                                   broken_net_stocks=low_pb[2],
                                                   dt=dt, dt_ratio=dt_ratio, zt=zt, zt_ratio=zt_ratio, zdr=zdr,
                                                   ipo=total_ipo, broken_ipo=broken_ipo_count,
                                                   broken_ipo_ratio=broken_ipo_rate,broken_ipo_list=broken_list,
                                                   upsert=True)
Beispiel #14
0
def read_market(nh, nl, date):
    # 破净率
    low_pb = low_pb_ratio()
    print(low_pb)
    broken_net_ratio = low_pb[0]
    broken_net = low_pb[1]
    stock_count = low_pb[3]
    nh_ratio = float(nh) / stock_count
    nl_ratio = float(nl) / stock_count

    # 跌停板
    dt = screen_by_pencentage(-10.11, -9.9)
    dt_ratio = dt / stock_count
    # 涨停板
    zt = screen_by_pencentage(9.9, 10.11)
    zt_ratio = zt / stock_count
    zdr = zt / dt
    print('dtb:{} ztb:{} zdr'.format(dt, zt, zdr))

    # 仙股
    penny_stocks = screen_by_price(0.1, 1)['count']
    penny_stocks_ratio = penny_stocks / stock_count

    #  破发率
    broken_ipo_count, total_ipo, broken_ipo_rate, broken_list = broken_ipo()

    cix = 0
    weight_range = [0, 10]

    # 1 latest SH PE
    pe_df = avg_sh_pe('2000-1-31')
    max_pe = pe_df['PE'].max()
    min_pe = pe_df['PE'].min()
    # get latest PE DF by tail()
    # latest_pe_df = pe_df.tail(1)
    # latest_pe = latest_pe_df.iloc[0][1]
    # print 'latest PE:{}'.format(latest_pe)
    latest_sh = Index.objects(name='上海A股').order_by('-date').first()
    print('items***{}'.format(latest_sh))
    pe = interp(latest_sh.pe, [min_pe, max_pe], weight_range)
    # print('min_pe:{} max_pe:{} latest_pe:{} pe:{}'.format(min_pe, max_pe, latest_pe, pe))
    cix += pe

    # 2 破净率
    min_low_pb = 0
    max_low_pb = 0.1
    pb = interp(-broken_net_ratio, [-max_low_pb, min_low_pb], weight_range)
    cix += pb

    # 3 AH premium index
    ah_now = xueqiu('HKHSAHP')
    ah_current = ah_now.current
    ah = interp(ah_current, [100, 150], weight_range)
    cix += ah

    # 4 GDP rate
    rate = gdp_rate()
    gdp = interp(rate, [0.4, 1], weight_range)
    cix += gdp

    # 5 百元股 [0,3.6%]
    high_price = high_price_ratio()
    g100 = high_price[0]
    g100_ratio = high_price[1]
    high = interp(g100_ratio, [0, 0.036], weight_range)
    cix += high

    # 5 low price
    low_price = low_price_ratio()
    print('low_price***{}'.format(low_price))

    # 6 SH换手率 [1%,3%]
    sh = read_index_market('SH000001')
    turnover_rate = sh['turnover_rate']
    turnover = interp(turnover_rate, [1, 3], weight_range)
    cix += turnover

    Market.objects(date=get_date(date)).update_one(
        nh=nh,
        nl=nl,
        nhnl=nh - nl,
        nh_ratio=nh_ratio,
        nl_ratio=nl_ratio,
        stock_count=stock_count,
        over_100=g100,
        over_100_ratio=g100_ratio,
        penny_stocks=penny_stocks,
        penny_stocks_ratio=penny_stocks_ratio,
        low_price_ratio=low_price,
        pe=latest_sh.pe,
        turnover=turnover_rate,
        ah=ah_current,
        gdp=rate,
        cix=cix,
        broken_net=broken_net,
        broken_net_ratio=broken_net_ratio,
        broken_net_stocks=low_pb[2],
        dt=dt,
        dt_ratio=dt_ratio,
        zt=zt,
        zt_ratio=zt_ratio,
        zdr=zdr,
        ipo=total_ipo,
        broken_ipo=broken_ipo_count,
        broken_ipo_ratio=broken_ipo_rate,
        broken_ipo_list=broken_list,
        upsert=True)
Beispiel #15
0
def csi_by_type(date='2011-05-04', data_type='zy1'):
    # http://115.29.204.48/syl/bk20180202.zip
    day = arrow.get(date, date_format).date()
    weekday = day.weekday()
    # ignore weekend
    if weekday == 5 or weekday == 6:
        return
    url = '{}industry-price-earnings-ratio?date={}&type={}'.format(
        csi_domain, date, data_type)
    print('url***', url)
    page = parse(url).getroot()
    # result = etree.tostring(page)
    # print(result)
    xpath = '//table[@class="table  table-bg p_table table-bordered table-border mb-20"]'
    if data_type == 'zz1' or data_type == 'zz2' or data_type == 'zz3' or data_type == 'zz4':
        xpath = '//table[@class="table table-bg p_table table-bordered table-border mb-20"]'
    r = page.xpath(xpath)
    # print(len(r))
    tree = etree.ElementTree(r[0])
    # print(etree.tostring(tree))
    html_table = etree.tostring(tree)
    dfs = pd.read_html(html_table, flavor='lxml')
    df = dfs[0]
    print(df)
    # v1 = df.iloc[3][0]
    # print(v1, df.iloc[3][1], df.iloc[3][2])
    # print(v1, df.iloc[4][1], df.iloc[4][2])
    for index, row in df.iterrows():
        v0 = row.iloc[0]
        v1 = row.iloc[1]
        v2 = row.iloc[2]
        # print(index, name, value)
        print('index {} v0:{} v1:{} v2:{}***'.format(index, v0, v1, v2))
        try:
            if data_type == 'zy1':
                # 静态市盈率
                Index.objects(name=v0, date=day).update_one(name=v0,
                                                            pe=v1,
                                                            upsert=True)
            elif data_type == 'zy2':
                # 滚动市盈率
                Index.objects(name=v0, date=day).update_one(name=v0,
                                                            pe_ttm=v1,
                                                            upsert=True)
            elif data_type == 'zy3':
                # 市净率
                Index.objects(name=v0, date=day).update_one(name=v0,
                                                            pb=v1,
                                                            upsert=True)
            elif data_type == 'zy4':
                # 股息率
                Index.objects(name=v0,
                              date=day).update_one(name=v0,
                                                   dividend_yield_ratio=v1,
                                                   upsert=True)
            elif data_type == 'zz1':
                # 行业静态市盈率
                Industry.objects(code=v0, date=day).update_one(code=v0,
                                                               date=day,
                                                               name=v1,
                                                               pe=v2,
                                                               upsert=True)
            elif data_type == 'zz2':
                # 行业滚动市盈率
                Industry.objects(code=v0, date=day).update_one(code=v0,
                                                               pe_ttm=v2,
                                                               upsert=True)
            elif data_type == 'zz3':
                # 行业市净率
                Industry.objects(code=v0, date=day).update_one(code=v0,
                                                               pb=v2,
                                                               upsert=True)
            elif data_type == 'zz4':
                # 行业股息率
                Industry.objects(code=v0,
                                 date=day).update_one(code=v0,
                                                      dividend_yield_ratio=v2,
                                                      upsert=True)
        except:
            continue
Beispiel #16
0
def csi_by_type(date='2011-05-04', data_type='zy1'):
    # http://115.29.204.48/syl/bk20180202.zip
    day = arrow.get(date, date_format).date()
    weekday = day.weekday()
    # ignore weekend
    if weekday == 5 or weekday == 6:
        return
    url = 'http://www.csindex.com.cn/zh-CN/downloads/industry-price-earnings-ratio?date={}&type={}'.format(
        date, data_type)

    page = parse(url).getroot()
    # result = etree.tostring(page)
    # print(result)
    r = page.xpath(
        '//table[@class="table-bg p_table table-bordered table-border mb-20"]')
    print(len(r))
    tree = etree.ElementTree(r[0])
    # print(etree.tostring(tree))
    html_table = etree.tostring(tree)
    dfs = pd.read_html(html_table, flavor='lxml')
    df = dfs[0]
    print(df)
    # v1 = df.iloc[1][1]
    # print(v1)
    for index, row in df.iterrows():
        name = row.iloc[0]
        value = row.iloc[1]
        print(index, name, value)
        if data_type == 'zy1':
            Index.objects(name=name, date=day).update_one(name=name,
                                                          pe=value,
                                                          upsert=True)
        elif data_type == 'zy2':
            Index.objects(name=name, date=day).update_one(name=name,
                                                          pe_ttm=value,
                                                          upsert=True)
        elif data_type == 'zy3':
            Index.objects(name=name, date=day).update_one(name=name,
                                                          pb=value,
                                                          upsert=True)
        elif data_type == 'zy4':
            Index.objects(name=name,
                          date=day).update_one(name=name,
                                               dividend_yield_ratio=value,
                                               upsert=True)
        elif data_type == 'zz1':
            # 行业静态市盈率
            Industry.objects(code=name, date=day).update_one(code=name,
                                                             date=day,
                                                             name=name,
                                                             pe=value,
                                                             upsert=True)
        elif data_type == 'zz2':
            # 行业滚动市盈率
            Industry.objects(code=name, date=day).update_one(code=name,
                                                             pe_ttm=value,
                                                             upsert=True)
        elif data_type == 'zz2':
            # 行业市净率
            Industry.objects(code=name, date=day).update_one(code=name,
                                                             pb=value,
                                                             upsert=True)
        elif data_type == 'zz2':
            # 行业股息率
            Industry.objects(code=name,
                             date=day).update_one(code=name,
                                                  dividend_yield_ratio=value,
                                                  upsert=True)
Beispiel #17
0
def read_market(nh, nl, date):
    # 破净率
    low_pb = low_pb_ratio()
    print(low_pb)
    broken_net_ratio = low_pb[0]
    broken_net = low_pb[1]
    stock_count = low_pb[3]
    nh_ratio = float(nh)/stock_count
    nl_ratio = float(nl)/stock_count
    nhnl = nh - nl

    # 跌停板
    dt = screen_by_pencentage(-10.11, -9.9)
    dt_ratio = dt/stock_count
    # 涨停板
    zt = screen_by_pencentage(9.9, 10.11)
    zt_ratio = zt/stock_count
    zdr = zt-dt
    print('dtb:{} ztb:{} zdr'.format(dt, zt, zdr))

    # 仙股
    penny_stocks = screen_by_price(0.1, 1)['count']
    penny_stocks_ratio = penny_stocks/stock_count

    #  破发率
    broken_ipo_count, total_ipo, broken_ipo_rate, broken_list = broken_ipo()

    # CIX范围从0到100,由10个指标组成
    cix = 0
    cix_data = {}
    weight_range = [0, 10]

    # 1 SH PE
    # pe_df = avg_sh_pe('2000-1-31')
    # max_pe = pe_df['PE'].max()
    # min_pe = pe_df['PE'].min()
    # # get latest PE DF by tail()
    # # latest_pe_df = pe_df.tail(1)
    # # latest_pe = latest_pe_df.iloc[0][1]
    # # print 'latest PE:{}'.format(latest_pe)
    # latest_sh = Index.objects(name='上海A股').order_by('-date').first()
    # print('items***{}'.format(latest_sh))
    # pe = interp(latest_sh.pe, [min_pe, max_pe], weight_range)
    # # print('min_pe:{} max_pe:{} latest_pe:{} pe:{}'.format(min_pe, max_pe, latest_pe, pe))
    # cix += pe

    # 1 替换为沪深A股PE TODO
    max_pe = 30
    min_pe = 12
    # get latest PE DF by tail()
    # latest_pe_df = pe_df.tail(1)
    # latest_pe = latest_pe_df.iloc[0][1]
    # print 'latest PE:{}'.format(latest_pe)
    latest_sh = Index.objects(name='沪深A股').order_by('-date').first()
    print('items***{}'.format(latest_sh))
    pe = interp(latest_sh.pe, [min_pe, max_pe], [0, 50])
    # print('min_pe:{} max_pe:{} latest_pe:{} pe:{}'.format(min_pe, max_pe, latest_pe, pe))
    cix += pe
    cix_data.update({'pe': pe})

    # 2 破净率
    min_low_pb = 0.02
    max_low_pb = 0.15
    pb = interp(-broken_net_ratio, [-max_low_pb, min_low_pb], weight_range)
    cix += pb
    cix_data.update({'broken_net': pb})

    # 3 AH premium index
    ah_now = xueqiu('HKHSAHP')
    ah_current = ah_now.current
    ah = interp(ah_current, [100, 130], weight_range)
    cix += ah
    cix_data.update({'ah': ah})

    # 4 GDP rate
    rate = gdp_rate()
    gdp = interp(rate, [0.4, 1], weight_range)
    cix += gdp
    cix_data.update({'gdp': gdp})

    # 5 百元股 [0,3.6%]
    high_price = high_price_ratio()
    g100 = high_price[0]
    g100_ratio = high_price[1]
    high = interp(g100_ratio, [0, 0.036], weight_range)
    cix += high
    cix_data.update({'over_100': high})

    # 8 NHNL
    n = interp(nhnl, [-1000, 1000], weight_range)
    cix += n
    cix_data.update({'nhnl': n})

    # 9 融资规模及占比 TODO tushare

    # 10 社交媒体挖掘 TODO xueqiu

    # 5 SH换手率 [1%,3%]
    sh = read_index_market('SH000001')
    turnover_rate = sh['turnover_rate']
    turnover = interp(turnover_rate, [1, 3], weight_range)
    # cix += turnover

    # 6 涨跌停差额

    # 7 TODO 最近一年IPO、可转债涨幅或破发率

    # low price
    low_price = low_price_ratio()
    print('low_price***{}'.format(low_price))
    print(cix_data)
    # TODO cix 映射到0.5-1.5区间,代表持仓比例
    Market.objects(date=get_date(date)).update_one(nh=nh, nl=nl, nhnl=nhnl, nh_ratio=nh_ratio, nl_ratio=nl_ratio,
                                                   stock_count=stock_count,
                                                   over_100=g100, over_100_ratio=g100_ratio,
                                                   penny_stocks=penny_stocks, penny_stocks_ratio=penny_stocks_ratio,
                                                   low_price_ratio=low_price,
                                                   pe=latest_sh.pe, turnover=turnover_rate,
                                                   ah=ah_current, gdp=rate, cix=cix,cix_data=cix_data,
                                                   broken_net=broken_net, broken_net_ratio=broken_net_ratio,
                                                   broken_net_stocks=low_pb[2],
                                                   dt=dt, dt_ratio=dt_ratio, zt=zt, zt_ratio=zt_ratio, zdr=zdr,
                                                   ipo=total_ipo, broken_ipo=broken_ipo_count,
                                                   broken_ipo_ratio=broken_ipo_rate,broken_ipo_list=broken_list,
                                                   upsert=True)