def hs_cei(): # url = 'http://www.hsi.com.hk/HSI-Net/static/revamp/contents/en/dl_centre/reports_stat/monthly/pe/hscei.xls' url = 'https://www.hsi.com.hk/static/uploads/contents/en/dl_centre/monthly/pe/hscei.xls' # r = requests.get(url) # file_contents = io.BytesIO(r.content) # book = xlrd.open_workbook(file_contents=file_contents.read()) book = get_excel_book(url) # print(book) name = 'HSCEI' for sheet in range(book.nsheets): sh = book.sheet_by_index(sheet) for rx in range(sh.nrows): row = sh.row(rx) # df = DataFrame(row) # print(df) # print(row) date = row[0].value pe = row[1].value # print(type(pe)) if date and pe and type(pe) == float: py_date = xlrd.xldate.xldate_as_datetime(date, book.datemode) # print(py_date) date = str(py_date) print(pd.to_datetime(date)) Index.objects(name=name, date=date).update_one(name=name, date=date, pe=pe, upsert=True)
def read_index2(code='000905'): url = 'http://www.csindex.com.cn/uploads/file/autofile/perf/{}perf.xls'.format(code) book = get_excel_book(url) # print(book) if code == '000300': name = '沪深300' elif code == '000905': name = '中证500' elif code == '000016': name = '上证50' for sheet in range(book.nsheets): sh = book.sheet_by_index(sheet) for rx in range(sh.nrows): row = sh.row(rx) df = DataFrame(row) # print(df) print(row) print(len(row)) if len(row) > 15: date = row[0].value pe1 = row[15].value pe2 = row[16].value dividend_yield_ratio1 = row[17].value dividend_yield_ratio2 = row[18].value turnover = row[13].value # # print(type(pe)) if date and turnover and pe1 and type(pe1) == float: py_date = xlrd.xldate.xldate_as_datetime(date, book.datemode) print(py_date) date = str(py_date) print(pd.to_datetime(date)) Index.objects(name=name, date=date).update_one(name=name, date=date, pe=pe1, pe_ttm=pe2, dividend_yield_ratio=dividend_yield_ratio1, turnover=turnover, upsert=True)
def csi_by_type(date='2011-05-04', data_type='zy1'): # http://115.29.204.48/syl/bk20180202.zip day = arrow.get(date, date_format).date() weekday = day.weekday() # ignore weekend if weekday == 5 or weekday == 6: return url = '{}industry-price-earnings-ratio?date={}&type={}'.format(csi_domain, date, data_type) print('url***', url) page = parse(url).getroot() # result = etree.tostring(page) # print(result) xpath = '//table[@class="table table-bg p_table table-bordered table-border mb-20"]' if data_type == 'zz1' or data_type == 'zz2' or data_type == 'zz3' or data_type == 'zz4': xpath = '//table[@class="table table-bg p_table table-bordered table-border mb-20"]' r = page.xpath(xpath) # print(len(r)) tree = etree.ElementTree(r[0]) # print(etree.tostring(tree)) html_table = etree.tostring(tree) dfs = pd.read_html(html_table, flavor='lxml') df = dfs[0] print(df) # v1 = df.iloc[3][0] # print(v1, df.iloc[3][1], df.iloc[3][2]) # print(v1, df.iloc[4][1], df.iloc[4][2]) for index, row in df.iterrows(): v0 = row.iloc[0] v1 = row.iloc[1] v2 = row.iloc[2] # print(index, name, value) print('index {} v0:{} v1:{} v2:{}***'.format(index, v0, v1, v2)) try: if data_type == 'zy1': # 静态市盈率 Index.objects(name=v0, date=day).update_one(name=v0, pe=v1, upsert=True) elif data_type == 'zy2': # 滚动市盈率 Index.objects(name=v0, date=day).update_one(name=v0, pe_ttm=v1, upsert=True) elif data_type == 'zy3': # 市净率 Index.objects(name=v0, date=day).update_one(name=v0, pb=v1, upsert=True) elif data_type == 'zy4': # 股息率 Index.objects(name=v0, date=day).update_one(name=v0, dividend_yield_ratio=v1, upsert=True) elif data_type == 'zz1': # 行业静态市盈率 Industry.objects(code=v0, date=day).update_one(code=v0, date=day, name=v1, pe=v2, upsert=True) elif data_type == 'zz2': # 行业滚动市盈率 Industry.objects(code=v0, date=day).update_one(code=v0, pe_ttm=v2, upsert=True) elif data_type == 'zz3': # 行业市净率 Industry.objects(code=v0, date=day).update_one(code=v0, pb=v2, upsert=True) elif data_type == 'zz4': # 行业股息率 Industry.objects(code=v0, date=day).update_one(code=v0, dividend_yield_ratio=v2, upsert=True) except: continue
def csi(date='20171228'): # http://115.29.204.48/syl/bk20180202.zip day = arrow.get(date,'YYYYMMDD').date() weekday = day.weekday() # ignore weekend if weekday == 5 or weekday == 6: return url = 'http://115.29.204.48/syl/bk'+date+'.zip' r = requests.get(url) if r.status_code == 404: return # create memory file z = zipfile.ZipFile(io.BytesIO(r.content)) # not extract to disk file here # z.extractall() memory_unzip_files = extract_zip(z) # print(zip_files) # pandas read_csv not work! # df = pd.read_csv("bk20171228.csv") # xls_file = pd.ExcelFile('bk20171228.xls', encoding_override="gb2312") # xls_file = pd.read_excel('bk20171228.xls', encoding="gb2312") for name in memory_unzip_files.keys(): book = xlrd.open_workbook(file_contents=memory_unzip_files.get(name), encoding_override="gbk") print("The number of worksheets is {0}".format(book.nsheets)) print("Worksheet name(s): {0}".format(book.sheet_names())) for sheet in range(book.nsheets): sh = book.sheet_by_index(sheet) print("{0} {1} {2}".format(sh.name, sh.nrows, sh.ncols)) for rx in range(sh.nrows): row = sh.row(rx) # print(row) name = row[0].value value = row[1].value print(name, value) # print(pe.replace('.', '', 1).isdigit(), type(pe)) if value.replace('.', '', 1).isdigit(): if sheet == 0: # 静态市盈率 Index.objects(name=name, date=day).update_one(name=name, date=day, pe=value, upsert=True) elif sheet == 1: # 滚动市盈率 print(Index.objects(name=name, date=day)) Index.objects(name=name, date=day).update_one(name=name, pe_ttm=value, upsert=True) elif sheet == 2: # 板块市净率 Index.objects(name=name, date=day).update_one(name=name, pb=value, upsert=True) elif sheet == 3: # 板块股息率 Index.objects(name=name, date=day).update_one(name=name, dividend_yield_ratio=value, upsert=True)
def read_index2(code='000905'): url = 'http://www.csindex.com.cn/uploads/file/autofile/perf/{}perf.xls'.format( code) book = get_excel_book(url) # print(book) if code == '000300': name = '沪深300' elif code == '000905': name = '中证500' elif code == '000016': name = '上证50' for sheet in range(book.nsheets): sh = book.sheet_by_index(sheet) for rx in range(sh.nrows): row = sh.row(rx) df = DataFrame(row) # print(df) print(row) print(len(row)) if len(row) > 15: date = row[0].value pe1 = row[15].value pe2 = row[16].value dividend_yield_ratio1 = row[17].value dividend_yield_ratio2 = row[18].value turnover = row[13].value # # print(type(pe)) if date and pe1 and type(pe1) == float: py_date = xlrd.xldate.xldate_as_datetime( date, book.datemode) print(py_date) date = str(py_date) print(pd.to_datetime(date)) Index.objects(name=name, date=date).update_one( name=name, date=date, pe=pe1, pe_ttm=pe2, dividend_yield_ratio=dividend_yield_ratio1, turnover=turnover, upsert=True)
def get(self, request, *args, **kw): # Process any get params that you may need # If you don't need to process get params, # you can skip this part name = request.GET.get('code') items = Index.objects(name=name).order_by('date') index_col = db.index.find({'name': name}) df = pd.DataFrame(list(index_col)) serializer = IndexListSerializer({'items': items}) # HSCEI index has no pb data if name == 'HSCEI': df['pb'] = 0 result = get_result(serializer, df) response = Response(result, status=status.HTTP_200_OK) return get_response_cors(response)
def get(self, request, *args, **kw): # Process any get params that you may need # If you don't need to process get params, # you can skip this part print('*' * 15) name = request.GET.get('name') items = Index.objects(name=name).order_by('date') index_col = db.index.find({'name': name}) df = pd.DataFrame(list(index_col)) serializer = IndexListSerializer({'items': items}) # print serializer.is_valid() # print serializer.errors content = JSONRenderer().render(serializer.data) print('**********content:{}'.format(content)) json_output = json.loads(content) print('****json:{}'.format(json_output)) pb_list = [] pe_list = [] for item in json_output.get('items'): # date = arrow.get(item.get('date'), 'YYYY-MM-DD HH:mm:ss').timestamp timestamp = arrow.get(item.get('date'), 'YYYY-MM-DD HH:mm:ss').timestamp * 1000 if item.get('pb'): pb_list.append([timestamp, item.get('pb')]) pe_list.append([timestamp, item.get('pe')]) # HSCEI index has no pb data if name == 'HSCEI': df['pb'] = 0 result = { 'PB': pb_list, 'PE': pe_list, 'PB_avg': df['pb'].mean(), 'PE_avg': df['pe'].mean() } response = Response(result, status=status.HTTP_200_OK) return response
def read_market(nh, nl, date): # 破净率 low_pb = low_pb_ratio() print(low_pb) broken_net_ratio = low_pb[0] broken_net = low_pb[1] stock_count = low_pb[3] nh_ratio = float(nh)/stock_count nl_ratio = float(nl)/stock_count nhnl = nh - nl # 跌停板 dt = screen_by_pencentage(-10.11, -9.9) dt_ratio = dt/stock_count # 涨停板 zt = screen_by_pencentage(9.9, 10.11) zt_ratio = zt/stock_count zdr = zt-dt print('dtb:{} ztb:{} zdr'.format(dt, zt, zdr)) # 仙股 penny_stocks = screen_by_price(0.1, 1)['count'] penny_stocks_ratio = penny_stocks/stock_count # 破发率 broken_ipo_count, total_ipo, broken_ipo_rate, broken_list = broken_ipo() # CIX范围从0到100,由10个指标组成 cix = 0 cix_data = {} weight_range = [0, 10] # 1 SH PE # pe_df = avg_sh_pe('2000-1-31') # max_pe = pe_df['PE'].max() # min_pe = pe_df['PE'].min() # # get latest PE DF by tail() # # latest_pe_df = pe_df.tail(1) # # latest_pe = latest_pe_df.iloc[0][1] # # print 'latest PE:{}'.format(latest_pe) # latest_sh = Index.objects(name='上海A股').order_by('-date').first() # print('items***{}'.format(latest_sh)) # pe = interp(latest_sh.pe, [min_pe, max_pe], weight_range) # # print('min_pe:{} max_pe:{} latest_pe:{} pe:{}'.format(min_pe, max_pe, latest_pe, pe)) # cix += pe # 1 替换为沪深A股PE TODO max_pe = 30 min_pe = 12 # get latest PE DF by tail() # latest_pe_df = pe_df.tail(1) # latest_pe = latest_pe_df.iloc[0][1] # print 'latest PE:{}'.format(latest_pe) latest_sh = Index.objects(name='沪深A股').order_by('-date').first() print('items***{}'.format(latest_sh)) pe = interp(latest_sh.pe, [min_pe, max_pe], [0, 50]) # print('min_pe:{} max_pe:{} latest_pe:{} pe:{}'.format(min_pe, max_pe, latest_pe, pe)) cix += pe cix_data.update({'pe': pe}) # 2 破净率 min_low_pb = 0.02 max_low_pb = 0.15 pb = interp(-broken_net_ratio, [-max_low_pb, min_low_pb], weight_range) cix += pb cix_data.update({'broken_net': pb}) # 3 AH premium index ah_now = xueqiu('HKHSAHP') ah_current = ah_now.current ah = interp(ah_current, [100, 130], weight_range) cix += ah cix_data.update({'ah': ah}) # 4 GDP rate rate = gdp_rate() gdp = interp(rate, [0.4, 1], weight_range) cix += gdp cix_data.update({'gdp': gdp}) # 5 百元股 [0,3.6%] high_price = high_price_ratio() g100 = high_price[0] g100_ratio = high_price[1] high = interp(g100_ratio, [0, 0.036], weight_range) cix += high cix_data.update({'over_100': high}) # 8 NHNL n = interp(nhnl, [-1000, 1000], weight_range) cix += n cix_data.update({'nhnl': n}) # 9 融资规模及占比 TODO tushare # 10 社交媒体挖掘 TODO xueqiu # 5 SH换手率 [1%,3%] sh = read_index_market('SH000001') turnover_rate = sh['turnover_rate'] turnover = interp(turnover_rate, [1, 3], weight_range) # cix += turnover # 6 涨跌停差额 # 7 TODO 最近一年IPO、可转债涨幅或破发率 # low price low_price = low_price_ratio() print('low_price***{}'.format(low_price)) print(cix_data) # TODO cix 映射到0.5-1.5区间,代表持仓比例 Market.objects(date=get_date(date)).update_one(nh=nh, nl=nl, nhnl=nhnl, nh_ratio=nh_ratio, nl_ratio=nl_ratio, stock_count=stock_count, over_100=g100, over_100_ratio=g100_ratio, penny_stocks=penny_stocks, penny_stocks_ratio=penny_stocks_ratio, low_price_ratio=low_price, pe=latest_sh.pe, turnover=turnover_rate, ah=ah_current, gdp=rate, cix=cix,cix_data=cix_data, broken_net=broken_net, broken_net_ratio=broken_net_ratio, broken_net_stocks=low_pb[2], dt=dt, dt_ratio=dt_ratio, zt=zt, zt_ratio=zt_ratio, zdr=zdr, ipo=total_ipo, broken_ipo=broken_ipo_count, broken_ipo_ratio=broken_ipo_rate,broken_ipo_list=broken_list, upsert=True)
def read_market(nh, nl, date): # 破净率 low_pb = low_pb_ratio() print(low_pb) broken_net_ratio = low_pb[0] broken_net = low_pb[1] stock_count = low_pb[3] nh_ratio = float(nh) / stock_count nl_ratio = float(nl) / stock_count # 跌停板 dt = screen_by_pencentage(-10.11, -9.9) dt_ratio = dt / stock_count # 涨停板 zt = screen_by_pencentage(9.9, 10.11) zt_ratio = zt / stock_count zdr = zt / dt print('dtb:{} ztb:{} zdr'.format(dt, zt, zdr)) # 仙股 penny_stocks = screen_by_price(0.1, 1)['count'] penny_stocks_ratio = penny_stocks / stock_count # 破发率 broken_ipo_count, total_ipo, broken_ipo_rate, broken_list = broken_ipo() cix = 0 weight_range = [0, 10] # 1 latest SH PE pe_df = avg_sh_pe('2000-1-31') max_pe = pe_df['PE'].max() min_pe = pe_df['PE'].min() # get latest PE DF by tail() # latest_pe_df = pe_df.tail(1) # latest_pe = latest_pe_df.iloc[0][1] # print 'latest PE:{}'.format(latest_pe) latest_sh = Index.objects(name='上海A股').order_by('-date').first() print('items***{}'.format(latest_sh)) pe = interp(latest_sh.pe, [min_pe, max_pe], weight_range) # print('min_pe:{} max_pe:{} latest_pe:{} pe:{}'.format(min_pe, max_pe, latest_pe, pe)) cix += pe # 2 破净率 min_low_pb = 0 max_low_pb = 0.1 pb = interp(-broken_net_ratio, [-max_low_pb, min_low_pb], weight_range) cix += pb # 3 AH premium index ah_now = xueqiu('HKHSAHP') ah_current = ah_now.current ah = interp(ah_current, [100, 150], weight_range) cix += ah # 4 GDP rate rate = gdp_rate() gdp = interp(rate, [0.4, 1], weight_range) cix += gdp # 5 百元股 [0,3.6%] high_price = high_price_ratio() g100 = high_price[0] g100_ratio = high_price[1] high = interp(g100_ratio, [0, 0.036], weight_range) cix += high # 5 low price low_price = low_price_ratio() print('low_price***{}'.format(low_price)) # 6 SH换手率 [1%,3%] sh = read_index_market('SH000001') turnover_rate = sh['turnover_rate'] turnover = interp(turnover_rate, [1, 3], weight_range) cix += turnover Market.objects(date=get_date(date)).update_one( nh=nh, nl=nl, nhnl=nh - nl, nh_ratio=nh_ratio, nl_ratio=nl_ratio, stock_count=stock_count, over_100=g100, over_100_ratio=g100_ratio, penny_stocks=penny_stocks, penny_stocks_ratio=penny_stocks_ratio, low_price_ratio=low_price, pe=latest_sh.pe, turnover=turnover_rate, ah=ah_current, gdp=rate, cix=cix, broken_net=broken_net, broken_net_ratio=broken_net_ratio, broken_net_stocks=low_pb[2], dt=dt, dt_ratio=dt_ratio, zt=zt, zt_ratio=zt_ratio, zdr=zdr, ipo=total_ipo, broken_ipo=broken_ipo_count, broken_ipo_ratio=broken_ipo_rate, broken_ipo_list=broken_list, upsert=True)
def csi_by_type(date='2011-05-04', data_type='zy1'): # http://115.29.204.48/syl/bk20180202.zip day = arrow.get(date, date_format).date() weekday = day.weekday() # ignore weekend if weekday == 5 or weekday == 6: return url = '{}industry-price-earnings-ratio?date={}&type={}'.format( csi_domain, date, data_type) print('url***', url) page = parse(url).getroot() # result = etree.tostring(page) # print(result) xpath = '//table[@class="table table-bg p_table table-bordered table-border mb-20"]' if data_type == 'zz1' or data_type == 'zz2' or data_type == 'zz3' or data_type == 'zz4': xpath = '//table[@class="table table-bg p_table table-bordered table-border mb-20"]' r = page.xpath(xpath) # print(len(r)) tree = etree.ElementTree(r[0]) # print(etree.tostring(tree)) html_table = etree.tostring(tree) dfs = pd.read_html(html_table, flavor='lxml') df = dfs[0] print(df) # v1 = df.iloc[3][0] # print(v1, df.iloc[3][1], df.iloc[3][2]) # print(v1, df.iloc[4][1], df.iloc[4][2]) for index, row in df.iterrows(): v0 = row.iloc[0] v1 = row.iloc[1] v2 = row.iloc[2] # print(index, name, value) print('index {} v0:{} v1:{} v2:{}***'.format(index, v0, v1, v2)) try: if data_type == 'zy1': # 静态市盈率 Index.objects(name=v0, date=day).update_one(name=v0, pe=v1, upsert=True) elif data_type == 'zy2': # 滚动市盈率 Index.objects(name=v0, date=day).update_one(name=v0, pe_ttm=v1, upsert=True) elif data_type == 'zy3': # 市净率 Index.objects(name=v0, date=day).update_one(name=v0, pb=v1, upsert=True) elif data_type == 'zy4': # 股息率 Index.objects(name=v0, date=day).update_one(name=v0, dividend_yield_ratio=v1, upsert=True) elif data_type == 'zz1': # 行业静态市盈率 Industry.objects(code=v0, date=day).update_one(code=v0, date=day, name=v1, pe=v2, upsert=True) elif data_type == 'zz2': # 行业滚动市盈率 Industry.objects(code=v0, date=day).update_one(code=v0, pe_ttm=v2, upsert=True) elif data_type == 'zz3': # 行业市净率 Industry.objects(code=v0, date=day).update_one(code=v0, pb=v2, upsert=True) elif data_type == 'zz4': # 行业股息率 Industry.objects(code=v0, date=day).update_one(code=v0, dividend_yield_ratio=v2, upsert=True) except: continue
def csi_by_type(date='2011-05-04', data_type='zy1'): # http://115.29.204.48/syl/bk20180202.zip day = arrow.get(date, date_format).date() weekday = day.weekday() # ignore weekend if weekday == 5 or weekday == 6: return url = 'http://www.csindex.com.cn/zh-CN/downloads/industry-price-earnings-ratio?date={}&type={}'.format( date, data_type) page = parse(url).getroot() # result = etree.tostring(page) # print(result) r = page.xpath( '//table[@class="table-bg p_table table-bordered table-border mb-20"]') print(len(r)) tree = etree.ElementTree(r[0]) # print(etree.tostring(tree)) html_table = etree.tostring(tree) dfs = pd.read_html(html_table, flavor='lxml') df = dfs[0] print(df) # v1 = df.iloc[1][1] # print(v1) for index, row in df.iterrows(): name = row.iloc[0] value = row.iloc[1] print(index, name, value) if data_type == 'zy1': Index.objects(name=name, date=day).update_one(name=name, pe=value, upsert=True) elif data_type == 'zy2': Index.objects(name=name, date=day).update_one(name=name, pe_ttm=value, upsert=True) elif data_type == 'zy3': Index.objects(name=name, date=day).update_one(name=name, pb=value, upsert=True) elif data_type == 'zy4': Index.objects(name=name, date=day).update_one(name=name, dividend_yield_ratio=value, upsert=True) elif data_type == 'zz1': # 行业静态市盈率 Industry.objects(code=name, date=day).update_one(code=name, date=day, name=name, pe=value, upsert=True) elif data_type == 'zz2': # 行业滚动市盈率 Industry.objects(code=name, date=day).update_one(code=name, pe_ttm=value, upsert=True) elif data_type == 'zz2': # 行业市净率 Industry.objects(code=name, date=day).update_one(code=name, pb=value, upsert=True) elif data_type == 'zz2': # 行业股息率 Industry.objects(code=name, date=day).update_one(code=name, dividend_yield_ratio=value, upsert=True)