def classification(class_types): if class_types == 'industry': industry_classified = ts.get_industry_classified() industry_classified.to_csv('D:\\ts\\classification\\industry_classified.csv', encoding='gbk') elif class_types == 'concept': concept_classified = ts.get_concept_classified() concept_classified.to_csv('D:\\ts\\classification\\concept_classified.csv', encoding='gbk') elif class_types == 'area': area_classified = ts.get_area_classified() area_classified.to_csv('D:\\ts\\classification\\area_classified.csv', encoding='gbk') elif class_types == 'sme': sme_classified = ts.get_sme_classified() sme_classified.to_csv('D:\\ts\\classification\\sme_classified.csv', encoding='gbk') elif class_types == 'gem': gem_classified = ts.get_gem_classified() gem_classified.to_csv('D:\\ts\\classification\\gem_classified.csv', encoding='gbk') elif class_types == 'st': st_classified = ts.get_st_classified() st_classified.to_csv('D:\\ts\\classification\\st_classified.csv', encoding='gbk') elif class_types == 'hs300': hs300s = ts.get_hs300s() hs300s.to_csv('D:\\ts\\classification\\hs300s.csv', encoding='gbk') elif class_types == 'sz50': sz50s = ts.get_sz50s() sz50s.to_csv('D:\\ts\\classification\\sz50s.csv', encoding='gbk') elif class_types == 'zz500': zz500s = ts.get_zz500s() zz500s.to_csv('D:\\ts\\classification\\zz500s.csv', encoding='gbk') elif class_types == 'terminated': terminated = ts.get_terminated() terminated.to_csv('D:\\ts\\classification\\terminated.csv', encoding='gbk') elif class_types == 'suspended': suspended = ts.get_suspended() suspended.to_csv('D:\\ts\\classification\\suspended.csv', encoding='gbk')
def save_300(): list = ts.get_hs300s() print(list) list.to_csv(HS300_NAME) for id in list.code: save(id) print('.........end.............')
def get_hs300s(): try: df = ts.get_hs300s(); engine = create_engine('mysql://*****:*****@127.0.0.1/stock?charset=utf8') # df.insert(0,'code','600848') df.to_sql('hs300s', engine, if_exists='append') except Exception, e: e.message
def gen_zz800_stock_list(): ''' :return: 生成中证800股票代码,[code, name, weight, date] ''' zz500 = ts.get_zz500s() hs300 = ts.get_hs300s() zz800 = zz500.append(hs300) zz800 = zz800[['code', 'name', 'weight', 'date']] zz800.columns = zz800.columns.str.upper() zz800.to_csv(config.rootPath + '/data/gen_data/zz800_codes.csv', index=False)
def storagepool(self): #storage zz500 df=ts.get_zz500s() self.pool['zz500'].insert_many(json.loads(df.to_json(orient='records'))) #hs300 df=ts.get_hs300s() self.pool['hz300'].insert_many(json.loads(df.to_json(orient='records'))) #zh50 df=ts.get_sz50s() self.pool['sz'].insert_many(json.loads(df.to_json(orient='records'))) #st df=ts.get_st_classified() self.pool['st'].insert_many(json.loads(df.to_json(orient='records')))
def _index_weight(self): # 输入列表['000300.SH','399905.SZ','000016.SH']获取对应指数成分股 hs300 = ts.get_hs300s() zz500 = ts.get_zz500s() sz50 = ts.get_sz50s() hs300['index_code'] = '000300.SH' zz500['index_code'] = '399905.SZ' sz50['index_code'] = '000016.SH' # 汇总数据 res = pd.concat([hs300, zz500, sz50], ignore_index=True) # 标准化code res['code'] = [i + '.SH' if i[0] == '6' else i + '.SZ' for i in res['code']] res['update_time'] = self.today return res
def get_hs30s() -> object: """ 获取沪深30支随机股票 return:code :股票代码 name :股票名称 date :日期 weight:权重 :return: """ df = pandas.DataFrame(tushare.get_hs300s()) shuffle = numpy.arange(0, 299, 10) df = df.sort_values(by='name') df = df.iloc[shuffle, :] return df
def find_csi_300_2(): # 通过ts.get_hs300s()方法来找到沪深300的code CSI_300_df = ts.get_hs300s() tickers = CSI_300_df['code'].values # 转化为列表 002044 000961 new_tickers = [] for i in tickers: if i[0] == '0' or i[0] == '3': i = i + '.SZ' if i[0] == '6': i = i + '.SH' new_tickers.append(i) # 601238.SH 002044.SZ with open('CSI_tickers.pickle', 'wb') as f: pickle.dump(new_tickers, f) print(new_tickers) return new_tickers
def cluster(): time = get_datetime() date = time.strftime('%Y-%m-%d') log.info(date) a = ts.get_hs300s() log.info(a) daima = pd.DataFrame(columns = ['code','open','close','ma5','ma10','v_ma10','turnover','volume','p_change']) for i in a['code']: try: s = ts.get_hist_data(i,start=date,end=date) s.insert(0,'code',i) s = s.loc[:,['code','open','close','ma5','ma10','v_ma10','turnover','volume','p_change']] daima = daima.append(s,ignore_index=True) except: pass data = daima.loc[:,['ma5','ma10','v_ma10','turnover','volume','p_change']] log.info(data) k = 9 #聚类的类别 iteration = 500 #聚类最大循环次数 data = data data_zs = 1.0*(data - data.mean())/data.std() #数据标准化 model = KMeans(n_clusters = k, n_jobs = 9, max_iter = iteration) #分为k类, 并发数9 model.fit(data_zs) #开始聚类 #简单打印结果 r1 = pd.Series(model.labels_).value_counts() #统计各个类别的数目 r2 = pd.DataFrame(model.cluster_centers_) #找出聚类中心 r = pd.concat([r2, r1], axis = 1) #横向连接(0是纵向), 得到聚类中心对应的类别下的数目 r.columns = list(data.columns) + [u'类别数目'] #重命名表头 log.info(r) #详细输出原始数据及其类别 r = pd.concat([data, pd.Series(model.labels_, index = data.index)], axis = 1) #详细输出每个样本对应的类别 r.columns = list(data.columns) + [u'聚类类别'] #重命名表头 t= r.loc[:,'聚类类别'] daima.insert(9,'聚类类别',t) cc = daima[(daima.聚类类别 == 7)] cc = cc.reset_index(drop=True) ee = cc.loc[:,['code']] dd = ee['code'].values.tolist() gg = [] for j in dd: if j[0] == '6': j = j + '.SH' gg.append(j) else: j = j + '.SZ' gg.append(j) log.info(gg) return gg
def obtain_and_insert_hs300(con): """Get the composite of hs300 index and insert it to database args: con: a sqlalchemy engine connection return: the code of every stock and a database table containing hs300 stocks and ticker """ # Get hs300 from tushare hs300=ts.get_hs300s() hs300['date']=pd.to_datetime(hs300['date']) hs300.set_index('date',inplace=True) # write it to MySQl database named 'cnstock' hs300.to_sql('hs300',con, if_exists='replace') #dtype={'date':sqla.types.VARCHAR(12)}
def list(self, stock_block_type): stock_block = None if stock_block_type == self.industry: stock_block = db.get(STOCK_BLOCK_INDUSTRY) if stock_block is None: stock_block = ts.get_industry_classified() db.save(STOCK_BLOCK_INDUSTRY, stock_block) elif stock_block_type == self.concept: stock_block = db.get(STOCK_BLOCK_CONCEPT) if stock_block is None: stock_block = ts.get_concept_classified() db.save(STOCK_BLOCK_CONCEPT, stock_block) elif stock_block_type == self.area: stock_block = db.get(STOCK_BLOCK_AREA) if stock_block is None: stock_block = ts.get_area_classified() db.save(STOCK_BLOCK_AREA, stock_block) elif stock_block_type == self.sme: stock_block = db.get(STOCK_BLOCK_SME) if stock_block is None: stock_block = ts.get_sme_classified() db.save(STOCK_BLOCK_SME, stock_block) elif stock_block_type == self.gem: stock_block = db.get(STOCK_BLOCK_GEM) if stock_block is None: stock_block = ts.get_gem_classified() db.save(STOCK_BLOCK_GEM, stock_block) elif stock_block_type == self.st: stock_block = db.get(STOCK_BLOCK_ST) if stock_block is None: stock_block = ts.get_st_classified() db.save(STOCK_BLOCK_ST, stock_block) elif stock_block_type == self.hs300s: stock_block = db.get(STOCK_BLOCK_HS300S) if stock_block is None: stock_block = ts.get_hs300s() db.save(STOCK_BLOCK_HS300S, stock_block) elif stock_block_type == self.sz50s: stock_block = db.get(STOCK_BLOCK_SZ50S) if stock_block is None: stock_block = ts.get_sz50s() db.save(STOCK_BLOCK_SZ50S, stock_block) elif stock_block_type == self.zz500s: stock_block = db.get(STOCK_BLOCK_ZZ500S) if stock_block is None: stock_block = ts.get_zz500s() db.save(STOCK_BLOCK_ZZ500S, stock_block) else: return None return stock_block
def getCodeBySection(plaName): if plaName == '深圳成指': sql = "select distinct code from basicinfo where section='%s'" % ( '深市A股') try: cursor.execute(sql) results = cursor.fetchall() re = [] for row in results: code = row[0] re.append(code) # print(re) except: print('get data fail') elif plaName == '上证指数': sql = "select distinct code from basicinfo where section='%s'" % ( '沪市A股') try: cursor.execute(sql) results = cursor.fetchall() re = [] for row in results: code = row[0] re.append(code) # print(re) except: print('get data fail') elif (plaName == '创业板' or plaName == '中小板'): sql = "select distinct code from basicinfo where section='%s'" % ( plaName) try: cursor.execute(sql) results = cursor.fetchall() re = [] for row in results: code = row[0] re.append(code) # print(re) except: print('get data fail') elif plaName == '上证50': df = ts.get_sz50s() re = list(df['code']) # print(re) elif plaName == '沪深300': df = ts.get_hs300s() re = list(df['code']) # print(re) return re
def run(self): df500 = ts.get_zz500s() df300 = ts.get_hs300s() df800 = pd.DataFrame(df500) df800 = df800.append(df300,ignore_index=True) df800.sort_values(by="code") un800 = json.loads(df800.to_json(orient="records")) emg = emongo() szCode = emg.getCollectionNames("un800") szCode.remove() szCode.insert(un800) emg.Close()
def save_hs300s(): df = ts.get_hs300s() if df == None: return hs300s_dict = df.to_dict("orient='records'") for data in hs300s_dict: code = data['code'] stock_weight = session.query(StockWeight).filter_by( and_(StockWeight.code == code, StockWeight.date == data['date'])).one() if stock_weight != None: continue else: logging.info("stock_weight: %s", stock_weight) session.add(stock_weight)
def storagepool(self): #storage zz500 df = ts.get_zz500s() self.pool['zz500'].insert_many(json.loads( df.to_json(orient='records'))) #hs300 df = ts.get_hs300s() self.pool['hz300'].insert_many(json.loads( df.to_json(orient='records'))) #zh50 df = ts.get_sz50s() self.pool['sz'].insert_many(json.loads(df.to_json(orient='records'))) #st df = ts.get_st_classified() self.pool['st'].insert_many(json.loads(df.to_json(orient='records')))
def update_hs300(request): '''更新沪深300''' stock_hs300.objects.all().delete() rltobj = ts.get_hs300s() objlist = [] for i in range(len(rltobj)): tmpobj = stock_hs300(code=rltobj.loc[i]['code'], name=rltobj.loc[i]['name'], date=rltobj.loc[i]['date'], weight=rltobj.loc[i]['weight']) objlist.append(tmpobj) stock_hs300.objects.bulk_create(objlist) return HttpResponse('succ.{0}条!'.format(len(objlist)))
def start_requests(self): result = ts.get_hs300s() keywords = result['code'].tolist() for keyword in keywords: url = '{url}?keyword={keyword}'.format(url=self.search_url, keyword=keyword) print('正在请求:' + url) for page in range(self.max_page + 1): data = { 'mp': str(self.max_page), 'page': str(page), } yield scrapy.FormRequest(url, callback=self.parse_index, formdata=data, meta={'keyword': keyword})
def save_component_stock(): # 沪深300成份及权重 hs300_classified = ts.get_hs300s() # 上证50成份股 sz50_classified = ts.get_sz50s() # 中证500成份股 zz500_classified = ts.get_zz500s() # 存入mysql数据库 engine = create_engine('mysql://*****:*****@127.0.0.1/stock?charset=utf8') # 数据定时存入mysql hs300_classified.to_sql('hs300_classify', engine, if_exists='replace') sz50_classified.to_sql('sz50_classify', engine, if_exists='replace') zz500_classified.to_sql('zz500_classify', engine, if_exists='replace')
def get_hs300_codes(): s = [] # stock if not os.path.exists("_stock/hs300.csv"): df = ts.get_hs300s() df.to_csv("_stock/hs300.csv") pass reader = csv.reader(open("_stock/hs300.csv")) for i, row in enumerate(reader): if i != 0: s.append(row[1]) pass pass return s pass
def get_stock_codes(markets=['zz500s'], ipo_date=None): ''' markets: list e.g.['sme','gem','st','hs300s','sz50s','zz500s','general']; ipo_date: str e.g.'2015-03-30' ''' code_list = [] if 'sme' in markets: #中小板 df = ts.get_sme_classified() codes = list(df.code) code_list.extend(codes) if 'gem' in markets: #创业板 df = ts.get_gem_classified() codes = list(df.code) code_list.extend(codes) if 'st' in markets: #风险板 df = ts.get_st_classified() codes = list(df.code) code_list.extend(codes) if 'hs300s' in markets: #沪深300 df = ts.get_hs300s() codes = list(df.code) code_list.extend(codes) if 'sz50s' in markets: #上证50 df = ts.get_sz50s() codes = list(df.code) code_list.extend(codes) if 'zz500s' in markets: #中证500 df = ts.get_zz500s() codes = list(df.code) code_list.extend(codes) if ipo_date: new_stock_df = ts.new_stocks() new_stock_df = new_stock_df[new_stock_df['ipo_date'] > ipo_date] new_stock_codes = list(new_stock_df.code) #得到输入时间之后发行的股票 code_list = list(set(code_list)) desired_codes = list(set(code_list) - set(new_stock_codes)) #剔除新股 desired_codes = list(set(code_list)) return desired_codes
def calc(): """计算各个股票的应有持仓股数""" weight = ts.get_hs300s() hs = ts.get_k_data('hs300') date = str(weight.date[0])[:10] spot = float(hs[hs.date == date].close) portfolioValue = spot * 300 weight['value'] = weight['weight'] * portfolioValue weight['close'] = weight.apply(getPrice, axis=1) weight['theoNum'] = weight['value'] / weight['close'] weight['flag'] = weight.apply(getFlag, axis=1) allocateValue = weight.value.sum() - (weight.value * weight.flag).sum() perAllocateValue = allocateValue / weight.flag.sum() weight['adjustValue'] = weight.apply(adjustValue(perAllocateValue), axis=1) return weight
def get_basic(trade_date, num=300): # 调用基础API获取沪深300的成分股 shares_item300 = ts.get_hs300s() shares_items = shares_item300.head(num) data = [] # 获取沪深300成分股的指标 for item in shares_items.index: code = add_shares_type(shares_items.loc[item, 'code']) basic_data = tp.query('daily_basic', ts_code=code, trade_date=trade_date, fields='trade_date,ts_code,close,pb,pe,dv_ratio') basic_data['name'] = shares_items.loc[item, 'name'] data.append(basic_data) datas = pd.concat(data).sort_values(by='pb') # 按市净率升幂排序 datas.index = pd.Series([n for n in range(int(len(datas.index)))]) # 重置行索引 return datas
def down_stk_base(): ''' 下载时基本参数数据时,有时会出现错误提升: timeout: timed out 属于正常现象,是因为网络问题,等几分钟,再次运行几次 ''' rss = "tmp\\" # fss = rss + 'stk_inx0.csv' print(fss) dat = ts.get_index() dat.to_csv(fss, index=False, encoding='gbk', date_format='str') #========= fss = rss + 'stk_base.csv' print(fss) dat = ts.get_stock_basics() dat.to_csv(fss, encoding='gbk', date_format='str') c20 = ['code', 'name', 'industry', 'area'] d20 = dat.loc[:, c20] d20['code'] = d20.index fss = rss + 'stk_code.csv' print(fss) d20.to_csv(fss, index=False, encoding='gbk', date_format='str') #sz50,上证50;hs300,沪深300;zz500,中证500 fss = rss + 'stk_sz50.csv' print(fss) dat = ts.get_sz50s() if len(dat) > 3: dat.to_csv(fss, index=False, encoding='gbk', date_format='str') fss = rss + 'stk_hs300.csv' print(fss) dat = ts.get_hs300s() if len(dat) > 3: dat.to_csv(fss, index=False, encoding='gbk', date_format='str') fss = rss + 'stk_zz500.csv' print(fss) dat = ts.get_zz500s() if len(dat) > 3: dat.to_csv(fss, index=False, encoding='gbk', date_format='str')
def find_and_save_CSI_300(): CSI_300_df = ts.get_hs300s() tickers = CSI_300_df['code'].values # print(tickers) tickers_mod = [] for ticker in tickers: if ticker[0] == '6': ticker = ticker + '.SH' tickers_mod.append(ticker) else: ticker = ticker + '.SZ' tickers_mod.append(ticker) print(tickers_mod) with open("CSI_tickers.pickle", "wb") as f: pickle.dump(tickers_mod, f) print(tickers_mod) return tickers_mod
def find_and_save_CSI_300(): CSI_300_DF = ts.get_hs300s() tickers = CSI_300_DF['code'].values tickers_mod = [] for ticker in tickers: if ticker[0] == '6': ticker = ticker + '.SH' tickers_mod.append(ticker) else: ticker = ticker + '.SZ' tickers_mod.append(ticker) # 储存文件,CSI_tickers.pickle为储存后的文件名,后缀为pickle # 将数据dump进文件中 # 'wb'以二进制格式打开一个文件只用于写入 # dump将数据通过特殊的形式转换为只有python语言认识的字符串,并写入文件 with open('CSI_tickers.pickle', 'wb') as f: pickle.dump(tickers_mod, f) return tickers_mod
def update_stock_list(self): engine = self.create_db_engine(self.str_db_stock_classification) #update hs300(沪深300) list: table_hs300_list = self.table_creator.get_table_hs300_list() table_hs300_list.create(engine, checkfirst=True) print("Create %s list table ok!" % table_hs300_list.name) #get the list from Tushare hs300_list = ts.get_hs300s() print('get %s data ok!' % table_hs300_list.name) #insert list self.insert_to_db_no_duplicate(hs300_list, table_hs300_list.name, engine) print("Insert %s data ok!" % table_hs300_list.name) #close the engine pool engine.dispose()
def find_chances(from_date, to_date, highest_days_n): def _in_list(code, l): for c, _ in l: if code == c: return True return False # customize js = get_customize_codes() log.info('customize stocks: %s', js) codes = js for code in get_codes(ts.get_hs300s()): if _in_list(code, codes): continue else: codes.append((code, None)) rets = [] cur_pos_rets = [] with ThreadPoolExecutor(max_workers=4) as executor: tasks = [ executor.submit(_loopback_stock, code, name, from_date, to_date, highest_days_n) for code, name in codes ] for task in as_completed(tasks): stock, is_chance, cur_pos = task.result() if is_chance: rets.append(stock) if cur_pos <= 0.1: cur_pos_rets.append((cur_pos, stock)) rets.sort(key=lambda s: s.get_benefit_rate(), reverse=True) log.info('==========Your chances==========') for stock in rets: log.info(stock) cur_pos_rets.sort(key=lambda s: s[0]) log.info('==========Underestimate==========') for _, stock in cur_pos_rets: log.info(stock) return rets, cur_pos_rets
def main_with_pe_roe(): ''' 遍历所有沪深300的股票,选择市盈率低,同时,净资产收益率高的公司股票。 ''' to_filter_stock = ts.get_hs300s() # 根据市盈率,净资产收益率,持股人数(持股人数越多,随机性越强,也不容易被操作?) filtered_stock_info = filter_good_stocks.get_low_pe_high_roe(to_filter_stock['code'], 30, 10, 100000, 2017, 4) if len(filtered_stock_info) < 0: print('cannot find the stocks: pe<30, roe>10 in 2017-Q04') return file_name = '.\\test_data\\1.code_range_'+datetime.datetime.now().strftime(date_format)+'.csv' final_coe_name = '.\\test_data\\1.final_code_'+datetime.datetime.now().strftime(date_format)+'.csv' filtered_stock_info.to_csv(file_name, sep=',', index=True) gold_cross_stock = DataFrame({'code':[], 'name':[], 'macd':[], 'pe':[], 'roe':[]}) for code in filtered_stock_info['code']: #code = '%s, %s'%(code, filtered_stock_info.loc[code, 'name']) print('%s:%s'%(code, filtered_stock_info.loc[code, 'name']), end=': ') his_data = get_macd_by_time_order(code) if his_data is None: continue # 筛选出指定日期内,出现过黄金交叉的股票 gold_date = has_golden_cross(his_data, (datetime.datetime.now() - datetime.timedelta(7)).strftime(date_format), datetime.datetime.now().strftime(date_format)) if gold_date is not None: print('%s:%s has the gold cross in %s~%s'%(code, filtered_stock_info.loc[code, 'name'], gold_date[0], gold_date[1])) gold_cross_stock.ix[code] = [code, filtered_stock_info.loc[code, 'name'], \ his_data['macd'][-1], filtered_stock_info.loc[code,'pe'],\ filtered_stock_info.loc[code,'roe']] # 筛选macd连续增长的股票 const_incr_date = const_incr_macd(his_data, (datetime.datetime.now() - datetime.timedelta(7)).strftime(date_format), datetime.datetime.now().strftime(date_format), 4) if const_incr_date is not None: print('%s:%s has the gold cross in %s~%s'%(code, filtered_stock_info.loc[code, 'name'], const_incr_date[0], const_incr_date[-1])) gold_cross_stock.ix[code] = [code, filtered_stock_info.loc[code, 'name'], \ his_data['macd'][-1], filtered_stock_info.loc[code,'pe'],\ filtered_stock_info.loc[code,'roe']] del(his_data) print("The Gold cross stocks are:") for key in gold_cross_stock: print("%s,%s"%(key, gold_cross_stock[key])) gold_cross_stock.to_csv(final_coe_name, sep=',', index = True)
def save_hs300s(): """获取沪深300当前成份股及所占权重""" logger.info('Begin get and save hs300 clssified.') try: data_df = ts.get_hs300s() if data_df is not None and not data_df.empty: data = data_df.values data_dicts = [{ 'code': row[0], 'name': row[1], 'date': row[2], 'weight': row[3] } for row in data] Hs300.insert_many(data_dicts).execute() logger.info('Success get and save hs300 classified.') else: logger.warn('Empty get and save hs300 classified.') except Exception as e: logger.exception('Error get and save hs300 classified.')
def set_universe(code, refDate=None): if Settings.data_source == DataSource.WIND: from WindPy import w if not w.isconnected(): w.start() if not refDate: rawData = w.wset('IndexConstituent', 'windcode='+convert2WindSymbol(code), 'field=wind_code') else: rawData = w.wset('IndexConstituent', 'date='+refDate, 'windcode='+convert2WindSymbol(code), 'field=wind_code') if len(rawData.Data) == 0: return # convert to .xshg/.xshe suffix idx = [s.replace('SH', 'xshg') for s in rawData.Data[0]] idx = [s.replace('SZ', 'xshe') for s in idx] return idx elif Settings.data_source == DataSource.TUSHARE: import tushare as ts tsSymbol = code.split('.')[0] if tsSymbol == '000300': idx = ts.get_hs300s()['code'] elif tsSymbol == '000016': idx = ts.get_sz50s()['code'] elif tsSymbol == '000905' or tsSymbol == '399905': idx = ts.get_zz500s()['code'] else: raise NotImplementedError idx = [equityCodeToSecurityID(s) for s in idx.tolist()] return idx elif Settings.data_source != DataSource.DXDataCenter: import os import tushare as ts try: ts.set_token(os.environ['DATAYES_TOKEN']) except KeyError: raise idx = ts.Idx() return list(idx.IdxCons(secID=code, field='consID')['consID']) else: from DataAPI import api data = api.GetIndexConstitutionInfo(code, refDate=refDate).sort_values('conSecurityID') return list(data.conSecurityID)
def get_BP_data(): ## 此函数获得股票池的1/BP(频率:天)的数据,并将所有股票单独保存在一个csv里 #获取基础信息数据,包括股票代码、名称、上市日期、退市日期等 ## pool = pro.stock_basic(exchange='', ## list_status='L', ## adj='qfq', ## fields='ts_code,symbol,name,area,industry,fullname,list_date, market,exchange,is_hs') ## print(pool.head()) ## 由题给要求,我们使用沪深300作为选股池 pool = ts.get_hs300s() # 根据basic的信息选取板块获取股票池 ## pool = pool[pool['market'].isin(['科创板'])].reset_index() print('需爬取股票总数:', len(pool)+1) j = 1 for i in pool.code: print('正在获取第%d家,股票代码%s.SZ' % (j, i)) if j==1: df = pro.daily_basic(ts_code=i+'.SZ', start_date=startdate, end_date=enddate, fields='ts_code,trade_date,turnover_rate,volume_ratio,pe,pb') if len(df)!=0: industry = pro.stock_basic(ts_code = i+'.SZ',exchange='', list_status='L', fields='industry')['industry'][0] df['industry'] = industry else: df2 = pro.daily_basic(ts_code=i+'.SZ', start_date=startdate, end_date=enddate, fields='ts_code,trade_date,close, turnover_rate,volume_ratio,pe,pb') if len(df2)!=0: industry = pro.stock_basic(ts_code = i+'.SZ',exchange='', list_status='L', fields='industry')['industry'][0] df2['industry'] = industry df = df.append(df2) j += 1 #设定双重重索引的数据格式 df=df.set_index(['trade_date','ts_code']) #根据第一索引排序 df=df.sort_index() path = os.path.join('BP_Data.csv') df.to_csv(path, index=True) return df
def update_HS300s(self): ''' 基于tushare,更新HS300s指标数据 :return: ''' data = ts.get_hs300s() if isinstance(data, pd.DataFrame) and not data.empty: for i in range(len(data)): id = '%s/%s' % (data.loc[i]['code'], data.loc[i]['date']) HS300s_item = HS300s(id = id, security = data.loc[i]['code'], name = data.loc[i]['name'], date = data.loc[i]['date'], weight = data.loc[i]['weight']) try: self._session.query(HS300s).filter(HS300s.id == id).one() except: self._session.add(HS300s_item) self._session.commit()
def get_stocklist_by_type(self, trade_date, type): print 'get_stocklist_by_type-------' + type df = None if type == '000016.SH': df = ts.get_sz50s() elif type == '000300.SH': df = ts.get_hs300s() elif type == '399006.SZ': df = ts.get_gem_classified() elif type == '000905.SH': df = ts.get_zz500s() print '----------------------------------------------------------' print df print '----------------------------------------------------------' if not df is None: stocklist = df.code.tolist() else: stocklist = [] return stocklist
def preload(): stock_block = ts.get_industry_classified() db.save(STOCK_BLOCK_INDUSTRY, stock_block) stock_block = ts.get_concept_classified() db.save(STOCK_BLOCK_CONCEPT, stock_block) stock_block = ts.get_area_classified() db.save(STOCK_BLOCK_AREA, stock_block) stock_block = ts.get_sme_classified() db.save(STOCK_BLOCK_SME, stock_block) stock_block = ts.get_gem_classified() db.save(STOCK_BLOCK_GEM, stock_block) stock_block = ts.get_st_classified() db.save(STOCK_BLOCK_ST, stock_block) stock_block = ts.get_hs300s() db.save(STOCK_BLOCK_HS300S, stock_block) stock_block = ts.get_sz50s() db.save(STOCK_BLOCK_SZ50S, stock_block) stock_block = ts.get_zz500s() db.save(STOCK_BLOCK_ZZ500S, stock_block)
def get_universe(symbol): ''' 获取**当前**全A、指定板块、指数、ST的成分股代码。 Parameters ----------- symbol 获取类型 Returns ---------- list [ticker,...] Notes --------- 'A' 全A股 'st' st股票 'hs300' 沪深300成分股 'cyb' 创业板成分股 'sz50' 上证50成分股 'A-st' 剔除st股票后的全A股 ''' if symbol == 'A': return ts.get_stock_basics().index.values.tolist() if symbol == 'st': return ts.get_st_classified()['code'].values.tolist() if symbol == 'hs300': return ts.get_hs300s()['code'].values.tolist() if symbol == 'cyb': return ts.get_gem_classified()['code'].values.tolist() if symbol == 'sz50': return ts.get_sz50s()['code'].values.tolist() if symbol == 'A-st': A = set(ts.get_stock_basics().index.values.tolist()) ST = set(ts.get_st_classified()['code'].values.tolist()) for st in ST: A.discard(st) return list(A)
def get_stockcode_list(dataset=None, update=False): filepath = dataset + '.csv' codefilepath = os.path.join(rootDir, 'codelist', filepath) if (os.path.exists(codefilepath) and update == False): codelist = pd.read_csv(codefilepath, encoding='gbk') return codelist if (dataset == 'zxb'): codelist = ts.get_sme_classified() codelist = codelist[['code', 'name']] codelist.to_csv(codefilepath) return codelist elif (dataset == 'cyb'): codelist = ts.get_gem_classified() codelist = codelist[['code', 'name']] codelist.to_csv(codefilepath) return codelist elif (dataset == 'hs300'): codelist = ts.get_hs300s() codelist = codelist[['code', 'name']] codelist.to_csv(codefilepath) return codelist elif (dataset == 'sz50'): codelist = ts.get_sz50s() codelist = codelist[['code', 'name']] codelist.to_csv(codefilepath) return codelist elif (dataset == 'zz500'): codelist = ts.get_zz500s() codelist = codelist[['code', 'name']] codelist.to_csv(codefilepath) return codelist elif (dataset == 'whole'): codelist = ts.get_today_all() codelist = codelist[['code', 'name']] codelist.to_csv(codefilepath) return codelist
def getStockClassfied(): filepath = 'f:\\stockdata\\股票分类数据\\' index = 0 # 行业分类 df = ts.get_industry_classified() saveCsv(df, filepath, stockClassList, index) index += 1 # 概念分类 df = ts.get_concept_classified() saveCsv(df, filepath, stockClassList, index) index += 1 # 地域分类 df = ts.get_area_classified() saveCsv(df, filepath, stockClassList, index) index += 1 # 中小板分类 df = ts.get_sme_classified() saveCsv(df, filepath, stockClassList, index) index += 1 # 创业板分类 df = ts.get_gem_classified() saveCsv(df, filepath, stockClassList, index) index += 1 # 风险警示板分类 df = ts.get_st_classified() saveCsv(df, filepath, stockClassList, index) index += 1 # 沪深300成份及权重 df = ts.get_hs300s() saveCsv(df, filepath, stockClassList, index) index += 1 # 上证50成份股 df = ts.get_sz50s() saveCsv(df, filepath, stockClassList, index) index += 1 # 中证500成份股 df = ts.get_zz500s() saveCsv(df, filepath, stockClassList, index) index += 1
time.sleep(1) # 中小板块 smalls = ts.get_sme_classified() time.sleep(1) # 创业版 news = ts.get_gem_classified() time.sleep(1) # st版块 sts = ts.get_st_classified() time.sleep(1) # 沪深300 hss = ts.get_hs300s() time.sleep(1) # 上证50 szs = ts.get_sz50s() time.sleep(1) # 中证500 zzs = ts.get_zz500s() time.sleep(1) # 终止上市 tss = ts.get_terminated() time.sleep(1) # 暂停上市
def syncHS300S(): hs300s_df = ts.get_hs300s() hs300s_df.to_csv(config.HS300_CodePath, encoding="utf-8") saveDataFileByCode(hs300s_df) print("sync and save HS300 done!")
def get_hs300s_info(file_path): hs300s_info = ts.get_hs300s() hs300s_info.to_csv(file_path, encoding='utf-8') print '\ndownload hs300s info finished\n'
f.write('\n') print str(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))),stockCode,'is finished' stockList.pop() except URLError,e: print 'Error',stockCode,str(e) stockList.pop() continue except BaseException, e: print stockCode,str(e) logger.error('Code: '+stockCode+ ' : '+str(e)) stockList.pop() continue if __name__ == '__main__': fileName = r'd:\stock\stockBZChoose.csv' startDate = '2014-01-01' endDate = str(time.strftime("%Y-%m-%d",time.localtime(time.time()))) specialDate = '2015-01-01' # stockCode = '600030' # print getStockInfo(startDate, endDate, stockCode,'2015-01-01') df = ts.get_hs300s() stockCodeList = list(df.code) chooseBZStock(startDate, endDate,specialDate,stockCodeList,fileName)
def load_company_industry_info(): #下载加载行业分类数据 try: rs=ts.get_industry_classified() sql.write_frame(rs, "company_industry_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载公司行业分类信息ok") except: print("下载公司行业分类信息出错") #下载加载概念分类数据 try: rs=ts.get_concept_classified() sql.write_frame(rs, "company_concept_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("载公司概念分类信息ok") except: print("下载公司概念分类信息出错") #下载加载地域分类数据 try: rs=ts.get_area_classified() sql.write_frame(rs, "company_area_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载公司区域分类信息ok") except: print("下载公司区域分类信息出错") #下载加载中小板分类数据 try: rs=ts.get_sme_classified() sql.write_frame(rs, "company_sme_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载中小板分类数据ok") except: print("下载中小板分类数据出错") #下载加载创业板分类数据 try: rs=ts.get_gem_classified() sql.write_frame(rs, "company_gem_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载创业板分类数据ok") except: print("下载创业板分类数据出错") #下载加载st板分类数据 try: rs=ts.get_st_classified() sql.write_frame(rs, "company_st_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载st板分类数据ok") except: print("下载st板分类数据出错") #下载加载沪深300板分类数据 try: rs=ts.get_hs300s() sql.write_frame(rs, "company_hs300_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载沪深300板分类数据ok") except: print("下载加载沪深300板分类数据出错") #下载加载上证50板分类数据 try: rs=ts.get_sz50s() sql.write_frame(rs, "company_sz50_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载上证50板分类数据ok") except: print("下载加载上证50板分类数据出错") #下载加载中证500板分类数据 try: rs=ts.get_zz500s() sql.write_frame(rs, "company_zz500_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载中证500板分类数据ok") except: print("下载加载中证500板分类数据出错") #下载加载终止上市分类数据 try: rs=ts.get_terminated() sql.write_frame(rs, "company_terminated_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载终止上市分类数据ok") except: print("下载加载终止上市分类数据出错") #下载加载暂停上市分类数据 try: rs=ts.get_suspended() sql.write_frame(rs, "company_suspended_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载暂停上市分类数据ok") except: print("下载加载暂停上市分类数据出错")
""" stock_get_data.py Created by Huaizheng ZHANG on 6.27. Copyright (c) 2015 zhzHNN. All rights reserved. """ import tushare as ts import os import urllib2 if os.path.isdir('Data'): pass else: os.mkdir('Data') hs300 = ts.get_hs300s() for i in xrange(0,281): dirName = hs300['code'][i] if os.path.isdir('Data/'+dirName): pass else: os.mkdir('Data/'+dirName) try: print u'当前正在获取' + dirName + u'前复权训练数据' trainData = ts.get_h_data(hs300['code'][i], start='2011-01-04',end='2012-12-31') trainName = dirName + u'trainData'.encode('utf-8') + '.csv' if os.path.exists('Data/'+ dirName + '/' + trainName): os.remove('Data/'+ dirName + '/' + trainName) trainData.to_csv('Data/'+ dirName + '/' + trainName, encoding='utf8') except urllib2.URLError, e:
# Note SME is a pd.series data type SME = ts.get_sme_classified() SME.to_csv('./ASHR/DATA/SME.csv', index = False) # Growth Enterprise Market GEM = ts.get_gem_classified() GEM.to_csv('./ASHR/DATA/GEM.csv', index = False) # ST Enterprise ST = ts.get_st_classified() ST.to_csv('./ASHR/DATA/ST.csv', index = False) ts.get_h_data() # HS 300 HS300S = ts.get_hs300s() HS300S.to_csv('./ASHR/DATA/HS300S.csv', index = False) # SZ 50 SZ50S = ts.get_sz50s() SZ50S.to_csv('./ASHR/DATA/SZ50S.csv', index = False) # ZZ 500 ZZ500S = ts.get_zz500s() ZZ500S.to_csv('./ASHR/DATA/ZZ500S.csv', index = False) ################# # Fund Holdings # ################# # TODO Data is available quarterly
def get_hs300s_history(): hs300s = ts.get_hs300s() return get_data_by_column(hs300s)
Created on 2015年6月4日 @author: Administrator ''' import tushare as ts # 行业分类 ts.get_industry_classified() # 概念分类 ts.get_concept_classified() # 地域分类 ts.get_area_classified() # 获取沪深300当前成份股及所占权重 ts.get_hs300s() # 获取中小板股票数据,即查找所有002开头的股票 ts.get_sme_classified() # 获取创业板股票数据,即查找所有300开头的股票 ts.get_gem_classified() # 获取风险警示板股票数据,即查找所有st股票 ts.get_st_classified() # 获取上证50成份股 ts.get_sz50s() # 获取中证500成份股 ts.get_zz500s()