def update_static_info(force=False): now = datetime.datetime.now() t = None rf = open(StaticInfo.storage_update_date, "r") line = rf.read() rf.close() wf = open(StaticInfo.storage_update_date, "w") if line: t = parser.parse(line) else: wf.write(str(now)) delta_day = (now - t).days if delta_day > 0 or force: wf.write(str(now)) print "static info need to update" ts.get_industry_classified().to_csv( StaticInfo.data_type["industry"], index=False) ts.get_concept_classified().to_csv(StaticInfo.data_type["concept"], index=False) ts.get_area_classified().to_csv(StaticInfo.data_type["area"], index=False) InitStaticInfo._get_stock_hist_data( StaticInfo.get_stock_pool().keys()) wf.close()
def industrytodb(): #获取sina行业分类信息 industry_sina = ts.get_industry_classified("sina") #获取申万行业分类信息 industry_sw = ts.get_industry_classified("sw") engine = create_engine( 'mysql+pymysql://root:[email protected]/tusharedata?charset=utf8') industry_sina.to_sql('industry_sina_data', engine, if_exists='append') industry_sw.to_sql('industry_sw_data', engine, if_exists='append')
def industrytodb(): #获取sina行业分类信息 industry_sina = ts.get_industry_classified("sina") print(industry_sina, sep=' ', end='\n', file=sys.stdout, flush=False) #获取申万行业分类信息 industry_sw = ts.get_industry_classified("sw") print(industry_sw, sep=' ', end='\n', file=sys.stdout, flush=False) print("连接数据库", sep=' ', end='\n', file=sys.stdout, flush=False) print(engine, sep=' ', end='\n', file=sys.stdout, flush=False) industry_sina.to_sql('industry_sina_data', ENGINE) industry_sw.to_sql('industry_sw_data', ENGINE)
def 下载板块数据(self): # 获取不同分类数据 ts.get_industry_classified().to_csv('./股票数据/基本信息/行业分类.csv') ts.get_concept_classified().to_csv('./股票数据/基本信息/概念分类.csv') ts.get_area_classified().to_csv('./股票数据/基本信息/地域分类.csv') ts.get_sme_classified().to_csv('./股票数据/基本信息/中小板.csv') ts.get_gem_classified().to_csv('./股票数据/基本信息/创业板.csv') ts.get_st_classified().to_csv('./股票数据/基本信息/风险警示板.csv') ts.get_hs300s().to_csv('./股票数据/基本信息/沪深300.csv') ts.get_sz50s().to_csv('./股票数据/基本信息/上证50.csv') ts.get_zz500s().to_csv('./股票数据/基本信息/中证500.csv')
def main(): pd.set_option('display.max_rows', None) pd.set_option('display.max_column', None) pd.set_option('display.width', 2000) sns.set_style('darkgrid') stock_df = ts.get_industry_classified() stock_list = stock_df[stock_df['c_name'] == '金融行业']['code'] stock_arr = numpy.array(stock_list) result_df = pd.DataFrame() for stock in stock_arr: closing_df = get_price_data(stock)['close'] result_df = result_df.join(pd.DataFrame({stock: closing_df}), how='outer') tech_rets = result_df.pct_change() rets = tech_rets.dropna() plt.scatter(rets.std(), rets.mean()) plt.ylabel('Excepted Return') plt.xlabel('Risk') for label, x, y in zip(rets.columns, rets.std(), rets.mean()): # 添加标注 plt.annotate(label, xy=(x, y), xytext=(15, 15), textcoords='offset points', arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=-0.3')) plt.show()
def fetch_stock_industry(): ''' 获取股票 行业数据 :return: ''' df = ts.get_industry_classified() return df
def classification(class_types): if class_types == 'industry': industry_classified = ts.get_industry_classified() industry_classified.to_csv('D:\\ts\\classification\\industry_classified.csv', encoding='gbk') elif class_types == 'concept': concept_classified = ts.get_concept_classified() concept_classified.to_csv('D:\\ts\\classification\\concept_classified.csv', encoding='gbk') elif class_types == 'area': area_classified = ts.get_area_classified() area_classified.to_csv('D:\\ts\\classification\\area_classified.csv', encoding='gbk') elif class_types == 'sme': sme_classified = ts.get_sme_classified() sme_classified.to_csv('D:\\ts\\classification\\sme_classified.csv', encoding='gbk') elif class_types == 'gem': gem_classified = ts.get_gem_classified() gem_classified.to_csv('D:\\ts\\classification\\gem_classified.csv', encoding='gbk') elif class_types == 'st': st_classified = ts.get_st_classified() st_classified.to_csv('D:\\ts\\classification\\st_classified.csv', encoding='gbk') elif class_types == 'hs300': hs300s = ts.get_hs300s() hs300s.to_csv('D:\\ts\\classification\\hs300s.csv', encoding='gbk') elif class_types == 'sz50': sz50s = ts.get_sz50s() sz50s.to_csv('D:\\ts\\classification\\sz50s.csv', encoding='gbk') elif class_types == 'zz500': zz500s = ts.get_zz500s() zz500s.to_csv('D:\\ts\\classification\\zz500s.csv', encoding='gbk') elif class_types == 'terminated': terminated = ts.get_terminated() terminated.to_csv('D:\\ts\\classification\\terminated.csv', encoding='gbk') elif class_types == 'suspended': suspended = ts.get_suspended() suspended.to_csv('D:\\ts\\classification\\suspended.csv', encoding='gbk')
def main_financial_statistic_process(path): store_path = os.path.join(path,'statistic/') if not os.path.exists(store_path): os.makedirs(store_path) scu = SCU(path=path) stock_codes = scu.stock_codes_remove_no_stock_basic() FFC =financail_factor_statistic(path=path) sz50 = ts.get_sz50s() stock_codes = sz50['code'] stock_industry = ts.get_industry_classified() stock_codes = stock_industry[stock_industry['c_name'].isin(['房地产'])]['code'] #stock_codes = stock_codes.pop('000527') #stock_codes = ['000001','000002','000004'] statistic_stock_data = {} statistic_stock_min_len = 100 for stock_code in stock_codes: print("stock:",stock_code) FFC.FLS.load_all_financial_one_stock(stock_code) FFC.FLS.load_all_processed_stock_basic_one_stock([stock_code]) data_processed = FFC.financial_index_calc(stock_code) (row, colum) = data_processed.shape if(row<statistic_stock_min_len): statistic_stock_min_len = row statistic_stock_data[stock_code] = data_processed #plt.figure() #data_processed.plot.bar(data_processed.index) sum_data = statistic_stock_data[stock_codes[0]].iloc[-statistic_stock_min_len:,:] for stock_code in stock_codes[1:]: sum_data = sum_data + statistic_stock_data[stock_code].iloc[-statistic_stock_min_len:,:] pct_data = sum_data.pct_change(periods=4) pct_data.to_csv(store_path+'statistic_pct.csv'); sum_data.to_csv(store_path+'statistic_sum.csv');
def __init__(self): # constructor self.notSaveFileName = 'Notsaved.txt' self.historyDBName = 'History.db' self.listDBName = 'Stocklist.db' self.listTableName = 'Allist' self.historyTabPrefx = 'code' # get all table in history db self.fetchConn = sqlite3.connect(self.historyDBName) fetchQuery = "select name from sqlite_master where type='table' order by name" self.alreadylist = pd.read_sql(fetchQuery, self.fetchConn) # get all stock list database_file = os.path.dirname(os.path.abspath(__file__)) + '\\' + self.listDBName if not os.path.exists(database_file): # get all stock list from tushare data = ts.get_industry_classified() engine = create_engine('sqlite:///' + self.listDBName, echo=False) data.to_sql(self.listTableName, engine, if_exists='replace', index=False) connList = sqlite3.connect(self.listDBName) cursorList = connList.cursor() query = 'select * from ' + self.listTableName cursorList.execute(query) self.stocklist = cursorList.fetchall() cursorList.close() connList.close()
def get_industry(): df = ts.get_industry_classified() # df.to_csv('./industry.csv') engine = create_engine('mysql://*****:*****@115.159.46.93:3306/StocksAnalysis?charset=utf8') # 存入数据库 df.to_sql('industry', engine)
def update_stock_basic() -> None: df = ts.get_stock_basics() df.reset_index(inplace=True) data = df.to_dict(orient="records") # update industry industry_df = ts.get_industry_classified() industry_df = industry_df.groupby(["code"])["c_name"].apply(lambda x: x.tolist()).reset_index() industry_mapper = dict([(d["code"], d["c_name"]) for d in industry_df.to_dict(orient="records")]) # update concept concept_df = ts.get_concept_classified() concept_df = concept_df.groupby(["code"])["c_name"].apply(lambda x: x.tolist()).reset_index() concept_mapper = dict([(d["code"], d["c_name"]) for d in concept_df.to_dict(orient="records")]) # update area area_df = ts.get_area_classified() area_df = area_df.groupby(["code"])["area"].apply(lambda x: x.tolist()).reset_index() area_mapper = dict([(d["code"], d["area"]) for d in area_df.to_dict(orient="records")]) bar = ProgressBar(total=len(data)) for d in data: d["industry"] = list(set(industry_mapper.get(d["code"], []) + [d["industry"]])) d["concept"] = concept_mapper.get(d["code"], []) d["area"] = list(set(area_mapper.get(d["code"], []) + [d["area"]])) bar.move() db.stock_basics.update({"code": d["code"]}, {"$set": d}, True) bar.log("code: {} name: {} industry {} concept {} area {}" .format(d["code"], d["name"], d["industry"], d["concept"], d["area"]))
def fetch_industry(): """ :return: """ df = tushare.get_industry_classified() print(df)
def stockIndustryClassified(): jsonFile = os.path.join(config.listsRootPath, "stockIndustryClassified.json") data = ts.get_industry_classified() #data.to_json(jsonFile, orient='records', force_ascii =False) writeFile(jsonFile, data, 'records', False)
def exportIndustry(fromDate, toDate, code): df = ts.get_industry_classified() engine = create_engine(sql_str) # 存入数据库 df.to_sql('industry_classified', engine, if_exists='append') Log.logger.info("industry_classified数据导入完成。") return True
def download_all_date(if_check_before=0): all_stock_code=ts.get_industry_classified() all_stock_code=all_stock_code.ix[all_stock_code['code'].duplicated()==False]['code'] n=0 exist_file=os.listdir(workdir) for stock_code in all_stock_code: print stock_code if ('Tu'+stock_code+'.db') in exist_file: print 'it is already done' n+=1 if if_check_before==1: test=onestock(stock_code) test.download_range=pd.date_range(test.con.execute('select max(rowid),day from tick').fetchall()[0][1],date.today())[1:] test.creat_tick_table() test.con.close() print n,' is done' else: test=onestock(stock_code) test.creat_tick_table() test.creat_k_table() test.con.close() n+=1 print n,'is done'
def fetch_classification(self): # 数据来源自新浪财经的行业分类/概念分类/地域分类 print("Trying: get_today_all") today_all = ts.get_today_all() #一次性获取今日全部股价 set_today_all = set(today_all.T.values[0]) print("Trying: get_industry_classified") industry_classified = ts.get_industry_classified() set_industry_classified = set(industry_classified.T.values[0]) print("Trying: get_area_classified") area_classified = ts.get_area_classified() set_area_classified = set(area_classified.T.values[0]) print("Trying: get_concept_classified") concept_classified = ts.get_concept_classified() set_concept_classified = set(concept_classified.T.values[0]) print("Trying: get_sme_classified") sme_classified = ts.get_sme_classified() set_sme_classified = set(sme_classified.T.values[0]) return [ today_all, set_today_all, industry_classified, set_industry_classified, area_classified, set_area_classified, concept_classified, set_concept_classified, sme_classified, set_sme_classified ]
def get_industy_code(industry): indus = ts.get_industry_classified() indus = indus[indus['c_name'] == industry] indus['code'] = indus['code'].apply(str) code = indus['code'].tolist() # print('code:',code) return code
def get_universe(): try: dat = pd.read_csv(dirs + 'code_inuse.csv', dtype={'code': str}, index_col=0, encoding='gbk') except Exception: dat = ts.get_industry_classified() dat = dat.drop_duplicates('code') # 去除重复code return dat['code'].values
def load_tushare_df(df_type): file = 'ts.' + df_type + '.dat' try: obj = pickle.load(open(file, "rb")) except: #print("---load in the fly",df_type) if df_type == "basic": obj = ts.get_stock_basics() elif df_type == "sme": obj = ts.get_sme_classified() elif df_type == "gem": obj = ts.get_gem_classified() elif df_type == "industry": #print(ts, pickle) obj = ts.get_industry_classified( ) #该函数不全,只有2800多个,实际股票有3326,所以换个方法取 get_stock_basics elif df_type == "st": obj = ts.get_st_classified() else: raise Exception("Error TSshare Type!!!") pickle.dump(obj, open(file, "wb", 0)) else: #print("***Read from file %s" % df_type) pass return obj
def save_industry_classified(): ''' 获取行业分类信息 :return: ''' cur = mysql_connect.cursor() try: sta = cur.execute("delete from py_db.gp_industry_classified") print(sta) print("删除行业分类gp_industry_classified:", sta, "条数据完成") except Exception as e: print(e) mysql_connect.commit() cur.close() df = tushare.get_industry_classified() try: pd.io.sql.to_sql(df, 'gp_industry_classified', yconnect, schema='py_db', if_exists='append', index=df['code']) print("行业分类", len(df), "更新完成") return len(df) except ValueError as e: print(e) return False return -1
def get_industry_classified(): ''' Return the industry classified DataFrame :return: the DataFrame ''' content = ts.get_industry_classified() return content
def fetch_classification(self): # 数据来源自新浪财经的行业分类/概念分类/地域分类 print( "Trying: get_today_all" ) today_all = ts.get_today_all() #一次性获取今日全部股价 set_today_all = set(today_all.T.values[0]) print( "Trying: get_industry_classified" ) industry_classified = ts.get_industry_classified() set_industry_classified = set(industry_classified.T.values[0]) print( "Trying: get_area_classified" ) area_classified = ts.get_area_classified() set_area_classified = set(area_classified.T.values[0]) print( "Trying: get_concept_classified" ) concept_classified = ts.get_concept_classified() set_concept_classified = set(concept_classified.T.values[0]) print( "Trying: get_sme_classified" ) sme_classified = ts.get_sme_classified() set_sme_classified = set(sme_classified.T.values[0]) return [ today_all , set_today_all , industry_classified , set_industry_classified , area_classified , set_area_classified , concept_classified , set_concept_classified , sme_classified , set_sme_classified ]
def get_stk_idt(con): c = con.cursor() c.execute('''CREATE TABLE IF NOT EXISTS stk_idt( stk_num CHAR(20) PRIMARY KEY, stk_name CHAR(20), stk_idt CHAR(20), pe float(10,2), avt_pe float(10,2) )''') result = ts.get_industry_classified() for i in range(0, len(result) - 1): c.execute("SELECT stk_name from stk_lst where stk_num = %s", (result["code"][i], )) fetch = c.fetchone() if fetch != None: stk_name = fetch[0] stk_num = result["code"][i] stk_idt = result["c_name"][i] print "stk_num", stk_num print "stk_name", stk_name print "stk_idt", stk_idt c.execute( '''REPLACE INTO stk_idt ( stk_num,stk_name,stk_idt) VALUES (%s,%s,%s)''', (stk_num, stk_name, stk_idt)) con.commit()
def core_function(self, func): self.set_data() mongo = MongoClient("127.0.0.1", 27017) if (func == "industry_classified"): df = ts.get_industry_classified() elif (func == "concept_classified"): df = ts.get_concept_classified() elif (func == "area_classified"): df = ts.get_area_classified() elif (func == "gem_classified"): df = ts.get_gem_classified() elif (func == "sme_classified"): df = ts.get_sme_classified() elif (func == "st_classified"): df = ts.get_st_classified() elif (func == "hs300s"): df = ts.get_hs300s() elif (func == "sz50s"): df = ts.get_sz50s() elif (func == "zz500s"): df = ts.get_zz500s() print(df) elif (func == "terminated"): df = ts.get_terminated() else: df = {} insert_string = df.to_json(orient='records') items = json.loads(insert_string) coll = mongo.classify[func] coll.insert(items)
def get_stk_idt(con): c = con.cursor() c.execute('''CREATE TABLE IF NOT EXISTS stk_idt( stk_num CHAR(20) PRIMARY KEY, stk_name CHAR(20), stk_idt CHAR(20), pe float(10,2), avt_pe float(10,2) )''') result = ts.get_industry_classified() for i in range(0,len(result)-1): c.execute("SELECT stk_name from stk_lst where stk_num = %s",(result["code"][i],)) fetch = c.fetchone() if fetch != None: stk_name = fetch[0] stk_num = result["code"][i] stk_idt = result["c_name"][i] print "stk_num", stk_num print "stk_name",stk_name print "stk_idt",stk_idt c.execute('''REPLACE INTO stk_idt ( stk_num,stk_name,stk_idt) VALUES (%s,%s,%s)''', (stk_num,stk_name,stk_idt)) con.commit()
def get_stock_basic() -> list: nodes, links = [], [] Node = namedtuple("Node", "name type source") Link = namedtuple("Link", "head link tail source") df = ts.get_stock_basics() df.reset_index(inplace=True) data = df.to_dict(orient="records") industry_mapper = ts.get_industry_classified().groupby( ["code"])["c_name"].apply(lambda x: x.tolist()).to_dict() concept_mapper = ts.get_concept_classified().groupby( ["code"])["c_name"].apply(lambda x: x.tolist()).to_dict() area_mapper = ts.get_area_classified().groupby( ["code"])["area"].apply(lambda x: x.tolist()).to_dict() for d in data: nodes.append(Node(d["code"], "公司", "tu")) nodes.append(Node(d["name"], "公司", "tu")) links.append(Link(d["code"], "等于", d["name"], "tu")) for k in set(industry_mapper.get(d["code"], []) + [d["industry"]]): nodes.append(Node(k, "行业", "tu")) links.append(Link(d["code"], "属于", k, "tu")) for k in concept_mapper.get(d["code"], []): nodes.append(Node(k, "概念", "tu")) links.append(Link(d["code"], "属于", k, "tu")) for k in set(area_mapper.get(d["code"], []) + [d["area"]]): nodes.append(Node(k, "区域", "tu")) links.append(Link(d["code"], "属于", k, "tu")) del d["industry"], d["area"] nodes = list(set(nodes)) return nodes, links, data
def download_all_date(if_check_before=0): all_stock_code = ts.get_industry_classified() all_stock_code = all_stock_code.ix[all_stock_code['code'].duplicated() == False]['code'] n = 0 exist_file = os.listdir(workdir) for stock_code in all_stock_code: print stock_code if ('Tu' + stock_code + '.db') in exist_file: print 'it is already done' n += 1 if if_check_before == 1: test = onestock(stock_code) test.download_range = pd.date_range( test.con.execute( 'select max(rowid),day from tick').fetchall()[0][1], date.today())[1:] test.creat_tick_table() test.con.close() print n, ' is done' else: test = onestock(stock_code) test.creat_tick_table() test.creat_k_table() test.con.close() n += 1 print n, 'is done'
def get_industry_classified(): wb = xw.Book.caller() sht = wb.sheets[0] df = ts.get_industry_classified() # wb = xw.Book('') # wb = xw.Book(filename) would open an existing file wb = xw.Book.caller() # 删除特定名称的Sheet表 # wb.sheets("行业数据").delete() try: # 删除特定名称的Sheet表 wb.sheets("行业数据").delete() except: print("Sheet does NOT exist!!!") # 如果某个Sheet已经存在,则删除! 对于不存在的表单删除,则会报错!!! # 新建一个表单,并且在新的表单中进行操作 ws = wb.sheets.add("行业数据", after="数据工作台") # 选择已经创建的表单 # ws = wb.sheets["Sheet1"] ws.range("A1").value = df # 重新回到主控制台的工作簿 wb.sheets("数据工作台").activate()
def industryData(self): table = 'stock_etl.stock_industry' column = 'ticker,tickerName,tickerType' result = ts.get_industry_classified() sql = SqlBuildUtil.SqlBuildUtil.insertBuildts(table, column, result.values) EtlDao.EtlDao().delAllDate(table) EtlDao.EtlDao().save(sql)
def test_get_industry_classified(): df = ts.get_industry_classified() df.to_sql("industry_classify", engine, if_exists='replace', index=False, dtype={"code": String(6)})
def classify_info_to_sql(): create_classify_table() a = ts.get_industry_classified() a.columns = ['code', 'name', 'industry'] b = ts.get_area_classified() c = ts.get_sz50s() c = c.iloc[:,1::] c['sz50'] = '1' d = ts.get_hs300s() d = d.iloc[:,1::] d.columns = ['code','name','hs300_weight'] e = ts.get_zz500s() e = e.iloc[:,1::] e.columns = ['code','name','zz500_weight'] result = pd.merge(a, b, how='left', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False) result = pd.merge(result, c, how='left', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False) result = pd.merge(result, d, how='left', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False) result = pd.merge(result, e, how='left', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False) df_to_mysql('anack_classify',result)
def download_industry_classified(self, start='2017-01-01', end='2018-01-01'): """ 返回值说明: code:股票代码 name:股票名称 c_name:行业名称 """ #print('downloading the industry_classified info...') total = 0 industry_classified = ts.get_industry_classified() if industry_classified is not None: industry_classified['date_download'] = end filename = 'industry_classified' industry_classified.to_excel(self.working_folder + filename + '.xlsx', encoding='GBK') else: warning_code = pd.DataFrame({ 'return code': ['for all stocks'], 'description': ['industry_classified download failed'] }) self.warning_list = self.warning_list.append(warning_code) return
def get_universe(): try: dat = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk') except Exception: dat = ts.get_industry_classified() dat = dat.drop_duplicates('code') #去除重复code return dat['code'].values
def get_industry_classified_dict(): icdf = ts.get_industry_classified() industry_classified_dict = icdf.to_dict(orient='records') ret = {} for data in industry_classified_dict: ret[data['code']] = data['c_name'] return ret
def get_pre_of_industry_concept(): # get info df = ts.get_industry_classified() df.to_csv("stock_industry_prep.csv") df = ts.get_concept_classified() df.to_csv("stock_concept_prep.csv")
def getHangye(): ''' 获取行业信息 :return: ''' hangyeData = ts.get_industry_classified() #hangyeData.to_csv("../../data/hangyeData.csv") return hangyeData
def get_industry_classified(): try: df = ts.get_industry_classified(); engine = create_engine('mysql://*****:*****@127.0.0.1/stock?charset=utf8') # df.insert(0,'code','600848') df.to_sql('industry_classified', engine, if_exists='append') except Exception, e: e.message
def downloadCodes(self): dat = ts.get_industry_classified() dat = dat.drop_duplicates('code') #mid 更换index并重命名 dat.index = dat['code'] dat.index.name = 'symbol' #mid 重命名code列为symbol df=dat.rename(columns = {'code':'symbol'}) return df
def read_all_stocks(): if os.path.isfile('all_stocks'): pass else: pd = ts.get_industry_classified() F = open('all_stocks', 'w') for line in pd.iloc[:, 0]: # print line F.write(line + "\n") F.close()
def list(self, stock_block_type): stock_block = None if stock_block_type == self.industry: stock_block = db.get(STOCK_BLOCK_INDUSTRY) if stock_block is None: stock_block = ts.get_industry_classified() db.save(STOCK_BLOCK_INDUSTRY, stock_block) elif stock_block_type == self.concept: stock_block = db.get(STOCK_BLOCK_CONCEPT) if stock_block is None: stock_block = ts.get_concept_classified() db.save(STOCK_BLOCK_CONCEPT, stock_block) elif stock_block_type == self.area: stock_block = db.get(STOCK_BLOCK_AREA) if stock_block is None: stock_block = ts.get_area_classified() db.save(STOCK_BLOCK_AREA, stock_block) elif stock_block_type == self.sme: stock_block = db.get(STOCK_BLOCK_SME) if stock_block is None: stock_block = ts.get_sme_classified() db.save(STOCK_BLOCK_SME, stock_block) elif stock_block_type == self.gem: stock_block = db.get(STOCK_BLOCK_GEM) if stock_block is None: stock_block = ts.get_gem_classified() db.save(STOCK_BLOCK_GEM, stock_block) elif stock_block_type == self.st: stock_block = db.get(STOCK_BLOCK_ST) if stock_block is None: stock_block = ts.get_st_classified() db.save(STOCK_BLOCK_ST, stock_block) elif stock_block_type == self.hs300s: stock_block = db.get(STOCK_BLOCK_HS300S) if stock_block is None: stock_block = ts.get_hs300s() db.save(STOCK_BLOCK_HS300S, stock_block) elif stock_block_type == self.sz50s: stock_block = db.get(STOCK_BLOCK_SZ50S) if stock_block is None: stock_block = ts.get_sz50s() db.save(STOCK_BLOCK_SZ50S, stock_block) elif stock_block_type == self.zz500s: stock_block = db.get(STOCK_BLOCK_ZZ500S) if stock_block is None: stock_block = ts.get_zz500s() db.save(STOCK_BLOCK_ZZ500S, stock_block) else: return None return stock_block
def getNetData(): print "... to get server data ..." # 行业分类 stockData_df=ts.get_industry_classified() # 概念分类 # df=ts.get_concept_classified() #上证50成份股 # stockData_df=ts.get_sz50s() #保存股票基础信息到文件 # df.to_json(stock_item_path,orient='records') stock={} for i, data in enumerate(stockData_df.values): stock[i]={} print ">>>>> ", data[0], " ", data[1], " ", data[2] stock[i]['code'], stock[i]["name"]=data[0], data[1] return stock
def preload(): stock_block = ts.get_industry_classified() db.save(STOCK_BLOCK_INDUSTRY, stock_block) stock_block = ts.get_concept_classified() db.save(STOCK_BLOCK_CONCEPT, stock_block) stock_block = ts.get_area_classified() db.save(STOCK_BLOCK_AREA, stock_block) stock_block = ts.get_sme_classified() db.save(STOCK_BLOCK_SME, stock_block) stock_block = ts.get_gem_classified() db.save(STOCK_BLOCK_GEM, stock_block) stock_block = ts.get_st_classified() db.save(STOCK_BLOCK_ST, stock_block) stock_block = ts.get_hs300s() db.save(STOCK_BLOCK_HS300S, stock_block) stock_block = ts.get_sz50s() db.save(STOCK_BLOCK_SZ50S, stock_block) stock_block = ts.get_zz500s() db.save(STOCK_BLOCK_ZZ500S, stock_block)
def set_h_data(start = ct._START_,middle = ct._MIDDLE_,autype="qfq",index=False,retry_count = 3,pause=0): """ 获取历史交易信息存入数据库中,默认从1994-2015年。若不设定默认取最近一年,其他参数与tushare相同 指数行情tushare其实可以查询,但未提供列表,因此自行构造 Parameters ------ return """ _CODE_INDEX = pd.DataFrame({'code':['000001','399001','399006'],'name':['上证指数','深证指数','创业板指数'],'c_name':['指数','指数','指数']}) code_index = _CODE_INDEX.set_index('code') dat = ts.get_industry_classified() dat = dat.drop_duplicates('code') engine = create_engine(ct._ENGINE_) dat.to_sql('code',engine,if_exists ='replace') #如果存在就覆盖表 dat = dat.append(code_index) _time_= pd.period_range(start,middle,freq='Y') #time[0] 为1994-12-31 _start_ = start i = 0 for code in dat['code'].values: i+= 1 if dat[dat['code']==code]['c_name'] is "指数": #若为上证或深证指数。则设定index为True index = True for _end_ in _time_: _end_ = _end_.strftime('%Y-%m-%d') print i,code,_end_ try: _data_ = ts.get_h_data(code,start=_start_,end=_end_,index=index,autype=autype,retry_count=retry_count,pause=pause) #两个日期之间的前复权数据 #_iterables_ = [[code],_data_.index] #无奈,选择multi——index,且需重新构造 #_index_ = pd.MultiIndex.from_product(_iterables_, names=['code', 'date']) #_data_ = DataFrame(_data_, index= _index_,columns =_data_.columns) if _data_ is not None: _data_['code'] =code _data_.to_sql('h_data',engine,if_exists='append') except Exception,e: print e.args[0] pass #不行的话还是continue _start_ = _end_
def read_classify(): #读取行业分类 db_classify=ts.get_industry_classified() #读取概念分类 db_concept=ts.get_concept_classified() #修改列名 db_classify.columns=['code','name','class'] db_concept.columns=['code','name','concept'] #去除name、c_name列里的空格 db_classify['name']=db_classify['name'].map(lambda x : x.replace(" ",'')) db_classify['class']=db_classify['class'].map(lambda x : x.replace(" ",'')) db_concept['name']=db_concept['name'].map(lambda x : x.replace(" ",'')) db_concept['concept']=db_concept['concept'].map(lambda x : x.replace(" ",'')) #写入数据库 db=DB_ENGINE.connect() db_classify.to_sql('stock_classify',db,if_exists='append',index=False) db_concept.to_sql('stock_concept',db,if_exists='append',index=False) db.close()
def save_data(): dat = ts.get_industry_classified() dat = dat.drop_duplicates('code') dat.to_csv('d:/data/code.csv',encoding='gbk') inuse = [] i = 0 for code in dat['code'].values: i+= 1 print i,code try: _data_ = ts.get_hist_data(code,end=ct._MIDDLE_) #默认取3年,code为str,start无效的,start 和end若当天有数据则全都取 if _data_ is not None: _data_.to_csv('d:/data/%s.csv'%code,encoding='gbk') if _data_.index[0] in ct._start_range and _data_.index[-1] in ct._end_range: #筛选一次代码,使用头尾都包含的代码 inuse.append(code) except IOError: pass #不行的话还是continue #print len(inuse) _df_inuse = DataFrame(inuse,columns={'code'}) _df_inuse.to_csv('d:/data/code_inuse.csv',encoding='gbk')
def generate_according_industry(filename, *arg): category = ts.get_industry_classified() selection = category[category["c_name"].isin(arg[0])] reload(sys) sys.setdefaultencoding('utf-8') selection.to_csv("tmp") with open("tmp") as f: lines = f.readlines() code = [] name = [] for index in range(len(lines)): if index > 0: code.append(lines[index].split(",")[1]) name.append(lines[index].split(",")[2]) code = [str(i) for i in code] name = [str(i) for i in name] stock_pair = zip(code, name) data = {"stock":stock_pair} with open(filename,'w') as f: yaml.dump(data, f)
# global variation plate_info = [] concept_info = [] basic_info = [] global code # multi-processing function def mapConcepts(concept_index): if code == concept_info[concept_index]['code']: basic_info.append(concept_info[concept_index]['c_name'].encode('utf-8')) print basic_info[0] if not os.path.exists(plate_file) and os.path.exists(concept_file): #plate plate = ts.get_industry_classified() plate.to_json(plate_file,orient='records') #concept concept = ts.get_concept_classified() concept.to_json(concept_file,orient='records') f = file(plate_file) plate_info = json.load(f) f.close() f = file(concept_file) concept_info = json.load(f) f.close() # map code[plate & concept]
def get_industry_info(file_path): industry_info = ts.get_industry_classified() industry_info.to_csv(file_path, encoding='utf-8') print '\ndownload industry info finished\n'
def get_industry(self, year): i = ts.get_industry_classified()[['code', 'c_name']] j = pd.read_csv('profit_data_%d.csv'%year, dtype={'code':str})[['code', 'roe', 'net_profit_ratio']] j = j[(j.net_profit_ratio>-100)&(j.net_profit_ratio<100)&(j.roe>-100)&(j.roe<100)] data = pd.merge(i, j, on='code').groupby(by='c_name').mean() return data
'酿酒行业', '电器行业', '传媒娱乐', '化工行业', '房地产', '金融行业', '开发区', '电子信息', '服装鞋类', '电子器件', '电力行业', '汽车制造', '家具行业', '农药化肥', '酒店旅游', '水泥行业', '物资外贸', '摩托车', '印刷包装', '家电行业'] IndustryEnglishList = ['ConsumerDiscretionary', 'Industrials', 'Materials', 'Industrials', 'HealthCare', 'Energy', 'Materials', 'Industrials', 'Industrials', 'Industrials', 'ConsumerStaples', 'Materials', 'Materials', 'ConsumerDiscretionary', 'ConsumerDiscretionary', 'HealthCare', 'Materials', 'Utilities', 'Industrials', 'Materials', 'Industrials', 'Materials', 'ConsumerStaples', 'Materials', 'Industrials', 'Materials', 'Utilities', 'ConsumerStaples', 'ConsumerDiscretionary', 'ConsumerDiscretionary', 'Materials', 'Financials', 'Financials', 'Financials', 'InformationTechnology', 'ConsumerDiscretionary', 'ConsumerDiscretionary', 'Utilities', 'ConsumerDiscretionary', 'ConsumerDiscretionary', 'Materials', 'ConsumerDiscretionary', 'Materials', 'Industrials', 'ConsumerDiscretionary', 'Materials', 'ConsumerDiscretionary'] IndustryClassified = ts.get_industry_classified() IndustryClassified = IndustryClassified.drop_duplicates('code') IndustryClassified['code'] = IndustryClassified['code'].map(lambda x: 'SH'+x if x[0] == '6' else 'SZ'+x) IndustryClassified = IndustryClassified.set_index('code') #IndustryClassified[IndustryClassified.code.isin(StockList)] for i,e in enumerate(IndustryList): IndustryClassified['c_name'] = IndustryClassified['c_name'].map(lambda x: IndustryEnglishList[i] if x == e else x) RestofStock = pd.read_csv('./ASHR/DATA/Index/rest.csv', header = None, index_col = 0, names = ['stock', 'industry']) # Restrict To Stock List Only Industry = pd.DataFrame({'Stock': StockList}) Industry['Industry'] = Industry['Stock'].map(lambda x: RestofStock.industry.loc[x] if x in RestofStock.index else IndustryClassified.c_name.loc[x])
def load_company_industry_info(): #下载加载行业分类数据 try: rs=ts.get_industry_classified() sql.write_frame(rs, "company_industry_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载公司行业分类信息ok") except: print("下载公司行业分类信息出错") #下载加载概念分类数据 try: rs=ts.get_concept_classified() sql.write_frame(rs, "company_concept_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("载公司概念分类信息ok") except: print("下载公司概念分类信息出错") #下载加载地域分类数据 try: rs=ts.get_area_classified() sql.write_frame(rs, "company_area_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载公司区域分类信息ok") except: print("下载公司区域分类信息出错") #下载加载中小板分类数据 try: rs=ts.get_sme_classified() sql.write_frame(rs, "company_sme_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载中小板分类数据ok") except: print("下载中小板分类数据出错") #下载加载创业板分类数据 try: rs=ts.get_gem_classified() sql.write_frame(rs, "company_gem_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载创业板分类数据ok") except: print("下载创业板分类数据出错") #下载加载st板分类数据 try: rs=ts.get_st_classified() sql.write_frame(rs, "company_st_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载st板分类数据ok") except: print("下载st板分类数据出错") #下载加载沪深300板分类数据 try: rs=ts.get_hs300s() sql.write_frame(rs, "company_hs300_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载沪深300板分类数据ok") except: print("下载加载沪深300板分类数据出错") #下载加载上证50板分类数据 try: rs=ts.get_sz50s() sql.write_frame(rs, "company_sz50_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载上证50板分类数据ok") except: print("下载加载上证50板分类数据出错") #下载加载中证500板分类数据 try: rs=ts.get_zz500s() sql.write_frame(rs, "company_zz500_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载中证500板分类数据ok") except: print("下载加载中证500板分类数据出错") #下载加载终止上市分类数据 try: rs=ts.get_terminated() sql.write_frame(rs, "company_terminated_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载终止上市分类数据ok") except: print("下载加载终止上市分类数据出错") #下载加载暂停上市分类数据 try: rs=ts.get_suspended() sql.write_frame(rs, "company_suspended_classified", con=conn_company_classified , flavor='mysql', if_exists='replace',index=True) print("下载加载暂停上市分类数据ok") except: print("下载加载暂停上市分类数据出错")
def __init__(self): super(MyUi, self).__init__() self.ui = Ui_MainWindow() self.ui.setupUi(self) cwd = os.getcwd() cwd = str(cwd) if os.path.isfile(cwd+"/time"): with open("time","r") as outfile:#reads current time history = cPickle.load(outfile) if (datetime.now()-history).total_seconds()<43200: #measures if time elapse>12 hours print("Less than 12 hours. Loading previously saved Pickle...") #with open("time","w") as infile: #update time #cPickle.dump(datetime.now(),infile) else: print("More than 12 hours. Updating Pickle...") data = ts.get_industry_classified() with open("class","w+") as outfile: cPickle.dump(data,outfile) now = datetime.now() with open("time", "w+") as outfile: #update time cPickle.dump(now, outfile) else: print("No Pickle found!") #If this is first time using tuchart in this directory data = df() data = ts.get_industry_classified() with open('class', 'w+') as outfile: #records pickle cPickle.dump(data, outfile) now = datetime.now() with open("time", "w+") as outfile: cPickle.dump(now,outfile) with open("class", "r") as infile: # reads current time series = cPickle.load(infile) #series = pd.read_json(cwd + "\\class.json") #series = ts.get_industry_classified() series = pd.DataFrame(series) curdate = time.strftime("%Y/%m/%d") # gets current time to put into dateedit curdateQ = QDate.fromString(curdate,"yyyy/MM/dd") dateobj = datetime.strptime(curdate, "%Y/%m/%d")#converts to datetime object past = dateobj - timedelta(days = 7) #minus a week to start date pasttime = datetime.strftime(past, "%Y/%m/%d") pastQ = QDate.fromString(pasttime,"yyyy/MM/dd") #convert to qtime so that widget accepts the values pastL = dateobj - timedelta(days=30) # minus a month to start date pasttimeL = datetime.strftime(pastL, "%Y/%m/%d") pastQL = QDate.fromString(pasttimeL, "yyyy/MM/dd") np_indexes = np.array([['sh', '上证指数', '大盘指数'], ['sz', '深证成指', '大盘指数'], ['hs300', '沪深300指数', '大盘指数'], ['sz50', '上证50', '大盘指数'], ['zxb', '中小板', '大盘指数'], ['cyb', '创业板', '大盘指数']]) indexes = df(data=np_indexes, index=range(5000, 5006), columns=["code", "name", "c_name"]) series = indexes.append(series) list1_bfr = series["c_name"].tolist() #Get industry categories. Filters out redundant ones list1 = list(set(list1_bfr)) list1.sort(key=list1_bfr.index) #w = database() #zsparent = QTreeWidgetItem(self.ui.treeWidget) #zsparent.setText(0,"股票指数") #zsnames =["上证指数-sh","深圳成指-sz","沪深300指数-hs300","上证50-"] self.init_treeWidget(list1,series) self.ui.treeWidget.setContextMenuPolicy(Qt.CustomContextMenu) self.ui.treeWidget.customContextMenuRequested.connect(self.openMenu) #self.ui.webView.setGeometry(QtCore.QRect(0, 30,1550, 861)) file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "render.html")) #path to read html file local_url = QUrl.fromLocalFile(file_path) self.ui.webView.load(local_url) #self.ui.commandLinkButton.setFixedSize(50, 50) self.ui.search_btn.clicked.connect(lambda: self.search_comp(series)) self.ui.init_code_btn.clicked.connect(lambda: self.code_sort_tree(series)) self.ui.init_category_btn.clicked.connect(lambda: self.init_treeWidget(list1, series)) self.ui.commandLinkButton.clicked.connect(self.classify) #when the arrow button is clicked, trigger events #self.ui.commandLinkButton.clicked.connect(lambda action: self.classify(action, self.ui.treewidget)) # QSizePolicy try: retain_size = self.ui.dateEdit_2.sizePolicy() retain_size.setRetainSizeWhenHidden(True) self.ui.dateEdit_2.setSizePolicy(retain_size) retain_size = self.ui.comboBox.sizePolicy() retain_size.setRetainSizeWhenHidden(True) self.ui.comboBox.setSizePolicy(retain_size) retain_size = self.ui.label_2.sizePolicy() retain_size.setRetainSizeWhenHidden(True) self.ui.label_2.setSizePolicy(retain_size) except AttributeError: print("No PYQT5 Binding! Widgets might be deformed") self.ui.dateEdit.setDate(pastQL) self.ui.dateEdit_2.setDate(curdateQ)#populate widgets self.ui.dateEdit.setCalendarPopup(True) self.ui.dateEdit_2.setCalendarPopup(True) self.ui.comboBox.addItems(["D", "W", "M", "5", "15", "30", "60"]) self.ui.treeWidget_2.setDragDropMode(self.ui.treeWidget_2.InternalMove) self.ui.treeWidget_2.setContextMenuPolicy(Qt.CustomContextMenu) self.ui.treeWidget_2.customContextMenuRequested.connect(self.openWidgetMenu) #self.ui.toolbutton.clicked.connect(lambda action: self.graphmerge(action, CombineKeyword)) self.ui.combobox.currentIndexChanged.connect(lambda: self.modifycombo(pastQL,pastQ))
cy_amount = index_all.loc[index_all['code'] == cy_index, 'amount'].values[0] cy_growth = index_all.loc[index_all['code'] == cy_index, 'change'].values[0] sz_amount = index_all.loc[index_all['code'] == sz_index, 'amount'].values[0] sz_growth = index_all.loc[index_all['code'] == sz_index, 'change'].values[0] spam_asset_writer_sh.writerow([format_time_asset, sh_index, sh_amount, sh_growth]) spam_asset_writer_sz.writerow([format_time_asset, sz_index, sz_amount, sz_growth]) spam_asset_writer_cy.writerow([format_time_asset, cy_index, cy_amount, cy_growth]) asset_flow_csv_sh.close() asset_flow_csv_cy.close() asset_flow_csv_sz.close() today_all = ts.get_today_all() today_all.to_csv(path_or_buf='./ClassifyHistory/today_all.csv', encoding='gbk') classify = ts.get_industry_classified() classify.to_csv(path_or_buf='./ClassifyHistory/classify.csv', encoding='gbk') for index, classifyRow in classify.iterrows(): code = classifyRow['code'] if len(today_all.loc[today_all['code'] == code]) != 1: continue stock_name = classifyRow["name"].encode("gbk") classify_name = classifyRow["c_name"].encode("gbk") growth = today_all.loc[today_all['code'] == code, 'changepercent'].values[0] amount = today_all.loc[today_all['code'] == code, "amount"].values[0] high_bit = int(int(code) / 100000) amount_percent = .0 if high_bit == 6: print 'code is:' + code + str(amount) amount_percent = amount * 100 / (sh_amount * _100_m) print amount_percent
def typeindustry(): global dftypeindustry dftypeindustry = ts.get_industry_classified() dftypeindustry.insert(0,'uploadtime',nowtime) typeindustry_tosql()
# coding: 'utf-8' __author__ = 'xlyang0211' import tushare as ts ts.set_token("efe5e687247788b99191f7fe13357d13b23e89a1df6989ec597d9b8c12a51403") print ts.get_token() print ts.get_industry_classified() # print fd
def insertIndex(dict): list = [] for k, v in dict.items(): v['code'] = k list.append(v) return list clinet = pymongo.MongoClient("localhost", 27017) db = clinet["Stock"] basics = db["Basics"] industry = db['Industry'] concept = db['Concept'] basicDf = ts.get_stock_basics() basicDict = json.loads(basicDf.to_json(orient='index'), encoding="UTF-8") basicList = insertIndex(basicDict) basics.insert(basicList) industryDf = ts.get_industry_classified() industryDict = json.loads(industryDf.to_json(orient='records'), encoding="UTF-8") industry.insert(industryDict) conceptDf = ts.get_concept_classified() conceptDict = json.loads(conceptDf.to_json(orient='records'), encoding="UTF-8") concept.insert(conceptDict)
# -*- coding:utf-8 -*- import tushare as ts import redis # 股票工业分类 data = ts.get_industry_classified() # print data.columns, data.values, data.dtypes, len(data), data.code for i in range(0, len(data)): for c in data.columns: print data[c][i] # 实时票房 # data = ts.realtime_boxoffice() # print(data)
def get_codes(): codes = tushare.get_industry_classified() codes = [value[0] for value in codes.values] with open("all_codes", "wb") as f: pickle.dump(codes, f) return codes