def getprofitreport(): profitreport = "./output/profit_%d_%d.csv" outreport = "./output/profitcontinuous.csv" year = 2015 quarter = 4 dflist = [] for i in range(3): report = profitreport % (year, quarter) if os.path.exists(report): print("Get data from csv") dfsingle = pd.read_csv(report, encoding="ANSI") else: print("Get data from internet") try: dfsingle = ts.get_report_data(year=year, quarter=quarter) dfsingle["period"] = "%d_%d" % (year, quarter) except Exception as e: dfsingle = ts.get_report_data(year=year, quarter=quarter - 1) dfsingle["period"] = "%d_%d" % (year, quarter - 1) dfsingle.to_csv(report) dflist.append(dfsingle) year += 1 dfall = pd.concat(dflist) dfposprofit = pd.DataFrame(dfall[(dfall["net_profits"] > 0) & (dfall["profits_yoy"] > 0)]) result = dfposprofit.groupby(["code", "name"]).size().reset_index() print(result.columns) result.columns = ["code", "name", "poscontinue"] print(result) result = pd.DataFrame(result[(result["poscontinue"] >= 3)], columns=["code", "name"]).drop_duplicates() print(result) result.to_csv(outreport, index=False)
def main(): year=2004 season=1 while year<2017: while season<5: try:#本地有记录 with open('report_'+str(year)+'_'+str(season)+'.pkl', "rb") as f: report = pickle.load(f) except:#本地没有存过 with open('report_' + str(year) + '_' + str(season) + '.pkl', "wb") as f: report_1st = ts.get_report_data(year, season) # 获取业绩报表 report_2nd = ts.get_report_data(year, season) # 获取业绩报表 report_3rd = ts.get_report_data(year, season) # 获取业绩报表 #把三次合并: report_all3 = pd.concat((report_1st,report_2nd,report_3rd),axis=0,join='outer') #report_all3.to_excel(r'D:\work_python\DeepData\tushare_download\201701_01_all3.xlsx') report =report_all3.drop_duplicates() #report.to_excel(r'D:\work_python\DeepData\tushare_download\201701_01_quchong.xlsx') pickle.dump(report, f) #report =report.drop_duplicates() #report.to_excel(r'D:\work_python\DeepData\tushare_download\201701_01_quchong.xlsx') engine = create_engine( 'mysql+pymysql://' + DATABASS_USER_NAME + ':' + DATABASS_PASSWORD + '@127.0.0.1/'+DATABASS_NAME+'?charset=utf8') #存入数据库 report.to_sql('report_'+str(year)+'_'+str(season),engine,if_exists='replace') season=season+1 season =1 year += 1
def save_report_data(): """ tushare: save_report_data """ report = ts.get_report_data(2017,2) report.to_csv(CSV_REPORT_DATA_S2) report = ts.get_report_data(2017,1) report.to_csv(CSV_REPORT_DATA_S1)
def get_report_data(self, year, loops): for i in range(1,loops): print(i) for j in year: print(year) try: ts.get_report_data(j, 4).to_csv('report_data_%d.csv'%j, encoding='utf-8') print(j) year.remove(j) except: pass
def find2017(): r2017 = ts.get_report_data(2017,3).set_index("code") r2016 = ts.get_report_data(2016,4).set_index("code") l = [] l1= [] for i in r2017.index: if i not in r2016.index: continue rate = (r2017.net_profits[i] - r2016.net_profits[i]) / r2016.net_profits[i] try: rate = float(rate) except: rate = rate.values[0] if rate > 0.25: # l.append((rate,i)) l1.append(i) print(l1) r2015 = ts.get_report_data(2015, 4).set_index("code") l2 = [] for i in l1: if i not in r2015.index: continue rate = (r2016.net_profits[i] - r2015.net_profits[i]) / r2015.net_profits[i] try: rate = float(rate) except: rate = rate.values[0] if rate > 0.3: # ll.append((l1[i],rate, i)) l2.append(i) r2014 = ts.get_report_data(2014, 4).set_index("code") print(l2) l3 = [] for i in l2: if i not in r2014.index: continue rate = (r2015.net_profits[i] - r2014.net_profits[i]) / r2014.net_profits[i] try: rate = float(rate) except: rate = rate.values[0] if rate > 0.3: # ll.append((l1[i],rate, i)) l3.append(i) print(l3) return l3
def basic_information(): ts.get_cashflow_data(2017, 1).to_sql('cash_flow', engine, if_exists='append') ts.get_debtpaying_data(2017, 1).to_sql('debtpaying', engine, if_exists='append') ts.get_growth_data(2017, 1).to_sql('growth', engine, if_exists='append') ts.get_operation_data(2017, 1).to_sql('operation', engine, if_exists='append') ts.get_profit_data(2017, 1).to_sql('profit', engine, if_exists='append') ts.get_report_data(2017, 2).to_sql('report', engine, if_exists='append') print('basic information over ....')
def _fetch_finance(): for year in range(2004, 2018): set_year = lambda x: str(year) + '-' + x for quarter in range(1, 5): print(year, ' year ', 'quarter ', quarter) rep = ts.get_report_data( year, quarter)[['code', 'eps', 'bvps', 'epcf', 'report_date']] pro = ts.get_profit_data(year, quarter)[[ 'code', 'roe', 'net_profit_ratio', 'gross_profit_rate', 'net_profits', 'business_income', 'bips' ]] ope = ts.get_operation_data(year, quarter)[[ 'code', 'arturnover', 'arturndays', 'inventory_turnover', 'currentasset_turnover', 'currentasset_days' ]] gro = ts.get_growth_data( year, quarter)[['code', 'mbrg', 'nprg', 'nav', 'epsg', 'seg']] deb = ts.get_debtpaying_data(year, quarter)[[ 'code', 'currentratio', 'quickratio', 'cashratio', 'icratio', 'sheqratio', 'adratio' ]] cas = ts.get_cashflow_data(year, quarter)[[ 'code', 'cf_sales', 'rateofreturn', 'cf_nm', 'cf_liabilities', 'cashflowratio' ]] rep.rename(columns={'report_date': 'date'}, inplace=True) rep['date'] = rep['date'].apply(set_year) rep = rep.merge(pro, on='code', how='left') rep = rep.merge(ope, on='code', how='left') rep = rep.merge(gro, on='code', how='left') rep = rep.merge(deb, on='code', how='left') rep = rep.merge(cas, on='code', how='left') finance.insert(rep.to_dict('record')) print(year, quarter)
def valuation_factor(year): report = ts.get_report_data(year,4) report = report.sort_values(by = 'code',axis = 0,ascending = True) report = report.reset_index(drop = True) report.to_csv("/home/yirui/Desktop/Quant/Report/%s.csv"%year, mode="w") profit = ts.get_profit_data(year, 4) profit = profit.sort_values(by='code', axis=0, ascending=True) profit = profit.reset_index(drop=True) profit.to_csv("/home/yirui/Desktop/Quant/Profit/%s.csv"%year,mode= "w") operation = ts.get_operation_data(year,4) operation = operation.sort_values(by='code', axis=0, ascending=True) operation = operation.reset_index(drop=True) operation.to_csv("/home/yirui/Desktop/Quant/Operation/%s.csv" % year, mode="w") growth = ts.get_growth_data(year,4) growth = growth.sort_values(by='code', axis=0, ascending=True) growth = growth.reset_index(drop=True) growth.to_csv("/home/yirui/Desktop/Quant/Growth/%s.csv" % year, mode="w") debtpaying = ts.get_debtpaying_data(year,4) debtpaying = debtpaying.sort_values(by='code', axis=0, ascending=True) debtpaying = debtpaying.reset_index(drop=True) debtpaying.to_csv("/home/yirui/Desktop/Quant/Debtpaying/%s.csv" % year, mode="w") cashflow = ts.get_cashflow_data(year,4) cashflow = cashflow.sort_values(by='code', axis=0, ascending=True) cashflow = cashflow.reset_index(drop=True) cashflow.to_csv("/home/yirui/Desktop/Quant/Cashflow/%s.csv" % year, mode="w")
def getData(): #获取财务数据保存到本地,修改tushare源码使其仅包括金融类股票 for year in range(2010,2011): for season in range(3,5): df = ts.get_report_data(year,season) filename = str(year) + '-' + str(season) df.to_csv(filename,sep=',', encoding='utf-8')
def get_stock_report_manual(year, season): frame = ts.get_report_data(year, season) table_name = 'stock_report_' + str(year) + 's' + str(season) db = MySQLdb.connect(host='localhost', port=3306, user='******', passwd='123456', db='stock_ts', charset='utf8') cursor = db.cursor() createDBSql = 'create table if not exists ' + table_name + '(code varchar(10), name varchar(16), eps text, eps_yoy text, bvps text, roe text, epcf text, net_profits text, profits_yoy text, distrib text, report_date text)' cursor.execute(createDBSql) for i in range(0, len(frame)): prefix = 'insert into ' + table_name + '(code, name, eps, eps_yoy, bvps, roe, epcf, net_profits, profits_yoy, distrib, report_date) values(\'%s\', \'%s\', \'%s\', \'%s\', \'%s\', \'%s\', \'%s\', \'%s\', \'%s\', \'%s\', \'%s\')' sql = prefix % ( frame['code'][i], str(frame['name'][i]), float(frame['eps'][i]), float(frame['eps_yoy'][i]), float(frame['bvps'][i]), float(frame['roe'][i]), float(frame['epcf'][i]), float(frame['net_profits'][i]), float(frame['profits_yoy'][i]), str(frame['distrib'][i]), str(frame['report_date'][i])) #print(sql) cursor.execute(sql) db.commit() print(frame['name'][i]) db.close()
def get_report_data(self, year, quarter): tsdata = ts.get_report_data( year=year, quarter=quarter, ) jsdata = To_Json(tsdata) return jsdata
def get_stock_report(year, season, engine): frame = ts.get_report_data(year, season) table_name = 'stock_report_' + str(year) + 's' + str(season) if useDB == True: frame.to_sql(table_name, engine) else: frame.to_csv(table_name + '.csv')
def fImportReportData(year="", quarter=""): df = ts.get_report_data(year=year, quarter=quarter) df['YEAR'] = year df['QUARTER'] = quarter #print(df) engine = create_engine('oracle://c##tushare:didierg160@myoracle') df.to_sql('tb_stock_report_data', engine, if_exists='append')
def export(exportType, datePicker): sb = None [year, quarter] = getYearQuarter(datePicker) if exportType == "report": name = "业绩报表" sb = ts.get_report_data(year, quarter) elif exportType == "profit": name = "盈利能力报表" sb = ts.get_profit_data(year, quarter) elif exportType == "operation": name = "营运能力报表" sb = ts.get_operation_data(year, quarter) elif exportType == "growth": name = "成长能力报表" sb = ts.get_growth_data(year, quarter) elif exportType == "debtpaying": name = "偿债能力报表" sb = ts.get_debtpaying_data(year, quarter) elif exportType == "cashflow": name = "现金流量报表" sb = ts.get_cashflow_data(year, quarter) filename = quote(name + str(year) + "Q" + str(quarter) + ".xlsx") filepath = os.path.join(basedir, app.config['UPLOAD_FOLDER'], filename) sb.to_excel(filepath) rtn = send_file(filepath, as_attachment=True) rtn.headers['Content-Disposition'] += "; filename*=utf-8''%s" % (filename) return rtn
def get_basic_datas( data_kind): #datakind 为debtpaying,growth,operation,profit,report client = pymongo.MongoClient('localhost', 27017) table_stock = client['stock'] sheet = table_stock[data_kind] for year in range(STARTYEAR, ENDYEAR + 1): try: for season in range(1, 5): print('getting ' + datakind + ' data at year:' + str(year) + " season:" + str(season)) if data_kind == 'debtpaying': tf = ts.get_debtpaying_data(year, season) elif data_kind == 'growth': tf = ts.get_growth_data(year, season) elif data_kind == 'operation': tf = ts.get_operation_data(year, season) elif data_kind == 'profit': tf = ts.get_profit_data(year, season) elif data_kind == 'report': tf = ts.get_report_data(year, season) else: print('Not available data type of data_kind!') return jsonres = json.loads(tf.to_json(orient='records')) for j in jsonres: sheet.insert_one(j) except: #数据缺失,tushare接口会报网络错误 print('the year: ' + str(year) + ' lost data will begin next year') continue
def update_basics(): basics = ts.get_stock_basics() f = os.path.join(DATA_DIR, 'basics.h5') basics.to_hdf(f, 'basics') length = 4 * 5 year, season = last_report_season() for i in range(length): f = os.path.join(DATA_DIR, 'basics-{0}-{1}.h5'.format(year, season)) if os.path.exists(f): continue report = ts.get_report_data(year, season) report.to_hdf(f, 'report') profit = ts.get_profit_data(year, season) profit.to_hdf(f, 'profit') operation = ts.get_operation_data(year, season) operation.to_hdf(f, 'operation') growth = ts.get_growth_data(year, season) growth.to_hdf(f, 'growth') debtpaying = ts.get_debtpaying_data(year, season) debtpaying.to_hdf(f, 'debtpaying') cashflow = ts.get_cashflow_data(year, season) cashflow.to_hdf(f, 'cashflow') season -= 1 if season == 0: season = 4 year -= 1
def report_data(): #获取2014年第3季度的业绩报表数据 #季度 :1、2、3、4,只能输入这4个季度 quarter = 1 # 获取当前的年和月 nowYear = int(time.strftime('%Y', time.localtime(time.time()))) nowMonth = int(time.strftime('%m', time.localtime(time.time()))) if nowMonth <= 3: # 上一年的第四季度 nowYear -= 1 quarter = 4 elif nowMonth <= 6: # 第一季度 quarter = 1 elif nowMonth <= 9: # 第一季度 quarter = 2 elif nowMonth <= 12: # 第一季度 quarter = 3 rd = ts.get_report_data(nowYear, quarter) rd.to_csv('report_data.csv')
def stat_stock_report(tmp_datetime, max_year=11): """ 以后每年7月份取一下上年的年报即可,历史数据不必再取 经验: 19年4月份取18年的年报是不全的,所以延到7月取 """ cur_year = int((tmp_datetime).strftime("%Y")) # cur_year = 2005 i = cur_year - max_year # i = 2001 MAX_RETRY_TIME = 3 retry_time = 0 while i < cur_year: try: data = ts.get_report_data(i, 4) except IOError: data = None if not data is None and len(data) > 0: print("\nyear done", i) # data = data.drop_duplicates(subset="code", keep="last") data.insert(0, "year", [i] * len(data)) data.head(n=1) common.insert_db(data, "ts_stock_report", False, "`year`,`code`") i += 1 retry_time = 0 else: print("\nno data . stock_report year", i) retry_time += 1 if retry_time > MAX_RETRY_TIME: i += 1 retry_time = 0 time.sleep(5) # 停止5秒
def __init__(self,year=None,quarter=None,save=True,updateAll=False,beginyear=1990,beginquarter=1): """ 获取季度的公司业绩报告 @@parm:year:年份 @@parm:quarter:季度 @@parm:updateAll是否全量更新历史所有数据 """ now = dt.datetime.now() if year is None or quarter is None: year = now.year quarter = math.ceil(now.month/3.0) if not updateAll: try: profitData = ts.get_report_data(year,quarter) profitData['datatime'] = now.strftime('%Y-%m-%d') profitData['datatimestramp'] = now.strftime('%H:%M:%S') profitData['year'] = year profitData['quarter'] = quarter indexlist = ['code','year','quarter']##数据库索引 tableName = 'companyRepor' database(profitData,indexlist,tableName,save) except : traceback.print_exc() else: ##生成季度时间序列 quarters = get_quarters((beginyear,beginquarter),(year,quarter)) for y,q in quarters: companyRepor(y,q)##递归获取所有历史数据
def get_data(self, year, quarter): data = ts.get_stock_basics() data_roe = pd.DataFrame() for i in range(4): quarter += 1 data_roe = pd.concat((data_roe, ts.get_report_data(year, quarter))) self.save_data(data, data_roe)
def stock_report(year, quarter, k_index): """ :param k_index: the k_index can be: 1: name 2: eps 3: eps_yoy 4: bvps 5: roe 6: epcf 7: net_profits 8: profits_yoy 9: distrib 10: report_data :return: the data to be captured for k_index and for all code, code is the index of return result """ index_list = [ 'name', 'eps', 'eps_yoy', 'bvps', 'roe', 'epcf', 'net_profits', 'profits_yoy', 'distrib', 'report_date' ] if k_index not in index_list: raise Exception('invalid k_index - the setting is not in the scope') if year <= 0: raise Exception('invalid year that should be larger than 0') if quarter <= 0 and quarter > 4: raise Exception('invalid quarter that we just 4 quarter in market') data = ts.get_report_data(year, quarter) new_data = data.set_index(['code']) return new_data[k_index]
def call_report_v1(year, quarter): ''' code,代码 name,名称 esp,每股收益 eps_yoy,每股收益同比(%) bvps,每股净资产 roe,净资产收益率(%) epcf,每股现金流量(元) net_profits,净利润(万元) profits_yoy,净利润同比(%) distrib,分配方案 report_date,发布日期 ''' key = f'v1_report_{year}_{quarter}' stores = pd.HDFStore(COMMEN_FILE_PATH) if key not in stores: df = ts.get_report_data(year, quarter) if df.empty: return df stores[key] = df if DEBUG: print('STORE:', filePath) else: df = stores[key] stores.close() return df
def store_fund_data(quarter_list): stock2year_path = os.path.join(LastFilePath, "stock_fundm_info") for fun_year, fun_quarter in quarter_list: #every dataframe you craw down all needs remove the duplicated row. Only need keep the first row of duplicates. # stock2year_report is tushare:get_report_data (fundamental data). stock2year_report = ts.get_report_data( fun_year, fun_quarter).drop_duplicates(keep='first') # stock2year_prof is tushare.get_profit_data (fundamental data). stock2year_prof = ts.get_profit_data( fun_year, fun_quarter).drop_duplicates(keep='first') # stock2year_opera is tushare.get_operation_data (fundamental data). stock2year_opera = ts.get_operation_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_grow is tushare.get_growth_data (fundamental data). stock2year_grow = ts.get_growth_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_debt is tushare.get_debtpaying_data (fundamental data). stock2year_debt = ts.get_debtpaying_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_cash is tushare.get_cashflow_data (fundamental data). stock2year_cash = ts.get_cashflow_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_comb is to combine all the stock2year data of same year and quarter in a same stock code. stock2year_list = [stock2year_report,stock2year_prof,stock2year_opera,stock2year_grow, \ stock2year_debt,stock2year_cash] for every_fund_element in stock2year_list: every_fund_element = every_fund_element.set_index('code') #use pandas concat to combine all the dataframe along columns. total_fund = pd.concat(stock2year_list, axis=1) HeadName = fun_year + "/" + fun_quarter + "_" + "fundamt_info" CsvName = os.path.join(stock2year_path, "{}.csv".format(HeadName)) total_fund.to_csv(CsvName)
def getThingsEveryday(self): yearEnd = datetime.now().year - 1 if self.__flagUpdateReport: pdGrowthLastYear = ts.get_growth_data(yearEnd, 4) pdGrowthLastYear.to_excel('./' + \ str(yearEnd) + 'Growth.xls', sheet_name='Growth') pdProfitLastYear = ts.get_profit_data(yearEnd, 4) pdProfitLastYear.to_excel('./' + \ str(yearEnd) + 'Profit.xls', sheet_name='Profit') pdReportLastYear = ts.get_report_data(yearEnd, 4) pdReportLastYear.to_excel('./' + \ str(yearEnd) + 'y.xls', sheet_name='Report') self.__stockBasics = ts.get_stock_basics() #获得昨天pe self.__stockBasics['code'] = self.__stockBasics.index.astype(int) self.__stockBasics.sort_index(inplace=True) self.__stockTodayAll = ts.get_today_all() #获得昨收 self.__stockTodayAll['code'] = self.__stockTodayAll['code'].astype(int) self.__pdForwardEps = pd.merge(self.__stockBasics, self.__stockTodayAll, on='code') self.__pdForwardEps['feps'] = self.__pdForwardEps[ 'settlement'] / self.__pdForwardEps['pe'] print('\n')
def stat_all(tmp_datetime): # 返回 31 天前的数据,做上个季度数据统计。 tmp_datetime_1month = tmp_datetime + datetime.timedelta(days=-31) year = int((tmp_datetime_1month).strftime("%Y")) quarter = int(pd.Timestamp(tmp_datetime_1month).quarter) # 获得上个季度的数据。 print("############ year %d, quarter %d", year, quarter) # 业绩报告(主表) data = ts.get_report_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") global db # 插入数据库。 db.insert_db(data, "ts_report_data", True, "`quarter`,`code`") # 盈利能力 data = ts.get_profit_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_profit_data", True, "`quarter`,`code`") # 营运能力 data = ts.get_operation_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_operation_data", True, "`quarter`,`code`") # 成长能力 data = ts.get_growth_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_growth_data", True, "`quarter`,`code`") # 偿债能力 data = ts.get_debtpaying_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_debtpaying_data", True, "`quarter`,`code`") # 现金流量 data = ts.get_cashflow_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_cashflow_data", True, "`quarter`,`code`")
def _load_report(self,year,season): #从本地或者网上取得report #year: int #season: int,1,2,3,4 report_date=None eps_yoy,roe,profits_yoy=0.0,0.0,0.0 try:#本地有记录 with open('report_'+str(year)+'_'+str(season)+'.pkl', "rb") as f: report = pickle.load(f) except:#本地没有存过 with open('report_' + str(year) + '_' + str(season) + '.pkl', "wb") as f: report = ts.get_report_data(year, season) # 获取业绩报表 pickle.dump(report, f) for i in range(report.index.size): if(report.iloc[i,0]==self.ticker): #公布报告的日期: if(season==4):#如果是4季报,发布年份是下一年 report_year=year+1 else: report_year=year report_date = date(report_year,int(report.iloc[i,10][0:2]),int(report.iloc[i,10][3:5]))#先看看报告日期.形如'06-16' eps_yoy = report.iloc[i,3]/100 roe = report.iloc[i,5]/100 profits_yoy = report.iloc[i,8]/100 break return report_date,[eps_yoy,roe,profits_yoy]
def main(): print("pd version:%s" %pd.__version__) print("tushare version:%s" %ts.__version__) ''' data = ts.get_today_all() data['Profit yield'] = None for i in range(0, len(data.index)): if data.at[i, 'per'] == 0: #可能停牌的股票 continue #if 'ST' in data.at[i, 'name']: # continue data.at[i, 'Profit yield'] = 1 / data.at[i, 'per'] data.to_csv("./get_today_all.csv") ''' databasic = ts.get_stock_basics() #databasic.sort_values(by = 'pb').to_csv("./get_today_all.csv") dta8_1 = ts.get_report_data(2018, 1) dtaROE = databasic['pe'] / dta8_1['roe'] print(dtaROE)
def call_report_v1(year, quarter): ''' code,代码 name,名称 esp,每股收益 eps_yoy,每股收益同比(%) bvps,每股净资产 roe,净资产收益率(%) epcf,每股现金流量(元) net_profits,净利润(万元) profits_yoy,净利润同比(%) distrib,分配方案 report_date,发布日期 ''' filePath = COMMEN_FILE_PATH + f'v1_report_{year}_{quarter}.csv' if not os.path.exists(filePath): df = ts.get_report_data(year, quarter) if df.empty: return df df.to_csv(filePath) if DEBUG: print('STORE:', filePath) else: df = pd.read_csv(filePath) return df
def get_temp_data(year, quarter): df1 = ts.get_report_data(year, quarter) #print (1) df1 = df1.merge(ts.get_profit_data(year, quarter), how='inner', on=['code', 'name']) #print (2) df1 = df1.merge(ts.get_operation_data(year, quarter), how='inner', on=['code', 'name']) #print (3, "n", df1) df1 = df1.merge(ts.get_growth_data(year, quarter), how='inner', on=['code', 'name']) #print (4) print(df1) df1 = df1.merge(ts.get_debtpaying_data(year, quarter), how='inner', on=['code', 'name']) #print (5) print(df1) df1 = df1.merge(ts.get_cashflow_data(year, quarter), how='inner', on=['code', 'name']) #print (6) (row, col) = df1.shape for i in range(0, row): df1.iloc[i, 0] = str(df1.iloc[i, 0]) return df1
def download_ACH_Q(year, quarter, df): #按照季度获取信息 Data = df try: achievement = ts.get_report_data(year, quarter) achievement = pd.DataFrame(achievement) except: pass achievement = achievement.set_index('code') achievement = achievement.sort_index() for title_name in achievement.columns: print(title_name) for code_ in achievement.index[1:]: if achievement.at['%s' % code_, '%s' % title_name] != NaN: try: #print(achievement.at['%s'%code_,'%s'title_name]) Data.ix['%s' % code_, '%s-%s-%s' % (title_name, year, quarter)] = achievement.at['%s' % code_, '%s' % title_name] except: buf = achievement.at['%s' % code_, '%s' % title_name] Data.ix['%s' % code_, '%s-%s-%s' % (title_name, year, quarter)] = buf[0] else: pass #print (code_,title_name) #print (achievement.head(9)) return Data
def value_factor(end_year, season): ''' 计算价值因子 ''' #价值因子:每股收益与价格比率、每股经营现金流与价格比率、每股净资产与价格比率、股息收益率 df_report_new = ts.get_report_data( end_year, season)[["name", "code", "eps", "epcf", "bvps"]] df_new_price = ts.get_today_all()[["name", "code", "settlement"]] file = "2005_2011.csv" if end_year in range(2005, 2012) else "2012_2018.csv" df_interest = pd.read_csv(file, dtype={'code': str})[["code", str(end_year)]] df_interest.columns = ["code", "interest_rate"] data = pd.merge(df_report_new, df_new_price, how='inner') data = pd.merge(data, df_interest, how='inner') # 去掉ST股 data = data[data.name.map(lambda x: "ST" not in x)] data["eps_rate"] = data["eps"] / data["settlement"] data["epcf_rate"] = data["epcf"] / data["settlement"] data["bvps_rate"] = data["bvps"] / data["settlement"] #print("value_factor dataframe length: " + str(len(data))) return data[[ "name", "code", "eps_rate", "epcf_rate", "bvps_rate", "interest_rate" ]].drop_duplicates().fillna(0.0)
def get_report_data(year, season): if not available(year, season): return None print("get_report_data") save(ts.get_report_data(year, season), "basics/report_data", year, season) print("get_profit_data") save(ts.get_profit_data(year, season), "basics/profit_data", year, season) filename = "operation_data" print("get_operation_data") save(ts.get_operation_data(year, season), "basics/operation_data", year, season) filename = "growth_data" print("get_growth_data") save(ts.get_growth_data(year, season), "basics/growth_data", year, season) filename = "get_debtpaying_data" print("get_debtpaying_data") save(ts.get_debtpaying_data(year, season), "basics/debtpaying_data", year, season) filename = "get_debtpaying_data" print("get_cashflow_data") save(ts.get_cashflow_data(year, season), "basics/cashflow_data", year, season)
def divident_rate(self): stock = ts.get_hist_data(self.id) df = dividend_rate.get_bonus_table(self) df_dividend = df[['年度', '派息', '登记日']] # print(df_dividend) stock_close_price = stock["close"] sIndex = stock_close_price.index.tolist() # 获取登记日 regis = df_dividend['登记日'].tolist() # print(sIndex) # print(regis) close_price = [] diVi = [] aPe = [] bonus = [] div_year = [] for i in regis: if i != "--" and i in sIndex: cprice = stock_close_price.loc[i] close_price.append(cprice) aDiv = df_dividend[df_dividend['登记日'] == i]['派息'].tolist()[0] year = df_dividend[df_dividend['登记日'] == i]['年度'].values #获得年份 div_year.append(year[0]) #此处的bonus暂时通过ts获得,以后可以直接搜索本地数据库 profit_table = ts.get_report_data(year[0], 4) #获取年度eps print('') target_eps = profit_table[profit_table['code'] == self.id]['eps'].values eps = target_eps[0].item() #numpy.float64 -> float per_bonus = round(float(aDiv) / 10 / eps * 100, 2) # per_bonus = 1 #测试时开启 bonus.append(per_bonus) diVi.append(float(aDiv) / 10) #10股派息转1股派息 div_ratio = [] for i, j in zip(diVi, close_price): adivr = float(i) / float(j) * 100 div_ratio.append(round(adivr, 2)) aPe.append(round(100 / adivr, 2)) reDf = pd.DataFrame( { "cash_div": diVi, #每股派现方案 "div_ratio(%)": div_ratio, #股息率 'ape': aPe, #真实市盈率 'bonus_ratio(%)': bonus #分红率 }, index=div_year) # 统计输出 print(self.id + '分红情况统计如下:') avg_bonus = round(sum(bonus) / len(bonus), 2) print('1.平均分红率:', avg_bonus, '%') avg_div = round(sum(div_ratio) / len(div_ratio), 2) print('2.平均股息率:', avg_div, '%') print('3.详细列表如下所示') return reDf
def get_report_data(year, quarter): try: df = ts.get_report_data(year, quarter) engine = create_engine('mysql://*****:*****@127.0.0.1/stock?charset=utf8') df.to_sql('report_data', engine, if_exists='append') print "message" except Exception, e: e.message
def sync_report_data(): ''' sync report data ''' year = datetime.datetime.now().year month = datetime.datetime.now().month seaon = month/3 if month<3: year = year - 1 seaon = 4 monthstr = '%s%s'%(year,seaon) DataFrameToMongo(ts.get_report_data(year, seaon), MongoClient(mongourl)['stoinfo']['report_data'], ['code'], monthstr)
def getQuerterReport(year): for q in [1, 2, 3, 4]: getDownloaded() if not isDownloaded(reportByYear(year, q)): try: print('Report of ' + str(year) + ' and quarter is ' + str(q)) report_df = ts.get_report_data(year, q) report_df.to_csv(path + reportByYear(year, q)) time.sleep(5) except Exception: logError('Report: ' + str(year) + '-' + str(q) + ' there is problem, will skip it. ')
def collect_report_data(year, term): try: report_data_path = os.path.join(BASE_FOLDER, 'report') if not os.path.isdir(report_data_path): os.makedirs(report_data_path) path = os.path.join(report_data_path, '{}-{}.csv'.format(year, term)) if not os.path.exists(path): df = ts.get_report_data(year, term) df.to_csv(path) return report_data_path except Exception as ex: print("error occurred in retrieving report data: ", ex) return None
def profit(self): df_2016=ts.get_report_data(2016,4) #第四季度就是年报 #df= df.sort_values('profits_yoy',ascending=False) #df.to_excel('profit.xls') df_2015=ts.get_report_data(2015,4) df_2016.to_excel('2016_report.xls') df_2015.to_excel('2015_report.xls') code_2015_lost=df_2015[df_2015['net_profits']<0]['code'].values code_2016_lost=df_2016[df_2016['net_profits']<0]['code'].values print code_2015_lost print code_2016_lost two_year_lost=[] #two_year_lost_name=[] for i in code_2015_lost: if i in code_2016_lost: print i, #name=self.base[self.base['code']==i].values[0] two_year_lost.append(i) self.saveList(two_year_lost,'st_dangours.csv')
def get_basic(): hsdq = stock_info.ix['300141'] print(hsdq) report = ts.get_report_data(2014, 1) print(report) # hsdq=stock_info.ix['300141'] # print(hsdq) # report=ts.get_report_data(2014,1) # print(report) print('*' * 20) df = ts.get_today_all() zrkj = df[df['code'] == '300333'] print(type(zrkj)) print(type(zrkj['code'])) print(zrkj['name'].values[0])
def updatereport(): reportdatalist=ts.get_report_data(2014,4) reportdata=pd.DataFrame(reportdatalist) conn= ms.connect(host='localhost',port = 3306,user='******', passwd='123456',db ='investment',charset="utf8") cur = conn.cursor() values=[] for index,row in reportdata.iterrows(): if math.isnan(row['eps']): eps=0 else: eps=row['eps'] if math.isnan(row['eps_yoy']): eps_yoy=0 else: eps_yoy=row['eps_yoy'] if math.isnan(row['bvps']): bvps=0 else: bvps=row['bvps'] if math.isnan(row['epcf']): epcf=0 else: epcf=row['epcf'] if math.isnan(row['roe']): roe=0 else: roe=row['roe'] if math.isnan(row['net_profits']): net_profits=0 else: net_profits=row['net_profits'] if math.isnan(row['profits_yoy']): profits_yoy=0 else: profits_yoy=row['profits_yoy'] values.append((row['code'],row['name'],eps,eps_yoy,bvps,roe,epcf,net_profits,profits_yoy,'2015-'+row['report_date'])) cur.executemany('insert into report20144 (code,name,eps,eps_yoy,bvps,roe,epcf,net_profis,profis_yoy,report_date) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',values) conn.commit() cur.close() conn.close()
def update_basics(): basics = ts.get_stock_basics() f = os.path.join(base_dir, 'basics.h5') basics.to_hdf(f, 'basics') today = datetime.date.today() current_year = today.year current_season = today.month / 3 if current_season == 0: current_year -= 1 current_season = 4 length = 4 * 5 year = current_year season = current_season for i in range(length): f = os.path.join(base_dir, 'basics-{0}-{1}.h5'.format(year, season)) if os.path.exists(f): continue print(f) report = ts.get_report_data(year, season) report.to_hdf(f, 'report') profit = ts.get_profit_data(year, season) profit.to_hdf(f, 'profit') operation = ts.get_operation_data(year, season) operation.to_hdf(f, 'operation') growth = ts.get_growth_data(year, season) growth.to_hdf(f, 'growth') debtpaying = ts.get_debtpaying_data(year, season) debtpaying.to_hdf(f, 'debtpaying') cashflow = ts.get_cashflow_data(year, season) cashflow.to_hdf(f, 'cashflow') season -= 1 if season == 0: season = 4 year -= 1
def finance_report(year=2018, quarter=2): latest_equity = Equity.objects().order_by('-date').first() # print(latest_equity) date = latest_equity.date df = ts.get_report_data(year, quarter) print(df) data = df.to_dict('index') print(data) print(len(data.items())) from mongoengine.queryset.visitor import Q for index, value in sorted(data.items()): code = value['code'] name = value['name'] roe = value['roe'] eps = value['eps'] report_date = value['report_date'] # print('code:{} roe:{}'.format(code, roe)) FinanceReport.objects(code=code, year=year, quarter=quarter).update_one(code=code, name=name, year=year, quarter=quarter, report_date=report_date, roe=roe, eps=eps, upsert=True)
def store_data(self): # 预测 # year_2016=ts.forecast_data(2016, 4) # self.save_to_excel(year_2016,'2016-profit.xls') # year_2017=ts.forecast_data(2017, 4) # self.save_to_excel(year_2017,'2017-profit.xls') # 盈利能力 # profit_2016=ts.get_profit_data(2016,4) # profit_2017=ts.get_profit_data(2017,3) # self.save_to_excel(profit_2016, '2016-profit.xls') # self.save_to_excel(profit_2017, '2017-3rdprofit.xls') # 股票基本信息 # basic=ts.get_stock_basics() # basic.to_csv('temp.xls',encoding='gbk') # df=pd.read_csv('temp.xls',encoding='gbk',dtype={'code':str}) # # print df # self.save_to_excel(df,'Markets.xls') # 基本面 每股净资产<1 df=ts.get_report_data(2017, 3) self.save_to_excel(df,'2017-3rd-report.xls')
""" Created on Wed Oct 07 09:21:05 2015 @author: Fuqian """ from sqlalchemy import create_engine import tushare as ts #import pymongo import pandas as pd df_base = ts.get_stock_basics() df_report_1503 = ts.get_report_data(2015,3) df_profit_1503 = ts.get_profit_data(2015,3) df_growth_1503 = ts.get_growth_data(2015,3) #detail_daily={} engine = create_engine('mysql://*****:*****@127.0.0.1/stock?charset=utf8') for row_index, row in df_base.iterrows(): try: f = open('qfq_err', 'a') f_d = open('detailDay_err','a')
#coding:utf-8 from sqlalchemy import create_engine import tushare as ts # define engine engine = create_engine('mysql://*****:*****@127.0.0.1/tushare?charset=utf8') df = ts.get_report_data(2014,4) df = df.assign(quater=20144) df.to_sql('report_data',engine, if_exists='append') df = ts.get_report_data(2015,4) df = df.assign(quater=20154) df.to_sql('report_data',engine, if_exists='append') df = ts.get_report_data(2016,4) df = df.assign(quater=20164) df.to_sql('report_data',engine, if_exists='append')
def pick_data(self, max_num_threads = 20, pause = 0): """ pick all necessary data from local database and from internet for loaded stocks. This function will take a while. """ logging.info('getting basics from tushare') self._init_stock_objs() # self.data_manager.drop_stock() # self.stocks = {key: self.stocks[key] for key in ['600233', '600130']} logging.info('totally there are %d listed companies' % len(self.stocks)) logging.info('get indexes from tushare') self._get_indexes() # self._pick_hist_data_and_save(self.stocks, False, self.indexes['000001'].hist_start_date, max_num_threads) logging.info('getting last stock trading data') df = ts.get_today_all() self._extract_from_dataframe(df, ignore=('changepercent', 'open', 'high', 'low', 'settlement', 'volume', 'turnoverratio', 'amount'), remap={'trade': 'price', 'per': 'pe'}) # calculate the report quarter report_year, report_quarter = ts.get_last_report_period() logging.info('getting last report (%d quarter %d) from tushare' % (report_year, report_quarter)) df = ts.get_report_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last profit data from tushare') df = ts.get_profit_data(report_year, report_quarter) self._extract_from_dataframe(df, ignore=('net_profits', 'roe', 'eps')) logging.info('getting last operation data from tushare') df = ts.get_operation_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last growth data from tushare') df = ts.get_growth_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last debtpaying data from tushare') df = ts.get_debtpaying_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last cashflow data from tushare') df = ts.get_cashflow_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting history trading data from tushare') start_from = self.indexes['000001'].hist_start_date data_full = self._pick_hist_data_and_save(self.stocks, False, start_from, max_num_threads, pause) # anything that pulling data must before here self._remove_unavailable_stocks() ''' # calculate qianfuquan data # deprecated due to precision issue for code, stock in self.stocks.items(): for i in range(1, len(stock.hist_data.index)-1): b = stock.hist_data.at[stock.hist_data.index[i], 'close'] a = stock.hist_data.at[stock.hist_data.index[i+1], 'close'] p = stock.hist_data.at[stock.hist_data.index[i+1], 'p_change'] / 100.0 q = (p*a+a)/b if q > 1.1: print('%s chuq-uan %s: %s %s %s, 1/%s' % (stock, stock.hist_data.index[i], b, a, p, q)) ''' return data_full
import tushare as ts import marshal, pickle year = 2015 print(report_data) report_data = ts.get_report_data(year, 4) f = file('report_data', 'w') pickle.dump(report_data, f) f.close() profit_data = ts.get_profit_data(year, 4) f = file('profit_data', 'w') pickle.dump(profit_data, f) f.close() growth_data = ts.get_growth_data(year, 4) f = file('growth_data', 'w') pickle.dump(growth_data, f) f.close()
# -*- coding: utf8 -*- import tushare as ts print(ts.__version__) report = ts.get_report_data(2016, 1) report['year'] = 2016 report['quarter'] = 1 for y in range(2005, 2016): for q in range(1, 5): print(y, q) r = ts.get_report_data(y, q) r['year'] = y r['quarter'] = q report = report.append(r, ignore_index=True) report.to_csv('report05Q4-16Q1.csv')
#coding=utf-8 import tushare as ts # 获取沪深上市公司基本情况 df = ts.get_stock_basics() date = df.ix['600848']['timeToMarket']#上市日期YYYYMMDD #获取2014年第3季度的业绩报表数据 ts.get_report_data(2014,3) #获取2014年第3季度的盈利能力数据 ts.get_profit_data(2014,3) #获取2014年第3季度的营运能力数据 ts.get_operation_data(2014,3) #获取2014年第3季度的成长能力数据 ts.get_growth_data(2014,3) #获取2014年第3季度的偿债能力数据 ts.get_debtpaying_data(2014,3) #获取2014年第3季度的现金流量数据 ts.get_cashflow_data(2014,3)
def download_report_info(file_path, year, quarter): report_data = ts.get_report_data(year, quarter) if report_data is not None: report_data.to_csv(file_path + 'report_' + str(year) + '_' + str(quarter) + '.csv', encoding='utf-8')
`eps` double NOT NULL DEFAULT '0' COMMENT '每股收益',\ `eps_yoy` double NOT NULL DEFAULT '0' COMMENT '每股收益同比(%)',\ `bvps` double NOT NULL DEFAULT '0' COMMENT '每股净资产',\ `roe` double NOT NULL DEFAULT '0' COMMENT '净资产收益率(%)',\ `epcf` double NOT NULL DEFAULT '0' COMMENT '每股现金流量(元)',\ `net_profits` double NOT NULL DEFAULT '0' COMMENT '净利润(万元)',\ `profits_yoy` double NOT NULL DEFAULT '0' COMMENT '净利润同比(%)',\ `distrib` char(30) NOT NULL DEFAULT '' COMMENT '分配方案',\ `report_date` char(10) NOT NULL DEFAULT '' COMMENT '发布日期',\ PRIMARY KEY (`id`),\ UNIQUE KEY `code` (`code`)\ ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='业绩报告(主表)';" cursor.execute(sql) try: df = ts.get_report_data(year, season) if df is not None: for id in df.index: temp = df.ix[id] code = temp['code'] # 以下先判断temp['']是否为NAN if temp['eps'] != temp['eps']: eps = 0.00 else: eps = temp['eps'] if temp['eps_yoy'] != temp['eps_yoy']: eps_yoy = 0.00 else: eps_yoy = temp['eps_yoy']
# v_ma10:10日均量 # v_ma20:20日均量 # turnover:换手率[注:指数无此项] 4/4 raw_data_profit = ts.get_profit_data() # code,代码 # name,名称 # roe,净资产收益率(%) 7 # net_profit_ratio,净利率(%) # gross_profit_rate,毛利率(%) # net_profits,净利润(万元) # eps,每股收益 # business_income,营业收入(百万元) # bips,每股主营业务收入(元) raw_data_report = ts.get_report_data() # code,代码 # name,名称 # eps,每股收益 # eps_yoy,每股收益同比(%) # bvps,每股净资产 # roe,净资产收益率(%) # epcf,每股现金流量(元) # net_profits,净利润(万元) # profits_yoy,净利润同比(%) # distrib,分配方案 # report_date,发布日期 raw_data_growth = ts.get_growth_data()
import tushare as ts import sys df = ts.get_report_data(int(sys.argv[1]), int(sys.argv[2])) df.to_csv(sys.argv[3], encoding="utf8")
#################### # Financial Report # #################### # FinancialReport: EPS, EPS_YOY, ROE, net_profits, profits_yoy # ProfitData: ROE, net_profit_ratio, gross_profit_rate, EPS, bips (business income per share) # GrowthData: mbrg (main business rate growth), nprg (net profit), # nav, targ (total asset), epsg, seg (shareholder's eqty) # DebtPayingData: currentratio, quickratio, cashratio, icratio (interest coverage) # TODO Data is available quarterly # TODO Compare data for FinancialReport and ProfitData FinancialData = ts.get_report_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) FinancialData = FinancialData.set_index('code') FinancialData = FinancialData.drop(['name', 'bvps', 'distrib', 'epcf', 'report_date'], axis = 1) FinancialData.to_csv('./ASHR/DATA/FinancialData_2015_1.csv', index = True) ProfitData = ts.get_profit_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) ProfitData = ProfitData.set_index('code') ProfitData = ProfitData.drop(['name', 'business_income', 'net_profits'], axis = 1) ProfitData.to_csv('./ASHR/DATA/ProfitData_2015_1.csv', index = True) GrowthData = ts.get_growth_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) GrowthData = GrowthData.set_index('code') GrowthData = GrowthData.drop(['name'], axis = 1) GrowthData.to_csv('./ASHR/DATA/GrowthData_2015_1.csv', index = True) DebtPayingData = ts.get_debtpaying_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1)
df = ts.get_stock_basics() df.to_sql('basic_info',engine, if_exists='replace') # import report data #code,代码 #name,名称 #esp,每股收益 #eps_yoy,每股收益同比(%) #bvps,每股净资产 #roe,净资产收益率(%) #epcf,每股现金流量(元) #net_profits,净利润(万元) #profits_yoy,净利润同比(%) #distrib,分配方案 #report_date,发布日期 df = ts.get_report_data(lastYear,lastSeason) df = df.assign(quater=lastQuater) df.to_sql('report_data',engine, if_exists='replace') df = ts.get_report_data(currentYear,currentSeason) df = df.assign(quater=currentQuater) df.to_sql('report_data',engine, if_exists='append') # import profit data #code,代码 #name,名称 #roe,净资产收益率(%) #net_profit_ratio,净利率(%) #gross_profit_rate,毛利率(%) #net_profits,净利润(万元) #esp,每股收益 #business_income,营业收入(百万元)