def update_single_basic_ele(ele_name, retry_times): for year in range(2001, int(year_now) + 1): for quarter in range(1, 5): table_name = ele_name + str(year) + '0' + str(quarter) if not is_table_exist(conn=conn_profit, database_name=stk_profit_data_db_name, table_name=table_name): """失败达到一定次数便不再尝试""" failure_time = 0 go_on_flag = True while go_on_flag & (failure_time < retry_times): try: if ele_name == "profit": ts.get_profit_data(year=year,quarter=quarter)\ .to_sql(con=engine_profit, name=table_name, if_exists='append', schema=stk_profit_data_db_name, index=False) elif ele_name == "growth": ts.get_growth_data(year=year, quarter=quarter) \ .to_sql(con=engine_growth, name=table_name, if_exists='append', schema=stk_growth_data_db_name, index=False,) elif ele_name == "operation": ts.get_operation_data(year=year, quarter=quarter) \ .to_sql(con=engine_operation, name=table_name, if_exists='append', schema=stk_operation_data_db_name, index=False) elif ele_name == "debtpaying": ts.get_debtpaying_data(year=year, quarter=quarter) \ .to_sql(con=engine_debtpaying, name=table_name, if_exists='append', schema=stk_debtpaying_data_db_name, index=False) elif ele_name == "cashflow": ts.get_cashflow_data(year=year, quarter=quarter) \ .to_sql(con=engine_cashflow, name=table_name, if_exists='append', schema=stk_cashflow_data_db_name, index=False) go_on_flag = False except: print(table_name + "下载失败!重试!\n") failure_time = failure_time + 1 else: print(table_name + "已经存在!\n")
def get_growth_data(self, year, loops): for i in range(1,loops): print(i) for j in year: print(year) try: ts.get_growth_data(j, 4).to_csv('growth_data_%d.csv'%j, encoding='utf-8') print(j) year.remove(j) except: pass
def basic_information(): ts.get_cashflow_data(2017, 1).to_sql('cash_flow', engine, if_exists='append') ts.get_debtpaying_data(2017, 1).to_sql('debtpaying', engine, if_exists='append') ts.get_growth_data(2017, 1).to_sql('growth', engine, if_exists='append') ts.get_operation_data(2017, 1).to_sql('operation', engine, if_exists='append') ts.get_profit_data(2017, 1).to_sql('profit', engine, if_exists='append') ts.get_report_data(2017, 2).to_sql('report', engine, if_exists='append') print('basic information over ....')
def get_stock_growth(year, season, engine): frame = ts.get_growth_data(year, season) table_name = 'stock_growth_' + str(year) + 's' + str(season) if useDB == True: frame.to_sql(table_name, engine) else: frame.to_csv(table_name + '.csv')
def export(exportType, datePicker): sb = None [year, quarter] = getYearQuarter(datePicker) if exportType == "report": name = "业绩报表" sb = ts.get_report_data(year, quarter) elif exportType == "profit": name = "盈利能力报表" sb = ts.get_profit_data(year, quarter) elif exportType == "operation": name = "营运能力报表" sb = ts.get_operation_data(year, quarter) elif exportType == "growth": name = "成长能力报表" sb = ts.get_growth_data(year, quarter) elif exportType == "debtpaying": name = "偿债能力报表" sb = ts.get_debtpaying_data(year, quarter) elif exportType == "cashflow": name = "现金流量报表" sb = ts.get_cashflow_data(year, quarter) filename = quote(name + str(year) + "Q" + str(quarter) + ".xlsx") filepath = os.path.join(basedir, app.config['UPLOAD_FOLDER'], filename) sb.to_excel(filepath) rtn = send_file(filepath, as_attachment=True) rtn.headers['Content-Disposition'] += "; filename*=utf-8''%s" % (filename) return rtn
def get_growth_data(self, year, quarter): tsdata = ts.get_growth_data( year=year, quarter=quarter, ) jsdata = To_Json(tsdata) return jsdata
def buildGrowthForStock(self, stock): code = stock.get_code() if self.__dfGrowth is None or self.__dfGrowth.empty: try: self.__dfGrowth = pd.read_sql_table('growth', con=self.__engine, index_col='code') except: pass if self.__dfGrowth is None or self.__dfGrowth.empty: self.__dfGrowth = ts.get_growth_data( self.__config.get_report()[0], self.__config.get_report()[1]) self.__dfGrowth.to_sql('growth', self.__engine, if_exists='replace', index_label='code', index=False) try: nprg = self.__dfGrowth.loc[stock.get_code()].get('nprg') if isinstance(nprg, pd.Series): nprg = nprg.iat[-1] epsg = self.__dfGrowth.loc[stock.get_code()].get('epsg') if isinstance(epsg, pd.Series): epsg = epsg.iat[-1] stock.set_nprg(nprg) stock.set_epsg(epsg) except: stock.set_nprg(float("nan")) stock.set_epsg(float("nan"))
def update_basics(): basics = ts.get_stock_basics() f = os.path.join(DATA_DIR, 'basics.h5') basics.to_hdf(f, 'basics') length = 4 * 5 year, season = last_report_season() for i in range(length): f = os.path.join(DATA_DIR, 'basics-{0}-{1}.h5'.format(year, season)) if os.path.exists(f): continue report = ts.get_report_data(year, season) report.to_hdf(f, 'report') profit = ts.get_profit_data(year, season) profit.to_hdf(f, 'profit') operation = ts.get_operation_data(year, season) operation.to_hdf(f, 'operation') growth = ts.get_growth_data(year, season) growth.to_hdf(f, 'growth') debtpaying = ts.get_debtpaying_data(year, season) debtpaying.to_hdf(f, 'debtpaying') cashflow = ts.get_cashflow_data(year, season) cashflow.to_hdf(f, 'cashflow') season -= 1 if season == 0: season = 4 year -= 1
def _fetch_finance(): for year in range(2004, 2018): set_year = lambda x: str(year) + '-' + x for quarter in range(1, 5): print(year, ' year ', 'quarter ', quarter) rep = ts.get_report_data( year, quarter)[['code', 'eps', 'bvps', 'epcf', 'report_date']] pro = ts.get_profit_data(year, quarter)[[ 'code', 'roe', 'net_profit_ratio', 'gross_profit_rate', 'net_profits', 'business_income', 'bips' ]] ope = ts.get_operation_data(year, quarter)[[ 'code', 'arturnover', 'arturndays', 'inventory_turnover', 'currentasset_turnover', 'currentasset_days' ]] gro = ts.get_growth_data( year, quarter)[['code', 'mbrg', 'nprg', 'nav', 'epsg', 'seg']] deb = ts.get_debtpaying_data(year, quarter)[[ 'code', 'currentratio', 'quickratio', 'cashratio', 'icratio', 'sheqratio', 'adratio' ]] cas = ts.get_cashflow_data(year, quarter)[[ 'code', 'cf_sales', 'rateofreturn', 'cf_nm', 'cf_liabilities', 'cashflowratio' ]] rep.rename(columns={'report_date': 'date'}, inplace=True) rep['date'] = rep['date'].apply(set_year) rep = rep.merge(pro, on='code', how='left') rep = rep.merge(ope, on='code', how='left') rep = rep.merge(gro, on='code', how='left') rep = rep.merge(deb, on='code', how='left') rep = rep.merge(cas, on='code', how='left') finance.insert(rep.to_dict('record')) print(year, quarter)
def get_basic_datas( data_kind): #datakind 为debtpaying,growth,operation,profit,report client = pymongo.MongoClient('localhost', 27017) table_stock = client['stock'] sheet = table_stock[data_kind] for year in range(STARTYEAR, ENDYEAR + 1): try: for season in range(1, 5): print('getting ' + datakind + ' data at year:' + str(year) + " season:" + str(season)) if data_kind == 'debtpaying': tf = ts.get_debtpaying_data(year, season) elif data_kind == 'growth': tf = ts.get_growth_data(year, season) elif data_kind == 'operation': tf = ts.get_operation_data(year, season) elif data_kind == 'profit': tf = ts.get_profit_data(year, season) elif data_kind == 'report': tf = ts.get_report_data(year, season) else: print('Not available data type of data_kind!') return jsonres = json.loads(tf.to_json(orient='records')) for j in jsonres: sheet.insert_one(j) except: #数据缺失,tushare接口会报网络错误 print('the year: ' + str(year) + ' lost data will begin next year') continue
def get_temp_data(year, quarter): df1 = ts.get_report_data(year, quarter) #print (1) df1 = df1.merge(ts.get_profit_data(year, quarter), how='inner', on=['code', 'name']) #print (2) df1 = df1.merge(ts.get_operation_data(year, quarter), how='inner', on=['code', 'name']) #print (3, "n", df1) df1 = df1.merge(ts.get_growth_data(year, quarter), how='inner', on=['code', 'name']) #print (4) print(df1) df1 = df1.merge(ts.get_debtpaying_data(year, quarter), how='inner', on=['code', 'name']) #print (5) print(df1) df1 = df1.merge(ts.get_cashflow_data(year, quarter), how='inner', on=['code', 'name']) #print (6) (row, col) = df1.shape for i in range(0, row): df1.iloc[i, 0] = str(df1.iloc[i, 0]) return df1
def get_report_data(year, season): if not available(year, season): return None print("get_report_data") save(ts.get_report_data(year, season), "basics/report_data", year, season) print("get_profit_data") save(ts.get_profit_data(year, season), "basics/profit_data", year, season) filename = "operation_data" print("get_operation_data") save(ts.get_operation_data(year, season), "basics/operation_data", year, season) filename = "growth_data" print("get_growth_data") save(ts.get_growth_data(year, season), "basics/growth_data", year, season) filename = "get_debtpaying_data" print("get_debtpaying_data") save(ts.get_debtpaying_data(year, season), "basics/debtpaying_data", year, season) filename = "get_debtpaying_data" print("get_cashflow_data") save(ts.get_cashflow_data(year, season), "basics/cashflow_data", year, season)
def valuation_factor(year): report = ts.get_report_data(year,4) report = report.sort_values(by = 'code',axis = 0,ascending = True) report = report.reset_index(drop = True) report.to_csv("/home/yirui/Desktop/Quant/Report/%s.csv"%year, mode="w") profit = ts.get_profit_data(year, 4) profit = profit.sort_values(by='code', axis=0, ascending=True) profit = profit.reset_index(drop=True) profit.to_csv("/home/yirui/Desktop/Quant/Profit/%s.csv"%year,mode= "w") operation = ts.get_operation_data(year,4) operation = operation.sort_values(by='code', axis=0, ascending=True) operation = operation.reset_index(drop=True) operation.to_csv("/home/yirui/Desktop/Quant/Operation/%s.csv" % year, mode="w") growth = ts.get_growth_data(year,4) growth = growth.sort_values(by='code', axis=0, ascending=True) growth = growth.reset_index(drop=True) growth.to_csv("/home/yirui/Desktop/Quant/Growth/%s.csv" % year, mode="w") debtpaying = ts.get_debtpaying_data(year,4) debtpaying = debtpaying.sort_values(by='code', axis=0, ascending=True) debtpaying = debtpaying.reset_index(drop=True) debtpaying.to_csv("/home/yirui/Desktop/Quant/Debtpaying/%s.csv" % year, mode="w") cashflow = ts.get_cashflow_data(year,4) cashflow = cashflow.sort_values(by='code', axis=0, ascending=True) cashflow = cashflow.reset_index(drop=True) cashflow.to_csv("/home/yirui/Desktop/Quant/Cashflow/%s.csv" % year, mode="w")
def getThingsEveryday(self): yearEnd = datetime.now().year - 1 if self.__flagUpdateReport: pdGrowthLastYear = ts.get_growth_data(yearEnd, 4) pdGrowthLastYear.to_excel('./' + \ str(yearEnd) + 'Growth.xls', sheet_name='Growth') pdProfitLastYear = ts.get_profit_data(yearEnd, 4) pdProfitLastYear.to_excel('./' + \ str(yearEnd) + 'Profit.xls', sheet_name='Profit') pdReportLastYear = ts.get_report_data(yearEnd, 4) pdReportLastYear.to_excel('./' + \ str(yearEnd) + 'y.xls', sheet_name='Report') self.__stockBasics = ts.get_stock_basics() #获得昨天pe self.__stockBasics['code'] = self.__stockBasics.index.astype(int) self.__stockBasics.sort_index(inplace=True) self.__stockTodayAll = ts.get_today_all() #获得昨收 self.__stockTodayAll['code'] = self.__stockTodayAll['code'].astype(int) self.__pdForwardEps = pd.merge(self.__stockBasics, self.__stockTodayAll, on='code') self.__pdForwardEps['feps'] = self.__pdForwardEps[ 'settlement'] / self.__pdForwardEps['pe'] print('\n')
def getCZNL(self,nian,ji): otherStyleTime = datetime.datetime.now().strftime("%Y%m%d%H%M%S") pp = ts.get_growth_data(nian, ji) fileName='%s/growth/%s-%s.csv'%(dir,str(nian)+str(ji),otherStyleTime) if not os.path.exists('%s/growth'%(dir)): os.makedirs('%s/growth'%(dir)) pp.to_csv(fileName,encoding='utf-8')
def store_fund_data(quarter_list): stock2year_path = os.path.join(LastFilePath, "stock_fundm_info") for fun_year, fun_quarter in quarter_list: #every dataframe you craw down all needs remove the duplicated row. Only need keep the first row of duplicates. # stock2year_report is tushare:get_report_data (fundamental data). stock2year_report = ts.get_report_data( fun_year, fun_quarter).drop_duplicates(keep='first') # stock2year_prof is tushare.get_profit_data (fundamental data). stock2year_prof = ts.get_profit_data( fun_year, fun_quarter).drop_duplicates(keep='first') # stock2year_opera is tushare.get_operation_data (fundamental data). stock2year_opera = ts.get_operation_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_grow is tushare.get_growth_data (fundamental data). stock2year_grow = ts.get_growth_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_debt is tushare.get_debtpaying_data (fundamental data). stock2year_debt = ts.get_debtpaying_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_cash is tushare.get_cashflow_data (fundamental data). stock2year_cash = ts.get_cashflow_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_comb is to combine all the stock2year data of same year and quarter in a same stock code. stock2year_list = [stock2year_report,stock2year_prof,stock2year_opera,stock2year_grow, \ stock2year_debt,stock2year_cash] for every_fund_element in stock2year_list: every_fund_element = every_fund_element.set_index('code') #use pandas concat to combine all the dataframe along columns. total_fund = pd.concat(stock2year_list, axis=1) HeadName = fun_year + "/" + fun_quarter + "_" + "fundamt_info" CsvName = os.path.join(stock2year_path, "{}.csv".format(HeadName)) total_fund.to_csv(CsvName)
def fetchByYearQuarter(self, mongo, type): years = range(self.begin_year, self.end_year_notinclude) quarters = range(1, 5) for year in years: for quarter in quarters: print(str(type) + '_' + str(year) + '_' + str(quarter)) if (type == 'report_data'): df = fd.get_report_data(year, quarter) elif (type == 'profit_data'): df = ts.get_profit_data(year, quarter) elif (type == 'operation_data'): df = ts.get_operation_data(year, quarter) elif (type == 'growth_data'): df = ts.get_growth_data(year, quarter) elif (type == 'debtpaying_data'): df = ts.get_debtpaying_data(year, quarter) elif (type == 'cashflow_data'): df = ts.get_cashflow_data(year, quarter) else: df = {} tmpJson = json.loads(df.to_json(orient='records')) for i in range(len(tmpJson)): tmpJson[i][u'year'] = int(year) tmpJson[i][u'quarter'] = int(quarter) coll = mongo.fundemental[type] coll2 = mongo.fundemental[str(type) + '_' + str(year) + '_' + str(quarter)] coll2.insert(tmpJson) coll.insert(tmpJson)
def stat_all(tmp_datetime): # 返回 31 天前的数据,做上个季度数据统计。 tmp_datetime_1month = tmp_datetime + datetime.timedelta(days=-31) year = int((tmp_datetime_1month).strftime("%Y")) quarter = int(pd.Timestamp(tmp_datetime_1month).quarter) # 获得上个季度的数据。 print("############ year %d, quarter %d", year, quarter) # 业绩报告(主表) data = ts.get_report_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") global db # 插入数据库。 db.insert_db(data, "ts_report_data", True, "`quarter`,`code`") # 盈利能力 data = ts.get_profit_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_profit_data", True, "`quarter`,`code`") # 营运能力 data = ts.get_operation_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_operation_data", True, "`quarter`,`code`") # 成长能力 data = ts.get_growth_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_growth_data", True, "`quarter`,`code`") # 偿债能力 data = ts.get_debtpaying_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_debtpaying_data", True, "`quarter`,`code`") # 现金流量 data = ts.get_cashflow_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_cashflow_data", True, "`quarter`,`code`")
def get_growth_data(year, quarter): try: df = ts.get_growth_data(year, quarter) engine = create_engine('mysql://*****:*****@127.0.0.1/stock?charset=utf8') df.to_sql('growth_data', engine, if_exists='append') print "message" except Exception, e: e.message
def get_growth_data_dict(year, season): gdf = ts.get_growth_data(year, season) ret_dict = {} if gdf is None: return ret_dict records = gdf.to_dict("records") for data in records: ret_dict[data['code']] = data return ret_dict
def get_stock_growth(nd, jd): """ 获取成长能力 """ try: res = ts.get_growth_data(nd, jd) return res except: return None
def loadgrowth(): for year in [2011,2012,2013,2014,2015,2016,2017]: for quarter in [1,2,3,4]: if quarter == 4 and year ==2017: break profit = ts.get_growth_data(year, quarter) filename = "C:\invest\\basic\\growth\\%s%s.csv" % (year, quarter) print filename profit.to_csv(filename, encoding='utf-8')
def single_stock_report(code, year_start, k_index): """ :param code: the valid stock code, for example '002146' :param year_start: the start date that we want to check the stock report, for example '201801' :param k_index: the performance of report we want to check :return: DataFrame table: the index is the quarter from start to end, the """ if code is None: raise ValueError('please assign code') if year_start is None: raise ValueError('please assign year') if k_index is None: raise ValueError('please assign index') year_to_market = stock_list('timeToMarket') ytm = year_to_market[year_to_market.index == code] ytm = str(ytm.iloc[0]) if ytm >= year_start: qs = getBetweenQuarter(ytm) else: qs = getBetweenQuarter(year_start) j = len(qs) - 1 results = pd.DataFrame() new_index = [] for i in range(j): year = int(qs[i].split('Q')[0]) q = int(qs[i].split('Q')[1]) n = 1 data = [] while n < 10: if k_index == 'get_profit_data': data = ts.get_profit_data(int(year), q) elif k_index == 'get_report_data': data = ts.get_report_data(int(year), q) elif k_index == 'get_operation_data': data = ts.get_operation_data(int(year), q) elif k_index == 'get_growth_data': data = ts.get_growth_data(int(year), q) elif k_index == 'get_debtpaying_data': data = ts.get_debtpaying_data(int(year), q) elif k_index == 'get_cashflow_data': data = ts.get_cashflow_data(int(year), q) else: raise Exception('the k_indexs is not correct') result = data[data['code'] == code] if len(result) >= 1: new_index.append('%d0%d' % (year, q)) results = results.append(result[0:1], ignore_index=True) print(results) break elif len(result) == 0: n += 1 continue new_index_1 = pd.DataFrame({"Y_Q": new_index}) frames = [results, new_index_1] return pd.concat(frames, axis=1)
def peg(): basic = ts.get_stock_basics() peinpeg = [g for g in basic["pe"]] growth181 = ts.get_growth_data(2018, 1) nprg181 = [a for a in growth181["nprg"]] growth182 = ts.get_growth_data(2018, 2) nprg182 = [b for b in growth182["nprg"]] growth183 = ts.get_growth_data(2018, 3) nprg183 = [c for c in growth183["nprg"]] growth184 = ts.get_growth_data(2018, 4) nprg184 = [d for d in growth184["nprg"]] average = [ nprg181[i] + nprg182[i] + nprg183[i] + nprg184[i] for i in range(0, len(nprg181)) ] print(average)
def Deep_Data_Median(self): # 1/行业利润概览 l1 = list() for x in self.fd_data_index: z = ts.get_profit_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l1.append(k) all_idt_PF_data = pd.DataFrame(l1, index=self.fd_data_index) all_idt_PF_data.dropna(how="all", inplace=True) # 2/行业现金流量概览 l2 = list() for x in self.fd_data_index: z = ts.get_cashflow_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l2.append(k) all_idt_CS_data = pd.DataFrame(l2, index=self.fd_data_index) all_idt_CS_data.dropna(how="all", inplace=True) # 3/行业偿债能力概览 l3 = list() for x in self.fd_data_index: z = ts.get_debtpaying_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l3.append(k) all_idt_DP_data = pd.DataFrame(l3, index=self.fd_data_index) all_idt_DP_data.dropna(how="all", inplace=True) # 4/行业营运能力概览 l4 = list() for x in self.fd_data_index: z = ts.get_operation_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l4.append(k) all_idt_OP_data = pd.DataFrame(l4, index=self.fd_data_index) all_idt_OP_data.dropna(how="all", inplace=True) # 5/行业成长能力概览 l5 = list() for x in self.fd_data_index: z = ts.get_growth_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l5.append(k) all_idt_GR_data = pd.DataFrame(l5, index=self.fd_data_index) all_idt_GR_data.dropna(how="all", inplace=True) # 保存数据 data_writer = pd.ExcelWriter("股票{}的基本面数据全览(中位数视角).xlsx".format(self.code)) self.All_Ratio_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="基本面财务比率") self.All_Basic_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="基本面财务数据") self.All_Growth_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="基本面数据增长率") self.All_Valuation_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="三项估值指标") self.Dupon_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="杜邦分析表") all_idt_PF_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业利润历史") all_idt_CS_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业现金历史") all_idt_OP_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业营运历史") all_idt_GR_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业成长历史") all_idt_DP_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业偿债历史") data_writer.save()
def growth(): """成长能力 code,代码 name,名称 mbrg,主营业务收入增长率(%) nprg,净利润增长率(%) nav,净资产增长率 targ,总资产增长率 epsg,每股收益增长率 seg,股东权益增长率 """ # 获取2014年第3季度的成长能力数据 df = ts.get_growth_data(2020, 2) print(df)
def get_fundamental_data(year,quarter): fundamental = {} fundamental['basic'] =ts.get_stock_basics() fundamental['report']=ts.get_report_data(year, quarter) fundamental['profit']=ts.get_profit_data(year, quarter) fundamental['operation']=ts.get_operation_data(year, quarter) fundamental['cashflow']=ts.get_cashflow_data(year, quarter) fundamental['growth']=ts.get_growth_data(year, quarter) fundamental['debt']=ts.get_debtpaying_data(year, quarter) return fundamental
def getGrowth(self, year, season): ''' 获取股票成长能力数据 ---待测试 ''' print 'getGrowth work' df = ts.get_growth_data(year, season) df.to_csv(self.__filename) self.__getDataCommSeason(9, year, season, "Growth")
def growth_data(year): for i in range(4): quarter = i + 1 predts = ts.get_growth_data(year, quarter) if predts is not None: predts['year'] = year predts['quarter'] = quarter predts.to_sql('fundamental_growth_data', engine, flavor='mysql', if_exists='append')
def get_growth_data(start=None, end=None): startArr = start.split("-") endArr = end.split("-") seasonList = dataUtil.getListSeason(int(startArr[0]), int(startArr[1]), int(endArr[0]), int(endArr[1])) for season in seasonList : df = ts.get_growth_data(season[0],season[1]).drop_duplicates('code') dt = pd.DataFrame({"date": np.array([str(season[0])+"-"+str(season[1])]*len(df))},index=df.index) df = pd.concat([df,dt],axis=1) dataPath = TushareApi.path + "growth_data/" +str(season[0])+"-"+str(season[1])+"-growth_data.csv" fileUtil.saveDf(df,dataPath)
def year_report(year): df0 = ts.get_report_data(year, 4) df0.to_sql(str(year) + '_main', engine) df1 = ts.get_profit_data(year, 4) df1.to_sql(str(year) + '_profit', engine) df2 = ts.get_growth_data(year, 4) df2.to_sql(str(year) + '_growth', engine) df3 = ts.get_debtpaying_data(year, 4) df3.to_sql(str(year) + '_debtpaying', engine) df4 = ts.get_cashflow_data(year, 4) df4.to_sql(str(year) + '_cashflow', engine)
def get_growth_data(cls, year, quarter): # 营运能力数据 data = ts.get_growth_data(year, quarter) data['year'] = year data['quarter'] = quarter data.reset_index(drop=True, inplace=True) ModelData.remove_data(table_name='growth_data', year=year, quarter=quarter) print('remove success') ModelData.insert_data(table_name='growth_data', data=data) print('write success')
def getGrowthReport(year): for q in [1, 2, 3, 4]: getDownloaded() if not isDownloaded(growthByYear(year, q)): try: print('Growth of ' + str(year) + ' and quarter is ' + str(q)) growth_df = ts.get_growth_data(year, q) growth_df.to_csv(path + growthByYear(year, q)) time.sleep(5) except Exception: logError('Growth: ' + str(year) + '-' + str(q) + ' there is problem, will skip it. ')
def MBRG(): """ 无 --> result 符合条件的股票集合 算法:排除ST股票以及主营业务收入增长低于40%的股票 """ b2.log('获取主营业务收入增长数据..........') print('获取主营业务收入增长数据..........') #print(readydata('growth')[0]) if readydata('growth') == 0: #季报数据不一定及时,因此采用试错办法 class FoundException(Exception): pass try: for year in (2016, ): for season in (4, 3, 2, 1): try: ds = ts.get_growth_data(year, season) except: print('ERROR: ts.get_growth_data(%d,%d)' % (year, season)) else: try: ds.to_sql('growth', engine, if_exists='replace') engine.execute( '''insert into tb_stamp values ('growth',curdate())''' ) except: print('ERROR: ds.to_sql(%d,%d)' % (year, season)) continue else: raise FoundException() except FoundException: pass b2.log('获取风险警示板股票数据..........') if not readydata('stcode'): ds = ts.get_st_classified() ds.to_sql('stcode', engine, if_exists='replace') engine.execute('''insert into tb_stamp values ('stcode',curdate())''') #删除ST股票,删除主营业务收入增长低于40%的股票 #result=engine.execute('select distinct code from growth where mbrg>0 and code not in (select code from stcode)') result = engine.execute( 'select distinct code from growth where code not in (select code from stcode)' ) #测试测试 #result=engine.execute('select distinct code from growth where code=002723') return result
def sync_growth_data(): ''' sync growth data ''' year = datetime.datetime.now().year month = datetime.datetime.now().month seaon = month/3 if month<3: year = year - 1 seaon = 4 monthstr = '%s%s'%(year,seaon) DataFrameToMongo(ts.get_growth_data(year, seaon), MongoClient(mongourl)['stoinfo']['growth_data'], ['code'], monthstr)
def MBRG(): """ 无 --> result 符合条件的股票集合 算法:排除ST股票以及主营业务收入增长低于40%的股票 """ b2.log('获取主营业务收入增长数据..........') print('获取主营业务收入增长数据..........') #print(readydata('growth')[0]) if readydata('growth')==0: #季报数据不一定及时,因此采用试错办法 class FoundException(Exception): pass try: for year in (2016,): for season in (4,3,2,1): try: ds=ts.get_growth_data(year,season) except: print('ERROR: ts.get_growth_data(%d,%d)'%(year,season)) else: try: ds.to_sql('growth',engine,if_exists='replace') engine.execute('''insert into tb_stamp values ('growth',curdate())''') except: print('ERROR: ds.to_sql(%d,%d)'%(year,season)) continue else: raise FoundException() except FoundException: pass b2.log('获取风险警示板股票数据..........') if not readydata('stcode'): ds=ts.get_st_classified() ds.to_sql('stcode',engine,if_exists='replace') engine.execute('''insert into tb_stamp values ('stcode',curdate())''') #删除ST股票,删除主营业务收入增长低于40%的股票 #result=engine.execute('select distinct code from growth where mbrg>0 and code not in (select code from stcode)') result=engine.execute('select distinct code from growth where code not in (select code from stcode)') #测试测试 #result=engine.execute('select distinct code from growth where code=002723') return result
def update_basics(): basics = ts.get_stock_basics() f = os.path.join(base_dir, 'basics.h5') basics.to_hdf(f, 'basics') today = datetime.date.today() current_year = today.year current_season = today.month / 3 if current_season == 0: current_year -= 1 current_season = 4 length = 4 * 5 year = current_year season = current_season for i in range(length): f = os.path.join(base_dir, 'basics-{0}-{1}.h5'.format(year, season)) if os.path.exists(f): continue print(f) report = ts.get_report_data(year, season) report.to_hdf(f, 'report') profit = ts.get_profit_data(year, season) profit.to_hdf(f, 'profit') operation = ts.get_operation_data(year, season) operation.to_hdf(f, 'operation') growth = ts.get_growth_data(year, season) growth.to_hdf(f, 'growth') debtpaying = ts.get_debtpaying_data(year, season) debtpaying.to_hdf(f, 'debtpaying') cashflow = ts.get_cashflow_data(year, season) cashflow.to_hdf(f, 'cashflow') season -= 1 if season == 0: season = 4 year -= 1
def updategrowth(): reportdatalist=ts.get_growth_data(2014,4) reportdata=pd.DataFrame(reportdatalist) conn= ms.connect(host='localhost',port = 3306,user='******', passwd='123456',db ='investment',charset="utf8") cur = conn.cursor() values=[] for index,row in reportdata.iterrows(): if math.isnan(row['mbrg']): mbrg=0 else: mbrg=row['mbrg'] if math.isnan(row['nprg']): nprg=0 else: nprg=row['nprg'] if math.isnan(row['nav']): nav=0 else: nav=row['nav'] if math.isnan(row['targ']): targ=0 else: targ=row['targ'] if math.isnan(row['epsg']): epsg=0 else: epsg=row['epsg'] if math.isnan(row['seg']): seg=0 else: seg=row['seg'] values.append((row['code'],row['name'],mbrg,nprg,nav,targ,epsg,seg)) cur.executemany('insert into growth20144 (code,name,mbrg,nprg,nav,targ,epsg,seg) values(%s,%s,%s,%s,%s,%s,%s,%s)',values) conn.commit() cur.close() conn.close()
#coding=utf-8 import tushare as ts # 获取沪深上市公司基本情况 df = ts.get_stock_basics() date = df.ix['600848']['timeToMarket']#上市日期YYYYMMDD #获取2014年第3季度的业绩报表数据 ts.get_report_data(2014,3) #获取2014年第3季度的盈利能力数据 ts.get_profit_data(2014,3) #获取2014年第3季度的营运能力数据 ts.get_operation_data(2014,3) #获取2014年第3季度的成长能力数据 ts.get_growth_data(2014,3) #获取2014年第3季度的偿债能力数据 ts.get_debtpaying_data(2014,3) #获取2014年第3季度的现金流量数据 ts.get_cashflow_data(2014,3)
def pick_data(self, max_num_threads = 20, pause = 0): """ pick all necessary data from local database and from internet for loaded stocks. This function will take a while. """ logging.info('getting basics from tushare') self._init_stock_objs() # self.data_manager.drop_stock() # self.stocks = {key: self.stocks[key] for key in ['600233', '600130']} logging.info('totally there are %d listed companies' % len(self.stocks)) logging.info('get indexes from tushare') self._get_indexes() # self._pick_hist_data_and_save(self.stocks, False, self.indexes['000001'].hist_start_date, max_num_threads) logging.info('getting last stock trading data') df = ts.get_today_all() self._extract_from_dataframe(df, ignore=('changepercent', 'open', 'high', 'low', 'settlement', 'volume', 'turnoverratio', 'amount'), remap={'trade': 'price', 'per': 'pe'}) # calculate the report quarter report_year, report_quarter = ts.get_last_report_period() logging.info('getting last report (%d quarter %d) from tushare' % (report_year, report_quarter)) df = ts.get_report_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last profit data from tushare') df = ts.get_profit_data(report_year, report_quarter) self._extract_from_dataframe(df, ignore=('net_profits', 'roe', 'eps')) logging.info('getting last operation data from tushare') df = ts.get_operation_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last growth data from tushare') df = ts.get_growth_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last debtpaying data from tushare') df = ts.get_debtpaying_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last cashflow data from tushare') df = ts.get_cashflow_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting history trading data from tushare') start_from = self.indexes['000001'].hist_start_date data_full = self._pick_hist_data_and_save(self.stocks, False, start_from, max_num_threads, pause) # anything that pulling data must before here self._remove_unavailable_stocks() ''' # calculate qianfuquan data # deprecated due to precision issue for code, stock in self.stocks.items(): for i in range(1, len(stock.hist_data.index)-1): b = stock.hist_data.at[stock.hist_data.index[i], 'close'] a = stock.hist_data.at[stock.hist_data.index[i+1], 'close'] p = stock.hist_data.at[stock.hist_data.index[i+1], 'p_change'] / 100.0 q = (p*a+a)/b if q > 1.1: print('%s chuq-uan %s: %s %s %s, 1/%s' % (stock, stock.hist_data.index[i], b, a, p, q)) ''' return data_full
CombineSlice = 0.050000 dates = pd.bdate_range(end=formatEndTime, periods=periodsNum, freq="B") datesList = dates.tolist() yearNow = int(formatEndTime[0:4]) monthNow = int(formatEndTime[5:7]) curQuarter = int(monthNow / 4) + 1 quarterIdx = 0 yearList = [0, 0, 0, 0] quarterList = [0, 0, 0, 0] growthList = [0, 0, 0, 0] while quarterIdx < TotalQuarter: print str(yearNow) + "-" + str(curQuarter) try: _growth = ts.get_growth_data(yearNow, curQuarter) except Exception, e: try: _growth = ts.get_growth_data(yearNow, curQuarter) except Exception, e1: if curQuarter == 1: yearNow -= 1 curQuarter = 4 else: curQuarter -= 1 continue if _growth is None: if curQuarter == 1: yearNow -= 1 curQuarter = 4 else:
#!/usr/bin/env python3.4 import sys import time import tushare as ts from sqlalchemy import create_engine import mysql.connector conn = create_engine('mysql+mysqlconnector://stockadmin:stock2016@localhost/stock?charset=utf8') for year in range(2004,2012): for season in range(1,5): print(year,season) df_profit = ts.get_profit_data(year,season) time.sleep(15) df_growth = ts.get_growth_data(year,season) time.sleep(15) df_operation = ts.get_operation_data(year,season) time.sleep(15) df_debtpaying = ts.get_debtpaying_data(year,season) time.sleep(15) df_cashflow = ts.get_cashflow_data(year,season) time.sleep(15) df_report = ts.get_report_data(year,season) df_profit.to_sql('profit',conn,if_exists='append') df_growth.to_sql('growth',conn,if_exists='append') df_operation.to_sql('operation',conn,if_exists='append') df_debtpaying.to_sql('debtpaying',conn,if_exists='append') df_cashflow.to_sql('cashflow',conn,if_exists='append') df_report.to_sql('report',conn,if_exists='append')
def getgrowth(year,quarter): dfgrowth = ts.get_growth_data(year,quarter) dfgrowth.insert(0,'uploadtime',nowtime) dfgrowth.insert(1,'year',year) dfgrowth.insert(2,'quarter',quarter) dfgrowth.to_sql('tb_growth',engine,if_exists='append')
def download_growth_data(file_path, year, quarter): growth_data = ts.get_growth_data(year, quarter) if growth_data is not None: growth_data.to_csv(file_path + 'growth_' + str(year) + '_' + str(quarter) + '.csv', encoding='utf-8')
import tushare as ts import marshal, pickle year = 2015 print(report_data) report_data = ts.get_report_data(year, 4) f = file('report_data', 'w') pickle.dump(report_data, f) f.close() profit_data = ts.get_profit_data(year, 4) f = file('profit_data', 'w') pickle.dump(profit_data, f) f.close() growth_data = ts.get_growth_data(year, 4) f = file('growth_data', 'w') pickle.dump(growth_data, f) f.close()
import tushare as ts import sys df = ts.get_growth_data(int(sys.argv[1]), int(sys.argv[2])) df.to_csv(sys.argv[3], encoding="utf8")
df = ts.get_operation_data(currentYear,currentSeason) df = df.assign(quater=currentQuater) df.to_sql('operation_data',engine, if_exists='append') # import growth data #code,代码 #name,名称 #mbrg,主营业务收入增长率(%) #nprg,净利润增长率(%) #nav,净资产增长率 #targ,总资产增长率 #epsg,每股收益增长率 #seg,股东权益增长率 df = ts.get_growth_data(lastYear,lastSeason) df = df.assign(quater=lastQuater) df.to_sql('growth_data',engine, if_exists='replace') df = ts.get_growth_data(currentYear,currentSeason) df = df.assign(quater=currentQuater) df.to_sql('growth_data',engine, if_exists='append') # import debt pay data #code,代码 #name,名称 #currentratio,流动比率 #quickratio,速动比率 #cashratio,现金比率 #icratio,利息支付倍数
# TODO Data is available quarterly # TODO Compare data for FinancialReport and ProfitData FinancialData = ts.get_report_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) FinancialData = FinancialData.set_index('code') FinancialData = FinancialData.drop(['name', 'bvps', 'distrib', 'epcf', 'report_date'], axis = 1) FinancialData.to_csv('./ASHR/DATA/FinancialData_2015_1.csv', index = True) ProfitData = ts.get_profit_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) ProfitData = ProfitData.set_index('code') ProfitData = ProfitData.drop(['name', 'business_income', 'net_profits'], axis = 1) ProfitData.to_csv('./ASHR/DATA/ProfitData_2015_1.csv', index = True) GrowthData = ts.get_growth_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) GrowthData = GrowthData.set_index('code') GrowthData = GrowthData.drop(['name'], axis = 1) GrowthData.to_csv('./ASHR/DATA/GrowthData_2015_1.csv', index = True) DebtPayingData = ts.get_debtpaying_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) DebtPayingData = DebtPayingData.set_index('code') DebtPayingData = DebtPayingData.drop(['name', 'sheqratio', 'adratio'], axis = 1) DebtPayingData.to_csv('./ASHR/DATA/DebtPayingData_2014_12.csv', index = True) # Merging data for subtab in [FinancialData, ProfitData, GrowthData, DebtPayingData]: StockInfo = pd.merge(StockInfo, subtab, how = 'outer', on = 'code') # Saving data StockInfo = StockInfo.to_csv('./ASHR/DATA/StockInfo.csv', index = True)
# gross_profit_rate,毛利率(%) # net_profits,净利润(万元) # eps,每股收益 # business_income,营业收入(百万元) # bips,每股主营业务收入(元) raw_data_report = ts.get_report_data() # code,代码 # name,名称 # eps,每股收益 # eps_yoy,每股收益同比(%) # bvps,每股净资产 # roe,净资产收益率(%) # epcf,每股现金流量(元) # net_profits,净利润(万元) # profits_yoy,净利润同比(%) # distrib,分配方案 # report_date,发布日期 raw_data_growth = ts.get_growth_data() # code,代码 # name,名称 # mbrg,主营业务收入增长率(%) 6 # nprg,净利润增长率(%) # nav,净资产增长率 # targ,总资产增长率 # epsg,每股收益增长率 # seg,股东权益增长率
""" from sqlalchemy import create_engine import tushare as ts #import pymongo import pandas as pd df_base = ts.get_stock_basics() df_report_1503 = ts.get_report_data(2015,3) df_profit_1503 = ts.get_profit_data(2015,3) df_growth_1503 = ts.get_growth_data(2015,3) #detail_daily={} engine = create_engine('mysql://*****:*****@127.0.0.1/stock?charset=utf8') for row_index, row in df_base.iterrows(): try: f = open('qfq_err', 'a') f_d = open('detailDay_err','a') stocknum = row_index timeToMarket = df_base.ix[stocknum]['timeToMarket'] startTime = str(timeToMarket)