def update_single_basic_ele(ele_name, retry_times): for year in range(2001, int(year_now) + 1): for quarter in range(1, 5): table_name = ele_name + str(year) + '0' + str(quarter) if not is_table_exist(conn=conn_profit, database_name=stk_profit_data_db_name, table_name=table_name): """失败达到一定次数便不再尝试""" failure_time = 0 go_on_flag = True while go_on_flag & (failure_time < retry_times): try: if ele_name == "profit": ts.get_profit_data(year=year,quarter=quarter)\ .to_sql(con=engine_profit, name=table_name, if_exists='append', schema=stk_profit_data_db_name, index=False) elif ele_name == "growth": ts.get_growth_data(year=year, quarter=quarter) \ .to_sql(con=engine_growth, name=table_name, if_exists='append', schema=stk_growth_data_db_name, index=False,) elif ele_name == "operation": ts.get_operation_data(year=year, quarter=quarter) \ .to_sql(con=engine_operation, name=table_name, if_exists='append', schema=stk_operation_data_db_name, index=False) elif ele_name == "debtpaying": ts.get_debtpaying_data(year=year, quarter=quarter) \ .to_sql(con=engine_debtpaying, name=table_name, if_exists='append', schema=stk_debtpaying_data_db_name, index=False) elif ele_name == "cashflow": ts.get_cashflow_data(year=year, quarter=quarter) \ .to_sql(con=engine_cashflow, name=table_name, if_exists='append', schema=stk_cashflow_data_db_name, index=False) go_on_flag = False except: print(table_name + "下载失败!重试!\n") failure_time = failure_time + 1 else: print(table_name + "已经存在!\n")
def get_debt_data(self, year, loops): for i in range(1,loops): print(i) for j in year: print(year) try: ts.get_debtpaying_data(j, 4).to_csv('debt_data_%d.csv'%j, encoding='utf-8') print(j) year.remove(j) except: pass
def basic_information(): ts.get_cashflow_data(2017, 1).to_sql('cash_flow', engine, if_exists='append') ts.get_debtpaying_data(2017, 1).to_sql('debtpaying', engine, if_exists='append') ts.get_growth_data(2017, 1).to_sql('growth', engine, if_exists='append') ts.get_operation_data(2017, 1).to_sql('operation', engine, if_exists='append') ts.get_profit_data(2017, 1).to_sql('profit', engine, if_exists='append') ts.get_report_data(2017, 2).to_sql('report', engine, if_exists='append') print('basic information over ....')
def debtpaying_data(self, year, quarter): #偿债能力 df = ts.get_debtpaying_data(year, quarter) for indexs in df.index: code = str(df.loc[indexs, ["code"]].values[0]) name = str(df.loc[indexs, ["name"]].values[0]) currentratio = str(df.loc[indexs, ["currentratio"]].values[0]) currentratio = '0' if currentratio in ('nan', '--') else currentratio quickratio = str(df.loc[indexs, ["quickratio"]].values[0]) quickratio = '0' if quickratio in ('nan', '--') else quickratio cashratio = str(df.loc[indexs, ["cashratio"]].values[0]) cashratio = '0' if cashratio in ('nan', '--') else cashratio icratio = str(df.loc[indexs, ["icratio"]].values[0]) icratio = '0' if icratio in ('nan', '--') else icratio sheqratio = str(df.loc[indexs, ["sheqratio"]].values[0]) sheqratio = '0' if sheqratio in ('nan', '--') else sheqratio adratio = str(df.loc[indexs, ["adratio"]].values[0]) adratio = '0' if adratio in ('nan', '--') else adratio insert = ( "insert into debtpaying_data(code,name,currentratio,quickratio,cashratio,icratio,sheqratio,adratio,year,quarter)\ values('" + code + "','" + name + "'," + currentratio + "," + quickratio + "," + cashratio + "," + icratio + "," + sheqratio + "," + adratio + ",'" + str(year) + "','" + str(quarter) + "')") self.dbA.updateInsertDelete(insert) self.dbA.commit()
def export(exportType, datePicker): sb = None [year, quarter] = getYearQuarter(datePicker) if exportType == "report": name = "业绩报表" sb = ts.get_report_data(year, quarter) elif exportType == "profit": name = "盈利能力报表" sb = ts.get_profit_data(year, quarter) elif exportType == "operation": name = "营运能力报表" sb = ts.get_operation_data(year, quarter) elif exportType == "growth": name = "成长能力报表" sb = ts.get_growth_data(year, quarter) elif exportType == "debtpaying": name = "偿债能力报表" sb = ts.get_debtpaying_data(year, quarter) elif exportType == "cashflow": name = "现金流量报表" sb = ts.get_cashflow_data(year, quarter) filename = quote(name + str(year) + "Q" + str(quarter) + ".xlsx") filepath = os.path.join(basedir, app.config['UPLOAD_FOLDER'], filename) sb.to_excel(filepath) rtn = send_file(filepath, as_attachment=True) rtn.headers['Content-Disposition'] += "; filename*=utf-8''%s" % (filename) return rtn
def update_basics(): basics = ts.get_stock_basics() f = os.path.join(DATA_DIR, 'basics.h5') basics.to_hdf(f, 'basics') length = 4 * 5 year, season = last_report_season() for i in range(length): f = os.path.join(DATA_DIR, 'basics-{0}-{1}.h5'.format(year, season)) if os.path.exists(f): continue report = ts.get_report_data(year, season) report.to_hdf(f, 'report') profit = ts.get_profit_data(year, season) profit.to_hdf(f, 'profit') operation = ts.get_operation_data(year, season) operation.to_hdf(f, 'operation') growth = ts.get_growth_data(year, season) growth.to_hdf(f, 'growth') debtpaying = ts.get_debtpaying_data(year, season) debtpaying.to_hdf(f, 'debtpaying') cashflow = ts.get_cashflow_data(year, season) cashflow.to_hdf(f, 'cashflow') season -= 1 if season == 0: season = 4 year -= 1
def get_basic_datas( data_kind): #datakind 为debtpaying,growth,operation,profit,report client = pymongo.MongoClient('localhost', 27017) table_stock = client['stock'] sheet = table_stock[data_kind] for year in range(STARTYEAR, ENDYEAR + 1): try: for season in range(1, 5): print('getting ' + datakind + ' data at year:' + str(year) + " season:" + str(season)) if data_kind == 'debtpaying': tf = ts.get_debtpaying_data(year, season) elif data_kind == 'growth': tf = ts.get_growth_data(year, season) elif data_kind == 'operation': tf = ts.get_operation_data(year, season) elif data_kind == 'profit': tf = ts.get_profit_data(year, season) elif data_kind == 'report': tf = ts.get_report_data(year, season) else: print('Not available data type of data_kind!') return jsonres = json.loads(tf.to_json(orient='records')) for j in jsonres: sheet.insert_one(j) except: #数据缺失,tushare接口会报网络错误 print('the year: ' + str(year) + ' lost data will begin next year') continue
def valuation_factor(year): report = ts.get_report_data(year,4) report = report.sort_values(by = 'code',axis = 0,ascending = True) report = report.reset_index(drop = True) report.to_csv("/home/yirui/Desktop/Quant/Report/%s.csv"%year, mode="w") profit = ts.get_profit_data(year, 4) profit = profit.sort_values(by='code', axis=0, ascending=True) profit = profit.reset_index(drop=True) profit.to_csv("/home/yirui/Desktop/Quant/Profit/%s.csv"%year,mode= "w") operation = ts.get_operation_data(year,4) operation = operation.sort_values(by='code', axis=0, ascending=True) operation = operation.reset_index(drop=True) operation.to_csv("/home/yirui/Desktop/Quant/Operation/%s.csv" % year, mode="w") growth = ts.get_growth_data(year,4) growth = growth.sort_values(by='code', axis=0, ascending=True) growth = growth.reset_index(drop=True) growth.to_csv("/home/yirui/Desktop/Quant/Growth/%s.csv" % year, mode="w") debtpaying = ts.get_debtpaying_data(year,4) debtpaying = debtpaying.sort_values(by='code', axis=0, ascending=True) debtpaying = debtpaying.reset_index(drop=True) debtpaying.to_csv("/home/yirui/Desktop/Quant/Debtpaying/%s.csv" % year, mode="w") cashflow = ts.get_cashflow_data(year,4) cashflow = cashflow.sort_values(by='code', axis=0, ascending=True) cashflow = cashflow.reset_index(drop=True) cashflow.to_csv("/home/yirui/Desktop/Quant/Cashflow/%s.csv" % year, mode="w")
def _fetch_finance(): for year in range(2004, 2018): set_year = lambda x: str(year) + '-' + x for quarter in range(1, 5): print(year, ' year ', 'quarter ', quarter) rep = ts.get_report_data( year, quarter)[['code', 'eps', 'bvps', 'epcf', 'report_date']] pro = ts.get_profit_data(year, quarter)[[ 'code', 'roe', 'net_profit_ratio', 'gross_profit_rate', 'net_profits', 'business_income', 'bips' ]] ope = ts.get_operation_data(year, quarter)[[ 'code', 'arturnover', 'arturndays', 'inventory_turnover', 'currentasset_turnover', 'currentasset_days' ]] gro = ts.get_growth_data( year, quarter)[['code', 'mbrg', 'nprg', 'nav', 'epsg', 'seg']] deb = ts.get_debtpaying_data(year, quarter)[[ 'code', 'currentratio', 'quickratio', 'cashratio', 'icratio', 'sheqratio', 'adratio' ]] cas = ts.get_cashflow_data(year, quarter)[[ 'code', 'cf_sales', 'rateofreturn', 'cf_nm', 'cf_liabilities', 'cashflowratio' ]] rep.rename(columns={'report_date': 'date'}, inplace=True) rep['date'] = rep['date'].apply(set_year) rep = rep.merge(pro, on='code', how='left') rep = rep.merge(ope, on='code', how='left') rep = rep.merge(gro, on='code', how='left') rep = rep.merge(deb, on='code', how='left') rep = rep.merge(cas, on='code', how='left') finance.insert(rep.to_dict('record')) print(year, quarter)
def get_temp_data(year, quarter): df1 = ts.get_report_data(year, quarter) #print (1) df1 = df1.merge(ts.get_profit_data(year, quarter), how='inner', on=['code', 'name']) #print (2) df1 = df1.merge(ts.get_operation_data(year, quarter), how='inner', on=['code', 'name']) #print (3, "n", df1) df1 = df1.merge(ts.get_growth_data(year, quarter), how='inner', on=['code', 'name']) #print (4) print(df1) df1 = df1.merge(ts.get_debtpaying_data(year, quarter), how='inner', on=['code', 'name']) #print (5) print(df1) df1 = df1.merge(ts.get_cashflow_data(year, quarter), how='inner', on=['code', 'name']) #print (6) (row, col) = df1.shape for i in range(0, row): df1.iloc[i, 0] = str(df1.iloc[i, 0]) return df1
def get_report_data(year, season): if not available(year, season): return None print("get_report_data") save(ts.get_report_data(year, season), "basics/report_data", year, season) print("get_profit_data") save(ts.get_profit_data(year, season), "basics/profit_data", year, season) filename = "operation_data" print("get_operation_data") save(ts.get_operation_data(year, season), "basics/operation_data", year, season) filename = "growth_data" print("get_growth_data") save(ts.get_growth_data(year, season), "basics/growth_data", year, season) filename = "get_debtpaying_data" print("get_debtpaying_data") save(ts.get_debtpaying_data(year, season), "basics/debtpaying_data", year, season) filename = "get_debtpaying_data" print("get_cashflow_data") save(ts.get_cashflow_data(year, season), "basics/cashflow_data", year, season)
def get_debtpaying_data(self, year, quarter): tsdata = ts.get_debtpaying_data( year=year, quarter=quarter, ) jsdata = To_Json(tsdata) return jsdata
def store_fund_data(quarter_list): stock2year_path = os.path.join(LastFilePath, "stock_fundm_info") for fun_year, fun_quarter in quarter_list: #every dataframe you craw down all needs remove the duplicated row. Only need keep the first row of duplicates. # stock2year_report is tushare:get_report_data (fundamental data). stock2year_report = ts.get_report_data( fun_year, fun_quarter).drop_duplicates(keep='first') # stock2year_prof is tushare.get_profit_data (fundamental data). stock2year_prof = ts.get_profit_data( fun_year, fun_quarter).drop_duplicates(keep='first') # stock2year_opera is tushare.get_operation_data (fundamental data). stock2year_opera = ts.get_operation_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_grow is tushare.get_growth_data (fundamental data). stock2year_grow = ts.get_growth_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_debt is tushare.get_debtpaying_data (fundamental data). stock2year_debt = ts.get_debtpaying_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_cash is tushare.get_cashflow_data (fundamental data). stock2year_cash = ts.get_cashflow_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_comb is to combine all the stock2year data of same year and quarter in a same stock code. stock2year_list = [stock2year_report,stock2year_prof,stock2year_opera,stock2year_grow, \ stock2year_debt,stock2year_cash] for every_fund_element in stock2year_list: every_fund_element = every_fund_element.set_index('code') #use pandas concat to combine all the dataframe along columns. total_fund = pd.concat(stock2year_list, axis=1) HeadName = fun_year + "/" + fun_quarter + "_" + "fundamt_info" CsvName = os.path.join(stock2year_path, "{}.csv".format(HeadName)) total_fund.to_csv(CsvName)
def stat_all(tmp_datetime): # 返回 31 天前的数据,做上个季度数据统计。 tmp_datetime_1month = tmp_datetime + datetime.timedelta(days=-31) year = int((tmp_datetime_1month).strftime("%Y")) quarter = int(pd.Timestamp(tmp_datetime_1month).quarter) # 获得上个季度的数据。 print("############ year %d, quarter %d", year, quarter) # 业绩报告(主表) data = ts.get_report_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") global db # 插入数据库。 db.insert_db(data, "ts_report_data", True, "`quarter`,`code`") # 盈利能力 data = ts.get_profit_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_profit_data", True, "`quarter`,`code`") # 营运能力 data = ts.get_operation_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_operation_data", True, "`quarter`,`code`") # 成长能力 data = ts.get_growth_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_growth_data", True, "`quarter`,`code`") # 偿债能力 data = ts.get_debtpaying_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_debtpaying_data", True, "`quarter`,`code`") # 现金流量 data = ts.get_cashflow_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_cashflow_data", True, "`quarter`,`code`")
def get_stock_debtpaying(year, season, engine): frame = ts.get_debtpaying_data(year, season) table_name = 'stock_debtpaying_' + str(year) + 's' + str(season) if useDB == True: frame.to_sql(table_name, engine) else: frame.to_csv(table_name + '.csv')
def fetchByYearQuarter(self, mongo, type): years = range(self.begin_year, self.end_year_notinclude) quarters = range(1, 5) for year in years: for quarter in quarters: print(str(type) + '_' + str(year) + '_' + str(quarter)) if (type == 'report_data'): df = fd.get_report_data(year, quarter) elif (type == 'profit_data'): df = ts.get_profit_data(year, quarter) elif (type == 'operation_data'): df = ts.get_operation_data(year, quarter) elif (type == 'growth_data'): df = ts.get_growth_data(year, quarter) elif (type == 'debtpaying_data'): df = ts.get_debtpaying_data(year, quarter) elif (type == 'cashflow_data'): df = ts.get_cashflow_data(year, quarter) else: df = {} tmpJson = json.loads(df.to_json(orient='records')) for i in range(len(tmpJson)): tmpJson[i][u'year'] = int(year) tmpJson[i][u'quarter'] = int(quarter) coll = mongo.fundemental[type] coll2 = mongo.fundemental[str(type) + '_' + str(year) + '_' + str(quarter)] coll2.insert(tmpJson) coll.insert(tmpJson)
def get_debtpaying_data(year, quarter): try: df = ts.get_debtpaying_data(year, quarter) engine = create_engine('mysql://*****:*****@127.0.0.1/stock?charset=utf8') df.to_sql('debtpaying_data', engine, if_exists='append') print "message" except Exception, e: e.message
def get_stock_debtpaying(nd, jd): """ 获取偿债能力 """ try: res = ts.get_debtpaying_data(nd, jd) return res except: return None
def get_debtpaying_data_dict(year, season): ddf = ts.get_debtpaying_data(year, season) ret_dict = {} if ddf is None: return ret_dict records = ddf.to_dict("records") for data in records: ret_dict[data['code']] = data return ret_dict
def single_stock_report(code, year_start, k_index): """ :param code: the valid stock code, for example '002146' :param year_start: the start date that we want to check the stock report, for example '201801' :param k_index: the performance of report we want to check :return: DataFrame table: the index is the quarter from start to end, the """ if code is None: raise ValueError('please assign code') if year_start is None: raise ValueError('please assign year') if k_index is None: raise ValueError('please assign index') year_to_market = stock_list('timeToMarket') ytm = year_to_market[year_to_market.index == code] ytm = str(ytm.iloc[0]) if ytm >= year_start: qs = getBetweenQuarter(ytm) else: qs = getBetweenQuarter(year_start) j = len(qs) - 1 results = pd.DataFrame() new_index = [] for i in range(j): year = int(qs[i].split('Q')[0]) q = int(qs[i].split('Q')[1]) n = 1 data = [] while n < 10: if k_index == 'get_profit_data': data = ts.get_profit_data(int(year), q) elif k_index == 'get_report_data': data = ts.get_report_data(int(year), q) elif k_index == 'get_operation_data': data = ts.get_operation_data(int(year), q) elif k_index == 'get_growth_data': data = ts.get_growth_data(int(year), q) elif k_index == 'get_debtpaying_data': data = ts.get_debtpaying_data(int(year), q) elif k_index == 'get_cashflow_data': data = ts.get_cashflow_data(int(year), q) else: raise Exception('the k_indexs is not correct') result = data[data['code'] == code] if len(result) >= 1: new_index.append('%d0%d' % (year, q)) results = results.append(result[0:1], ignore_index=True) print(results) break elif len(result) == 0: n += 1 continue new_index_1 = pd.DataFrame({"Y_Q": new_index}) frames = [results, new_index_1] return pd.concat(frames, axis=1)
def Deep_Data_Median(self): # 1/行业利润概览 l1 = list() for x in self.fd_data_index: z = ts.get_profit_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l1.append(k) all_idt_PF_data = pd.DataFrame(l1, index=self.fd_data_index) all_idt_PF_data.dropna(how="all", inplace=True) # 2/行业现金流量概览 l2 = list() for x in self.fd_data_index: z = ts.get_cashflow_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l2.append(k) all_idt_CS_data = pd.DataFrame(l2, index=self.fd_data_index) all_idt_CS_data.dropna(how="all", inplace=True) # 3/行业偿债能力概览 l3 = list() for x in self.fd_data_index: z = ts.get_debtpaying_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l3.append(k) all_idt_DP_data = pd.DataFrame(l3, index=self.fd_data_index) all_idt_DP_data.dropna(how="all", inplace=True) # 4/行业营运能力概览 l4 = list() for x in self.fd_data_index: z = ts.get_operation_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l4.append(k) all_idt_OP_data = pd.DataFrame(l4, index=self.fd_data_index) all_idt_OP_data.dropna(how="all", inplace=True) # 5/行业成长能力概览 l5 = list() for x in self.fd_data_index: z = ts.get_growth_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l5.append(k) all_idt_GR_data = pd.DataFrame(l5, index=self.fd_data_index) all_idt_GR_data.dropna(how="all", inplace=True) # 保存数据 data_writer = pd.ExcelWriter("股票{}的基本面数据全览(中位数视角).xlsx".format(self.code)) self.All_Ratio_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="基本面财务比率") self.All_Basic_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="基本面财务数据") self.All_Growth_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="基本面数据增长率") self.All_Valuation_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="三项估值指标") self.Dupon_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="杜邦分析表") all_idt_PF_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业利润历史") all_idt_CS_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业现金历史") all_idt_OP_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业营运历史") all_idt_GR_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业成长历史") all_idt_DP_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业偿债历史") data_writer.save()
def get_fundamental_data(year,quarter): fundamental = {} fundamental['basic'] =ts.get_stock_basics() fundamental['report']=ts.get_report_data(year, quarter) fundamental['profit']=ts.get_profit_data(year, quarter) fundamental['operation']=ts.get_operation_data(year, quarter) fundamental['cashflow']=ts.get_cashflow_data(year, quarter) fundamental['growth']=ts.get_growth_data(year, quarter) fundamental['debt']=ts.get_debtpaying_data(year, quarter) return fundamental
def debt(): """偿债能力 code,代码 name,名称 currentratio,流动比率 quickratio,速动比率 cashratio,现金比率 icratio,利息支付倍数 sheqratio,股东权益比率 adratio,股东权益增长率 """ # 获取2014年第3季度的偿债能力数据 df = ts.get_debtpaying_data(2014, 3) print(df)
def getDebtpaying(self, year, season): ''' 获取股票运营能力数据 ''' print 'getDebtpaying work' df = ts.get_debtpaying_data(year, season) df.to_csv(self.__filename) self.__getDataCommSeason(9, year, season, "Debtpaying")
def year_report(year): df0 = ts.get_report_data(year, 4) df0.to_sql(str(year) + '_main', engine) df1 = ts.get_profit_data(year, 4) df1.to_sql(str(year) + '_profit', engine) df2 = ts.get_growth_data(year, 4) df2.to_sql(str(year) + '_growth', engine) df3 = ts.get_debtpaying_data(year, 4) df3.to_sql(str(year) + '_debtpaying', engine) df4 = ts.get_cashflow_data(year, 4) df4.to_sql(str(year) + '_cashflow', engine)
def get_debtpaying_data(start=None, end=None): startArr = start.split("-") endArr = end.split("-") seasonList = dataUtil.getListSeason(int(startArr[0]), int(startArr[1]), int(endArr[0]), int(endArr[1])) for season in seasonList : df = ts.get_debtpaying_data(season[0],season[1]).drop_duplicates('code') dt = pd.DataFrame({"date": np.array([str(season[0])+"-"+str(season[1])]*len(df))},index=df.index) df = pd.concat([df,dt],axis=1) dataPath = TushareApi.path + "debtpaying_data/" +str(season[0])+"-"+str(season[1])+"-debtpaying_data.csv" fileUtil.saveDf(df,dataPath)
def get_debtpaying_data(cls, year, quarter): # 偿债能力数据 data = ts.get_debtpaying_data(year, quarter) data['year'] = year data['quarter'] = quarter data.reset_index(drop=True, inplace=True) ModelData.remove_data(table_name='debtpaying_data', year=year, quarter=quarter) print('remove success') ModelData.insert_data(table_name='debtpaying_data', data=data) print('write success')
def debtpaying_data(year): for i in range(4): quarter = i + 1 predts = ts.get_debtpaying_data(year, quarter) if predts is not None: predts['year'] = year predts['quarter'] = quarter predts.to_sql('fundamental_debtpaying_data', engine, flavor='mysql', if_exists='append')
def get_debtpaying_data(year, quarter): try: logger.info('get %s year %s quarter debtpaying data starting...' % (year, quarter)) df = ts.get_debtpaying_data(year, quarter) logger.info('get %s year %s quarter debtpaying data end...' % (year, quarter)) except: logger.exception( 'some errors between get %s year %s quarter debtpaying data end...' % (year, quarter)) return df
def getDebtReport(year): for q in [1, 2, 3, 4]: getDownloaded() if not isDownloaded(debtByYear(year, q)): try: print('Debt of ' + str(year) + ' and quarter is ' + str(q)) debt_df = ts.get_debtpaying_data(year, q) debt_df.to_csv(path + debtByYear(year, q)) time.sleep(5) except Exception: logError('Debt: ' + str(year) + '-' + str(q) + ' there is problem, will skip it. ')
def sync_debtpaying_data(): ''' sync debtpaying data ''' year = datetime.datetime.now().year month = datetime.datetime.now().month seaon = month/3 if month<3: year = year - 1 seaon = 4 monthstr = '%s%s'%(year,seaon) DataFrameToMongo(ts.get_debtpaying_data(year, seaon), MongoClient(mongourl)['stoinfo']['debtpaying_data'], ['code'], monthstr)
def fImportDebtpayingData(year="", quarter=""): df = ts.get_debtpaying_data(year=year, quarter=quarter) df['YEAR'] = year df['QUARTER'] = quarter df = df.replace("--", np.nan) #print(df) df["currentratio"] = df["currentratio"].astype('float64') df["quickratio"] = df["quickratio"].astype('float64') df["cashratio"] = df["cashratio"].astype('float64') df["icratio"] = df["icratio"].astype('float64') df["sheqratio"] = df["sheqratio"].astype('float64') df["adratio"] = df["adratio"].astype('float64') engine = create_engine('oracle://c##tushare:didierg160@myoracle') df.to_sql('tb_stock_debtpaying_data', engine, if_exists='append')
def report_type_chosen(report_type, year, season): """ report_type: 报表种类 year: 报表年份 season: 报表季度 """ report_type_dict = { '主表': ts.get_report_data(year, season), '收益表': ts.get_profit_data(year, season), '运营表': ts.get_operation_data(year, season), '现金流表': ts.get_cashflow_data(year, season), '偿债能力表': ts.get_debtpaying_data(year, season), } report_chosen = report_type_dict.get(report_type) return report_chosen
def debt(engine, year, quarter): tbl = "basic_debt" tsl.log(tbl + " start...") try: df = ts.get_debtpaying_data(year, quarter) df = df.set_index('code', drop='true') df['year'] = year df['quarter'] = quarter df = df.fillna(0) df.to_sql(tbl, engine, if_exists='append') print tsl.log(tbl + " done") except BaseException, e: print print e tsl.log(tbl + " error")
def update_basics(): basics = ts.get_stock_basics() f = os.path.join(base_dir, 'basics.h5') basics.to_hdf(f, 'basics') today = datetime.date.today() current_year = today.year current_season = today.month / 3 if current_season == 0: current_year -= 1 current_season = 4 length = 4 * 5 year = current_year season = current_season for i in range(length): f = os.path.join(base_dir, 'basics-{0}-{1}.h5'.format(year, season)) if os.path.exists(f): continue print(f) report = ts.get_report_data(year, season) report.to_hdf(f, 'report') profit = ts.get_profit_data(year, season) profit.to_hdf(f, 'profit') operation = ts.get_operation_data(year, season) operation.to_hdf(f, 'operation') growth = ts.get_growth_data(year, season) growth.to_hdf(f, 'growth') debtpaying = ts.get_debtpaying_data(year, season) debtpaying.to_hdf(f, 'debtpaying') cashflow = ts.get_cashflow_data(year, season) cashflow.to_hdf(f, 'cashflow') season -= 1 if season == 0: season = 4 year -= 1
def updatedebtpay(): debtpaydatalist=ts.get_debtpaying_data(2014,1) debtpaydata=pd.DataFrame(debtpaydatalist) conn= ms.connect(host='localhost',port = 3306,user='******', passwd='123456',db ='investment',charset="utf8") cur = conn.cursor() values=[] for index,row in debtpaydata.iterrows(): if row['currentratio']=='--': currentratio=0 else: currentratio=row['currentratio'] if row['quickratio']=='--': quickratio=0 else: quickratio=row['quickratio'] if row['cashratio']=='--': cashratio=0 else: cashratio=row['cashratio'] if row['icratio']=='--': icratio=0 else: icratio=row['icratio'] if row['sheqratio']=='--': sheqratio=0 else: sheqratio=row['sheqratio'] if row['adratio']=='--': adratio=0 else: adratio=row['adratio'] values.append((row['code'],row['name'],currentratio,quickratio,cashratio,icratio,sheqratio,adratio)) cur.executemany('insert into debtpay20141 (code,name,currentratio,quickratio,cashratio,icratio,sheqratio,adratio) values(%s,%s,%s,%s,%s,%s,%s,%s)',values) conn.commit() cur.close() conn.close()
import tushare as ts import pandas as pd from tushare.stock import cons as ct import lxml.html from lxml import etree import re from pandas.compat import StringIO try: from urllib.request import urlopen, Request except ImportError: from urllib2 import urlopen, Request df = ts.get_debtpaying_data(year=2016,quarter=1,orderby='errors') print (df) df.to_excel('/Users/elliot/Downloads/debtpaying_data2016Q1.xlsx') """ year=2016 quarter=1 orderby='default' pageNo=1 urla=(ct.PROFIT_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, quarter, pageNo, ct.PAGE_NUM[1], orderby)) print (urla) """
def pick_data(self, max_num_threads = 20, pause = 0): """ pick all necessary data from local database and from internet for loaded stocks. This function will take a while. """ logging.info('getting basics from tushare') self._init_stock_objs() # self.data_manager.drop_stock() # self.stocks = {key: self.stocks[key] for key in ['600233', '600130']} logging.info('totally there are %d listed companies' % len(self.stocks)) logging.info('get indexes from tushare') self._get_indexes() # self._pick_hist_data_and_save(self.stocks, False, self.indexes['000001'].hist_start_date, max_num_threads) logging.info('getting last stock trading data') df = ts.get_today_all() self._extract_from_dataframe(df, ignore=('changepercent', 'open', 'high', 'low', 'settlement', 'volume', 'turnoverratio', 'amount'), remap={'trade': 'price', 'per': 'pe'}) # calculate the report quarter report_year, report_quarter = ts.get_last_report_period() logging.info('getting last report (%d quarter %d) from tushare' % (report_year, report_quarter)) df = ts.get_report_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last profit data from tushare') df = ts.get_profit_data(report_year, report_quarter) self._extract_from_dataframe(df, ignore=('net_profits', 'roe', 'eps')) logging.info('getting last operation data from tushare') df = ts.get_operation_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last growth data from tushare') df = ts.get_growth_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last debtpaying data from tushare') df = ts.get_debtpaying_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last cashflow data from tushare') df = ts.get_cashflow_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting history trading data from tushare') start_from = self.indexes['000001'].hist_start_date data_full = self._pick_hist_data_and_save(self.stocks, False, start_from, max_num_threads, pause) # anything that pulling data must before here self._remove_unavailable_stocks() ''' # calculate qianfuquan data # deprecated due to precision issue for code, stock in self.stocks.items(): for i in range(1, len(stock.hist_data.index)-1): b = stock.hist_data.at[stock.hist_data.index[i], 'close'] a = stock.hist_data.at[stock.hist_data.index[i+1], 'close'] p = stock.hist_data.at[stock.hist_data.index[i+1], 'p_change'] / 100.0 q = (p*a+a)/b if q > 1.1: print('%s chuq-uan %s: %s %s %s, 1/%s' % (stock, stock.hist_data.index[i], b, a, p, q)) ''' return data_full
#coding=utf-8 import tushare as ts # 获取沪深上市公司基本情况 df = ts.get_stock_basics() date = df.ix['600848']['timeToMarket']#上市日期YYYYMMDD #获取2014年第3季度的业绩报表数据 ts.get_report_data(2014,3) #获取2014年第3季度的盈利能力数据 ts.get_profit_data(2014,3) #获取2014年第3季度的营运能力数据 ts.get_operation_data(2014,3) #获取2014年第3季度的成长能力数据 ts.get_growth_data(2014,3) #获取2014年第3季度的偿债能力数据 ts.get_debtpaying_data(2014,3) #获取2014年第3季度的现金流量数据 ts.get_cashflow_data(2014,3)
def download_debtpaying_data(file_path, year, quarter): debtpaying_data = ts.get_debtpaying_data(year, quarter) if debtpaying_data is not None: debtpaying_data.to_csv(file_path + 'debtpaying_' + str(year) + '_' + str(quarter) + '.csv', encoding='utf-8')
FinancialData = ts.get_report_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) FinancialData = FinancialData.set_index('code') FinancialData = FinancialData.drop(['name', 'bvps', 'distrib', 'epcf', 'report_date'], axis = 1) FinancialData.to_csv('./ASHR/DATA/FinancialData_2015_1.csv', index = True) ProfitData = ts.get_profit_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) ProfitData = ProfitData.set_index('code') ProfitData = ProfitData.drop(['name', 'business_income', 'net_profits'], axis = 1) ProfitData.to_csv('./ASHR/DATA/ProfitData_2015_1.csv', index = True) GrowthData = ts.get_growth_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) GrowthData = GrowthData.set_index('code') GrowthData = GrowthData.drop(['name'], axis = 1) GrowthData.to_csv('./ASHR/DATA/GrowthData_2015_1.csv', index = True) DebtPayingData = ts.get_debtpaying_data(CURRENT.year, np.floor((CURRENT.month+2)/3)-1) DebtPayingData = DebtPayingData.set_index('code') DebtPayingData = DebtPayingData.drop(['name', 'sheqratio', 'adratio'], axis = 1) DebtPayingData.to_csv('./ASHR/DATA/DebtPayingData_2014_12.csv', index = True) # Merging data for subtab in [FinancialData, ProfitData, GrowthData, DebtPayingData]: StockInfo = pd.merge(StockInfo, subtab, how = 'outer', on = 'code') # Saving data StockInfo = StockInfo.to_csv('./ASHR/DATA/StockInfo.csv', index = True) ######################## ## Trade Tick Data ##
df = ts.get_growth_data(currentYear,currentSeason) df = df.assign(quater=currentQuater) df.to_sql('growth_data',engine, if_exists='append') # import debt pay data #code,代码 #name,名称 #currentratio,流动比率 #quickratio,速动比率 #cashratio,现金比率 #icratio,利息支付倍数 #sheqratio,股东权益比率 #adratio,股东权益增长率 df = ts.get_debtpaying_data(lastYear,lastSeason) df = df.assign(quater=lastQuater) df.to_sql('debtpay_data',engine, if_exists='replace') df = ts.get_debtpaying_data(currentYear,currentSeason) df = df.assign(quater=currentQuater) df.to_sql('debtpay_data',engine, if_exists='append') # import cashflow data #code,代码 #name,名称 #cf_sales,经营现金净流量对销售收入比率 #rateofreturn,资产的经营现金流量回报率 #cf_nm,经营现金净流量与净利润的比率 #cf_liabilities,经营现金净流量对负债比率
def getdebtpaying(year,quarter): dfdebtpaying = ts.get_debtpaying_data(year,quarter) dfdebtpaying.insert(0,'uploadtime',nowtime) dfdebtpaying.insert(1,'year',year) dfdebtpaying.insert(2,'quarter',quarter) dfdebtpaying.to_sql('tb_debtpaying',engine,if_exists='append')
import tushare as ts from sqlalchemy import create_engine import mysql.connector conn = create_engine('mysql+mysqlconnector://stockadmin:stock2016@localhost/stock?charset=utf8') for year in range(2004,2012): for season in range(1,5): print(year,season) df_profit = ts.get_profit_data(year,season) time.sleep(15) df_growth = ts.get_growth_data(year,season) time.sleep(15) df_operation = ts.get_operation_data(year,season) time.sleep(15) df_debtpaying = ts.get_debtpaying_data(year,season) time.sleep(15) df_cashflow = ts.get_cashflow_data(year,season) time.sleep(15) df_report = ts.get_report_data(year,season) df_profit.to_sql('profit',conn,if_exists='append') df_growth.to_sql('growth',conn,if_exists='append') df_operation.to_sql('operation',conn,if_exists='append') df_debtpaying.to_sql('debtpaying',conn,if_exists='append') df_cashflow.to_sql('cashflow',conn,if_exists='append') df_report.to_sql('report',conn,if_exists='append') #sys.exit(1) #追加数据到现有表 #df.to_sql('tick_data',engine,if_exists='append')
import tushare as ts import sys df = ts.get_debtpaying_data(int(sys.argv[1]), int(sys.argv[2])) df.to_csv(sys.argv[3], encoding="utf8")