def update_single_basic_ele(ele_name, retry_times): for year in range(2001, int(year_now) + 1): for quarter in range(1, 5): table_name = ele_name + str(year) + '0' + str(quarter) if not is_table_exist(conn=conn_profit, database_name=stk_profit_data_db_name, table_name=table_name): """失败达到一定次数便不再尝试""" failure_time = 0 go_on_flag = True while go_on_flag & (failure_time < retry_times): try: if ele_name == "profit": ts.get_profit_data(year=year,quarter=quarter)\ .to_sql(con=engine_profit, name=table_name, if_exists='append', schema=stk_profit_data_db_name, index=False) elif ele_name == "growth": ts.get_growth_data(year=year, quarter=quarter) \ .to_sql(con=engine_growth, name=table_name, if_exists='append', schema=stk_growth_data_db_name, index=False,) elif ele_name == "operation": ts.get_operation_data(year=year, quarter=quarter) \ .to_sql(con=engine_operation, name=table_name, if_exists='append', schema=stk_operation_data_db_name, index=False) elif ele_name == "debtpaying": ts.get_debtpaying_data(year=year, quarter=quarter) \ .to_sql(con=engine_debtpaying, name=table_name, if_exists='append', schema=stk_debtpaying_data_db_name, index=False) elif ele_name == "cashflow": ts.get_cashflow_data(year=year, quarter=quarter) \ .to_sql(con=engine_cashflow, name=table_name, if_exists='append', schema=stk_cashflow_data_db_name, index=False) go_on_flag = False except: print(table_name + "下载失败!重试!\n") failure_time = failure_time + 1 else: print(table_name + "已经存在!\n")
def get_operation_data(self, year, loops): for i in range(1,loops): print(i) for j in year: print(year) try: ts.get_operation_data(j, 4).to_csv('operation_data_%d.csv'%j, encoding='utf-8') print(j) year.remove(j) except: pass
def basic_information(): ts.get_cashflow_data(2017, 1).to_sql('cash_flow', engine, if_exists='append') ts.get_debtpaying_data(2017, 1).to_sql('debtpaying', engine, if_exists='append') ts.get_growth_data(2017, 1).to_sql('growth', engine, if_exists='append') ts.get_operation_data(2017, 1).to_sql('operation', engine, if_exists='append') ts.get_profit_data(2017, 1).to_sql('profit', engine, if_exists='append') ts.get_report_data(2017, 2).to_sql('report', engine, if_exists='append') print('basic information over ....')
def operation_data(self, year, quarter): #营运能力 df = ts.get_operation_data(year, quarter) for indexs in df.index: code = str(df.loc[indexs, ["code"]].values[0]) name = str(df.loc[indexs, ["name"]].values[0]) arturnover = str(df.loc[indexs, ["arturnover"]].values[0]) arturnover = '0' if arturnover in ('nan', '--') else arturnover arturndays = str(df.loc[indexs, ["arturndays"]].values[0]) arturndays = '0' if arturndays in ('nan', '--') else arturndays inventory_turnover = str(df.loc[indexs, ["inventory_turnover"]].values[0]) inventory_turnover = '0' if inventory_turnover in ( 'nan', '--') else inventory_turnover inventory_days = str(df.loc[indexs, ["inventory_days"]].values[0]) inventory_days = '0' if inventory_days in ( 'nan', '--') else inventory_days currentasset_turnover = str( df.loc[indexs, ["currentasset_turnover"]].values[0]) currentasset_turnover = '0' if currentasset_turnover in ( 'nan', '--') else currentasset_turnover currentasset_days = str(df.loc[indexs, ["currentasset_days"]].values[0]) currentasset_days = '0' if currentasset_days in ( 'nan', '--') else currentasset_days insert = ( "insert into operation_data(code,name,arturnover,arturndays,inventory_turnover,inventory_days,currentasset_turnover,currentasset_days,year,quarter) values('" + code + "','" + name + "'," + arturnover + "," + arturndays + "," + inventory_turnover + "," + inventory_days + "," + currentasset_turnover + "," + currentasset_days + ",'" + str(year) + "','" + str(quarter) + "')") self.dbA.updateInsertDelete(insert) self.dbA.commit()
def export(exportType, datePicker): sb = None [year, quarter] = getYearQuarter(datePicker) if exportType == "report": name = "业绩报表" sb = ts.get_report_data(year, quarter) elif exportType == "profit": name = "盈利能力报表" sb = ts.get_profit_data(year, quarter) elif exportType == "operation": name = "营运能力报表" sb = ts.get_operation_data(year, quarter) elif exportType == "growth": name = "成长能力报表" sb = ts.get_growth_data(year, quarter) elif exportType == "debtpaying": name = "偿债能力报表" sb = ts.get_debtpaying_data(year, quarter) elif exportType == "cashflow": name = "现金流量报表" sb = ts.get_cashflow_data(year, quarter) filename = quote(name + str(year) + "Q" + str(quarter) + ".xlsx") filepath = os.path.join(basedir, app.config['UPLOAD_FOLDER'], filename) sb.to_excel(filepath) rtn = send_file(filepath, as_attachment=True) rtn.headers['Content-Disposition'] += "; filename*=utf-8''%s" % (filename) return rtn
def stat_all(tmp_datetime): # 返回 31 天前的数据,做上个季度数据统计。 tmp_datetime_1month = tmp_datetime + datetime.timedelta(days=-31) year = int((tmp_datetime_1month).strftime("%Y")) quarter = int(pd.Timestamp(tmp_datetime_1month).quarter) # 获得上个季度的数据。 print("############ year %d, quarter %d", year, quarter) # 业绩报告(主表) data = ts.get_report_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") global db # 插入数据库。 db.insert_db(data, "ts_report_data", True, "`quarter`,`code`") # 盈利能力 data = ts.get_profit_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_profit_data", True, "`quarter`,`code`") # 营运能力 data = ts.get_operation_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_operation_data", True, "`quarter`,`code`") # 成长能力 data = ts.get_growth_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_growth_data", True, "`quarter`,`code`") # 偿债能力 data = ts.get_debtpaying_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_debtpaying_data", True, "`quarter`,`code`") # 现金流量 data = ts.get_cashflow_data(year, quarter) # 增加季度字段。 data = concat_quarter(year, quarter, data) # 处理重复数据,保存最新一条数据。最后一步处理,否则concat有问题。 data = data.drop_duplicates(subset="code", keep="last") # 插入数据库。 db.insert_db(data, "ts_cashflow_data", True, "`quarter`,`code`")
def get_basic_datas( data_kind): #datakind 为debtpaying,growth,operation,profit,report client = pymongo.MongoClient('localhost', 27017) table_stock = client['stock'] sheet = table_stock[data_kind] for year in range(STARTYEAR, ENDYEAR + 1): try: for season in range(1, 5): print('getting ' + datakind + ' data at year:' + str(year) + " season:" + str(season)) if data_kind == 'debtpaying': tf = ts.get_debtpaying_data(year, season) elif data_kind == 'growth': tf = ts.get_growth_data(year, season) elif data_kind == 'operation': tf = ts.get_operation_data(year, season) elif data_kind == 'profit': tf = ts.get_profit_data(year, season) elif data_kind == 'report': tf = ts.get_report_data(year, season) else: print('Not available data type of data_kind!') return jsonres = json.loads(tf.to_json(orient='records')) for j in jsonres: sheet.insert_one(j) except: #数据缺失,tushare接口会报网络错误 print('the year: ' + str(year) + ' lost data will begin next year') continue
def update_basics(): basics = ts.get_stock_basics() f = os.path.join(DATA_DIR, 'basics.h5') basics.to_hdf(f, 'basics') length = 4 * 5 year, season = last_report_season() for i in range(length): f = os.path.join(DATA_DIR, 'basics-{0}-{1}.h5'.format(year, season)) if os.path.exists(f): continue report = ts.get_report_data(year, season) report.to_hdf(f, 'report') profit = ts.get_profit_data(year, season) profit.to_hdf(f, 'profit') operation = ts.get_operation_data(year, season) operation.to_hdf(f, 'operation') growth = ts.get_growth_data(year, season) growth.to_hdf(f, 'growth') debtpaying = ts.get_debtpaying_data(year, season) debtpaying.to_hdf(f, 'debtpaying') cashflow = ts.get_cashflow_data(year, season) cashflow.to_hdf(f, 'cashflow') season -= 1 if season == 0: season = 4 year -= 1
def valuation_factor(year): report = ts.get_report_data(year,4) report = report.sort_values(by = 'code',axis = 0,ascending = True) report = report.reset_index(drop = True) report.to_csv("/home/yirui/Desktop/Quant/Report/%s.csv"%year, mode="w") profit = ts.get_profit_data(year, 4) profit = profit.sort_values(by='code', axis=0, ascending=True) profit = profit.reset_index(drop=True) profit.to_csv("/home/yirui/Desktop/Quant/Profit/%s.csv"%year,mode= "w") operation = ts.get_operation_data(year,4) operation = operation.sort_values(by='code', axis=0, ascending=True) operation = operation.reset_index(drop=True) operation.to_csv("/home/yirui/Desktop/Quant/Operation/%s.csv" % year, mode="w") growth = ts.get_growth_data(year,4) growth = growth.sort_values(by='code', axis=0, ascending=True) growth = growth.reset_index(drop=True) growth.to_csv("/home/yirui/Desktop/Quant/Growth/%s.csv" % year, mode="w") debtpaying = ts.get_debtpaying_data(year,4) debtpaying = debtpaying.sort_values(by='code', axis=0, ascending=True) debtpaying = debtpaying.reset_index(drop=True) debtpaying.to_csv("/home/yirui/Desktop/Quant/Debtpaying/%s.csv" % year, mode="w") cashflow = ts.get_cashflow_data(year,4) cashflow = cashflow.sort_values(by='code', axis=0, ascending=True) cashflow = cashflow.reset_index(drop=True) cashflow.to_csv("/home/yirui/Desktop/Quant/Cashflow/%s.csv" % year, mode="w")
def get_report_data(year, season): if not available(year, season): return None print("get_report_data") save(ts.get_report_data(year, season), "basics/report_data", year, season) print("get_profit_data") save(ts.get_profit_data(year, season), "basics/profit_data", year, season) filename = "operation_data" print("get_operation_data") save(ts.get_operation_data(year, season), "basics/operation_data", year, season) filename = "growth_data" print("get_growth_data") save(ts.get_growth_data(year, season), "basics/growth_data", year, season) filename = "get_debtpaying_data" print("get_debtpaying_data") save(ts.get_debtpaying_data(year, season), "basics/debtpaying_data", year, season) filename = "get_debtpaying_data" print("get_cashflow_data") save(ts.get_cashflow_data(year, season), "basics/cashflow_data", year, season)
def fetchByYearQuarter(self, mongo, type): years = range(self.begin_year, self.end_year_notinclude) quarters = range(1, 5) for year in years: for quarter in quarters: print(str(type) + '_' + str(year) + '_' + str(quarter)) if (type == 'report_data'): df = fd.get_report_data(year, quarter) elif (type == 'profit_data'): df = ts.get_profit_data(year, quarter) elif (type == 'operation_data'): df = ts.get_operation_data(year, quarter) elif (type == 'growth_data'): df = ts.get_growth_data(year, quarter) elif (type == 'debtpaying_data'): df = ts.get_debtpaying_data(year, quarter) elif (type == 'cashflow_data'): df = ts.get_cashflow_data(year, quarter) else: df = {} tmpJson = json.loads(df.to_json(orient='records')) for i in range(len(tmpJson)): tmpJson[i][u'year'] = int(year) tmpJson[i][u'quarter'] = int(quarter) coll = mongo.fundemental[type] coll2 = mongo.fundemental[str(type) + '_' + str(year) + '_' + str(quarter)] coll2.insert(tmpJson) coll.insert(tmpJson)
def get_operation_data(self, year, quarter): tsdata = ts.get_operation_data( year=year, quarter=quarter, ) jsdata = To_Json(tsdata) return jsdata
def get_temp_data(year, quarter): df1 = ts.get_report_data(year, quarter) #print (1) df1 = df1.merge(ts.get_profit_data(year, quarter), how='inner', on=['code', 'name']) #print (2) df1 = df1.merge(ts.get_operation_data(year, quarter), how='inner', on=['code', 'name']) #print (3, "n", df1) df1 = df1.merge(ts.get_growth_data(year, quarter), how='inner', on=['code', 'name']) #print (4) print(df1) df1 = df1.merge(ts.get_debtpaying_data(year, quarter), how='inner', on=['code', 'name']) #print (5) print(df1) df1 = df1.merge(ts.get_cashflow_data(year, quarter), how='inner', on=['code', 'name']) #print (6) (row, col) = df1.shape for i in range(0, row): df1.iloc[i, 0] = str(df1.iloc[i, 0]) return df1
def store_fund_data(quarter_list): stock2year_path = os.path.join(LastFilePath, "stock_fundm_info") for fun_year, fun_quarter in quarter_list: #every dataframe you craw down all needs remove the duplicated row. Only need keep the first row of duplicates. # stock2year_report is tushare:get_report_data (fundamental data). stock2year_report = ts.get_report_data( fun_year, fun_quarter).drop_duplicates(keep='first') # stock2year_prof is tushare.get_profit_data (fundamental data). stock2year_prof = ts.get_profit_data( fun_year, fun_quarter).drop_duplicates(keep='first') # stock2year_opera is tushare.get_operation_data (fundamental data). stock2year_opera = ts.get_operation_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_grow is tushare.get_growth_data (fundamental data). stock2year_grow = ts.get_growth_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_debt is tushare.get_debtpaying_data (fundamental data). stock2year_debt = ts.get_debtpaying_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_cash is tushare.get_cashflow_data (fundamental data). stock2year_cash = ts.get_cashflow_data( fun_year, fun_quarter).drop_duplicates(keep='first') #stock2year_comb is to combine all the stock2year data of same year and quarter in a same stock code. stock2year_list = [stock2year_report,stock2year_prof,stock2year_opera,stock2year_grow, \ stock2year_debt,stock2year_cash] for every_fund_element in stock2year_list: every_fund_element = every_fund_element.set_index('code') #use pandas concat to combine all the dataframe along columns. total_fund = pd.concat(stock2year_list, axis=1) HeadName = fun_year + "/" + fun_quarter + "_" + "fundamt_info" CsvName = os.path.join(stock2year_path, "{}.csv".format(HeadName)) total_fund.to_csv(CsvName)
def _fetch_finance(): for year in range(2004, 2018): set_year = lambda x: str(year) + '-' + x for quarter in range(1, 5): print(year, ' year ', 'quarter ', quarter) rep = ts.get_report_data( year, quarter)[['code', 'eps', 'bvps', 'epcf', 'report_date']] pro = ts.get_profit_data(year, quarter)[[ 'code', 'roe', 'net_profit_ratio', 'gross_profit_rate', 'net_profits', 'business_income', 'bips' ]] ope = ts.get_operation_data(year, quarter)[[ 'code', 'arturnover', 'arturndays', 'inventory_turnover', 'currentasset_turnover', 'currentasset_days' ]] gro = ts.get_growth_data( year, quarter)[['code', 'mbrg', 'nprg', 'nav', 'epsg', 'seg']] deb = ts.get_debtpaying_data(year, quarter)[[ 'code', 'currentratio', 'quickratio', 'cashratio', 'icratio', 'sheqratio', 'adratio' ]] cas = ts.get_cashflow_data(year, quarter)[[ 'code', 'cf_sales', 'rateofreturn', 'cf_nm', 'cf_liabilities', 'cashflowratio' ]] rep.rename(columns={'report_date': 'date'}, inplace=True) rep['date'] = rep['date'].apply(set_year) rep = rep.merge(pro, on='code', how='left') rep = rep.merge(ope, on='code', how='left') rep = rep.merge(gro, on='code', how='left') rep = rep.merge(deb, on='code', how='left') rep = rep.merge(cas, on='code', how='left') finance.insert(rep.to_dict('record')) print(year, quarter)
def get_stock_operation(year, season, engine): frame = ts.get_operation_data(year, season) table_name = 'stock_operation_' + str(year) + 's' + str(season) if useDB == True: frame.to_sql(table_name, engine) else: frame.to_csv(table_name + '.csv')
def get_operation_data(year, quarter): try: df = ts.get_operation_data(year, quarter) engine = create_engine('mysql://*****:*****@127.0.0.1/stock?charset=utf8') df.to_sql('operation_data', engine, if_exists='append') print "message" except Exception, e: e.message
def get_operation_data_dict(year, season): odf = ts.get_operation_data(year, season) ret_dict = {} if odf is None: return ret_dict records = odf.to_dict("records") for data in records: ret_dict[data['code']] = data return ret_dict
def get_stock_operation(nd, jd): """ 获取营运能力 """ try: res = ts.get_operation_data(nd, jd) return res except: return None
def Deep_Data_Median(self): # 1/行业利润概览 l1 = list() for x in self.fd_data_index: z = ts.get_profit_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l1.append(k) all_idt_PF_data = pd.DataFrame(l1, index=self.fd_data_index) all_idt_PF_data.dropna(how="all", inplace=True) # 2/行业现金流量概览 l2 = list() for x in self.fd_data_index: z = ts.get_cashflow_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l2.append(k) all_idt_CS_data = pd.DataFrame(l2, index=self.fd_data_index) all_idt_CS_data.dropna(how="all", inplace=True) # 3/行业偿债能力概览 l3 = list() for x in self.fd_data_index: z = ts.get_debtpaying_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l3.append(k) all_idt_DP_data = pd.DataFrame(l3, index=self.fd_data_index) all_idt_DP_data.dropna(how="all", inplace=True) # 4/行业营运能力概览 l4 = list() for x in self.fd_data_index: z = ts.get_operation_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l4.append(k) all_idt_OP_data = pd.DataFrame(l4, index=self.fd_data_index) all_idt_OP_data.dropna(how="all", inplace=True) # 5/行业成长能力概览 l5 = list() for x in self.fd_data_index: z = ts.get_growth_data(x, 4) k = z[z["code"].isin(self.idstry_code_list)].iloc[:, 1:].median() l5.append(k) all_idt_GR_data = pd.DataFrame(l5, index=self.fd_data_index) all_idt_GR_data.dropna(how="all", inplace=True) # 保存数据 data_writer = pd.ExcelWriter("股票{}的基本面数据全览(中位数视角).xlsx".format(self.code)) self.All_Ratio_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="基本面财务比率") self.All_Basic_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="基本面财务数据") self.All_Growth_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="基本面数据增长率") self.All_Valuation_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="三项估值指标") self.Dupon_data.T.sort_index(ascending=True).to_excel(data_writer, sheet_name="杜邦分析表") all_idt_PF_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业利润历史") all_idt_CS_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业现金历史") all_idt_OP_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业营运历史") all_idt_GR_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业成长历史") all_idt_DP_data.sort_index(ascending=True).to_excel(data_writer, sheet_name="行业偿债历史") data_writer.save()
def single_stock_report(code, year_start, k_index): """ :param code: the valid stock code, for example '002146' :param year_start: the start date that we want to check the stock report, for example '201801' :param k_index: the performance of report we want to check :return: DataFrame table: the index is the quarter from start to end, the """ if code is None: raise ValueError('please assign code') if year_start is None: raise ValueError('please assign year') if k_index is None: raise ValueError('please assign index') year_to_market = stock_list('timeToMarket') ytm = year_to_market[year_to_market.index == code] ytm = str(ytm.iloc[0]) if ytm >= year_start: qs = getBetweenQuarter(ytm) else: qs = getBetweenQuarter(year_start) j = len(qs) - 1 results = pd.DataFrame() new_index = [] for i in range(j): year = int(qs[i].split('Q')[0]) q = int(qs[i].split('Q')[1]) n = 1 data = [] while n < 10: if k_index == 'get_profit_data': data = ts.get_profit_data(int(year), q) elif k_index == 'get_report_data': data = ts.get_report_data(int(year), q) elif k_index == 'get_operation_data': data = ts.get_operation_data(int(year), q) elif k_index == 'get_growth_data': data = ts.get_growth_data(int(year), q) elif k_index == 'get_debtpaying_data': data = ts.get_debtpaying_data(int(year), q) elif k_index == 'get_cashflow_data': data = ts.get_cashflow_data(int(year), q) else: raise Exception('the k_indexs is not correct') result = data[data['code'] == code] if len(result) >= 1: new_index.append('%d0%d' % (year, q)) results = results.append(result[0:1], ignore_index=True) print(results) break elif len(result) == 0: n += 1 continue new_index_1 = pd.DataFrame({"Y_Q": new_index}) frames = [results, new_index_1] return pd.concat(frames, axis=1)
def get_fundamental_data(year,quarter): fundamental = {} fundamental['basic'] =ts.get_stock_basics() fundamental['report']=ts.get_report_data(year, quarter) fundamental['profit']=ts.get_profit_data(year, quarter) fundamental['operation']=ts.get_operation_data(year, quarter) fundamental['cashflow']=ts.get_cashflow_data(year, quarter) fundamental['growth']=ts.get_growth_data(year, quarter) fundamental['debt']=ts.get_debtpaying_data(year, quarter) return fundamental
def getOperation(self, year, season): ''' 获取股票运营能力数据 ''' print 'getOperation work' df = ts.get_operation_data(year, season) df.to_csv(self.__filename) self.__getDataCommSeason(9, year, season, "Operation")
def operation_data(year): for i in range(4): quarter = i + 1 predts = ts.get_operation_data(year, quarter) if predts is not None: predts['year'] = year predts['quarter'] = quarter predts.to_sql('fundamental_operation_data', engine, flavor='mysql', if_exists='append')
def get_operation_data(start=None, end=None): startArr = start.split("-") endArr = end.split("-") seasonList = dataUtil.getListSeason(int(startArr[0]), int(startArr[1]), int(endArr[0]), int(endArr[1])) for season in seasonList : df = ts.get_operation_data(season[0],season[1]).drop_duplicates('code') dt = pd.DataFrame({"date": np.array([str(season[0])+"-"+str(season[1])]*len(df))},index=df.index) df = pd.concat([df,dt],axis=1) dataPath = TushareApi.path + "operation_data/" +str(season[0])+"-"+str(season[1])+"-operation_data.csv" fileUtil.saveDf(df,dataPath)
def get_operation_data(cls, year, quarter): # 营运能力数据 data = ts.get_operation_data(year, quarter) data['year'] = year data['quarter'] = quarter data.reset_index(drop=True, inplace=True) ModelData.remove_data(table_name='operation_data', year=year, quarter=quarter) print('remove success') ModelData.insert_data(table_name='operation_data', data=data) print('write success')
def sync_operation_data(): ''' sync operation data ''' year = datetime.datetime.now().year month = datetime.datetime.now().month seaon = month/3 if month<3: year = year - 1 seaon = 4 monthstr = '%s%s'%(year,seaon) DataFrameToMongo(ts.get_operation_data(year, seaon), MongoClient(mongourl)['stoinfo']['operation_data'], ['code'], monthstr)
def get_operation_data(year, quarter): try: logger.info('get %s year %s quarter operation data starting...' % (year, quarter)) df = ts.get_operation_data(year, quarter) logger.info('get %s year %s quarter operation data end...' % (year, quarter)) except: logger.exception( 'some errors between get %s year %s quarter operation data end...' % (year, quarter)) return df
def get_data_date(year,quarter): cmd='''delete from %s where `year`='%s' and quarter='%s' '''%(table_name,year,quarter) print cmd run_mysql_cmd(cmd=cmd,conn=conn) #先删除指定年和指定季度的数据 try: rs=ts.get_operation_data(year=int(year),quarter=int(quarter)) rs['year']=year rs['quarter']=quarter rs=rs.drop_duplicates() #去除重复的数据,没想到还有重复的,心塞塞,这个api不咋地啊,挖地兔 pd.DataFrame.to_sql(rs, table_name, con=conn , flavor='mysql', if_exists='append',index=False) return rs except: print("get data year=%s and quarter =%s wrong %s" % (year, quarter,table_name))
def updateoperation(): reportdatalist = ts.get_operation_data(2014, 1) reportdata = pd.DataFrame(reportdatalist) conn = ms.connect(host='localhost', port=3306, user='******', passwd='123456', db='investment', charset="utf8") cur = conn.cursor() values = [] for index, row in reportdata.iterrows(): if math.isnan(row['arturnover']): arturnover = 0 else: arturnover = row['arturnover'] if math.isnan(row['arturndays']): arturndays = 0 else: arturndays = row['arturndays'] if math.isnan(row['inventory_turnover']): inventory_turnover = 0 else: inventory_turnover = row['inventory_turnover'] if math.isnan(row['inventory_days']): inventory_days = 0 else: inventory_days = row['inventory_days'] if math.isnan(row['currentasset_turnover']): currentasset_turnover = 0 else: currentasset_turnover = row['currentasset_turnover'] if math.isnan(row['currentasset_days']): currentasset_days = 0 else: currentasset_days = row['currentasset_days'] values.append((row['code'], row['name'], arturnover, arturndays, inventory_turnover, inventory_days, currentasset_turnover, currentasset_days)) cur.executemany( 'insert into operation20141 (code,name,arturnover,arturndays,inventory_turnover,inventory_days,currentasset_turnover,currentasset_days) values(%s,%s,%s,%s,%s,%s,%s,%s)', values) conn.commit() cur.close() conn.close()
def report_type_chosen(report_type, year, season): """ report_type: 报表种类 year: 报表年份 season: 报表季度 """ report_type_dict = { '主表': ts.get_report_data(year, season), '收益表': ts.get_profit_data(year, season), '运营表': ts.get_operation_data(year, season), '现金流表': ts.get_cashflow_data(year, season), '偿债能力表': ts.get_debtpaying_data(year, season), } report_chosen = report_type_dict.get(report_type) return report_chosen
def get_operation(year, quarter, q): """ arturnover: 应收账款周转率 arturndays: 应收账款周转天数 inventory_turnover: 存货周转率 inventory_days: 存活周转天数 currentasset_turnover: 流动资产周转率 currentasset_days: 流动资产周转天数 """ operation = ts.get_operation_data(year, quarter).drop_duplicates() \ .reindex(['code','arturnover','arturndays','inventory_turnover', 'inventory_days','currentasset_turnover','currentasset_days'], axis=1) res = operation.set_index(['code'], drop=True) q.put({'name': 'operation', 'data': res.to_dict('index')}) logger.info('Fetch data \'operation\' for %s-%s ... Done.'%(year, quarter))
def operation(engine, year, quarter): tbl = "basic_operation" tsl.log(tbl + " start...") try: df = ts.get_operation_data(year, quarter) df = df.set_index('code', drop='true') df['year'] = year df['quarter'] = quarter df = df.fillna(0) df.to_sql(tbl, engine, if_exists='append') print tsl.log(tbl + " done") except BaseException, e: print print e tsl.log(tbl + " error")
def job_4(): try: print("I'm working......基本面数据") # 业绩报告(主表) report_data = ts.get_report_data(year,1) data = pd.DataFrame(report_data) data.to_sql('report_data',engine,index=True,if_exists='replace') print("业绩报告(主表)......done") # 盈利能力 profit_data = ts.get_profit_data(year,1) data = pd.DataFrame(profit_data) data.to_sql('profit_data',engine,index=True,if_exists='replace') print("盈利能力......done") # 营运能力 operation_data = ts.get_operation_data(year,1) data = pd.DataFrame(operation_data) data.to_sql('operation_data',engine,index=True,if_exists='replace') print("营运能力......done") # 成长能力 growth_data = ts.get_growth_data(year,1) data = pd.DataFrame(growth_data) data.to_sql('growth_data',engine,index=True,if_exists='replace') print("成长能力......done") # 偿债能力 debtpaying_data = ts.get_debtpaying_data(year,1) data = pd.DataFrame(debtpaying_data) data.to_sql('debtpaying_data',engine,index=True,if_exists='replace') print("偿债能力......done") # 现金流量 cashflow_data = ts.get_cashflow_data(year,1) data = pd.DataFrame(cashflow_data) data.to_sql('cashflow_data',engine,index=True,if_exists='replace') print("现金流量......done") # 股票列表 stock_basics = ts.get_stock_basics() data = pd.DataFrame(stock_basics) data.to_sql('stock_basics',engine,index=True,if_exists='replace') print("股票列表......done") except Exception as e: print(e)
def update_basics(): basics = ts.get_stock_basics() f = os.path.join(base_dir, 'basics.h5') basics.to_hdf(f, 'basics') today = datetime.date.today() current_year = today.year current_season = today.month / 3 if current_season == 0: current_year -= 1 current_season = 4 length = 4 * 5 year = current_year season = current_season for i in range(length): f = os.path.join(base_dir, 'basics-{0}-{1}.h5'.format(year, season)) if os.path.exists(f): continue print(f) report = ts.get_report_data(year, season) report.to_hdf(f, 'report') profit = ts.get_profit_data(year, season) profit.to_hdf(f, 'profit') operation = ts.get_operation_data(year, season) operation.to_hdf(f, 'operation') growth = ts.get_growth_data(year, season) growth.to_hdf(f, 'growth') debtpaying = ts.get_debtpaying_data(year, season) debtpaying.to_hdf(f, 'debtpaying') cashflow = ts.get_cashflow_data(year, season) cashflow.to_hdf(f, 'cashflow') season -= 1 if season == 0: season = 4 year -= 1
def updateoperation(): reportdatalist=ts.get_operation_data(2014,1) reportdata=pd.DataFrame(reportdatalist) conn= ms.connect(host='localhost',port = 3306,user='******', passwd='123456',db ='investment',charset="utf8") cur = conn.cursor() values=[] for index,row in reportdata.iterrows(): if math.isnan(row['arturnover']): arturnover=0 else: arturnover=row['arturnover'] if math.isnan(row['arturndays']): arturndays=0 else: arturndays=row['arturndays'] if math.isnan(row['inventory_turnover']): inventory_turnover=0 else: inventory_turnover=row['inventory_turnover'] if math.isnan(row['inventory_days']): inventory_days=0 else: inventory_days=row['inventory_days'] if math.isnan(row['currentasset_turnover']): currentasset_turnover=0 else: currentasset_turnover=row['currentasset_turnover'] if math.isnan(row['currentasset_days']): currentasset_days=0 else: currentasset_days=row['currentasset_days'] values.append((row['code'],row['name'],arturnover,arturndays,inventory_turnover,inventory_days,currentasset_turnover,currentasset_days)) cur.executemany('insert into operation20141 (code,name,arturnover,arturndays,inventory_turnover,inventory_days,currentasset_turnover,currentasset_days) values(%s,%s,%s,%s,%s,%s,%s,%s)',values) conn.commit() cur.close() conn.close()
import tushare as ts import sys df = ts.get_operation_data(int(sys.argv[1]), int(sys.argv[2])) df.to_csv(sys.argv[3], encoding="utf8")
import sys import time import tushare as ts from sqlalchemy import create_engine import mysql.connector conn = create_engine('mysql+mysqlconnector://stockadmin:stock2016@localhost/stock?charset=utf8') for year in range(2004,2012): for season in range(1,5): print(year,season) df_profit = ts.get_profit_data(year,season) time.sleep(15) df_growth = ts.get_growth_data(year,season) time.sleep(15) df_operation = ts.get_operation_data(year,season) time.sleep(15) df_debtpaying = ts.get_debtpaying_data(year,season) time.sleep(15) df_cashflow = ts.get_cashflow_data(year,season) time.sleep(15) df_report = ts.get_report_data(year,season) df_profit.to_sql('profit',conn,if_exists='append') df_growth.to_sql('growth',conn,if_exists='append') df_operation.to_sql('operation',conn,if_exists='append') df_debtpaying.to_sql('debtpaying',conn,if_exists='append') df_cashflow.to_sql('cashflow',conn,if_exists='append') df_report.to_sql('report',conn,if_exists='append') #sys.exit(1)
def download_operation_data(file_path, year, quarter): operation_data = ts.get_operation_data(year, quarter) if operation_data is not None: operation_data.to_csv(file_path + 'operation_' + str(year) + '_' + str(quarter) + '.csv', encoding='utf-8')
#coding=utf-8 import tushare as ts # 获取沪深上市公司基本情况 df = ts.get_stock_basics() date = df.ix['600848']['timeToMarket']#上市日期YYYYMMDD #获取2014年第3季度的业绩报表数据 ts.get_report_data(2014,3) #获取2014年第3季度的盈利能力数据 ts.get_profit_data(2014,3) #获取2014年第3季度的营运能力数据 ts.get_operation_data(2014,3) #获取2014年第3季度的成长能力数据 ts.get_growth_data(2014,3) #获取2014年第3季度的偿债能力数据 ts.get_debtpaying_data(2014,3) #获取2014年第3季度的现金流量数据 ts.get_cashflow_data(2014,3)
df = df.assign(quater=currentQuater) df.to_sql('profit_data',engine, if_exists='append') # import operation data #code,代码 #name,名称 #arturnover,应收账款周转率(次) #arturndays,应收账款周转天数(天) #inventory_turnover,存货周转率(次) #inventory_days,存货周转天数(天) #currentasset_turnover,流动资产周转率(次) #currentasset_days,流动资产周转天数(天) df = ts.get_operation_data(lastYear,lastSeason) df = df.assign(quater=lastQuater) df.to_sql('operation_data',engine, if_exists='replace') df = ts.get_operation_data(currentYear,currentSeason) df = df.assign(quater=currentQuater) df.to_sql('operation_data',engine, if_exists='append') # import growth data #code,代码 #name,名称 #mbrg,主营业务收入增长率(%) #nprg,净利润增长率(%) #nav,净资产增长率 #targ,总资产增长率
def getoperation(year,quarter): dfoperation = ts.get_operation_data(year,quarter) dfoperation.insert(0,'uploadtime',nowtime) dfoperation.insert(1,'year',year) dfoperation.insert(2,'quarter',quarter) dfoperation.to_sql('tb_operation',engine,if_exists='append')
def pick_data(self, max_num_threads = 20, pause = 0): """ pick all necessary data from local database and from internet for loaded stocks. This function will take a while. """ logging.info('getting basics from tushare') self._init_stock_objs() # self.data_manager.drop_stock() # self.stocks = {key: self.stocks[key] for key in ['600233', '600130']} logging.info('totally there are %d listed companies' % len(self.stocks)) logging.info('get indexes from tushare') self._get_indexes() # self._pick_hist_data_and_save(self.stocks, False, self.indexes['000001'].hist_start_date, max_num_threads) logging.info('getting last stock trading data') df = ts.get_today_all() self._extract_from_dataframe(df, ignore=('changepercent', 'open', 'high', 'low', 'settlement', 'volume', 'turnoverratio', 'amount'), remap={'trade': 'price', 'per': 'pe'}) # calculate the report quarter report_year, report_quarter = ts.get_last_report_period() logging.info('getting last report (%d quarter %d) from tushare' % (report_year, report_quarter)) df = ts.get_report_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last profit data from tushare') df = ts.get_profit_data(report_year, report_quarter) self._extract_from_dataframe(df, ignore=('net_profits', 'roe', 'eps')) logging.info('getting last operation data from tushare') df = ts.get_operation_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last growth data from tushare') df = ts.get_growth_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last debtpaying data from tushare') df = ts.get_debtpaying_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting last cashflow data from tushare') df = ts.get_cashflow_data(report_year, report_quarter) self._extract_from_dataframe(df) logging.info('getting history trading data from tushare') start_from = self.indexes['000001'].hist_start_date data_full = self._pick_hist_data_and_save(self.stocks, False, start_from, max_num_threads, pause) # anything that pulling data must before here self._remove_unavailable_stocks() ''' # calculate qianfuquan data # deprecated due to precision issue for code, stock in self.stocks.items(): for i in range(1, len(stock.hist_data.index)-1): b = stock.hist_data.at[stock.hist_data.index[i], 'close'] a = stock.hist_data.at[stock.hist_data.index[i+1], 'close'] p = stock.hist_data.at[stock.hist_data.index[i+1], 'p_change'] / 100.0 q = (p*a+a)/b if q > 1.1: print('%s chuq-uan %s: %s %s %s, 1/%s' % (stock, stock.hist_data.index[i], b, a, p, q)) ''' return data_full