def combine_the_data_raw_raw(): dir_dadan = data_dict.get("DADAN") df1 = combine_csv_in_folder_raw(os.path.join(dir_dadan,"2020_01_30")) #df1.to_csv("all.csv",index=0) df1.columns = ["stock_index","stock_name", \ "trade_time","price","trade_num","trade_shou", \ "status","price_change_rate","price_change_ratio","look","stock_date"] df1['stock_index'] = [ str(x).zfill(6) for x in df1.stock_index.tolist()] df1['day'] = [x.replace("_","-") for x in df1.stock_date.tolist()] tmp_dir = data_dict.get("tmp") df1 = df1.drop_duplicates()
def test(loop_num): data_dir = data_dict.get("day_history") df1 = pd.read_csv(os.path.join(data_dir, "all.csv"), header=None) df1 = raw_data_col(df1) df2 = df1[df1['stock_index'] == 600999] #df2 = df1.sample(10000) X, df_K = make_data(df2) n_cluster = 11 kmeans_model = get_model(n_cluster, X) df_K['labels'] = kmeans_model.labels_ #df2[["open","high","low","close"]].values df_K['raw_array'] = df_K[["open", "high", "low", "close"]].values.tolist() #df_K['center'] = kmeans_model.cluster_centers_.tolist() df_cluster_cent_dist = cluster_center_dist(kmeans_model.cluster_centers_) print(df_cluster_cent_dist.iloc[0:3, ]) df_cluster_info = pd.DataFrame(columns=["label_center", "labels"]) df_cluster_info['label_center'] = kmeans_model.cluster_centers_.round( 3).tolist() df_cluster_info['labels'] = [x for x in range(0, kmeans_model.n_clusters)] df_center = pd.merge(df_K, df_cluster_info, on='labels')[['raw_array', "label_center", "labels"]] df_cluster_stat = df_center.groupby("labels").apply(cluster_stat)[[ "labels", 'max_dist', "median_dist", 'cnt' ]] print(df_cluster_stat.sort_values("max_dist", ascending=False)) file_name = "cluster_%s_%s.csv" % (n_cluster, loop_num) save_cluster(kmeans_model.cluster_centers_, file_name)
def allKdj(now_date): stock_index_list = [] j_line = [] stock_date_list = [] for i in stk_index_list: if i[0:2] == '60' or i[0] == '00': try: wy_data_dir = data_dict.get("day_history_wangyi") stock_index = i df1 = loadData(wy_data_dir, stock_index).sort_values("stock_date") stock = DF_to_StockDataFrame(df1) df_stock = stock_kdj(stock) #df_kdj_macd['adj_close'] = df_kdj_macd["macd"] df_class = dayHistoryFeature.makeHistoryFeature( df_stock, 10, "macdh") df_kdj_macd = df_class.df_out macd_feature_name = df_class.new_history_days_colname stock_date = now_date jjj = np.round( df_kdj_macd['kdjj'][df_kdj_macd['date'] == now_date].values[0], 3) macd_line = df_kdj_macd[macd_feature_name][df_kdj_macd['date'] == now_date] stock_index_list.append(stock_index) j_line.append(jjj) stock_date_list.append(stock_date) except: pass return stock_index_list, j_line, stock_date_list
def allKdj(now_date, stock_index): stock_index_list = [] j_line = [] stock_date_list = [] rsi_6 = [] boll_ratio = [] try: wy_data_dir = data_dict.get("day_history_wangyi") df1 = loadData(wy_data_dir, stock_index).sort_values("stock_date") stock = DF_to_StockDataFrame(df1) df_stock, _ = stock_kdj(stock) df_stock["boll_ratio"] = df_stock["boll_ub"] / df_stock["boll_lb"] kdj_j_today = np.round( df_stock['kdjj'][df_stock['date'] == now_date].values[0], 3) rsi_today = np.round( df_stock['rsi_6'][df_stock['date'] == now_date].values[0], 3) boll_ratio_today = np.round( df_stock['boll_ratio'][df_stock['date'] == now_date].values[0], 3) stock_index_list.append(stock_index) j_line.append(kdj_j_today) rsi_6.append(rsi_today) boll_ratio.append(boll_ratio_today) stock_date_list.append(now_date) except: pass return stock_index_list, j_line, stock_date_list, rsi_6, boll_ratio
def download_for_stock_index(stock_index, end_year): dir_news_report = data_dict.get("news_report") stk = stock_index try: get_all_news(stock_index, os.path.join(dir_news_report, stk), end_year) except Exception: print(stock_index + 'not download') traceback.print_exc() pass time.sleep(3)
def getFeatture(stock_index): wy_data_dir = data_dict.get("day_history_wangyi") df1 = loadData(wy_data_dir, stock_index).sort_values("stock_date") stock = DF_to_StockDataFrame(df1) df_stock = stock_kdj(stock) #df_kdj_macd['adj_close'] = df_kdj_macd["macd"] df_class = dayHistoryFeature.makeHistoryFeature(df_stock, 10, "macdh") df_kdj_macd = df_class.df_out macd_feature_name = df_class.new_history_days_colname return df_kdj_macd, macd_feature_name
def load_data(date_in=None): dir_yjyq = data_dict.get("YeJiYuQi") now_date,now_date_time = get_the_datetime() if date_in == None: date_in = now_date data_dir = os.path.join(dir_yjyq,date_in) df1 = combine_csv_in_folder_raw(data_dir) df1.columns = ["index","stock_index","stock_name","yeji_predict","yeji_abstract","profit_change_ratio", "profit_change","date"] return df1
def combine_with_stock_basic_info(df_input,columns_select): out_columns = df_input.columns.tolist() out_columns = out_columns + columns_select basic_info_dir = data_dict.get("basic_info") basic_info_df = pd.read_csv(os.path.join(basic_info_dir,"stock_basic_info.csv")) basic_info_df['stock_index'] = basic_info_df['code'] basic_info_df['stock_index'] = [str(x).zfill(6) for x in basic_info_df['stock_index'].tolist()] df_input['stock_index'] = [str(x).zfill(6) for x in df_input['stock_index'].tolist()] df1 = pd.merge(df_input,basic_info_df,how='left',on = ["stock_index"]) df2 = df1[out_columns] return df2
def loop_for_download_fin_report(stock_index, year_range): save_dir = data_dict.get("financial_report") for year in year_range: file_name, if_file_exists = make_filename_check_if_exists( save_dir, stock_index, year) if if_file_exists == False: with eventlet.Timeout(2.7, False): do_download(stock_index, year, save_dir, file_name, if_file_exists) time.sleep(20) else: pass
def get_all_date(): cols = [ "id", "id2", "owner_name", "owner_type", "stock_type", "gudong_rank", "stock_index", "stock_name", "change_date", "num", "chigu_ratio", "liutong_ratio", "report_date", "change_type", "change_ratio", "test1", "test2", "num_change" ] data_dir = data_dict.get("important_owner") df1 = pd.read_csv(os.path.join(data_dir, "zo_nyyh_zozz_2020-08-14.csv")) df1.columns = cols df1 = df1.drop_duplicates() df1['stock_date'] = [x[0:10] for x in df1.change_date.tolist()] return df1
def get_all_date(): cols = [ "id", "id2", "owner_name", "owner_type", "stock_type", "gudong_rank", "stock_index", "stock_name", "change_date", "num", "chigu_ratio", "liutong_ratio", "report_date", "change_type", "change_ratio", "test1", "test2", "num_change" ] data_dir = data_dict.get("important_owner") df1 = combine_csv_in_folder_raw(data_dir) df1.columns = cols df1 = df1.drop_duplicates() df1['stock_date'] = [x[0:10] for x in df1.change_date.tolist()] return df1
def get_all_date(): cols = [ "id", "id2", "owner_name", "owner_type", "stock_type", "gudong_rank", "stock_index", "stock_name", "change_date", "num", "chigu_ratio", "liutong_ratio", "report_date", "change_type", "change_ratio", "test1", "test2", "num_change" ] data_dir = data_dict.get("important_owner") file1 = "hk_central_2020_05_22.csv" df1 = pd.read_csv(os.path.join(data_dir, file1)) #df1 = combine_csv_in_folder_raw(data_dir) df1.columns = cols df1 = df1.drop_duplicates() df1['stock_date'] = [x[0:10] for x in df1.change_date.tolist()] return df1
def stockSlope(stock_index, start_date, stat_days, pred_days): try: data_dir = data_dict.get("baostock") df1 = pd.read_csv( os.path.join(data_dir, "history_data", stock_index) + '.csv') k1 = klineDate(start_date, stat_days, pred_days) stat_end_date, pred_start_date, pred_end_date = k1.make_date() df2 = df1[(df1["date"] >= pred_start_date) & (df1["date"] <= pred_end_date)] slope = LinearReg.single_linear_reg(df2, "close")[0] #print("{},{},{},{},{}".format(stock_index,pred_start_date,pred_end_date,pred_days,slope)) except: #print("{},{},{},{},{}".format(stock_index,"aa","bb",pred_days,-999)) slope = -999 pred_start_date = -999 pred_end_date = -999 return slope, pred_start_date, pred_end_date
def main(): now_date, now_date_time = get_the_datetime( ) ## the now_date is like "2019_11_08" #now_date = "2020_05_22" dir_dadan = data_dict.get("DADAN") data_dir = os.path.join(dir_dadan, now_date) df1 = combine_csv_in_folder(data_dir) df1.columns = setColname().DADAN() ## merge the data df_merge1 = DADAN_diff_stat(df1) # save data now_date = now_date.replace("_", "-") save_dir = dailyReport.dailyReport(now_date).save_to_daily_report() save_file = "DADAN_200_daily_report_" + now_date + ".csv" save_file = os.path.join(save_dir, save_file) df_merge1 = changeStockIndex(df_merge1, 'stock_index') df_merge1.head(100).to_csv(save_file, encoding="utf_8_sig", index=0) return df_merge1
def combine_clean_data(): dir_dadan = data_dict.get("DADAN") folder = dir_dadan df_all = pd.DataFrame(columns=('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'date')) for dirname, dirs, files in walk(folder): try: date_sig = dirname.split("/")[-1:][0] date_in = datetime.datetime.strptime( date_sig, "%Y_%m_%d").strftime("%Y-%m-%d") print(dirname) df1 = combine_csv_in_folder_raw(dirname) df1['date'] = date_in df_all = pd.concat([df_all, df1]) except: pass df_out = df_all.drop_duplicates() df_out.to_csv("test.csv", index=0) return df_out
def main(stock_index): #from dir_control.data_dir import dir_basic_info,dir_day_history,stk_index_list dir_day_history_insert = data_dict.get("day_history_insert") import os import pandas as pd import time import datetime #### ------------ para -------------------# ## set start end date ############################################## ## comment it if not test #### #print(k) stock_index = make_stock_download_index(stock_index) try: run_download(stock_index, start_date, end_date, dir_day_history_insert) #print("sleep") except: print("the stock index cannot be download " + str(stock_index)) pass
def main(): now_date, now_date_time = get_the_datetime( ) ## the now_date is like "2019_11_08" #now_date = "2020_06_01" dir_dadan = data_dict.get("DADAN") data_dir = os.path.join(dir_dadan, now_date) df1 = combine_csv_in_folder(data_dir) df1.columns = setColname().DADAN() df2 = df1[df1['status'] == '买盘'] df_buy = df2.groupby( 'stock_index').count()['price'].sort_values().reset_index() df3 = df1[df1['status'] == '卖盘'] df_sale = df3.groupby( 'stock_index').count()['trade_time'].sort_values().reset_index() df_merge = pd.merge(df_buy, df_sale, how='left').fillna(0) df_merge['diff'] = df_merge['price'] - df_merge['trade_time'] df_out = df_merge.sort_values('diff').tail(50) print(df_out) ## merge the data return df_merge1
def main(): now_date, now_date_time = get_the_datetime( ) ## the now_date is like "2019_11_08" dir_dadan = data_dict.get("DADAN") data_dir = os.path.join(dir_dadan, now_date) df1 = combine_csv_in_folder(data_dir) df1.columns = setColname().DADAN() ## merge the data df_merge1 = DADAN_diff_stat(df1) df_merge1.to_csv("DADAN_sample.csv", index=0) print("=" * 50) print(df_merge1[['stock_index', 'stock_name', 'buy_sale_diff']].head(50)) #print(df_merge1[['stock_index','stock_name','buy_sale_diff']][df_merge1['price']<25].head(50)) print(df_merge1.tail(50)) ## save data print("=" * 50) save_dir = os.path.join(data_path, "DADAN_daily_report") create_dir_if_not_exist(save_dir) save_file = "DADAN_200_daily_report_" + now_date + ".csv" save_file = os.path.join(save_dir, save_file) df_merge1.to_csv(save_file, encoding="utf_8_sig") return df_merge1
df2 = df_input.drop_duplicates() df4 = status_sum(df2, "买盘") df6 = status_sum(df2, "卖盘") ## df_merge = pd.merge(df4, df6, how='left', on=["stock_index", "stock_name"]) df_merge = df_merge.fillna(0) df_merge["buy_sale_diff"] = df_merge["buy_num"] - df_merge["sale_num"] df_merge1 = df_merge.sort_values("buy_sale_diff", ascending=False) #print(df_merge1) return df_merge1 if __name__ == '__main__': #now_date,now_date_time = get_the_datetime() ## the now_date is like "2019_11_08" now_date = "2020_01_17" dir_dadan = data_dict.get("DADAN") data_dir = os.path.join(dir_dadan, now_date) print(data_dir) df1 = combine_csv_in_folder(data_dir) df1.columns = ["stock_index","stock_name", \ "trade_time","price","trade_num","trade_shou", \ "status","price_change_rate","price_change_ratio","look","stock_date"] df1['stock_index'] = [str(x).zfill(6) for x in df1.stock_index.tolist()] df1['day'] = [x.replace("_", "-") for x in df1.stock_date.tolist()] tmp_dir = data_dict.get("tmp") df1.to_csv(os.path.join(tmp_dir, "DADAN_sample.csv"), index=0) #df_merge1 = DADAN_diff_stat(df1) #print(df_merge1.head(30)) #print(df_merge1.tail(30)) #df_merge2 = df_merge1[df_merge1.sale_num.isna()] #df_merge3 = df_merge2.sort_values("buy_num",ascending=False)
def makeDateInput(): season = ['03-31', '06-30', '09-30', '12-31'] year = [str(x) + '-' for x in range(2003, 2020)] year.reverse() date_input = [] for year1 in year: for season1 in season: date_input.append(year1 + season1) return date_input if __name__ == '__main__': #date_input = makeDateInput() save_dir = data_dict.get("fenhong") date_input = ['2020-06-31'] for date_input1 in date_input: print(date_input1) get_data(date_input1, save_dir) time.sleep(60) #get_data("2019-06-30") #response=requests.get(html1) #values={'act':'login'} """ headers={ 'Accept': 'text/html, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Connection': 'keep-alive',
for row in table.find_all('tr'): column_marker = 0 columns = row.find_all('td') for column in columns: new_table.iat[row_marker, column_marker] = column.get_text() column_marker += 1 row_marker += 1 #new_table = new_table[~(new_table[''].isnull())] new_table = new_table.dropna(axis=0) return new_table def save_the_table(new_table, dir_dadan, now_date, now_date_time): save_dir = os.path.join(dir_dadan, now_date) create_dir_if_not_exist(save_dir) save_file = os.path.join(save_dir, now_date_time + ".csv") new_table.to_csv(save_file, index=0) if __name__ == '__main__': dir_dadan = data_dict.get("DADAN") now_date, now_date_time = get_the_datetime() html1 = "http://app.finance.ifeng.com/hq/all_stock_bill.php" table, new_table_index = get_html_table(html1) new_table = table_to_DF(table, new_table_index) save_the_table(new_table, dir_dadan, now_date, now_date_time) #print(new_table) #airline.table = readHTMLTable(html1, header=T, which=1,stringsAsFactors=F)
from davidyu_cfg import * from functions.connect_url import url_opener from functions.data_dir import data_dict data_dir = data_dict.get("owner") #df1 = pd.read_csv(os.path.join(data_dir,"zhongyanghuijin.csv")) df1 = pd.read_csv(os.path.join(data_dir, "hk_central.csv")) #df1.groupby("股票简称") def latest_change(x): date_in = x["截止日期"].tolist()[0] change1 = x["持股比例(%)"].diff(-1).reset_index()['持股比例(%)'].tolist()[0] change = round(change1, 2) df2 = pd.DataFrame(columns=["date_latest", "latest_change"]) df2['date_latest'] = [date_in] df2["latest_change"] = [change] return df2 aa = df1.groupby("股票简称").apply(latest_change) a1 = aa.dropna().sort_values("latest_change") a2 = a1[a1["date_latest"] > "2019-09-01"] print(a2.tail(30)) ''' a1.sort_values("date_latest") df2 = df1[df1["股票简称"]=="华峰氨纶"] df2["持股比例(%)"].diff(-1)
from davidyu_cfg import * from functions.data_dir import data_dict, stk_index_list, create_dir_if_not_exist from functions.get_datetime import * from functions.run_combine_all_csv import * ''' combine today DADAN data @time: 2020-02-03 ''' if __name__ == '__main__': now_date, now_date_time = get_the_datetime( ) ## the now_date is like "2019_11_08" now_date = "2021-01-15" dir_dadan = data_dict.get("dadan_real_time_ifeng") data_dir = os.path.join(dir_dadan, now_date) print(data_dir) df1 = combine_csv_in_folder_raw(data_dir) df1.columns = ["stock_index","stock_name", \ "trade_time","price","trade_num","trade_shou", \ "status","price_change_rate","price_change_ratio","look","stock_date"] df1['stock_index'] = [str(x).zfill(6) for x in df1.stock_index.tolist()] df1['dt'] = [x.replace("_", "-") for x in df1.stock_date.tolist()] tmp_dir = data_dict.get("tmp") save_data_name = "dadan_real_time_ifeng_" + now_date + ".csv" df1.drop_duplicates().to_csv(os.path.join(tmp_dir, save_data_name), index=0)
from davidyu_cfg import * from functions.data_dir import data_dict, stk_index_list, create_dir_if_not_exist from functions.get_datetime import * from functions.run_combine_all_csv import * from functions.colNames import * now_date = "2020_06_19" dir_dadan = data_dict.get("dadan_DFCF") data_dir = dir_dadan df1 = pd.read_csv(os.path.join(data_dir, "2020_08_06.csv")) #df1.columns = ["new_price","today_increase_ratio","stock_index","stock_name","zhuli_liuru", # "chaodadan_liuru","chaodadan_liuru_ratio","dadan_liuru","dadan_liuru_ratio", # "zhongdan_liuru","zhongdan_liuru_ratio","xiaodan_liuru","xiaodan_liuru_ratio","test1", # "zhuli_liuru_ratio","test2","test3", # "test4","stock_date"] df1.columns = setColname().dadan_DFCF() df1 = df1[df1['zhuli_liuru_ratio'] != "-"] df1['zhuli_liuru_ratio'] = df1['zhuli_liuru_ratio'].astype(float) df1['zhuli_liuru'] = df1['zhuli_liuru'].astype(float) df2 = df1[[ "stock_index", "stock_name", "new_price", "today_increase_ratio", 'zhuli_liuru_ratio', 'zhuli_liuru' ]] df2.sort_values("zhuli_liuru_ratio") df2.sort_values("zhuli_liuru") print(df2.head(20)) #df1.columns = setColname().DADAN()
pass return kline_dict, data_list_len def clean_dict(kline_dict, data_list_len): #for i in kline_dict: data_list_len1 = [x for x in data_list_len if x > 0] most_common_len = Counter(data_list_len1).most_common(1)[0][0] logging.info("dict out") logging.info(kline_dict) kline_data_dict = clean_dict_data(kline_dict, most_common_len) return kline_data_dict if __name__ == '__main__': data_dir = data_dict.get("day_history_insert") list_files = os.listdir(data_dir) kl_stat_day = 14 kl_pred_day = 7 import time import random a1 = (2000, 1, 1, 0, 0, 0, 0, 0, 0) #设置开始日期时间元组(1976-01-01 00:00:00) a2 = (2019, 12, 31, 23, 59, 59, 0, 0, 0) #设置结束日期时间元组(1990-12-31 23:59:59) start = time.mktime(a1) #生成开始时间戳 end = time.mktime(a2) #生成结束时间戳 for i in range(100): t = random.randint(start, end) #在开始和结束时间戳中随机取出一个 date_touple = time.localtime(t) #将时间戳生成时间元组 date = time.strftime("%Y-%m-%d",
def columnToFloat(df,columns): """ @param: df: a dataframe @param: columns: a list of the colnames that need trans to float """ for col in columns: df[col] = df[col].apply(lambda x:x.replace(",","").replace("%","")).astype(float) return df @staticmethod def dfColumnsToFloat(df): new_table = dadanSina.columnToFloat(df,["total_trade_vol","total_trade_vol_ratio", \ "total_trade_money","total_trade_money_ratio", \ "avg_price","zhuli_buy_vol","zhongxing_vol","zhuli_sale_vol"]) return new_table if __name__=='__main__': from functions.dadan_sina.dadanSina import dadanSina dir_dadan = data_dict.get("dadan_sina_offline") now_date,now_date_time = get_the_datetime() url1 = "http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_bill_sum.php?num=100000&page=1&sort=totalvolpct&asc=0&volume=40000&type=0&dpc=1" DF_columns = 11 table,new_table_index = dadanSina.get_html_table(url1) new_table = dadanSina.table_to_DF(table,new_table_index,DF_columns) new_table.columns = setColname().dadan_sina() new_table['date_time'] = now_date_time new_table['stock_date'] = now_date new_table = dadanSina.dfColumnsToFloat(new_table) dadanSina.save_the_table(new_table,dir_dadan,now_date,now_date_time)
from davidyu_cfg import * from functions.data_dir import data_dict,stk_index_list,create_dir_if_not_exist from functions.get_datetime import * from functions.run_combine_all_csv import * #from functions.pyspark_david.pyspark_functions import * #from functions.pyspark_david.get_day_history_data import * dir_yjyq = data_dict.get("YeJiYuQi") #now_date,now_date_time = get_the_datetime() now_date = "2020_01_05" data_dir = os.path.join(dir_yjyq,now_date) df1 = combine_csv_in_folder_raw(data_dir) df1.columns = ["index","stock_index","stock_name","yeji_predict","yeji_abstract","profit_change_ratio", "profit_change","date"] df2 = df1[(df1["yeji_predict"]=='业绩大幅上升')&(df1["date"]=="2020-01-04")] stk_index_list = ['\''+str(x).zfill(6)+'\'' for x in df2.stock_index.tolist()] stk_index_list = list(set(stk_index_list)) stk_index_list_str = ','.join(stk_index_list) every_table = "stock_dev.day_history_insert" start_date = "2020-01-04" end_date = "2020-01-18" df_history = get_data(every_table,start_date,end_date,stk_index_list_str) #df_history.groupby('stock_index').apply(linear_REG) data_vv = df_history.groupby('stock_index') stock_ind = [] stock_slope = [] stock_row_len = []
#df_f2 = df_f1.merge(df_kdj, left_index=True, right_index=True) df_max_min = df_max_min.reset_index() df_max_min = rolling_regression(df_max_min,regre_window,'date',regre_col) #df_max_min['slope_5'] = linear_slope['slopes'] df_max_min = df_max_min.reset_index().dropna() df_max_min['stock_date'] = df_max_min.reset_index()['date'].astype(str) ## combine data cols_to_use = df_max_min.columns.difference(stock_kdj_macd.columns).tolist()+['stock_date'] df_f2 = pd.merge(stock_kdj_macd,df_max_min[cols_to_use],on="stock_date") return df_f2 if __name__ == "__main__": from functions.data_dir import data_dict,stk_index_list,create_dir_if_not_exist from functions.rolling_regression import * data_dir = data_dict.get("test") file_name = "60_data.csv" file_in = os.path.join(data_dir,file_name) df1 = pd.read_csv(file_in,sep="\t").iloc[300:,:] df1.columns = [x.split(".")[1] for x in df1.columns.tolist()] #df1['close'] = df1['adj_close'] start_date = '2019-01-01' end_date = '2019-12-31' max_min_stat_window = 5 regre_window = 5 regre_col = 'close' df_out = [] for name,group in df1.groupby('stock_index'): df2 = process(group,start_date,end_date,max_min_stat_window,regre_window,regre_col) df_out.append(df2)
new_table.iat[row_marker, column_marker] = column.get_text() column_marker += 1 row_marker += 1 new_table = new_table.dropna(axis=0) return new_table def save_the_table(new_table, dir_dadan, now_date, page): save_dir = os.path.join(dir_dadan, now_date) create_dir_if_not_exist(save_dir) save_file = os.path.join(save_dir, page + ".csv") new_table.to_csv(save_file, index=0) if __name__ == '__main__': dir_dadan = data_dict.get("dadan_real_time_ifeng_1000") now_date, now_date_time = get_the_datetime() #now_date = '2020_06_19' import time import random #html1 = "http://app.finance.ifeng.com/hq/all_stock_bill.php" DF_columns = 10 i = sys.argv[1] try: html1 = "http://app.finance.ifeng.com/hq/all_stock_bill.php?page=%s&by=hq_time&order=desc&amount=1000" % ( str(i)) table, new_table_index = get_html_table(html1) new_table = table_to_DF(table, new_table_index, DF_columns) new_table['date'] = now_date save_the_table(new_table, dir_dadan, now_date, str(i)) except Exception as e:
from davidyu_cfg import * from functions.run_combine_all_csv import * from functions.data_dir import data_dict, stk_index_list, create_dir_if_not_exist dir_fenhong = data_dict.get("fenhong") save_dir_fenhong = tmp_data_dict.get("fenhong") df1 = combine_csv_in_folder_raw(dir_fenhong) df2 = df1[df1['除权除息日'] >= "2019-12-31"] #df2['派息']/(df2['最新价']*100) x1 = [float(x) if x != '停牌' else 999 for x in df2['最新价'].tolist()] x2 = [float(x) if x != '--' else -1 for x in df2['派息'].tolist()] df2['paixi'] = x2 df2['price'] = x1 df2['paixi_ratio'] = df2['paixi'] / df2['price'] df2['除权除息日'] df3 = df2[["股票代码", "股票简称", "除权除息日", "paixi_ratio"]] save_file = os.path.join(save_dir_fenhong, "fenhong_data.csv") df3.round(3).to_csv(save_file, index=0) high_paixi_num = 30 df_sammple = df3.sort_values("paixi_ratio").tail(50) high_fenhong_stock_list1 = df_sammple['股票代码'] high_fenhong_stock_list = [str(x).zfill(6) for x in high_fenhong_stock_list1] #df2[df2['股票代码']==601216][["除权除息日","股权登记日"]] ########################################################### ###########################################################