def price_bar_plot(f_name): root_path = os.path.abspath(".") f_name = "伟创电气.xlsx" file_path = os.path.join(root_path, f_name) raw_df = get_ns_info_data(file_path) raw_df_key = raw_df.keys().tolist() for key_item in raw_df_key: if "价格" in key_item: sg_price = key_item elif "数量" in key_item: sg_num = key_item price_counts = raw_df[sg_price].value_counts() x_list = sorted(list(price_counts.keys()), reverse=False) y_list = [int(price_counts[item]) for item in x_list] p_v_counts = pd.pivot_table(raw_df, index=[sg_price], values=[sg_num], aggfunc=np.sum) print(p_v_counts)
def get_race_bar_data(): root_path = os.path.abspath(".") data_dir = os.path.join(root_path, "raw_data") output_dir = os.path.join(root_path, "output") save_name = "注册制同行报价汇总表" # file_name = ["科创板", "创业板"] file_name = ["创业板"] file_type = ".xlsx" data_name = "同行报价" sheet_name = "基础数据" my_comp_name = "上海迎水投资管理有限公司" df_col = ["股票名称", "询价日", "投资者名称", "申购价格", "备注"] df_zero = pd.DataFrame(columns=df_col) for file in file_name: file_path = find_file_path(data_dir, file, file_type) if not file_path: return False raw_df = get_ns_info_data(file_path, sheet_name) if type(raw_df) is bool: return raw_df if file == file_name[-1]: raw_df = raw_df[cyb_col] elif file == file_name[0]: raw_df = raw_df[kcb_col] raw_df.columns = df_col df_zero = pd.concat([df_zero, raw_df]) df_zero.sort_values(by=["询价日", "股票名称"], inplace=True, ascending=[False, True]) print(df_zero.head()) tzz_mc, sg_jg = df_col[2], df_col[3] # 获取全部的股票列表 stock_list = df_zero[df_col[0]].tolist() union_stock_col = list(set(stock_list)) union_stock_col.sort(key=stock_list.index) print(print_info(), end=" ") print("Get the stock list: \n{}".format(union_stock_col)) # 获取无重复全部的投资者 all_tzz_col = df_zero[tzz_mc].tolist() union_tzz_col = list(set(all_tzz_col)) tzz_col_len = len(union_tzz_col) print(print_info(), end=" ") print("Get the tzz list: \n{}".format(union_tzz_col)) race_member_list_copy = race_member_list.copy() print(race_member_list_copy) for race_item in race_member_list: print(race_item + "基金") count = 0 attribute = 0 for tzz_item in union_tzz_col: if set(race_item + "基金").issubset(set(tzz_item)): count += 1 attribute = 1 # elif set(race_item).issubset(set(tzz_item)): # attribute = 2 # count += 1 print(race_item, count, attribute) # race_member_list_copy.remove(race_item) print(race_member_list_copy) df_dict = dict() price_dict = dict() font_dict = dict() # for stock in union_stock_col: # print(print_info(), end=" ") # print("Operator the stock: {}".format(stock)) # df_dict[stock] = df_zero[df_zero[df_col[0]] == stock] # df_group = get_df_group(df_dict[stock], tzz_mc, sg_jg) # if type(df_group) is bool: # return df_group # # price_line = list() # note_line = list() # for tzz_item in union_tzz_col: # if tzz_item in df_group.index: # price_line.append(df_group[df_col[3]][tzz_item]) # note_line.append(get_note(df_dict[stock], tzz_mc, tzz_item, tzz_item, state_dict)[0]) # else: # price_line.append("") # note_line.append("") # # price_dict[stock] = price_line # font_dict[stock] = note_line # # # 对于单支股票,输出一行报价记录,以及一个用于标记颜色的单条记录 # print(print_info(), end=" ") # print("Get the price dict:\n{}".format(price_dict)) # print(print_info(), end=" ") # print("Get the font dict:\n{}".format(font_dict)) # # TF = save_all_data(output_dir, save_name, file_type, price_dict, font_dict, union_tzz_col, tzz_col_len) return TF
def op_top_comp(d_dir, t_rate, f_type): sub_dir = "history" drop_name = "基础数据" file_list = get_file_list(d_dir, sub_dir, f_type, drop_name) print(print_new_info(), end=" ") print("Get the file list:\n {}".format(file_list)) base_name = "同行报价" sheet_name = "全部" base_col = ["证券名称", "证券代码", "询价日期", "发行价", "高剔价格"] base_data = get_base_data(root_path, base_name, sheet_name, base_col, file_type) if type(base_data) is bool and not base_data: return False # 构建数据框 state_left = base_col.copy() state_left.insert(3, "2020EPS") state_left.append("备注") df_data = pd.DataFrame(data=state_left[1:], columns=["证券名称"]) base_len = df_data.shape[0] + 1 color_dict = dict() eps_name = "注册制发行价预测" eps_sheet_name = "Sheet1" eps_col = ["证券名称", "证券代码", "2020EPS", "2021EPS"] eps_data = get_base_data(root_path, eps_name, eps_sheet_name, eps_col, file_type, 1) if type(eps_data) is bool and not eps_data: return False # 去除重复项 eps_data.drop_duplicates(subset="证券代码", inplace=True) # 获取处理字典路径 file:path op_stock_dict = dict() for file_item in file_list: # 获取标的名称 k_tmp = file_item.split("\\")[-1].split(".")[0].split("_")[0] op_stock_dict[k_tmp] = file_item # 根据同行报价的股票顺序依次检索 for stock_item in base_data.index: if stock_item not in op_stock_dict: # 如果找不到原始数据,暂时不处理 # print(stock_item) continue # 发行价, 高剔价格 ipo_price = base_data["发行价"].loc[stock_item] high_drop_price = base_data["高剔价格"].loc[stock_item] # 处理数据 file_path = os.path.join(d_dir, op_stock_dict[stock_item]) df_raw = get_ns_info_data(file_path) print(print_new_info(), end=" ") print("Get the raw dataframe:\n{}".format(df_raw.head())) # 计算 eps eps = eps_data["2020EPS"].loc[stock_item] # 获取分类数据 df_group = get_sort_df(df_raw) sum_state, count_state, sum_color, count_color = get_state( df_group, t_rate, ipo_price, eps) head_list = [ base_data["证券代码"].loc[stock_item], # 证券代码 base_data["询价日期"].loc[stock_item], # 询价日期 eps, "{:.2f}({}pe:{:.2f})".format(ipo_price, 20, ipo_price / eps), # 发行价 "{:.2f}({}pe:{:.2f})".format(high_drop_price, 20, high_drop_price / eps), # 高剔价格 sum_state # 备注信息 ] # axis=1 表示行对齐 df_data = pd.concat([ df_data, pd.DataFrame(data=head_list + list(sum_color.keys()), columns=[stock_item]) ], axis=1) color_dict[stock_item] = list(sum_color.values()) op_date = time.strftime('%Y%m%d', time.localtime(time.time())) output_name = "同行报价投资者Top榜单_{}.{}".format(op_date, f_type) output_path = os.path.join(os.path.abspath("."), "output", output_name) df_data.to_excel(output_path, index=None) print(print_new_info(), end=" ") print("Successfully Saved to: {}".format(output_path)) set_color(color_dict, output_path, base_len) return True
def op_history(): root_path = os.path.abspath(".") data_dir = os.path.join(root_path, "raw_data") output_dir = os.path.join(root_path, "output") save_name = "注册制同行报价汇总表" file_name = ["科创板", "创业板"] # file_name = ["创业板"] # file_name = input("请输入新股中文名称:") or "新致软件" file_type = ".xlsx" data_name = "同行报价" sheet_name = "基础数据" my_comp_name = "上海迎水投资管理有限公司" df_col = ["股票名称", "询价日", "投资者名称", "申购价格", "备注"] # col_list = get_col_list(root_path, data_name, "全部", file_type) # if not col_list: # return False df_zero = pd.DataFrame(columns=df_col) for file in file_name: file_path = find_file_path(data_dir, file, file_type) if not file_path: return False raw_df = get_ns_info_data(file_path, sheet_name) if type(raw_df) is bool: return raw_df if file == file_name[-1]: raw_df = raw_df[cyb_col] elif file == file_name[0]: raw_df = raw_df[kcb_col] raw_df.columns = df_col df_zero = pd.concat([df_zero, raw_df]) df_zero.sort_values(by=["询价日", "股票名称"], inplace=True, ascending=[False, True]) print(df_zero.head()) tzz_mc, sg_jg = df_col[2], df_col[3] # 获取全部的股票列表 stock_list = df_zero[df_col[0]].tolist() union_stock_col = list(set(stock_list)) union_stock_col.sort(key=stock_list.index) print(print_info(), end=" ") print("Get the stock list: \n{}".format(union_stock_col)) # 获取无重复全部的投资者 all_tzz_col = df_zero[tzz_mc].tolist() union_tzz_col = list(set(all_tzz_col)) tzz_col_len = len(union_tzz_col) print(print_info(), end=" ") print("Get the tzz list: \n{}".format(union_tzz_col)) df_dict = dict() price_dict = dict() font_dict = dict() for stock in union_stock_col: print(print_info(), end=" ") print("Operator the stock: {}".format(stock)) df_dict[stock] = df_zero[df_zero[df_col[0]] == stock] df_group = get_df_group(df_dict[stock], tzz_mc, sg_jg) if type(df_group) is bool: return df_group price_line = list() note_line = list() for tzz_item in union_tzz_col: if tzz_item in df_group.index: price_line.append(df_group[df_col[3]][tzz_item]) note_line.append( get_note(df_dict[stock], tzz_mc, tzz_item, tzz_item, state_dict)[0]) else: price_line.append("") note_line.append("") price_dict[stock] = price_line font_dict[stock] = note_line # 对于单支股票,输出一行报价记录,以及一个用于标记颜色的单条记录 print(print_info(), end=" ") print("Get the price dict:\n{}".format(price_dict)) print(print_info(), end=" ") print("Get the font dict:\n{}".format(font_dict)) TF = save_all_data(output_dir, save_name, file_type, price_dict, font_dict, union_tzz_col, tzz_col_len) return TF
def op_all_tzz(file_name): root_path = os.path.abspath(".") data_dir = os.path.join(root_path, "raw_data") file_type = ".xlsx" data_name = "同行报价" sheet_name = "全部" col_list = get_col_list(root_path, data_name, sheet_name, file_type) if not col_list: return False file_path = find_file_path(data_dir, file_name, file_type) if not file_path: return False long_name = os.path.split(file_path)[-1].split(".")[0] ipo_name, ipo_code = get_name_and_code(long_name) if ipo_code != "": data = w.wsd(ipo_code, "sec_name,ipo_inq_enddate", "ED-1TD", datetime.now().strftime("%Y-%m-%d")) if ipo_name != data.Data[0][0]: print(print_new_info("E", "R"), end=" ") print("Name: {} and Code: {} not match!".format( ipo_name, ipo_code)) return False raw_df = get_ns_info_data(file_path) if type(raw_df) is bool: return raw_df # 替换 raw_df.replace("招商基金管理有限公司", "招商基金", inplace=True) if not judge_df(raw_df): return False raw_df_key = raw_df.keys().tolist() for item in raw_df_key: if "价格" in item: sg_jg = item elif "投资者" in item: tzz_mc = item for idx, item in zip(range(len(raw_df[tzz_mc])), raw_df[tzz_mc]): raw_df[tzz_mc][idx] = item.replace("(", "(").replace(")", ")") raw_df_col = raw_df[tzz_mc].tolist() union_col = get_all_col(col_list, raw_df_col) df_group = get_df_group(raw_df, tzz_mc, sg_jg) if type(df_group) is bool: return df_group tzz_list = df_group.index.tolist() print(union_col) df_note = output_all_df(file_path, tzz_mc, raw_df, df_group, tzz_list, union_col, col_temp, ipo_name, ipo_code) try: save_df(df_note, root_path, file_path, sheet_name, op_file_type=".xlsx") return True except: return False