def match_announcement(announcement_dir, history_dir, output_dir, origin_file): """匹配公告数据与历史股价数据""" keywords = ["分配"] stopwords = [] ou.check_ret( su.match_announcement(ou.check_path(announcement_dir), ou.check_path(history_dir), ou.check_path(output_dir), origin_file, keywords, stopwords, True))
def get_fundamental_data(opath): print("获取沪深上市公司的基本面情况(会花费较长时间,请耐心等待)") if not os.path.exists(opath): print("创建保存数据的文件夹", opath) os.makedirs(opath, exist_ok=True) print("#1 获取股票列表") csvpath = ou.check_path(opath) + "stock_basics.csv" if os.path.exists(csvpath): # 如果存在,则直接跳过,不重复获取 print("已有该股票列表数据,跳过获取") else: stock_basics = ts.get_stock_basics() print("数据总数:", stock_basics.index.size) print("全量数据将保存为", csvpath) stock_basics.sort_index().to_csv(csvpath) print("数据保存成功") title = ["业绩报告", "盈利能力", "营运能力", "成长能力", "偿债能力", "现金流量"] en_name = [ "report_data", "profit_data", "operation_data", "growth_data", "debtpaying_data", "cashflow_data" ] count = 0 while count < len(title): print("#%d 获取 %s 报表" % (count + 2, title[count])) # 从 1989 到现在 now = datetime.datetime.now() year = 1989 while year <= now.year: season = 1 while season <= 4: print("获取 %d 年第 %d 季度的 %s 报表" % (year, season, title[count])) try: csvpath = "%s%d-%d-%s.csv" % (ou.check_path(opath), year, season, en_name[count]) if os.path.exists(csvpath): # 如果存在,则直接跳过,不重复获取 print("已有该季度数据,跳过获取") season = season + 1 continue exec("data = ts.get_%s(%d, %d)" % (en_name[count], year, season)) exec('data.sort_index().to_csv(csvpath)') print("\n数据保存成功") except IOError: print("获取数据错误,跳过该季度") season = season + 1 year = year + 1 count = count + 1 return 0
def get_period_history_data(code, interval, path, start, end): print("获取单个股票一段时间内的历史数据") print("要获取的股票代码为:", code) data = ts.get_k_data(code, ktype=interval) if data is None: print("没有该股票的数据(可能是网络问题,也可能是本身就没有这个编号)") return -1 else: print("共", data.index.size, "行数据") path = ou.check_path(path) + code + "#" + interval + ".csv" print("数据获取成功,将被保存在: ", path) data.to_csv(path) print("数据保存成功") return 0
def get_stock_list(path): print("获取沪深上市公司基本情况") stock_basics = ts.get_stock_basics() # stock_basics.index, stock_basics.columns, stock_basics.values print("数据总数:", stock_basics.index.size) print("索引列名:", stock_basics.index.name) print("其他列名:", end=' ') for item in stock_basics.columns: print(item, end=' ') print("\n数据前 3 行") print(stock_basics.head(3)) if not os.path.exists(path): print("创建保存数据的文件夹", path) os.mkdir(path) csvpath = ou.check_path(path) + "stock_basics.csv" print("全量数据将保存为", csvpath) stock_basics.sort_index().to_csv(csvpath) print("数据保存成功") return 0
def get_history_data(code, interval, path): print("获取单个股票的历史数据") if ou.check_interval(interval) == -1: print("时间间隔设置错误,请参考 help 文档") return -1 if not os.path.exists(path): print("创建保存数据的文件夹", path) os.makedirs(path, exist_ok=True) print("要获取的股票代码为:", code) data = ts.get_k_data(code, ktype=interval) if data is None: print("没有该股票的数据(可能是网络问题,也可能是本身就没有这个编号)") return -1 else: print("共", data.index.size, "行数据") path = ou.check_path(path) + code + "#" + interval + ".csv" print("数据获取成功,将被保存在: ", path) data.to_csv(path) print("数据保存成功") return 0
def get_macro_enco_data(opath): print("获取沪深上市公司的基本面情况(会花费较长时间,请耐心等待)") if not os.path.exists(opath): print("创建保存数据的文件夹", opath) os.makedirs(opath, exist_ok=True) now = datetime.datetime.now() title = [ "存款利率", "贷款利率", "存款准备金率", "货币供应量", "货币供应量(年底余额)", "国内生产总值(年度)", "国内生产总值(季度)", "三大需求对 GPD 贡献", "三大产业对 GDP 拉动", "三大产业贡献率", "居民消费价格指数", "工业品出场价格指数" ] en_name = [ "deposit_rate", "loan_rate", "rrr", "money_supply", "money_supply_bal", "gdp_year", "gdp_quarter", "gdp_for", "gdp_pull", "gdp_contrib", "cpi", "ppi" ] count = 0 while count < len(title): print("#%d 获取 %s 报表" % (count + 1, title[count])) try: csvpath = "%s%d-%2d-%2d-%s.csv" % (ou.check_path(opath), now.year, now.month, now.day, en_name[count]) if os.path.exists(csvpath): # 如果存在,则直接跳过,不重复获取 print("已有该数据,跳过获取") count = count + 1 continue exec("data = ts.get_%s()" % en_name[count]) exec('data.sort_index().to_csv(csvpath)') print("数据保存成功") except IOError: print("获取数据错误,跳过指标") count = count + 1 return 0
def daily_crawler(interval, path, root): now = datetime.datetime.now() oneday = datetime.timedelta(days=1) today = now.strftime("%Y-%m-%d") tomorrow = (now + oneday).strftime("%Y-%m-%d") folder = root + interval print("爬取", today, "所有股票的", interval, "分钟数据") if not os.path.exists(folder): print("创建保存数据的文件夹", folder) os.makedirs(folder, exist_ok=True) f = codecs.open(path, "r", "utf-8") csv_reader = csv.reader(f) count = -1 fail_count = 0 for line in csv_reader: count = count + 1 if count == 0: # 过滤表头 continue code = line[0] name = line[1] data = ts.get_hist_data(code, ktype=interval, start=today, end=tomorrow) if data is None: fail_count = fail_count + 1 continue else: dpath = ou.check_path(folder) + code + "-" + today + ".csv" data.to_csv(dpath) print("共尝试获取", count, "支股票的历史数据,其中", fail_count, "支股票获取失败") f.close() return 0
def macro_enco_data(output_dir): """获取宏观经济数据""" ou.check_ret(su.get_macro_enco_data(ou.check_path(output_dir)))
def fundamental_data(output_dir): """获取基本面数据""" ou.check_ret(su.get_fundamental_data(ou.check_path(output_dir)))
def get_all_history(list_file, interval, output): """获取指定时间间隔的全部股价数据""" ou.check_file(list_file, "请先使用 stock-list 命令来获取") # 读取默认输出文件夹中的 stock_basics.csv 来获取股票列表 ou.check_ret( su.get_all_history_data(interval, list_file, ou.check_path(output)))
def get_history_data(code, interval, output): """获取指定股票指定时间间隔的股价数据""" ou.check_ret(su.get_history_data(code, interval, ou.check_path(output)))
def daily(list_file, output): """获取 A 股的股票列表及对应详情""" intervals = ["5", "15", "30", "60"] for i in intervals: ou.check_ret(su.daily_crawler(i, list_file, ou.check_path(output)))
def get_stock_list(output): """获取 A 股的股票列表及对应详情""" ou.check_ret(su.get_stock_list(ou.check_path(output)))
def generate_label_data(input_dir, output_dir): """生成用于标注的公告数据""" ou.check_ret( su.generate_label_data(ou.check_path(input_dir), ou.check_path(output_dir)))
def keyword_cluster(input_dir, output_dir): """根据指定关键词聚类公告""" ou.check_ret( su.cluster_announcement(ou.check_path(input_dir), ou.check_path(output_dir)))