def get_share_data(code, f, ktype, stype=INDEX_TYPE): try: if f.get(ktype) is None: # 如果股票不存在,则获取17年至今数据(M取上个月月底,W取上周日) end_date = ttime.get_end_date(code, ktype) _add_data(code, ktype, f, end_date, stype) else: # 如果已存在,则根据存储内最后日期,获取至今 if len(f[ktype]) == 0: # 如果数据为空 end_date = ttime.get_end_date(code, ktype) _add_data(code, ktype, f, end_date, stype) else: # TODO 将index的0改为常量控制 tail_date_str = f[ktype][-1][0].astype(str) start_date = ttime.get_start_date(tail_date_str, code, ktype) end_date = ttime.get_end_date(code, ktype) # 如果开始日期大于等于结束日期,则不需要进行处理 if start_date >= end_date: count.inc_by_index("pass") return else: _append_data(code, ktype, f, start_date, end_date, stype) except Exception as er: time.sleep(conf.REQUEST_BLANK) print(str(er)) return
def get_xsg(f): """ 获取限售股解禁 """ for year in range(2010, datetime.today().year + 1): for month in range(1, 13): if month in range(1, 10): dset_name = str(year) + "0" + str(month) else: dset_name = str(year) + str(month) if f.get(dset_name) is not None: count.inc_by_index(conf.HDF5_COUNT_PASS) continue try: df = ts.xsg_data(year=year, month=month, pause=conf.REQUEST_BLANK) df = df.drop("name", axis=1) df = df.sort_values(by=[conf.HDF5_SHARE_DATE_INDEX]) tool.create_df_dataset(f, dset_name, df) console.write_exec() count.inc_by_index(conf.HDF5_COUNT_GET) except Exception as er: print(str(er)) time.sleep(conf.REQUEST_BLANK) return
def _check_refresh(f, tag): d = datetime.now() cpath = './' + tag if f.get(cpath) is None: return True f_c = f[cpath] if f_c.get(conf.HDF5_CLASSIFY_DS_CODE) is not None and f_c[conf.HDF5_CLASSIFY_DS_CODE].attrs.get(conf.HDF5_CLASSIFY_REFRESH_ATTR) is not None: last_datetime_str = f_c[conf.HDF5_CLASSIFY_DS_CODE].attrs[conf.HDF5_CLASSIFY_REFRESH_ATTR] last_datetime = datetime.strptime(last_datetime_str, '%Y-%m-%d') diff = d - last_datetime if diff.days < conf.HDF5_CLASSIFY_REFRESH_DAYS_BLANK: count.inc_by_index(conf.HDF5_COUNT_PASS) return False return True
def _append_data(code, ktype, f, start_date, end_date, stype): df = ts.get_hist_data(code, ktype=ktype, pause=conf.REQUEST_BLANK, end=end_date, start=start_date) time.sleep(conf.REQUEST_BLANK) if df is not None and df.empty is not True: if stype == SHARE_TYPE: df = df[SHARE_COLS] elif stype == INDEX_TYPE: df = df[INDEX_COLS] df = df.reset_index().sort_values(by=[conf.HDF5_SHARE_DATE_INDEX]) tool.append_df_dataset(f, ktype, df) console.write_exec() count.inc_by_index(ktype) else: error.add_row([ktype, code]) count.inc_by_index("empty") return
def _add_data(f, tag, name): row_df = get_detail(tag, name, retry_count=1, pause=conf.REQUEST_BLANK) if row_df is not None: # 判断该类别的组是否存在 cpath = './' + tag if f.get(cpath) is None: f.create_group(cpath) f_c = f[cpath] # 判断该类别的code list内容是否存在 if f_c.get(conf.HDF5_CLASSIFY_DS_CODE) is not None: del f_c[conf.HDF5_CLASSIFY_DS_CODE] # 获取code list并储存 data = row_df['code'].values.astype('S').tolist() f_c.create_dataset(conf.HDF5_CLASSIFY_DS_CODE, (len(data), 1), data=data) f_c[conf.HDF5_CLASSIFY_DS_CODE].attrs[conf.HDF5_CLASSIFY_NAME_ATTR] = name f_c[conf.HDF5_CLASSIFY_DS_CODE].attrs[conf.HDF5_CLASSIFY_REFRESH_ATTR] = datetime.now().strftime('%Y-%m-%d') count.inc_by_index(conf.HDF5_COUNT_GET)
def get_detail(f, start_date): # 按日间隔获取 if start_date is None: start_date = datetime.strptime("2016-08-09", "%Y-%m-%d") # 获取历史错误数据 history = error.get_file() close_history = list() if history is not None: history["type"] = history["type"].str.decode("utf-8") history["date"] = history["date"].str.decode("utf-8") close_history = history[history["type"] == "close"]["date"].values while start_date <= datetime.now(): try: start_date_str = datetime.strftime(start_date, "%Y-%m-%d") # 如果是周六日,已获取,或者闭盘的日子则跳过 if start_date.weekday() < 5 and start_date_str not in close_history and f.get(start_date_str) is None: df = ts.get_stock_basics(start_date_str) time.sleep(conf.REQUEST_BLANK) if df is not None and df.empty is not True: df = df.drop("name", axis=1) df = df.drop("area", axis=1) df = df.drop("industry", axis=1) tool.create_df_dataset(f, start_date_str, df.reset_index()) count.inc_by_index(conf.HDF5_COUNT_GET) console.write_exec() else: count.inc_by_index(conf.HDF5_COUNT_PASS) except Exception as er: time.sleep(conf.REQUEST_BLANK) if str(er) != "HTTP Error 404: Not Found": error.add_row([GET_DETAIL_OTHER, start_date_str]) print(str(er)) else: error.add_row([GET_DETAIL_CLOSE, start_date_str]) start_date = start_date + timedelta(days=1) return