def get_single_financial_resource(self, current_filename): datacrawler = HistoryFinancialCrawler() if os.path.exists(current_filename) != True: currentdata = datacrawler.fetch_and_parse( reporthook=demo_reporthook, filename=current_filename, path_to_download=current_filename) reader = HistoryFinancialReader() database_data = reader.get_df(current_filename) database_data = renames.rename_list_utils( ).rename_current_finance_utils(database_data) return database_data
def main(input, output, datatype): """ 通达信数据文件读取 """ if datatype == 'daily': reader = TdxDailyBarReader() elif datatype == 'ex_daily': reader = TdxExHqDailyBarReader() elif datatype == 'lc': reader = TdxLCMinBarReader() elif datatype == 'gbbq': reader = GbbqReader() elif datatype == 'block': reader = BlockReader() elif datatype == 'customblock': reader = CustomerBlockReader() elif datatype == 'history_financial' or datatype == 'hf': reader = HistoryFinancialReader() else: reader = TdxMinBarReader() try: df = reader.get_df(input) if output: click.echo("写入到文件 : " + output) df.to_csv(output) else: print(df) except Exception as e: print(str(e))
def get_all_data(self) -> pd.DataFrame: cw_dir = self.tdx_dir.joinpath("cw") gpcw_path = cw_dir.joinpath("gpcw.txt") self.updated_date = int( pd.to_datetime(gpcw_path.stat().st_mtime_ns).strftime('%Y%m%d')) file_df = pd.read_csv(gpcw_path, header=None, usecols=[0]) from pytdx.reader.history_financial_reader import HistoryFinancialReader with TimeInspector.logt("get all cn financial files ......"): df = pd.concat([ HistoryFinancialReader().get_df(cw_dir.joinpath(file_path)) for file_path in file_df.iloc[:, 0].to_list() ]) # 获取无效的 col unit_bool = df == 0 col_bool = unit_bool.all(axis=0) unused_col = col_bool[col_bool].index.to_list() df = df.rename(columns={'col314': 'date', 'report_date': 'period'}) return df.drop(columns=unused_col)
def get_and_parse(filename): return HistoryFinancialReader().get_df(filename)
def to_df(self): datafile = self.cache_file_name.replace('.zip', '.dat') df = HistoryFinancialReader().get_df(datafile) return df
def save_financial_files(): """ 将tdx目录下的gpcw财务数据存储到mongo数据库 """ coll = QA_DATABASE.financial coll.create_index([("code", ASCENDING), ("report_date", ASCENDING)], unique=True) df = pd.DataFrame(os.listdir(_CW_DIR), columns=['filename']) pattern = "^(gpcw)(?P<date>\d{8})\.zip" # gpcw20210930.dat df['re'] = df['filename'].apply(lambda x: re.match(pattern, x)) df = df.dropna() df['date'] = df['re'].apply(lambda x: int(x.groupdict()['date'])) df['last_modified'] = df['filename'].apply(lambda x: pd.to_datetime( os.path.getmtime(os.path.join(_CW_DIR, x)), unit='s')) last_modified = df.sort_values(by='last_modified', ascending=[False])['last_modified'].iloc[0] last_modified = pd.to_datetime(last_modified.strftime('%Y-%m-%d')) df = df[df['last_modified'] > last_modified] df.sort_values(by='last_modified', ascending=[False]).head() for filename in df['filename'].to_list(): try: date = int(re.match(pattern, filename).groupdict()['date']) except: continue util_log_info('NOW SAVING {}'.format(date)) util_log_info('在数据库中的条数 {}'.format( coll.count_documents({'report_date': date}))) try: filename = os.path.join(_CW_DIR, filename) df = HistoryFinancialReader().get_df(filename) # 修改columns的名称 columns = df.columns.to_list() col = {} for name in columns[1:]: col[name] = '00{}'.format(name[3:])[-3:] df.rename(columns=col, inplace=True) data = util_to_json_from_pandas(df.reset_index().drop_duplicates( subset=['code', 'report_date']).sort_index()) util_log_info('即将更新的条数 {}'.format(len(data))) try: for d in data: coll.update_one( { 'code': d['code'], 'report_date': d['report_date'] }, {'$set': d}, upsert=True) except Exception as e: if isinstance(e, MemoryError): coll.insert_many(data, ordered=True) elif isinstance(e, pymongo.bulk.BulkWriteError): pass except Exception as e: util_log_info('似乎没有数据') util_log_info('SUCCESSFULLY SAVE/UPDATE FINANCIAL DATA')