예제 #1
0
 def get_single_financial_resource(self, current_filename):
     datacrawler = HistoryFinancialCrawler()
     if os.path.exists(current_filename) != True:
         currentdata = datacrawler.fetch_and_parse(
             reporthook=demo_reporthook,
             filename=current_filename,
             path_to_download=current_filename)
     reader = HistoryFinancialReader()
     database_data = reader.get_df(current_filename)
     database_data = renames.rename_list_utils(
     ).rename_current_finance_utils(database_data)
     return database_data
예제 #2
0
def main(input, output, datatype):
    """
    通达信数据文件读取
    """

    if datatype == 'daily':
        reader = TdxDailyBarReader()
    elif datatype == 'ex_daily':
        reader = TdxExHqDailyBarReader()
    elif datatype == 'lc':
        reader = TdxLCMinBarReader()
    elif datatype == 'gbbq':
        reader = GbbqReader()
    elif datatype == 'block':
        reader = BlockReader()
    elif datatype == 'customblock':
        reader = CustomerBlockReader()
    elif datatype == 'history_financial' or datatype == 'hf':
        reader = HistoryFinancialReader()
    else:
        reader = TdxMinBarReader()

    try:
        df = reader.get_df(input)
        if output:
            click.echo("写入到文件 : " + output)
            df.to_csv(output)
        else:
            print(df)
    except Exception as e:
        print(str(e))
예제 #3
0
    def get_all_data(self) -> pd.DataFrame:
        cw_dir = self.tdx_dir.joinpath("cw")
        gpcw_path = cw_dir.joinpath("gpcw.txt")

        self.updated_date = int(
            pd.to_datetime(gpcw_path.stat().st_mtime_ns).strftime('%Y%m%d'))
        file_df = pd.read_csv(gpcw_path, header=None, usecols=[0])

        from pytdx.reader.history_financial_reader import HistoryFinancialReader

        with TimeInspector.logt("get all cn financial files  ......"):
            df = pd.concat([
                HistoryFinancialReader().get_df(cw_dir.joinpath(file_path))
                for file_path in file_df.iloc[:, 0].to_list()
            ])

        # 获取无效的 col
        unit_bool = df == 0
        col_bool = unit_bool.all(axis=0)
        unused_col = col_bool[col_bool].index.to_list()
        df = df.rename(columns={'col314': 'date', 'report_date': 'period'})
        return df.drop(columns=unused_col)
예제 #4
0
def get_and_parse(filename):
    return HistoryFinancialReader().get_df(filename)
예제 #5
0
 def to_df(self):
     datafile = self.cache_file_name.replace('.zip', '.dat')
     df = HistoryFinancialReader().get_df(datafile)
     return df
예제 #6
0
def save_financial_files():
    """
    将tdx目录下的gpcw财务数据存储到mongo数据库
    """
    coll = QA_DATABASE.financial
    coll.create_index([("code", ASCENDING), ("report_date", ASCENDING)],
                      unique=True)

    df = pd.DataFrame(os.listdir(_CW_DIR), columns=['filename'])

    pattern = "^(gpcw)(?P<date>\d{8})\.zip"  # gpcw20210930.dat
    df['re'] = df['filename'].apply(lambda x: re.match(pattern, x))
    df = df.dropna()
    df['date'] = df['re'].apply(lambda x: int(x.groupdict()['date']))

    df['last_modified'] = df['filename'].apply(lambda x: pd.to_datetime(
        os.path.getmtime(os.path.join(_CW_DIR, x)), unit='s'))

    last_modified = df.sort_values(by='last_modified',
                                   ascending=[False])['last_modified'].iloc[0]
    last_modified = pd.to_datetime(last_modified.strftime('%Y-%m-%d'))
    df = df[df['last_modified'] > last_modified]
    df.sort_values(by='last_modified', ascending=[False]).head()

    for filename in df['filename'].to_list():
        try:
            date = int(re.match(pattern, filename).groupdict()['date'])
        except:
            continue

        util_log_info('NOW SAVING {}'.format(date))
        util_log_info('在数据库中的条数 {}'.format(
            coll.count_documents({'report_date': date})))
        try:
            filename = os.path.join(_CW_DIR, filename)
            df = HistoryFinancialReader().get_df(filename)

            # 修改columns的名称
            columns = df.columns.to_list()
            col = {}

            for name in columns[1:]:
                col[name] = '00{}'.format(name[3:])[-3:]

            df.rename(columns=col, inplace=True)

            data = util_to_json_from_pandas(df.reset_index().drop_duplicates(
                subset=['code', 'report_date']).sort_index())
            util_log_info('即将更新的条数 {}'.format(len(data)))
            try:
                for d in data:
                    coll.update_one(
                        {
                            'code': d['code'],
                            'report_date': d['report_date']
                        }, {'$set': d},
                        upsert=True)

            except Exception as e:
                if isinstance(e, MemoryError):
                    coll.insert_many(data, ordered=True)
                elif isinstance(e, pymongo.bulk.BulkWriteError):
                    pass
        except Exception as e:
            util_log_info('似乎没有数据')

    util_log_info('SUCCESSFULLY SAVE/UPDATE FINANCIAL DATA')