def format(self, entity, df): cols = list(df.columns) str_cols = ['Title'] date_cols = [self.get_original_time_field()] float_cols = list(set(cols) - set(str_cols) - set(date_cols)) for column in float_cols: df[column] = df[column].apply(lambda x: to_float(x[0])) df.rename(columns=cash_flow_map, inplace=True) df.update(df.select_dtypes(include=[np.number]).fillna(0)) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime( df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['report_period'] = df['timestamp'].apply( lambda x: to_report_period_type(x)) df['report_date'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['name'] = entity.name df['id'] = self.generate_domain_id(entity, df) return df
def format(self, entity, df): df['volume'] = df['BianDongShuLiang'].apply(lambda x: to_float(x)) df['change_pct'] = df['BianDongBiLi'].apply(lambda x: to_float(x)) df['holding_pct'] = df['BianDongHouChiGuBiLi'].apply(lambda x: to_float(x)) df.update(df.select_dtypes(include=[np.number]).fillna(0)) df['holder_name'] = df['GuDongMingCheng'].astype(str) df['holder_name'] = df['holder_name'].apply(lambda x: x.replace('\n', '').replace('\r', '')) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['id'] = self.generate_domain_id(entity, df) return df
def format(self, entity, df): df['rights_issues'] = df['ShiJiPeiGu'].apply(lambda x: to_float(x)) df['rights_issue_price'] = df['PeiGuJiaGe'].apply( lambda x: to_float(x)) df['rights_raising_fund'] = df['ShiJiMuJi'].apply( lambda x: to_float(x)) df.update(df.select_dtypes(include=[np.number]).fillna(0)) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime( df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['id'] = self.generate_domain_id(entity, df) return df
def format(self, entity, df): df['volume'] = df['BianDongShuLiang'].apply(lambda x: to_float(x)) df['price'] = df['JiaoYiJunJia'].apply(lambda x: to_float(x)) df['holding'] = df['BianDongHouShuLiang'].apply(lambda x: to_float(x)) df.update(df.select_dtypes(include=[np.number]).fillna(0)) df['trading_person'] = df['BianDongRen'].astype(str) df['trading_person'] = df['trading_person'].apply( lambda x: x.replace('\n', '').replace('\r', '')) df['trading_way'] = df['JiaoYiTuJing'].astype(str) df['trading_way'] = df['trading_way'].apply( lambda x: x.replace('\n', '').replace('\r', '')) df['manager'] = df['GaoGuanMingCheng'].astype(str) df['manager'] = df['manager'].apply( lambda x: x.replace('\n', '').replace('\r', '')) df['manager_position'] = df['GaoGuanZhiWei'].astype(str) df['manager_position'] = df['manager_position'].apply( lambda x: x.replace('\n', '').replace('\r', '')) df['relationship_with_manager'] = df['GaoGuanGuanXi'].astype(str) df['relationship_with_manager'] = df[ 'relationship_with_manager'].apply( lambda x: x.replace('\n', '').replace('\r', '')) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime( df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['id'] = self.generate_domain_id(entity, df) return df
def format(self, entity, df): # 分红总额 df['dividend_money'] = df['FenHongZongE'].apply(lambda x: to_float(x[1])) # 新股 df['ipo_issues'] = df['XinGu'].apply(lambda x: to_float(x[1])) # 增发 df['spo_issues'] = df['ZengFa'].apply(lambda x: to_float(x[1])) # 配股 df['rights_issues'] = df['PeiFa'].apply(lambda x: to_float(x[1])) df.update(df.select_dtypes(include=[np.number]).fillna(0)) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['id'] = self.generate_domain_id(entity, df) return df
def numba_boost_up(klines): kdatas = [] # TODO: ignore the last unfinished kdata now,could control it better if need for result in klines[:-1]: # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00" # time,open,close,high,low,volume,turnover fields = result.split(',') kdatas.append( dict(timestamp=fields[0], open=to_float(fields[1]), close=to_float(fields[2]), high=to_float(fields[3]), low=to_float(fields[4]), volume=to_float(fields[5]), turnover=to_float(fields[6]))) return kdatas
async def record(self, entity, http_session, db_session, para): start_point = time.time() (ref_record, start, end, size, timestamps) = para json_results = [] for timestamp in timestamps: timestamp_str = to_time_str(timestamp) url = self.url.format(timestamp_str) async with http_session.get( url, headers=DEFAULT_SH_SUMMARY_HEADER) as response: if response.status != 200: return text = await response.text() if text is None: continue results = demjson.decode(text[text.index("(") + 1:text.index(")")])['result'] result = [ result for result in results if result['productType'] == '1' ] if result and len(result) == 1: result_json = result[0] # 有些较老的数据不存在,默认设为0.0 json_results.append({ 'timestamp': timestamp, 'pe': to_float(result_json['profitRate'], 0.0), 'total_value': to_float(result_json['marketValue1'] + '亿', 0.0), 'total_tradable_vaule': to_float(result_json['negotiableValue1'] + '亿', 0.0), 'volume': to_float(result_json['trdVol1'] + '万', 0.0), 'turnover': to_float(result_json['trdAmt1'] + '亿', 0.0), 'turnover_rate': to_float(result_json['exchangeRate'], 0.0), }) if len(json_results) > self.batch_size: df = pd.DataFrame.from_records(json_results) df['entity_id'] = entity.id df['provider'] = Provider.Exchange.value df['timestamp'] = pd.to_datetime(df['timestamp']) df['name'] = '上证指数' df = self.format(df) return False, time.time() - start_point, (ref_record, df) if len(json_results) > 0: df = pd.DataFrame.from_records(json_results) df = self.format(df) return False, time.time() - start_point, (ref_record, df) return True, time.time() - start_point, None