Example #1
0
    def format(self, entity, df):
        cols = list(df.columns)
        str_cols = ['Title']
        date_cols = [self.get_original_time_field()]
        float_cols = list(set(cols) - set(str_cols) - set(date_cols))
        for column in float_cols:
            df[column] = df[column].apply(lambda x: to_float(x[0]))

        df.rename(columns=cash_flow_map, inplace=True)

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(
                df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['report_period'] = df['timestamp'].apply(
            lambda x: to_report_period_type(x))
        df['report_date'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code
        df['name'] = entity.name

        df['id'] = self.generate_domain_id(entity, df)
        return df
Example #2
0
    def format(self, entity, df):
        df['volume'] = df['BianDongShuLiang'].apply(lambda x: to_float(x))
        df['change_pct'] = df['BianDongBiLi'].apply(lambda x: to_float(x))
        df['holding_pct'] = df['BianDongHouChiGuBiLi'].apply(lambda x: to_float(x))

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        df['holder_name'] = df['GuDongMingCheng'].astype(str)
        df['holder_name'] = df['holder_name'].apply(lambda x: x.replace('\n', '').replace('\r', ''))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code

        df['id'] = self.generate_domain_id(entity, df)
        return df
    def format(self, entity, df):
        df['rights_issues'] = df['ShiJiPeiGu'].apply(lambda x: to_float(x))
        df['rights_issue_price'] = df['PeiGuJiaGe'].apply(
            lambda x: to_float(x))
        df['rights_raising_fund'] = df['ShiJiMuJi'].apply(
            lambda x: to_float(x))

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(
                df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code

        df['id'] = self.generate_domain_id(entity, df)
        return df
    def format(self, entity, df):
        df['volume'] = df['BianDongShuLiang'].apply(lambda x: to_float(x))
        df['price'] = df['JiaoYiJunJia'].apply(lambda x: to_float(x))
        df['holding'] = df['BianDongHouShuLiang'].apply(lambda x: to_float(x))

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        df['trading_person'] = df['BianDongRen'].astype(str)
        df['trading_person'] = df['trading_person'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        df['trading_way'] = df['JiaoYiTuJing'].astype(str)
        df['trading_way'] = df['trading_way'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        df['manager'] = df['GaoGuanMingCheng'].astype(str)
        df['manager'] = df['manager'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        df['manager_position'] = df['GaoGuanZhiWei'].astype(str)
        df['manager_position'] = df['manager_position'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        df['relationship_with_manager'] = df['GaoGuanGuanXi'].astype(str)
        df['relationship_with_manager'] = df[
            'relationship_with_manager'].apply(
                lambda x: x.replace('\n', '').replace('\r', ''))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(
                df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code

        df['id'] = self.generate_domain_id(entity, df)
        return df
Example #5
0
    def format(self, entity, df):
        # 分红总额
        df['dividend_money'] = df['FenHongZongE'].apply(lambda x: to_float(x[1]))
        # 新股
        df['ipo_issues'] = df['XinGu'].apply(lambda x: to_float(x[1]))
        # 增发
        df['spo_issues'] = df['ZengFa'].apply(lambda x: to_float(x[1]))
        # 配股
        df['rights_issues'] = df['PeiFa'].apply(lambda x: to_float(x[1]))

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code

        df['id'] = self.generate_domain_id(entity, df)
        return df
 def numba_boost_up(klines):
     kdatas = []
     # TODO: ignore the last unfinished kdata now,could control it better if need
     for result in klines[:-1]:
         # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00"
         # time,open,close,high,low,volume,turnover
         fields = result.split(',')
         kdatas.append(
             dict(timestamp=fields[0],
                  open=to_float(fields[1]),
                  close=to_float(fields[2]),
                  high=to_float(fields[3]),
                  low=to_float(fields[4]),
                  volume=to_float(fields[5]),
                  turnover=to_float(fields[6])))
     return kdatas
Example #7
0
    async def record(self, entity, http_session, db_session, para):
        start_point = time.time()

        (ref_record, start, end, size, timestamps) = para

        json_results = []

        for timestamp in timestamps:
            timestamp_str = to_time_str(timestamp)
            url = self.url.format(timestamp_str)

            async with http_session.get(
                    url, headers=DEFAULT_SH_SUMMARY_HEADER) as response:
                if response.status != 200:
                    return

                text = await response.text()
                if text is None:
                    continue

                results = demjson.decode(text[text.index("(") +
                                              1:text.index(")")])['result']
                result = [
                    result for result in results
                    if result['productType'] == '1'
                ]
                if result and len(result) == 1:
                    result_json = result[0]
                    # 有些较老的数据不存在,默认设为0.0
                    json_results.append({
                        'timestamp':
                        timestamp,
                        'pe':
                        to_float(result_json['profitRate'], 0.0),
                        'total_value':
                        to_float(result_json['marketValue1'] + '亿', 0.0),
                        'total_tradable_vaule':
                        to_float(result_json['negotiableValue1'] + '亿', 0.0),
                        'volume':
                        to_float(result_json['trdVol1'] + '万', 0.0),
                        'turnover':
                        to_float(result_json['trdAmt1'] + '亿', 0.0),
                        'turnover_rate':
                        to_float(result_json['exchangeRate'], 0.0),
                    })

                    if len(json_results) > self.batch_size:
                        df = pd.DataFrame.from_records(json_results)
                        df['entity_id'] = entity.id
                        df['provider'] = Provider.Exchange.value
                        df['timestamp'] = pd.to_datetime(df['timestamp'])
                        df['name'] = '上证指数'
                        df = self.format(df)
                        return False, time.time() - start_point, (ref_record,
                                                                  df)

        if len(json_results) > 0:
            df = pd.DataFrame.from_records(json_results)
            df = self.format(df)
            return False, time.time() - start_point, (ref_record, df)

        return True, time.time() - start_point, None