def format(self, entity, df):
        # 分红总额
        df['dividend_money'] = df['FenHongZongE'].apply(
            lambda x: to_float(x[1]))
        # 新股
        df['ipo_issues'] = df['XinGu'].apply(lambda x: to_float(x[1]))
        # 增发
        df['spo_issues'] = df['ZengFa'].apply(lambda x: to_float(x[1]))
        # 配股
        df['rights_issues'] = df['PeiFa'].apply(lambda x: to_float(x[1]))

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(
                df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code

        df['id'] = self.generate_domain_id(entity, df)
        return df
Ejemplo n.º 2
0
    def run(self):
        for security_item in self.entities:
            assert isinstance(security_item, StockDetail)

            if security_item.exchange == 'sh':
                fc = "{}01".format(security_item.code)
            if security_item.exchange == 'sz':
                fc = "{}02".format(security_item.code)

            # 基本资料
            param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"}
            resp = requests.post(
                'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao',
                json=param)
            resp.encoding = 'utf8'

            resp_json = resp.json()['Result']['JiBenZiLiao']

            security_item.profile = resp_json['CompRofile']
            security_item.main_business = resp_json['MainBusiness']
            security_item.date_of_establishment = to_pd_timestamp(
                resp_json['FoundDate'])

            # 关联行业
            industries = ','.join(resp_json['Industry'].split('-'))
            security_item.industries = industries

            # 关联概念
            security_item.concept_indices = resp_json['Block']

            # 关联地区
            security_item.area_indices = resp_json['Provice']

            self.sleep()

            # 发行相关
            param = {"color": "w", "fc": fc}
            resp = requests.post(
                'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan',
                json=param)
            resp.encoding = 'utf8'

            resp_json = resp.json()['Result']['FaXingXiangGuan']

            security_item.issue_pe = to_float(resp_json['PEIssued'])
            security_item.price = to_float(resp_json['IssuePrice'])
            security_item.issues = to_float(resp_json['ShareIssued'])
            security_item.raising_fund = to_float((resp_json['NetCollection']))
            security_item.net_winning_rate = pct_to_float(
                resp_json['LotRateOn'])

            self.session.commit()

            self.logger.info('finish recording stock meta for:{}'.format(
                security_item.code))

            self.sleep()
Ejemplo n.º 3
0
    def run(self):
        for security_item in self.entities:
            assert isinstance(security_item, StockDetail)

            if security_item.exchange == "sh":
                fc = "{}01".format(security_item.code)
            if security_item.exchange == "sz":
                fc = "{}02".format(security_item.code)

            # 基本资料
            param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"}
            resp = requests.post(
                "https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao",
                json=param)
            resp.encoding = "utf8"

            resp_json = resp.json()["Result"]["JiBenZiLiao"]

            security_item.profile = resp_json["CompRofile"]
            security_item.main_business = resp_json["MainBusiness"]
            security_item.date_of_establishment = to_pd_timestamp(
                resp_json["FoundDate"])

            # 关联行业
            industries = ",".join(resp_json["Industry"].split("-"))
            security_item.industries = industries

            # 关联概念
            security_item.concept_indices = resp_json["Block"]

            # 关联地区
            security_item.area_indices = resp_json["Provice"]

            self.sleep()

            # 发行相关
            param = {"color": "w", "fc": fc}
            resp = requests.post(
                "https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan",
                json=param)
            resp.encoding = "utf8"

            resp_json = resp.json()["Result"]["FaXingXiangGuan"]

            security_item.issue_pe = to_float(resp_json["PEIssued"])
            security_item.price = to_float(resp_json["IssuePrice"])
            security_item.issues = to_float(resp_json["ShareIssued"])
            security_item.raising_fund = to_float((resp_json["NetCollection"]))
            security_item.net_winning_rate = pct_to_float(
                resp_json["LotRateOn"])

            self.session.commit()

            self.logger.info("finish recording stock meta for:{}".format(
                security_item.code))

            self.sleep()
Ejemplo n.º 4
0
    def run(self):
        for security_item in self.entities:
            assert isinstance(security_item, StockDetail)
            security = to_jq_entity_id(security_item)

            # 基本资料
            data = c.css(
                security,
                "COMPANYPROFILE,FOUNDDATE,AREA,CAPITAL,IPOSHARESVOL,IPOPRICE,IPONETCOLLECTION,IPOPE,HKSE,IPOPLANISSUEVOL",
                "CurType=1,ClassiFication=4,ispandas=1")
            security_item.profile = data.COMPANYPROFILE[0]
            security_item.date_of_establishment = to_pd_timestamp(
                data.FOUNDDATE.values[0])
            # 关联行业
            security_item.industries = data.HKSE.values[0]
            # 关联概念
            # security_item.concept_indices = [i['concept_name'] for i in concept_dict["000001.XSHE"]['jq_concept']]
            # 关联地区
            security_item.area_indices = data.AREA.values[0]

            # 发行相关
            security_item.price = data.IPOPRICE.values[0]
            security_item.issues = data.IPOPLANISSUEVOL.values[0]
            security_item.raising_fund = data.IPONETCOLLECTION.values[0]
            try:
                security_item.register_capital = to_float(
                    data.CAPITAL.values[0].split(' ')[0]) / 10000
            except AttributeError:
                pass
            self.session.commit()
            self.logger.info('finish recording stock meta for:{}'.format(
                security_item.code))
            self.sleep()
Ejemplo n.º 5
0
    def format(self, entity, df):
        cols = list(df.columns)
        str_cols = ['Title']
        date_cols = [self.get_original_time_field()]
        float_cols = list(set(cols) - set(str_cols) - set(date_cols))
        for column in float_cols:
            df[column] = df[column].apply(lambda x: to_float(x[0]))

        df.rename(columns=cash_flow_map, inplace=True)

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['report_period'] = df['timestamp'].apply(lambda x: to_report_period_type(x))
        df['report_date'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code
        df['name'] = entity.name

        df['id'] = self.generate_domain_id(entity, df)
        return df
        def numba_boost_up(json_list):
            result_list = []
            for item in json_list:
                result_list.append({
                    'name':
                    entity.name,
                    'timestamp':
                    to_pd_timestamp(item['opendate']),
                    'close':
                    to_float(item['avg_price']),
                    'change_pct':
                    to_float(item['avg_changeratio']),
                    'turnover_rate':
                    to_float(item['turnover']) / 10000,
                    'net_inflows':
                    to_float(item['netamount']),
                    'net_inflow_rate':
                    to_float(item['ratioamount']),
                    'net_main_inflows':
                    to_float(item['r0_net']),
                    'net_main_inflow_rate':
                    to_float(item['r0_ratio'])
                })

            return result_list
Ejemplo n.º 7
0
    def format(self, entity, df):
        df['report_period'] = df['timestamp'].apply(
            lambda x: to_report_period_type(x))
        df['report_date'] = pd.to_datetime(df['timestamp'])
        # 股东代码
        df['holder_code'] = df['GuDongDaiMa'].astype(str)
        df['holder_code'] = df['holder_code'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))
        # 股东名称
        df['holder_name'] = df['GuDongMingCheng'].astype(str)
        df['holder_name'] = df['holder_name'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        # 持股数
        df['shareholding_numbers'] = df['ChiGuShu'].apply(
            lambda x: to_float(x))
        # 持股比例
        df['shareholding_ratio'] = df['ChiGuBiLi'].apply(lambda x: to_float(x))
        # 变动
        df['change'] = df['ZengJian'].apply(lambda x: to_float(x))
        # 变动比例
        df['change_ratio'] = df['BianDongBiLi'].apply(lambda x: to_float(x))

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        fill_values = {
            'report_period': "未知",
            'report_date': pd.to_datetime("1900-01-01"),
            'holder_name': "未知",
            'holder_code': "未知"
        }
        df.fillna(value=fill_values, inplace=True)

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(
                df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code

        df['id'] = self.generate_domain_id(entity, df)
        return df
    def format(self, entity, df):
        df['rights_issues'] = df['ShiJiPeiGu'].apply(lambda x: to_float(x))
        df['rights_issue_price'] = df['PeiGuJiaGe'].apply(lambda x: to_float(x))
        df['rights_raising_fund'] = df['ShiJiMuJi'].apply(lambda x: to_float(x))

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code

        df['id'] = self.generate_domain_id(entity, df)
        return df
Ejemplo n.º 9
0
    def request(self, url=None, method='get', param=None, path_fields=None):
        url = url.format(param)
        response = requests.get(url=url, headers=DEFAULT_SH_SUMMARY_HEADER)

        results = demjson.decode(
            response.text[response.text.index("(") +
                          1:response.text.index(")")])['result']
        result = [result for result in results if result['productType'] == '1']
        if result and len(result) == 1:
            result_json = result[0]
            # 有些较老的数据不存在,默认设为0.0
            return [{
                'provider':
                Provider.EXCHANGE.value,
                'timestamp':
                param,
                'name':
                '上证指数',
                'pe':
                to_float(result_json['profitRate'], 0.0),
                'total_value':
                to_float(result_json['marketValue1'] + '亿', 0.0),
                'total_tradable_vaule':
                to_float(result_json['negotiableValue1'] + '亿', 0.0),
                'volume':
                to_float(result_json['trdVol1'] + '万', 0.0),
                'turnover':
                to_float(result_json['trdAmt1'] + '亿', 0.0),
                'turnover_rate':
                to_float(result_json['exchangeRate'], 0.0),
            }]
Ejemplo n.º 10
0
    def record(self, entity, start, end, size, timestamps):
        json_results = []
        for timestamp in timestamps:
            timestamp_str = to_time_str(timestamp)
            url = self.url.format(timestamp_str)
            response = requests.get(url=url, headers=DEFAULT_SH_SUMMARY_HEADER)

            results = demjson3.decode(response.text[response.text.index("(") + 1 : response.text.index(")")])["result"]
            result = [result for result in results if result["productType"] == "1"]
            if result and len(result) == 1:
                result_json = result[0]
                # 有些较老的数据不存在,默认设为0.0
                json_results.append(
                    {
                        "provider": "exchange",
                        "timestamp": timestamp,
                        "name": "上证指数",
                        "pe": to_float(result_json["profitRate"], 0.0),
                        "total_value": to_float(result_json["marketValue1"] + "亿", 0.0),
                        "total_tradable_vaule": to_float(result_json["negotiableValue1"] + "亿", 0.0),
                        "volume": to_float(result_json["trdVol1"] + "万", 0.0),
                        "turnover": to_float(result_json["trdAmt1"] + "亿", 0.0),
                        "turnover_rate": to_float(result_json["exchangeRate"], 0.0),
                    }
                )

                if len(json_results) > 30:
                    return json_results

        return json_results
Ejemplo n.º 11
0
    def format(self, entity, df):
        df['volume'] = df['BianDongShuLiang'].apply(lambda x: to_float(x))
        df['price'] = df['JiaoYiJunJia'].apply(lambda x: to_float(x))
        df['holding'] = df['BianDongHouShuLiang'].apply(lambda x: to_float(x))

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        df['trading_person'] = df['BianDongRen'].astype(str)
        df['trading_person'] = df['trading_person'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        df['trading_way'] = df['JiaoYiTuJing'].astype(str)
        df['trading_way'] = df['trading_way'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        df['manager'] = df['GaoGuanMingCheng'].astype(str)
        df['manager'] = df['manager'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        df['manager_position'] = df['GaoGuanZhiWei'].astype(str)
        df['manager_position'] = df['manager_position'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        df['relationship_with_manager'] = df['GaoGuanGuanXi'].astype(str)
        df['relationship_with_manager'] = df[
            'relationship_with_manager'].apply(
                lambda x: x.replace('\n', '').replace('\r', ''))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(
                df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code

        df['id'] = self.generate_domain_id(entity, df)
        return df
Ejemplo n.º 12
0
    def record(self, entity, start, end, size, timestamps, http_session):
        json_results = []
        for timestamp in timestamps:
            timestamp_str = to_time_str(timestamp)
            url = self.url.format(timestamp_str)
            response = request_get(http_session, url, headers=DEFAULT_SH_SUMMARY_HEADER)

            results = demjson.decode(response.text[response.text.index("(") + 1:response.text.index(")")])['result']
            result = [result for result in results if result['productType'] == '1']
            if result and len(result) == 1:
                result_json = result[0]
                # 有些较老的数据不存在,默认设为0.0
                json_results.append({
                    'provider': Provider.Exchange.value,
                    'timestamp': timestamp,
                    'name': '上证指数',
                    'pe': to_float(result_json['profitRate'], 0.0),
                    'total_value': to_float(result_json['marketValue1'] + '亿', 0.0),
                    'total_tradable_vaule': to_float(result_json['negotiableValue1'] + '亿', 0.0),
                    'volume': to_float(result_json['trdVol1'] + '万', 0.0),
                    'turnover': to_float(result_json['trdAmt1'] + '亿', 0.0),
                    'turnover_rate': to_float(result_json['exchangeRate'], 0.0),
                })

                if len(json_results) > self.batch_size:
                    return json_results

        return json_results
    def format(self, entity, df):
        df['volume'] = df['BianDongShuLiang'].apply(lambda x: to_float(x))
        df['change_pct'] = df['BianDongBiLi'].apply(lambda x: to_float(x))
        df['holding_pct'] = df['BianDongHouChiGuBiLi'].apply(
            lambda x: to_float(x))

        df.update(df.select_dtypes(include=[np.number]).fillna(0))

        df['holder_name'] = df['GuDongMingCheng'].astype(str)
        df['holder_name'] = df['holder_name'].apply(
            lambda x: x.replace('\n', '').replace('\r', ''))

        if 'timestamp' not in df.columns:
            df['timestamp'] = pd.to_datetime(
                df[self.get_original_time_field()])
        elif not isinstance(df['timestamp'].dtypes, datetime):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code

        df['id'] = self.generate_domain_id(entity, df)
        return df
Ejemplo n.º 14
0
    def record(self, entity, start, end, size, timestamps):
        url = self.generate_url(category=entity.category,
                                code=entity.code,
                                number=size)

        resp = requests.get(url)

        opendate = "opendate"
        avg_price = "avg_price"
        avg_changeratio = 'avg_changeratio'
        turnover = 'turnover'
        netamount = 'netamount'
        ratioamount = 'ratioamount'
        r0_net = 'r0_net'
        r0_ratio = 'r0_ratio'
        r0x_ratio = 'r0x_ratio'
        cnt_r0x_ratio = 'cnt_r0x_ratio'

        json_list = []
        try:
            json_list = eval(resp.text)
        except Exception as e:
            resp.encoding = 'GBK'
            self.logger.error(resp.text)
            time.sleep(60 * 5)

        result_list = []
        for item in json_list:
            result_list.append({
                'name':
                entity.name,
                'timestamp':
                to_pd_timestamp(item['opendate']),
                'close':
                to_float(item['avg_price']),
                'change_pct':
                to_float(item['avg_changeratio']),
                'turnover_rate':
                to_float(item['turnover']) / 10000,
                'net_inflows':
                to_float(item['netamount']),
                'net_inflow_rate':
                to_float(item['ratioamount']),
                'net_main_inflows':
                to_float(item['r0_net']),
                'net_main_inflow_rate':
                to_float(item['r0_ratio'])
            })

        return result_list
Ejemplo n.º 15
0
    def request(self, url=None, method='post', param=None, path_fields=None):
        # security_item = param['security_item']

        resp = requests.get(param['url'])

        opendate = "opendate"
        avg_price = "avg_price"
        avg_changeratio = 'avg_changeratio'
        turnover = 'turnover'
        netamount = 'netamount'
        ratioamount = 'ratioamount'
        r0_net = 'r0_net'
        r0_ratio = 'r0_ratio'
        r0x_ratio = 'r0x_ratio'
        cnt_r0x_ratio = 'cnt_r0x_ratio'

        json_list = []
        try:
            json_list = eval(resp.text)
        except Exception as e:
            resp.encoding = 'GBK'
            logger.error(resp.text)
            time.sleep(60 * 5)

        result_list = []
        for item in json_list:
            result_list.append({
                'timestamp':
                to_pd_timestamp(item['opendate']),
                'close':
                to_float(item['avg_price']),
                'change_pct':
                to_float(item['avg_changeratio']),
                'turnover_rate':
                to_float(item['turnover']) / 10000,
                'net_inflows':
                to_float(item['netamount']),
                'net_inflow_rate':
                to_float(item['ratioamount']),
                'net_main_inflows':
                to_float(item['r0_net']),
                'net_main_inflow_rate':
                to_float(item['r0_ratio'])
            })

        return result_list
Ejemplo n.º 16
0
    def record(self, entity, start, end, size, timestamps, http_session):
        json_results = []
        for timestamp in timestamps:
            timestamp_str = to_time_str(timestamp)
            url = self.url.format(timestamp_str)
            text = sync_get(http_session,
                            url=url,
                            headers=DEFAULT_SH_SUMMARY_HEADER,
                            return_type='text')
            if text is None:
                continue

            results = demjson.decode(text[text.index("(") +
                                          1:text.index(")")])['result']
            result = [
                result for result in results if result['productType'] == '1'
            ]
            if result and len(result) == 1:
                result_json = result[0]
                # 有些较老的数据不存在,默认设为0.0
                json_results.append({
                    'timestamp':
                    timestamp,
                    'pe':
                    to_float(result_json['profitRate'], 0.0),
                    'total_value':
                    to_float(result_json['marketValue1'] + '亿', 0.0),
                    'total_tradable_vaule':
                    to_float(result_json['negotiableValue1'] + '亿', 0.0),
                    'volume':
                    to_float(result_json['trdVol1'] + '万', 0.0),
                    'turnover':
                    to_float(result_json['trdAmt1'] + '亿', 0.0),
                    'turnover_rate':
                    to_float(result_json['exchangeRate'], 0.0),
                })

                if len(json_results) > self.batch_size:
                    df = pd.DataFrame.from_records(json_results)
                    df['entity_id'] = entity.id
                    df['provider'] = Provider.Exchange.value
                    df['timestamp'] = pd.to_datetime(df['timestamp'])
                    df['name'] = '上证指数'
                    return df

        if len(json_results) > 0:

            df = pd.DataFrame.from_records(json_results)
            return df
        return None
 def numba_boost_up(klines):
     kdatas = []
     # TODO: ignore the last unfinished kdata now,could control it better if need
     for result in klines[:-1]:
         # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00"
         # time,open,close,high,low,volume,turnover
         fields = result.split(',')
         kdatas.append(
             dict(timestamp=fields[0],
                  open=to_float(fields[1]),
                  close=to_float(fields[2]),
                  high=to_float(fields[3]),
                  low=to_float(fields[4]),
                  volume=to_float(fields[5]),
                  turnover=to_float(fields[6])))
     return kdatas
Ejemplo n.º 18
0
    def record(self, entity, start, end, size, timestamps):
        url = self.generate_url(category=entity.category, code=entity.code, number=size)

        resp = requests.get(url)

        opendate = "opendate"
        avg_price = "avg_price"
        avg_changeratio = "avg_changeratio"
        turnover = "turnover"
        netamount = "netamount"
        ratioamount = "ratioamount"
        r0_net = "r0_net"
        r0_ratio = "r0_ratio"
        r0x_ratio = "r0x_ratio"
        cnt_r0x_ratio = "cnt_r0x_ratio"

        json_list = []
        try:
            json_list = eval(resp.text)
        except Exception as e:
            resp.encoding = "GBK"
            self.logger.error(resp.text)
            time.sleep(60 * 5)

        result_list = []
        for item in json_list:
            result_list.append(
                {
                    "name": entity.name,
                    "timestamp": to_pd_timestamp(item["opendate"]),
                    "close": to_float(item["avg_price"]),
                    "change_pct": to_float(item["avg_changeratio"]),
                    "turnover_rate": to_float(item["turnover"]) / 10000,
                    "net_inflows": to_float(item["netamount"]),
                    "net_inflow_rate": to_float(item["ratioamount"]),
                    "net_main_inflows": to_float(item["r0_net"]),
                    "net_main_inflow_rate": to_float(item["r0_ratio"]),
                }
            )

        return result_list
Ejemplo n.º 19
0
    def record(self, entity, start, end, size, time_array):
        if entity.type == 'index':
            id_flag = "{}1".format(entity.code)
        elif entity.type == 'stock':
            if entity.exchange == 'sh':
                id_flag = "{}1".format(entity.code)
            if entity.exchange == 'sz':
                id_flag = "{}2".format(entity.code)

        the_url = self.url.format("{}".format(id_flag),
                                  eastmoney_map_zvt_trading_level(self.level),
                                  now_time_str(fmt=TIME_FORMAT_MINUTE), size)

        resp = requests.get(the_url)
        results = json_callback_param(resp.text)

        kdatas = []

        for result in results:
            the_timestamp = to_pd_timestamp(result['time'])
            the_id = generate_kdata_id(entity_id=entity.id,
                                       timestamp=the_timestamp,
                                       level=self.level)

            if not data_exist(self.session, self.kdata_schema, the_id):
                kdatas.append(
                    self.kdata_schema(id=the_id,
                                      timestamp=the_timestamp,
                                      entity_id=entity.id,
                                      code=entity.code,
                                      name=entity.name,
                                      level=self.level,
                                      open=to_float(result['open']),
                                      close=to_float(result['close']),
                                      high=to_float(result['high']),
                                      low=to_float(result['low']),
                                      volume=to_float(result['volume']),
                                      turnover=to_float(result['amount']),
                                      turnover_rate=to_float(
                                          result['turnoverrate'])))
        return kdatas
Ejemplo n.º 20
0
    def record(self, entity, start, end, size, timestamps):
        the_url = self.url.format("{}".format(entity.code),
                                  level_flag(self.level), size,
                                  now_time_str(fmt=TIME_FORMAT_DAY1))

        resp = requests.get(the_url)
        results = json_callback_param(resp.text)

        kdatas = []

        if results:
            klines = results['data']['klines']

            # TODO: ignore the last unfinished kdata now,could control it better if need
            for result in klines[:-1]:
                # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00"
                # time,open,close,high,low,volume,turnover
                fields = result.split(',')
                the_timestamp = to_pd_timestamp(fields[0])

                the_id = generate_kdata_id(entity_id=entity.id,
                                           timestamp=the_timestamp,
                                           level=self.level)

                kdatas.append(
                    dict(id=the_id,
                         timestamp=the_timestamp,
                         entity_id=entity.id,
                         code=entity.code,
                         name=entity.name,
                         level=self.level.value,
                         open=to_float(fields[1]),
                         close=to_float(fields[2]),
                         high=to_float(fields[3]),
                         low=to_float(fields[4]),
                         volume=to_float(fields[5]),
                         turnover=to_float(fields[6])))
        return kdatas
Ejemplo n.º 21
0
    def request(self, url=None, method='post', param=None, path_fields=None):
        # security_item = param['security_item']

        resp = requests.get(param['url'])
        # {opendate:"2019-04-29",trade:"10.8700",changeratio:"-0.0431338",turnover:"74.924",netamount:"-2903349.8500",
        # ratioamount:"-0.155177",r0:"0.0000",r1:"2064153.0000",r2:"6485031.0000",r3:"10622169.2100",r0_net:"0.0000",
        # r1_net:"2064153.0000",r2_net:"-1463770.0000",r3_net:"-3503732.8500"}
        opendate = "opendate"
        trade = "trade"
        changeratio = 'changeratio'
        turnover = 'turnover'
        netamount = 'netamount'
        ratioamount = 'ratioamount'
        r0 = 'r0'
        r1 = 'r1'
        r2 = 'r2'
        r3 = 'r3'
        r0_net = 'r0_net'
        r1_net = 'r1_net'
        r2_net = 'r2_net'
        r3_net = 'r3_net'

        json_list = []

        try:
            json_list = eval(resp.text)
        except Exception as e:
            resp.encoding = 'GBK'
            logger.error(resp.text)
            time.sleep(60 * 5)

        result_list = []
        for item in json_list:
            amount = to_float(item['r0']) + to_float(item['r1']) + to_float(
                item['r2']) + to_float(item['r3'])

            result = {
                'timestamp':
                to_pd_timestamp(item['opendate']),
                'close':
                to_float(item['trade']),
                'change_pct':
                to_float(item['changeratio']),
                'turnover_rate':
                to_float(item['turnover']) / 10000,
                'net_inflows':
                to_float(item['netamount']),
                'net_inflow_rate':
                to_float(item['ratioamount']),
                #     # 主力=超大单+大单
                #     net_main_inflows = Column(Float)
                #     net_main_inflow_rate = Column(Float)
                #     # 超大单
                #     net_huge_inflows = Column(Float)
                #     net_huge_inflow_rate = Column(Float)
                #     # 大单
                #     net_big_inflows = Column(Float)
                #     net_big_inflow_rate = Column(Float)
                #
                #     # 中单
                #     net_medium_inflows = Column(Float)
                #     net_medium_inflow_rate = Column(Float)
                #     # 小单
                #     net_small_inflows = Column(Float)
                #     net_small_inflow_rate = Column(Float)
                'net_main_inflows':
                to_float(item['r0_net']) + to_float(item['r1_net']),
                'net_huge_inflows':
                to_float(item['r0_net']),
                'net_big_inflows':
                to_float(item['r1_net']),
                'net_medium_inflows':
                to_float(item['r2_net']),
                'net_small_inflows':
                to_float(item['r3_net']),
            }

            if amount != 0:
                result['net_main_inflow_rate'] = (to_float(
                    item['r0_net']) + to_float(item['r1_net'])) / amount
                result['net_huge_inflows_rate'] = to_float(
                    item['r0_net']) / amount
                result['net_big_inflows_rate'] = to_float(
                    item['r1_net']) / amount
                result['net_medium_inflows_rate'] = to_float(
                    item['r2_net']) / amount
                result['net_small_inflows_rate'] = to_float(
                    item['r3_net']) / amount

            result_list.append(result)

        return result_list
    def record(self, entity, start, end, size, timestamps):
        param = {
            'url': self.generate_url(code='{}{}'.format(entity.exchange, entity.code), number=size),
            'security_item': entity
        }

        resp = requests.get(param['url'])
        # {opendate:"2019-04-29",trade:"10.8700",changeratio:"-0.0431338",turnover:"74.924",netamount:"-2903349.8500",
        # ratioamount:"-0.155177",r0:"0.0000",r1:"2064153.0000",r2:"6485031.0000",r3:"10622169.2100",r0_net:"0.0000",
        # r1_net:"2064153.0000",r2_net:"-1463770.0000",r3_net:"-3503732.8500"}
        opendate = "opendate"
        trade = "trade"
        changeratio = 'changeratio'
        turnover = 'turnover'
        netamount = 'netamount'
        ratioamount = 'ratioamount'
        r0 = 'r0'
        r1 = 'r1'
        r2 = 'r2'
        r3 = 'r3'
        r0_net = 'r0_net'
        r1_net = 'r1_net'
        r2_net = 'r2_net'
        r3_net = 'r3_net'

        json_list = []

        try:
            json_list = eval(resp.text)
        except Exception as e:
            resp.encoding = 'GBK'
            self.logger.error(resp.text)
            time.sleep(60 * 5)

        result_list = []
        for item in json_list:
            amount = to_float(item['r0']) + to_float(item['r1']) + to_float(item['r2']) + to_float(item['r3'])

            result = {
                'timestamp': to_pd_timestamp(item['opendate']),
                'name': entity.name,
                'close': to_float(item['trade']),
                'change_pct': to_float(item['changeratio']),
                'turnover_rate': to_float(item['turnover']) / 10000,
                'net_inflows': to_float(item['netamount']),
                'net_inflow_rate': to_float(item['ratioamount']),
                #     # 主力=超大单+大单
                #     net_main_inflows = Column(Float)
                #     net_main_inflow_rate = Column(Float)
                #     # 超大单
                #     net_huge_inflows = Column(Float)
                #     net_huge_inflow_rate = Column(Float)
                #     # 大单
                #     net_big_inflows = Column(Float)
                #     net_big_inflow_rate = Column(Float)
                #
                #     # 中单
                #     net_medium_inflows = Column(Float)
                #     net_medium_inflow_rate = Column(Float)
                #     # 小单
                #     net_small_inflows = Column(Float)
                #     net_small_inflow_rate = Column(Float)
                'net_main_inflows': to_float(item['r0_net']) + to_float(item['r1_net']),

                'net_huge_inflows': to_float(item['r0_net']),

                'net_big_inflows': to_float(item['r1_net']),

                'net_medium_inflows': to_float(item['r2_net']),

                'net_small_inflows': to_float(item['r3_net']),
            }

            if amount != 0:
                result['net_main_inflow_rate'] = (to_float(item['r0_net']) + to_float(item['r1_net'])) / amount
                result['net_huge_inflow_rate'] = to_float(item['r0_net']) / amount
                result['net_big_inflow_rate'] = to_float(item['r1_net']) / amount
                result['net_medium_inflow_rate'] = to_float(item['r2_net']) / amount
                result['net_small_inflow_rate'] = to_float(item['r3_net']) / amount

            result_list.append(result)

        return result_list
Ejemplo n.º 23
0
    def record(self, security_item, start, end, size, timestamps):
        the_quarters = get_year_quarters(start)
        # treat has recorded the season if contains some date
        if not is_same_date(security_item.timestamp,
                            start) and len(the_quarters) > 1:
            the_quarters = the_quarters[1:]
        for year, quarter in the_quarters:
            kdatas = []

            for fuquan in ['bfq', 'hfq']:
                the_url = self.get_kdata_url(security_item.code, year, quarter,
                                             fuquan)
                resp = requests.get(the_url)

                trs = Selector(text=resp.text).xpath(
                    '//*[@id="FundHoldSharesTable"]/tr[position()>1 and position()<=last()]'
                ).extract()

                for idx, tr in enumerate(trs):
                    tds = Selector(text=tr).xpath('//td//text()').extract()
                    tds = [x.strip() for x in tds if x.strip()]

                    open = tds[1]
                    high = tds[2]
                    close = tds[3]
                    low = tds[4]
                    volume = tds[5]
                    turnover = tds[6]
                    if fuquan == 'hfq':
                        factor = tds[7]

                    the_timestamp = to_pd_timestamp(tds[0])
                    the_id = generate_kdata_id(security_id=security_item.id,
                                               timestamp=the_timestamp,
                                               level=self.level)

                    if fuquan == 'hfq':
                        # we got bfq at first and then update hfq data
                        existed = [
                            item for item in kdatas if item['id'] == the_id
                        ]

                        if existed:
                            kdata = existed[0]
                        else:
                            self.logger.error(
                                "bfq not got for:{}".format(the_id))
                            kdata = {
                                'id': the_id,
                                'timestamp': the_timestamp,
                                'name': security_item.name,
                                'level': self.level.value,
                                'open': to_float(open) / to_float(factor),
                                'close': to_float(close) / to_float(factor),
                                'high': to_float(high) / to_float(factor),
                                'low': to_float(low) / to_float(factor),
                                'volume': to_float(volume),
                                'turnover': to_float(turnover)
                            }
                            kdatas.append(kdata)

                        kdata['hfq_open'] = to_float(open)
                        kdata['hfq_high'] = to_float(high)
                        kdata['hfq_close'] = to_float(close)
                        kdata['hfq_low'] = to_float(low)
                        kdata['factor'] = to_float(factor)

                        self.latest_factors[security_item.id] = to_float(
                            factor)

                    else:
                        kdatas.append({
                            'id': the_id,
                            'timestamp': the_timestamp,
                            'name': security_item.name,
                            'level': self.level.value,
                            'open': to_float(open),
                            'close': to_float(close),
                            'high': to_float(high),
                            'low': to_float(low),
                            'volume': to_float(volume),
                            'turnover': to_float(turnover)
                        })

            return kdatas
Ejemplo n.º 24
0
    def record(self, entity, start, end, size, timestamps):
        param = {
            "url": self.generate_url(code="{}{}".format(entity.exchange, entity.code), number=size),
            "security_item": entity,
        }

        resp = requests.get(param["url"])
        # {opendate:"2019-04-29",trade:"10.8700",changeratio:"-0.0431338",turnover:"74.924",netamount:"-2903349.8500",
        # ratioamount:"-0.155177",r0:"0.0000",r1:"2064153.0000",r2:"6485031.0000",r3:"10622169.2100",r0_net:"0.0000",
        # r1_net:"2064153.0000",r2_net:"-1463770.0000",r3_net:"-3503732.8500"}
        opendate = "opendate"
        trade = "trade"
        changeratio = "changeratio"
        turnover = "turnover"
        netamount = "netamount"
        ratioamount = "ratioamount"
        r0 = "r0"
        r1 = "r1"
        r2 = "r2"
        r3 = "r3"
        r0_net = "r0_net"
        r1_net = "r1_net"
        r2_net = "r2_net"
        r3_net = "r3_net"

        json_list = []

        try:
            json_list = eval(resp.text)
        except Exception as e:
            resp.encoding = "GBK"
            self.logger.error(resp.text)
            time.sleep(60 * 5)

        result_list = []
        for item in json_list:
            amount = to_float(item["r0"]) + to_float(item["r1"]) + to_float(item["r2"]) + to_float(item["r3"])

            result = {
                "timestamp": to_pd_timestamp(item["opendate"]),
                "name": entity.name,
                "close": to_float(item["trade"]),
                "change_pct": to_float(item["changeratio"]),
                "turnover_rate": to_float(item["turnover"]) / 10000,
                "net_inflows": to_float(item["netamount"]),
                "net_inflow_rate": to_float(item["ratioamount"]),
                #     # 主力=超大单+大单
                #     net_main_inflows = Column(Float)
                #     net_main_inflow_rate = Column(Float)
                #     # 超大单
                #     net_huge_inflows = Column(Float)
                #     net_huge_inflow_rate = Column(Float)
                #     # 大单
                #     net_big_inflows = Column(Float)
                #     net_big_inflow_rate = Column(Float)
                #
                #     # 中单
                #     net_medium_inflows = Column(Float)
                #     net_medium_inflow_rate = Column(Float)
                #     # 小单
                #     net_small_inflows = Column(Float)
                #     net_small_inflow_rate = Column(Float)
                "net_main_inflows": to_float(item["r0_net"]) + to_float(item["r1_net"]),
                "net_huge_inflows": to_float(item["r0_net"]),
                "net_big_inflows": to_float(item["r1_net"]),
                "net_medium_inflows": to_float(item["r2_net"]),
                "net_small_inflows": to_float(item["r3_net"]),
            }

            if amount != 0:
                result["net_main_inflow_rate"] = (to_float(item["r0_net"]) + to_float(item["r1_net"])) / amount
                result["net_huge_inflow_rate"] = to_float(item["r0_net"]) / amount
                result["net_big_inflow_rate"] = to_float(item["r1_net"]) / amount
                result["net_medium_inflow_rate"] = to_float(item["r2_net"]) / amount
                result["net_small_inflow_rate"] = to_float(item["r3_net"]) / amount

            result_list.append(result)

        return result_list
    def run(self):
        time.sleep(random.randint(0, self.share_para[1]))
        process_identity = multiprocessing.current_process()._identity
        if len(process_identity) > 0:
            #  The worker process tqdm bar shall start at Position 1
            worker_id = (process_identity[0] - 1) % self.share_para[1] + 1
        else:
            worker_id = 0
        desc = "{:02d} : {}".format(worker_id, self.share_para[0])

        with tqdm(total=len(self.entities),
                  ncols=80,
                  position=worker_id,
                  desc=desc,
                  leave=self.share_para[3]) as pbar:
            http_session = get_http_session()

            for security_item in self.entities:
                assert isinstance(security_item, StockDetail)

                if security_item.exchange == 'sh':
                    fc = "{}01".format(security_item.code)
                if security_item.exchange == 'sz':
                    fc = "{}02".format(security_item.code)

                # 基本资料
                param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"}
                resp = request_post(
                    http_session,
                    'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao',
                    json=param)
                resp.encoding = 'utf8'

                resp_json = resp.json()['Result']['JiBenZiLiao']

                security_item.profile = resp_json['CompRofile']
                security_item.main_business = resp_json['MainBusiness']
                security_item.date_of_establishment = to_pd_timestamp(
                    resp_json['FoundDate'])

                # 关联行业
                industry = ','.join(resp_json['Industry'].split('-'))
                security_item.industry = industry

                # 关联概念
                security_item.concept_indices = resp_json['Block']

                # 关联地区
                security_item.area_indices = resp_json['Provice']

                # 发行相关
                param = {"color": "w", "fc": fc}
                resp = request_post(
                    http_session,
                    'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan',
                    json=param)
                resp.encoding = 'utf8'

                resp_json = resp.json()['Result']['FaXingXiangGuan']

                security_item.issue_pe = to_float(resp_json['PEIssued'])
                security_item.price = to_float(resp_json['IssuePrice'])
                security_item.issues = to_float(resp_json['ShareIssued'])
                security_item.raising_fund = to_float(
                    (resp_json['NetCollection']))
                security_item.net_winning_rate = pct_to_float(
                    resp_json['LotRateOn'])

                self.session.commit()
                self.logger.info('finish recording stock meta for: {}'.format(
                    security_item.code))

                self.share_para[2].acquire()
                pbar.update()
                self.share_para[2].release()

                self.sleep()
Ejemplo n.º 26
0
    def process_loop(self, entity, http_session):
        assert isinstance(entity, StockDetail)

        step1 = time.time()
        precision_str = '{' + ':>{},.{}f'.format(8, 4) + '}'

        self.result = None

        if entity.exchange == 'sh':
            fc = "{}01".format(entity.code)
        if entity.exchange == 'sz':
            fc = "{}02".format(entity.code)

        # 基本资料
        param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"}
        url = 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao'
        json_result = sync_post(http_session, url, json=param)
        if json_result is None:
            return

        resp_json = json_result['JiBenZiLiao']

        entity.profile = resp_json['CompRofile']
        entity.main_business = resp_json['MainBusiness']
        entity.date_of_establishment = to_pd_timestamp(resp_json['FoundDate'])

        # 关联行业
        industry = ','.join(resp_json['Industry'].split('-'))
        entity.industry = industry

        # 关联概念
        entity.sector = resp_json['Block']

        # 关联地区
        entity.area = resp_json['Provice']

        self.sleep()

        # 发行相关
        param = {"color": "w", "fc": fc}
        url = 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan'
        json_result = sync_post(http_session, url, json=param)
        if json_result is None:
            return

        resp_json = json_result['FaXingXiangGuan']

        entity.issue_pe = to_float(resp_json['PEIssued'])
        entity.price = to_float(resp_json['IssuePrice'])
        entity.issues = to_float(resp_json['ShareIssued'])
        entity.raising_fund = to_float((resp_json['NetCollection']))
        entity.net_winning_rate = pct_to_float(resp_json['LotRateOn'])

        session = get_db_session(region=self.region,
                                 provider=self.provider,
                                 data_schema=self.data_schema)
        session.commit()

        cost = precision_str.format(time.time() - step1)

        prefix = "finish~ " if zvt_config['debug'] else ""
        postfix = "\n" if zvt_config['debug'] else ""

        if self.result is not None:
            self.logger.info(
                "{}{}, {}, time: {}, size: {:>7,}, date: [ {}, {} ]{}".format(
                    prefix, self.data_schema.__name__, entity.id, cost,
                    self.result[0], self.result[1], self.result[2], postfix))
        else:
            self.logger.info("{}{}, {}, time: {}{}".format(
                prefix, self.data_schema.__name__, entity.id, cost, postfix))
        def numba_boost_up(json_list):
            result_list = []
            # {opendate:"2019-04-29",trade:"10.8700",changeratio:"-0.0431338",turnover:"74.924",netamount:"-2903349.8500",
            # ratioamount:"-0.155177",r0:"0.0000",r1:"2064153.0000",r2:"6485031.0000",r3:"10622169.2100",r0_net:"0.0000",
            # r1_net:"2064153.0000",r2_net:"-1463770.0000",r3_net:"-3503732.8500"}
            for item in json_list:
                result = {
                    'timestamp':
                    to_pd_timestamp(item['opendate']),
                    'close':
                    to_float(item['trade']),
                    'change_pct':
                    to_float(item['changeratio']),
                    'turnover_rate':
                    to_float(item['turnover']) / 10000,
                    'net_inflows':
                    to_float(item['netamount']),
                    'net_inflow_rate':
                    to_float(item['ratioamount']),
                    #     # 主力=超大单+大单
                    #     net_main_inflows = Column(Float)
                    #     net_main_inflow_rate = Column(Float)
                    #     # 超大单
                    #     net_huge_inflows = Column(Float)
                    #     net_huge_inflow_rate = Column(Float)
                    #     # 大单
                    #     net_big_inflows = Column(Float)
                    #     net_big_inflow_rate = Column(Float)
                    #
                    #     # 中单
                    #     net_medium_inflows = Column(Float)
                    #     net_medium_inflow_rate = Column(Float)
                    #     # 小单
                    #     net_small_inflows = Column(Float)
                    #     net_small_inflow_rate = Column(Float)
                    'net_main_inflows':
                    to_float(item['r0_net']) + to_float(item['r1_net']),
                    'net_huge_inflows':
                    to_float(item['r0_net']),
                    'net_big_inflows':
                    to_float(item['r1_net']),
                    'net_medium_inflows':
                    to_float(item['r2_net']),
                    'net_small_inflows':
                    to_float(item['r3_net']),
                }

                amount = to_float(item['r0']) + to_float(
                    item['r1']) + to_float(item['r2']) + to_float(item['r3'])
                if amount != 0:
                    result['net_main_inflow_rate'] = (to_float(
                        item['r0_net']) + to_float(item['r1_net'])) / amount
                    result['net_huge_inflow_rate'] = to_float(
                        item['r0_net']) / amount
                    result['net_big_inflow_rate'] = to_float(
                        item['r1_net']) / amount
                    result['net_medium_inflow_rate'] = to_float(
                        item['r2_net']) / amount
                    result['net_small_inflow_rate'] = to_float(
                        item['r3_net']) / amount

                result_list.append(result)

            return result_list