Ejemplo n.º 1
0
 def _request_convertible_basics(update_mapping):
     bond_mappings = update_mapping['convertible']
     if bond_mappings:
         # bond basics 已上市的basics
         text = _parse_url(ASSET_SUPPLEMENT_URL['convertible_supplement'],
                           encoding=None,
                           bs=False)
         text = json.loads(text)
         # 取交集
         common_bond = set(bond_mappings) & set(
             [basic['id'] for basic in text['rows']])
         print('common', len(common_bond))
         # combine two dict object --- single --- 保持数据的完整性
         [
             basic['cell'].update(bond_mappings[basic['id']])
             for basic in text['rows'] if basic['id'] in common_bond
         ]
         basics = [
             basic['cell'] for basic in text['rows']
             if basic['id'] in common_bond
         ]
         basics_frame = pd.DataFrame(basics)
     else:
         basics_frame = pd.DataFrame()
     return basics_frame
Ejemplo n.º 2
0
 def _initialize_symbols():
     raw = _parse_url(BENCHMARK_URL['symbols'], encoding='utf-8')
     data = json.loads(raw.text)
     index_mappings = {
         item['f14']: item['f12']
         for item in data['data']['diff']
     }
     return index_mappings
Ejemplo n.º 3
0
 def _request_funds():
     # 获取存量的ETF 基金主要分为 固定收益 分级杠杆(A/B) ( ETF场内| QDII-ETF )
     obj = _parse_url(ASSERT_URL_MAPPING['fund'])
     raw = [data.find_all('td') for data in obj.find_all(id='tableDiv')]
     text = [t.get_text() for t in raw[0]]
     # 由于format原因,最后两列为空
     frame = pd.DataFrame(partition_all(14, text[18:]),
                          columns=text[2:16]).iloc[:, :-2]
     return frame
Ejemplo n.º 4
0
 def _arbitrary_parser(self, url, encoding='gbk', direct=True):
     try:
         text = _parse_url(url, encoding=encoding, bs=False)
         raw = json.loads(text) if direct else text
         return raw
     except Exception as e:
         print('error %r' % e)
         time.sleep(np.random.randint(5, 10))
         self._arbitrary_parser(url, encoding=encoding, direct=direct)
Ejemplo n.º 5
0
 def lookup_index_symbols(cls):
     raw = json.loads(
         _parse_url(ASSERT_URL_MAPPING['benchmark'],
                    encoding='utf-8',
                    bs=False))
     symbols = raw['data']['diff']
     frame = pd.DataFrame(symbols.values())
     frame.set_index('f12', inplace=True)
     dct = frame.iloc[:, 0].to_dict()
     return dct
Ejemplo n.º 6
0
 def _writer_internal(self, equities):
     for sid in equities:
         try:
             content = _parse_url(OWNERSHIP % sid)
             self._parse_equity_ownership(content, sid)
             print('successfully spider ownership of code : %s' % sid)
         except Exception as e:
             print('spider code: % s  ownership failure due to % r' % (sid, e))
             self.missed.add(sid)
         else:
             self.missed.discard(sid)
Ejemplo n.º 7
0
 def _request_equity_basics(code):
     url = ASSET_SUPPLEMENT_URL['equity_supplement'] % code
     obj = _parse_url(url)
     table = obj.find('table', {'id': 'comInfo1'})
     tag = [item.findAll('td') for item in table.findAll('tr')]
     tag_chain = list(chain(*tag))
     raw = [item.get_text() for item in tag_chain]
     # remove format
     raw = [i.replace(':', '') for i in raw]
     raw = [i.strip() for i in raw]
     brief = list(zip(raw[::2], raw[1::2]))
     mapping = {item[0]: item[1] for item in brief}
     mapping.update({'代码': code})
     return mapping
Ejemplo n.º 8
0
 def _request_duals():
     # 获取存量AH两地上市的标的
     dual_mappings = {}
     page = 1
     while True:
         url = ASSERT_URL_MAPPING['dual'] % page
         raw = _parse_url(url, bs=False, encoding=None)
         raw = json.loads(raw)
         diff = raw['data']
         if diff and len(diff['diff']):
             # f12 -- hk ; 191 -- code
             diff = {item['f191']: item['f12'] for item in diff['diff']}
             dual_mappings.update(diff)
             page = page + 1
         else:
             break
     return dual_mappings
Ejemplo n.º 9
0
 def get_current_minutes(sid):
     """
         return current reality tickers data
     """
     _url = 'http://push2.eastmoney.com/api/qt/stock/trends2/get?fields1=f1&' \
            'fields2=f51,f52,f53,f54,f55,f56,f57,f58&iscr=0&secid={}'
     # 处理数据
     req_sid = '0.' + sid if sid.startswith('6') else '1.' + sid
     req_url = _url.format(req_sid)
     obj = _parse_url(req_url, bs=False)
     d = json.loads(obj)
     raw_array = [item.split(',') for item in d['data']['trends']]
     minutes = pd.DataFrame(raw_array,
                            columns=[
                                'ticker', 'open', 'close', 'high', 'low',
                                'volume', 'turnover', 'avg'
                            ])
     return minutes
Ejemplo n.º 10
0
 def suspend(cls, dt):
     """
         获取dt停盘信息  e.g:2020-07-13
     """
     supspend_url = 'http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=FD&sty=SRB&st=0&sr=-1&p=1&ps=50&'\
                    'js={"pages":(pc),"data":[(x)]}&mkt=1&fd=%s' % dt
     text = _parse_url(supspend_url, bs=False, encoding=None)
     text = json.loads(text)
     text = [t.split(',') for t in text['data']]
     # list(partition(9, text['data']))
     frame = pd.DataFrame(text,
                          columns=[
                              'sid', 'name', 'open_ticker', 'close_ticker',
                              'suspend', 'reason', 'market', 'date',
                              'market_date'
                          ])
     print('frame', frame.iloc[0, :])
     return frame
Ejemplo n.º 11
0
 def load_raw_arrays(self, dts, assets, fields=None):
     """获取GDP数据"""
     page = 1
     gross_value = pd.DataFrame()
     while True:
         req_url = self._url % page
         obj = _parse_url(req_url)
         raw = obj.findAll('div', {'class': 'Content'})
         text = [t.get_text() for t in raw[1].findAll('td')]
         text = [item.strip() for item in text]
         data = zip(text[::9], text[1::9])
         data = pd.DataFrame(data, columns=['季度', '总值'])
         gross_value = gross_value.append(data)
         if len(gross_value) != len(
                 gross_value.drop_duplicates(ignore_index=True)):
             gross_value.drop_duplicates(inplace=True, ignore_index=True)
             break
         page = page + 1
     return gross_value
Ejemplo n.º 12
0
 def _crawler(self, mapping, tbl, pct=False):
     sid = mapping['sid']
     url = ASSETS_BUNDLES_URL[tbl].format(mapping['request_sid'], self.lmt)
     obj = _parse_url(url, bs=False)
     kline = json.loads(obj)['data']
     cols = self.default + ['pct'] if pct else self.default
     if kline and len(kline['klines']):
         frame = pd.DataFrame([item.split(',') for item in kline['klines']],
                              columns=cols)
         frame.loc[:, 'sid'] = sid
         # 过滤
         try:
             deadline = self._cache_deadlines[tbl][sid]
         except Exception as e:
             print('error :%s raise from sid come to market today' % e)
             deadline = None
         # frame = frame[frame['trade_dt'] > self._cache_deadlines[tbl][sid]]
         frame = frame[frame['trade_dt'] > deadline] if deadline else frame
         db.writer(tbl, frame)
Ejemplo n.º 13
0
 def _calculate_alternative_returns(self, index_name):
     """
         dt --- 1990-01-01
     """
     try:
         index = lookup_benchmark[index_name]
     except KeyError:
         raise ValueError
     url = BENCHMARK_URL['periphera_kline'] % (index, '3000-01-01')
     text = _parse_url(url, bs=False, encoding='utf-8')
     raw = json.loads(text)
     kline = pd.DataFrame(
         raw['data'][index]['day'],
         columns=['trade_dt', 'open', 'close', 'high', 'low', 'turnover'])
     kline.set_index('trade_dt', inplace=True)
     kline.sort_index(inplace=True)
     kline = kline.astype('float64')
     returns = kline['close'] / kline['close'].shift(1) - 1
     daily_returns = self._compute_session_returns(returns)
     return daily_returns
Ejemplo n.º 14
0
 def _request_convertibles():
     # 获取上市的可转债的标的
     page = 1
     bonds = []
     while True:
         bond_url = ASSERT_URL_MAPPING['convertible'] % page
         text = _parse_url(bond_url, encoding='utf-8', bs=False)
         text = json.loads(text)
         data = text['data']
         if data:
             bonds = chain(bonds, data)
             page = page + 1
         else:
             break
     bonds = list(bonds)
     # 过滤未上市的可转债 bond_id : bond_basics
     bond_mappings = {
         bond['BONDCODE']: bond
         for bond in bonds if bond['LISTDATE'] != '-'
     }
     return bond_mappings
Ejemplo n.º 15
0
 def _calculate_returns(self, sid):
     """
         date --- 19900101
     """
     symbol = '1.' + sid if sid.startswith('0') else '0.' + sid
     url = BENCHMARK_URL['kline'].format(symbol, '30000101')
     obj = _parse_url(url, bs=False)
     data = json.loads(obj)
     raw = data['data']
     if raw and len(raw['klines']):
         raw = [item.split(',') for item in raw['klines']]
         kline = pd.DataFrame(raw,
                              columns=[
                                  'trade_dt', 'open', 'close', 'high',
                                  'low', 'turnover', 'volume', 'amount'
                              ])
         kline.set_index('trade_dt', inplace=True)
         kline.sort_index(inplace=True)
         close = kline['close'].astype(np.float)
         returns = close / close.shift(1) - 1
         daily_returns = self._compute_session_returns(returns)
         return daily_returns
Ejemplo n.º 16
0
 def _request_equities():
     # 获取存量股票包括退市
     raw = json.loads(_parse_url(ASSERT_URL_MAPPING['equity'], bs=False))
     equities = [item['f12'] for item in raw['data']['diff']]
     return equities
Ejemplo n.º 17
0
 def _parser_writer(self, sid):
     contents = _parse_url(DIVDEND % sid)
     # 解析网页内容
     self._parse_equity_rights(contents, sid)
     self._parse_equity_divdend(contents, sid)