def record(self, entity, start, end, size, timestamps): the_quarters = get_year_quarters(start) if not is_same_date(entity.timestamp, start) and len(the_quarters) > 1: the_quarters = the_quarters[1:] param = { 'security_item': entity, 'quarters': the_quarters, 'level': self.level.value } security_item = param['security_item'] quarters = param['quarters'] level = param['level'] result_df = pd.DataFrame() for year, quarter in quarters: query_url = self.url.format(security_item.code, year, quarter) response = requests.get(query_url) response.encoding = 'gbk' try: dfs = pd.read_html(response.text) except ValueError as error: self.logger.error( f'skip ({year}-{quarter:02d}){security_item.code}{security_item.name}({error})' ) time.sleep(10.0) continue if len(dfs) < 5: time.sleep(10.0) continue df = dfs[4].copy() df = df.iloc[1:] df.columns = [ 'timestamp', 'open', 'high', 'close', 'low', 'volume', 'turnover' ] df['name'] = security_item.name df['level'] = level df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'sina' result_df = pd.concat([result_df, df]) self.logger.info( f'({security_item.code}{security_item.name})({year}-{quarter:02d})' ) time.sleep(10.0) result_df = result_df.sort_values(by='timestamp') return result_df.to_dict(orient='records')
def record(self, entity, start, end, size, timestamps): the_quarters = get_year_quarters(start) # treat has recorded the season if contains some date if not is_same_date(entity.timestamp, start) and len(the_quarters) > 1: the_quarters = the_quarters[1:] for year, quarter in the_quarters: kdatas = [] for fuquan in ['bfq', 'hfq']: the_url = self.get_kdata_url(entity.code, year, quarter, fuquan) resp = requests.get(the_url) trs = Selector(text=resp.text).xpath( '//*[@id="FundHoldSharesTable"]/tr[position()>1 and position()<=last()]' ).extract() for idx, tr in enumerate(trs): tds = Selector(text=tr).xpath('//td//text()').extract() tds = [x.strip() for x in tds if x.strip()] open = tds[1] high = tds[2] close = tds[3] low = tds[4] volume = tds[5] turnover = tds[6] if fuquan == 'hfq': factor = tds[7] the_timestamp = to_pd_timestamp(tds[0]) the_id = generate_kdata_id(entity_id=entity.id, timestamp=the_timestamp, level=self.level) if fuquan == 'hfq': # we got bfq at first and then update hfq data existed = [ item for item in kdatas if item['id'] == the_id ] if existed: kdata = existed[0] else: self.logger.error( "bfq not got for:{}".format(the_id)) kdata = { 'id': the_id, 'timestamp': the_timestamp, 'name': entity.name, 'level': self.level.value, 'open': to_float(open) / to_float(factor), 'close': to_float(close) / to_float(factor), 'high': to_float(high) / to_float(factor), 'low': to_float(low) / to_float(factor), 'volume': to_float(volume), 'turnover': to_float(turnover) } kdatas.append(kdata) kdata['hfq_open'] = to_float(open) kdata['hfq_high'] = to_float(high) kdata['hfq_close'] = to_float(close) kdata['hfq_low'] = to_float(low) kdata['factor'] = to_float(factor) self.latest_factors[entity.id] = to_float(factor) else: kdatas.append({ 'id': the_id, 'timestamp': the_timestamp, 'name': entity.name, 'level': self.level.value, 'open': to_float(open), 'close': to_float(close), 'high': to_float(high), 'low': to_float(low), 'volume': to_float(volume), 'turnover': to_float(turnover) }) return kdatas