def set_components_data(self, cdate = datetime.now().strftime('%Y-%m-%d')): table_name = self.get_components_table_name(cdate) if not self.is_table_exists(table_name): if not self.create_components_table(table_name): logger.error("create components table failed") return False if self.is_date_exists(table_name, cdate): logger.debug("existed table:%s, date:%s" % (table_name, cdate)) return True url = self.INDEX_URLS[self.code][0] columns = self.INDEX_URLS[self.code][1] column_names = self.INDEX_URLS[self.code][2] df = smart_get(pd.read_excel, url, usecols = columns) if df is None: logger.error("data for %s is empty" % self.code) return False df.columns = column_names df.code = df.code.astype('str').str.zfill(6) df['date'] = cdate if 'wieight' not in df.columns: df['weight'] = 1/len(df) if 'flag' not in df.columns: df['flag'] = 1 df = df.reset_index(drop = True) if is_df_has_unexpected_data(df): logger.error("data for %s is not clear" % self.code) return False if self.mysql_client.set(df, table_name): if self.redis.sadd(table_name, cdate): return True return False
def set_data(self, cdate=datetime.now().strftime('%Y-%m-%d')): table_name = self.get_table_name(cdate) if not self.is_table_exists(table_name): if not self.create_table(table_name): self.logger.error("create tick table failed") return False self.redis.sadd(self.dbname, table_name) if self.is_date_exists(table_name, cdate): self.logger.debug("existed table:%s, date:%s" % (table_name, cdate)) return True total_df = smart_get(self.crawler.margin, trade_date=transfer_date_string_to_int(cdate)) if total_df is None: self.logger.error("crawel margin for %s failed" % cdate) return False total_df = total_df.rename(columns={ "trade_date": "date", "exchange_id": "code" }) total_df['rqyl'] = 0 total_df['rqchl'] = 0 detail_df = smart_get(self.crawler.margin_detail, trade_date=transfer_date_string_to_int(cdate)) if detail_df is None: self.logger.error("crawel detail margin for %s failed" % cdate) return False detail_df = detail_df.rename(columns={ "trade_date": "date", "ts_code": "code" }) total_df = total_df.append(detail_df, sort=False) total_df['date'] = pd.to_datetime( total_df.date).dt.strftime("%Y-%m-%d") total_df = total_df.reset_index(drop=True) if self.mysql_client.set(total_df, table_name): time.sleep(1) return self.redis.sadd(table_name, cdate) return False
def init(self): df = smart_get(ts.get_stock_basics) if df is None: return False df = df.reset_index(drop = False) return self.redis.set(ct.STOCK_INFO, _pickle.dumps(df, 2))
def get_data_from_url(self, cdate = datetime.now().strftime('%Y-%m-%d')): if self.market == ct.SH_MARKET_SYMBOL: current_milli_time = lambda: int(round(time.time() * 1000)) url = self.get_url() % (int_random(5), cdate, current_milli_time()) response = smart_get(requests.get, url, headers=self.header) if response.status_code != 200: self.logger.error("get exchange data failed, response code:%s" % response.status_code) return pd.DataFrame() json_result = loads_jsonp(response.text) if json_result is None: self.logger.error("parse exchange data jsonp failed") return pd.DataFrame() datas = list() for json_obj in json_result['result']: name = self.get_sh_type_name(json_obj['productType']) if name is None: self.logger.error("get unknown type for SH data:%s" % json_obj['productType']) return pd.DataFrame() elif name == "科创板": continue else: amount = 0 if json_obj['trdAmt'] == '' else float(json_obj['trdAmt']) number = 0 if json_obj['istVol'] == '' else int(json_obj['istVol']) negotiable_value = 0 if json_obj['negotiableValue'] == '' else float(json_obj['negotiableValue']) market_value = 0 if json_obj['marketValue'] == '' else float(json_obj['marketValue']) volume = 0 if json_obj['trdVol'] == '' else float(json_obj['trdVol']) pe = 0 if json_obj['profitRate'] == '' else float(json_obj['profitRate']) transactions = 0 if json_obj['trdTm'] == '' else float(json_obj['trdTm']) turnover = 0 if json_obj['exchangeRate'] == '' else float(json_obj['exchangeRate']) outstanding = 0 if turnover == 0 else volume / (100 * turnover) totals = outstanding data = {'amount': amount,\ 'number': number,\ 'negotiable_value': negotiable_value,\ 'market_value': market_value,\ 'pe': pe,\ 'totals': totals,\ 'outstanding': outstanding,\ 'volume': volume,\ 'transactions': transactions,\ 'turnover': turnover} if any(data.values()): data['name'] = name data['date'] = cdate datas.append(data) df = pd.DataFrame.from_dict(datas) else: datas = list() for name, tab in ct.SZ_MARKET_DICT.items(): url = self.get_url() % (tab, cdate, float_random(17)) df = smart_get(pd.read_excel, url, usecols = [0, 1]) if df is None: return pd.DataFrame() if df.empty: continue if len(df) == 1 and df.values[0][0] == '没有找到符合条件的数据!': continue if name == "深圳市场": amount = 0 #amount = float(df.loc[df['指标名称'] == '市场总成交金额(元)', '本日数值'].values[0].replace(',', '')) / 100000000 number = int(float(df.loc[df['指标名称'] == '上市公司数', '本日数值'].values[0].replace(',', ''))) negotiable_value = float(df.loc[df['指标名称'] == '股票流通市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000 market_value = float(df.loc[df['指标名称'] == '股票总市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000 pe = float(df.loc[df['指标名称'] == '股票平均市盈率', '本日数值'].values[0].replace(',', '')) totals = float(df.loc[df['指标名称'] == '股票总股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000 outstanding = float(df.loc[df['指标名称'] == '股票流通股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000 volume = 0 transactions = 0 turnover = float(df.loc[df['指标名称'] == '股票平均换手率', '本日数值'].values[0]) else: amount = float(df.loc[df['指标名称'] == '总成交金额(元)', '本日数值'].values[0].replace(',', '')) / 100000000 number = int(float(df.loc[df['指标名称'] == '上市公司数', '本日数值'].values[0].replace(',', ''))) negotiable_value = float(df.loc[df['指标名称'] == '上市公司流通市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000 market_value = float(df.loc[df['指标名称'] == '上市公司市价总值(元)', '本日数值'].values[0].replace(',', '')) / 100000000 pe = float(df.loc[df['指标名称'] == '平均市盈率(倍)', '本日数值'].values[0]) totals = float(df.loc[df['指标名称'] == '总发行股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000 outstanding = float(df.loc[df['指标名称'] == '总流通股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000 volume = float(df.loc[df['指标名称'] == '总成交股数', '本日数值'].values[0].replace(',', '')) / 100000000 transactions = float(df.loc[df['指标名称'] == '总成交笔数', '本日数值'].values[0].replace(',', '')) / 10000 turnover = 100 * volume / outstanding data = { 'name': name,\ 'date': cdate,\ 'amount': amount,\ 'number': number,\ 'negotiable_value': negotiable_value,\ 'market_value': market_value,\ 'pe': pe,\ 'totals': totals,\ 'outstanding': outstanding,\ 'volume': volume,\ 'transactions': transactions,\ 'turnover': turnover } datas.append(data) df = pd.DataFrame.from_dict(datas) if not df.empty: df.at[df.name == "深圳市场", 'amount'] = df.amount.sum() - df.loc[df.name == "深圳市场", 'amount'] df.at[df.name == "深圳市场", 'volume'] = df.volume.sum() - df.loc[df.name == "深圳市场", 'volume'] df.at[df.name == "深圳市场", 'transactions'] = df.transactions.sum() - df.loc[df.name == "深圳市场", 'transactions'] return df
def process(self, req_date): [year, month, day] = req_date.split('-') if not self.smart_call(self.driver.set_page_load_timeout, None, 30): self.logger.error("set page load timeout failed.") return None if not self.smart_call(self.driver.get, None, self.link): self.logger.error("%s get source page failed." % self.link) return None element = smart_get(self.driver.find_element_by_id, 'txtShareholdingDate') if element is None: self.logger.error( "%s find txtShareholdingDate element by xpath failed." % self.link) return None if not self.smart_call(element.click, None): self.logger.error( "%s get txtShareholdingDate element by xpath failed." % self.link) return None element = smart_get( self.driver.find_element_by_xpath, "//b[@class='year']//button[@data-value=%s]" % year) if element is None: self.logger.error("%s find year element by xpath failed." % self.link) return None if not self.smart_call(element.click, None): self.logger.error("%s get find year element by xpath failed." % self.link) return None element = smart_get( self.driver.find_element_by_xpath, "//b[@class='month']//button[@data-value=%s]" % (int(month) - 1)) if element is None: self.logger.error("%s find month element by xpath failed." % self.link) return None if not self.smart_call(element.click, None): self.logger.error("%s get month element by xpath failed." % self.link) return None element = smart_get( self.driver.find_element_by_xpath, "//b[@class='day']//button[@data-value=%s]" % int(day)) if element is None: self.logger.error("%s find day element by xpath failed." % self.link) return None if not self.smart_call(element.click, None): self.logger.error("%s get day element by xpath failed." % self.link) return None element = smart_get(self.driver.find_element_by_name, "btnSearch") if element is None: self.logger.error("%s find search button by xpath failed." % self.link) return None if not self.smart_call(element.click, None): self.logger.error("%s search button result by xpath failed." % self.link) return None return self.driver.page_source