class PlateValuationCrawler(object): def __init__(self, dbinfo = ct.DB_INFO, redis_host = None): self.dbname = self.get_dbname() self.table_name = self.get_tablename() self.redis = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host) self.mysql_client = CMySQL(dbinfo, self.dbname, iredis = self.redis) if not self.mysql_client.create_db(self.dbname): raise Exception("create plate database failed") if not self.create_table(self.table_name): raise Exception("create valuation table failed") @staticmethod def get_dbname(): return "plate" @staticmethod def get_tablename(): return "valuation" def create_table(self, table): sql = 'create table if not exists %s(date varchar(10) not null,\ code varchar(10) not null,\ name varchar(50),\ pe float,\ ttm float,\ pb float,\ dividend float,\ PRIMARY KEY(date, code))' % table return True if table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, table)
class InvestorCrawler(object): def __init__(self, dbinfo = ct.DB_INFO, redis_host = None): self.dbname = self.get_dbname() self.table = self.get_table_name() self.redis = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host) self.mysql_client = CMySQL(dbinfo, self.dbname, iredis = self.redis) if not self.mysql_client.create_db(self.dbname): raise Exception("init stock database failed") if not self.create_table(): raise Exception("init week investor table failed") @staticmethod def get_dbname(): return "stock" @staticmethod def get_table_name(): return "investor" def create_table(self): sql = 'create table if not exists %s(date varchar(10) not null,\ new_investor float,\ final_investor float,\ new_natural_person float,\ new_non_natural_person float,\ final_natural_person float,\ final_non_natural_person float,\ PRIMARY KEY(date))' % self.table return True if self.table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, self.table)
class PlatePERatioCrawler(object): def __init__(self, dbinfo=ct.DB_INFO, redis_host=None): self.dbname = self.get_dbname() self.redis = create_redis_obj( ) if redis_host is None else create_redis_obj(host=redis_host) self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis) if not self.mysql_client.create_db(self.dbname): raise Exception("init pledge database failed") @staticmethod def get_dbname(): return "plate_pe" def create_table(self, table): import pdb pdb.set_trace() sql = 'create table if not exists %s(date varchar(10) not null,\ code varchar(10) not null,\ name varchar(50),\ pledge_counts int,\ unlimited_pledge_stocks float,\ limited_pledge_stocks float,\ total_stocks float,\ pledge_ratio float,\ PRIMARY KEY(date, code))' % table return True if table in self.mysql_client.get_all_tables( ) else self.mysql_client.create(sql, table)
class ChinaTreasuryRateCrawler(object): def __init__(self, dbinfo = ct.DB_INFO, redis_host = None): self.dbname = self.get_dbname() self.table_name = self.get_tablename() self.redis = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host) self.mysql_client = CMySQL(dbinfo, self.dbname, iredis = self.redis) if not self.mysql_client.create_db(self.dbname): raise Exception("create china treasury database failed") if not self.create_table(self.table_name): raise Exception("create rate table failed") @staticmethod def get_dbname(): return "china_treasury" @staticmethod def get_tablename(): return "yield" def create_table(self, table): sql = 'create table if not exists %s(date varchar(10) not null,\ name varchar(50),\ month3 float,\ month6 float,\ year1 float,\ year3 float,\ year5 float,\ year7 float,\ year10 float,\ year30 float,\ PRIMARY KEY(date, name))' % table return True if table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, table)
class HkexCrawler(object): def __init__(self, dbinfo = ct.DB_INFO, redis_host = None): for (market_from, market_to) in [(ct.SH_MARKET_SYMBOL, ct.HK_MARKET_SYMBOL), (ct.HK_MARKET_SYMBOL, ct.SH_MARKET_SYMBOL), (ct.SZ_MARKET_SYMBOL, ct.HK_MARKET_SYMBOL), (ct.HK_MARKET_SYMBOL, ct.SZ_MARKET_SYMBOL)]: dbname = self.get_dbname(market_from, market_to) self.mysql_client = CMySQL(dbinfo, dbname) if not self.mysql_client.create_db(dbname): raise Exception("init hkex crawler database failed") #self.mysql_client.delete(self.get_topten_table(dbname)) if not self.create_topten_table(dbname): raise Exception("init hkex crawler topten table failed") #self.mysql_client.delete(self.get_capital_table(dbname)) if not self.create_capital_table(dbname): raise Exception("init hkex crawler capital table failed failed") @staticmethod def get_dbname(market_from, market_to): return "%s2%s" % (market_from, market_to) @staticmethod def get_topten_table(dbname): return "%s_topten" % dbname @staticmethod def get_capital_table(dbname): return "%s_capital_overview" % dbname def create_topten_table(self, dbname): table = self.get_topten_table(dbname) sql = 'create table if not exists %s(date varchar(10) not null,\ code varchar(10) not null,\ name varchar(50),\ rank int,\ total_turnover float,\ buy_turnover float,\ sell_turnover float,\ PRIMARY KEY(date, code))' % table return True if table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, table) def create_capital_table(self, dbname): table = self.get_capital_table(dbname) sql = 'create table if not exists %s(date varchar(10) not null,\ total_turnover float,\ buy_turnover float,\ sell_turnover float,\ total_trade_count long,\ buy_trade_count long,\ sell_trade_count long,\ PRIMARY KEY(date))' % table return True if table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, table) def run(self): settings = get_project_settings() myrunner = CrawlerRunner(settings) myrunner.crawl(HkexSpider) d = myrunner.join() d.addBoth(lambda _: reactor.stop()) reactor.run() #the script will block here until the crawling is finished return True
class MonthInvestorCrawler(object): def __init__(self, dbinfo = ct.DB_INFO, redis_host = None): self.dbname = self.get_dbname() self.table = self.get_table_name() self.redis = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host) self.mysql_client = CMySQL(dbinfo, self.dbname, iredis = self.redis) if not self.mysql_client.create_db(self.dbname): raise Exception("init stock database failed") if not self.create_table(): raise Exception("init month investor table failed") @staticmethod def get_dbname(): return "stock" @staticmethod def get_table_name(): return "month_investor" def create_table(self): sql = 'create table if not exists %s(date varchar(10) not null,\ new_investor float,\ new_natural_person float,\ new_non_natural_person float,\ final_investor float,\ final_natural_person float,\ final_natural_a_person float,\ final_natural_b_person float,\ final_non_natural_person float,\ final_non_natural_a_person float,\ final_non_natural_b_person float,\ final_hold_investor float,\ final_a_hold_investor float,\ final_b_hold_investor float,\ trading_investor float,\ trading_a_investor float,\ trading_b_investor float,\ PRIMARY KEY(date))' % self.table return True if self.table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, self.table) def get_data(self, mdate): table_name = self.get_table_name() if date is not None: sql = "select * from %s where date=\"%s\"" %(table_name, date) else: sql = "select * from %s" % table_name return self.mysql_client.get(sql) def get_data_in_range(self, start_date, end_date): table_name = self.get_table_name() sql = "select * from %s where date between \"%s\" and \"%s\"" %(table_name, start_date, end_date) return self.mysql_client.get(sql)
class CMysqlObj(CClass): __slots__ = ("code", "redis", "mysql_client") def __init__(self, code, dbname, dbinfo, redis_host): self.code = code self._dbname = dbname self.redis = create_redis_obj( ) if redis_host is None else create_redis_obj(redis_host) self.mysql_client = CMySQL(dbinfo=dbinfo, dbname=dbname, iredis=self.redis) def __del__(self): self.redis = None self.mysql_client = None def create_db(self, db_name): return self.mysql_client.create_db(db_name) def is_table_exists(self, table_name): if self.redis.exists(self.dbname): return table_name in set( table.decode() for table in self.redis.smembers(self.dbname)) return False def is_date_exists(self, table_name, cdate): if self.redis.exists(table_name): return cdate in set(tdate.decode() for tdate in self.redis.smembers(table_name)) return False def get_existed_keys_list(self, table_name): if self.redis.exists(table_name): return list(tdate.decode() for tdate in self.redis.smembers(table_name)) return list() @property def dbname(self): return self._dbname @dbname.setter def dbname(self, value): self._dbname = value
class StockLimitCrawler(object): def __init__(self, dbinfo=ct.DB_INFO, redis_host=None): self.dbname = self.get_dbname() self.table_name = self.get_tablename() self.redis = create_redis_obj( ) if redis_host is None else create_redis_obj(host=redis_host) self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis) if not self.mysql_client.create_db(self.dbname): raise Exception("create stock limit database failed") if not self.create_table(self.table_name): raise Exception("create stock limit table failed") @staticmethod def get_dbname(): return "stock" @staticmethod def get_tablename(): return "limitup" def create_table(self, table): sql = 'create table if not exists %s(date varchar(10) not null,\ code varchar(6) not null,\ price float,\ pchange float,\ prange float,\ concept varchar(200),\ fcb float,\ flb float,\ fdmoney float,\ first_time varchar(20),\ last_time varchar(20),\ open_times varchar(20),\ intensity float,\ PRIMARY KEY (date, code))' % self.table return True if table in self.mysql_client.get_all_tables( ) else self.mysql_client.create(sql, table)
class RIndexStock: def __init__(self, dbinfo=ct.DB_INFO, redis_host=None): self.redis = create_redis_obj( ) if redis_host is None else create_redis_obj(host=redis_host) self.dbname = self.get_dbname() self.redis_host = redis_host self.logger = getLogger(__name__) self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis) if not self.mysql_client.create_db(self.get_dbname()): raise Exception("init rstock database failed") @staticmethod def get_dbname(): return ct.RINDEX_STOCK_INFO_DB def get_table_name(self, cdate): cdates = cdate.split('-') return "%s_day_%s_%s" % (self.get_dbname(), cdates[0], (int(cdates[1]) - 1) // 3 + 1) def is_date_exists(self, table_name, cdate): if self.redis.exists(table_name): return cdate in set( str(tdate, encoding="utf8") for tdate in self.redis.smembers(table_name)) return False def is_table_exists(self, table_name): if self.redis.exists(self.dbname): return table_name in set( str(table, encoding="utf8") for table in self.redis.smembers(self.dbname)) return False def create_table(self, table): sql = 'create table if not exists %s(date varchar(10) not null,\ code varchar(10) not null,\ open float,\ high float,\ close float,\ preclose float,\ low float,\ volume float,\ amount float,\ outstanding float,\ totals float,\ adj float,\ aprice float,\ pchange float,\ turnover float,\ sai float,\ sri float,\ uprice float,\ sprice float,\ mprice float,\ lprice float,\ ppercent float,\ npercent float,\ base float,\ ibase bigint,\ breakup int,\ ibreakup bigint,\ pday int,\ profit float,\ gamekline float,\ PRIMARY KEY (date, code))' % table return True if table in self.mysql_client.get_all_tables( ) else self.mysql_client.create(sql, table) def get_k_data_in_range(self, start_date, end_date): ndays = delta_days(start_date, end_date) date_dmy_format = time.strftime("%m/%d/%Y", time.strptime(start_date, "%Y-%m-%d")) data_times = pd.date_range(date_dmy_format, periods=ndays, freq='D') date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))( data_times.to_pydatetime()) data_dict = OrderedDict() for _date in date_only_array: if CCalendar.is_trading_day(_date, redis=self.redis): table_name = self.get_table_name(_date) if table_name not in data_dict: data_dict[table_name] = list() data_dict[table_name].append(str(_date)) all_df = pd.DataFrame() for key in data_dict: table_list = sorted(data_dict[key], reverse=False) if len(table_list) == 1: df = self.get_data(table_list[0]) if df is not None: all_df = all_df.append(df) else: start_date = table_list[0] end_date = table_list[len(table_list) - 1] df = self.get_data_between(start_date, end_date) if df is not None: all_df = all_df.append(df) return all_df def get_data_between(self, start_date, end_date): #start_date and end_date should be in the same table sql = "select * from %s where date between \"%s\" and \"%s\"" % ( self.get_table_name(start_date), start_date, end_date) return self.mysql_client.get(sql) def get_data(self, cdate=datetime.now().strftime('%Y-%m-%d')): sql = "select * from %s where date=\"%s\"" % ( self.get_table_name(cdate), cdate) return self.mysql_client.get(sql) def get_stock_data(self, cdate, code): return (code, CStock(code).get_k_data(cdate)) def generate_all_data_1(self, cdate, black_list=list()): failed_list = CStockInfo(redis_host=self.redis_host).get( redis=self.redis).code.tolist() if len(black_list) > 0: failed_list = list(set(failed_list).difference(set(black_list))) cfunc = partial(self.get_stock_data, cdate) return queue_process_concurrent_run(cfunc, failed_list, redis_client=self.redis) def generate_all_data(self, cdate, black_list=ct.BLACK_LIST): from gevent.pool import Pool obj_pool = Pool(5000) failed_list = CStockInfo(redis_host=self.redis_host).get( redis=self.redis).code.tolist() if len(black_list) > 0: failed_list = list(set(failed_list).difference(set(black_list))) all_df = pd.DataFrame() last_length = len(failed_list) cfunc = partial(self.get_stock_data, cdate) while last_length > 0: self.logger.info("all stock list:%s, cdate:%s", len(failed_list), cdate) for code_data in obj_pool.imap_unordered(cfunc, failed_list): if code_data[1] is not None: tem_df = code_data[1] tem_df['code'] = code_data[0] all_df = all_df.append(tem_df) failed_list.remove(code_data[0]) if len(failed_list) != last_length: self.logger.debug( "last failed list:%s, current failed list:%s" % (last_length, len(failed_list))) last_length = len(failed_list) else: if last_length > 0: time.sleep(600) obj_pool.join(timeout=5) obj_pool.kill() all_df = all_df.drop_duplicates() all_df = all_df.sort_values(by='date', ascending=True) all_df = all_df.reset_index(drop=True) return all_df def update(self, end_date=datetime.now().strftime('%Y-%m-%d'), num=30): #if end_date == datetime.now().strftime('%Y-%m-%d'): end_date = get_day_nday_ago(end_date, num = 1, dformat = "%Y-%m-%d") start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d") date_array = get_dates_array(start_date, end_date) succeed = True count = 0 for mdate in date_array: count += 1 if CCalendar.is_trading_day(mdate, redis=self.redis): if not self.set_day_data(mdate): self.logger.error("set %s data for rstock failed" % mdate) succeed = False return succeed def set_day_data(self, cdate): table_name = self.get_table_name(cdate) if not self.is_table_exists(table_name): if not self.create_table(table_name): self.logger.error("create tick table failed") return False self.redis.sadd(self.dbname, table_name) if self.is_date_exists(table_name, cdate): self.logger.debug("existed table:%s, date:%s" % (table_name, cdate)) return True df = self.generate_all_data(cdate) if df is None: return False if self.mysql_client.set(df, table_name): self.redis.sadd(table_name, cdate) return True return False
class RIndexIndustryInfo: def __init__(self, dbinfo=ct.DB_INFO, redis_host=None): self.redis = create_redis_obj( ) if redis_host is None else create_redis_obj(host=redis_host) self.dbname = self.get_dbname() self.logger = getLogger(__name__) self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis) if not self.mysql_client.create_db(self.get_dbname()): raise Exception("init rindex stock database failed") @staticmethod def get_dbname(): return ct.RINDEX_INDUSTRY_INFO_DB def get_table_name(self, cdate): cdates = cdate.split('-') return "rindustry_day_%s_%s" % (cdates[0], (int(cdates[1]) - 1) // 3 + 1) def is_date_exists(self, table_name, cdate): if self.redis.exists(table_name): return cdate in set( str(tdate, encoding="utf8") for tdate in self.redis.smembers(table_name)) return False def is_table_exists(self, table_name): if self.redis.exists(self.dbname): return table_name in set( str(table, encoding="utf8") for table in self.redis.smembers(self.dbname)) return False def create_table(self, table): sql = 'create table if not exists %s(date varchar(10) not null,\ code varchar(10) not null,\ open float,\ high float,\ close float,\ preclose float,\ low float,\ volume bigint,\ amount float,\ preamount float,\ pchange float,\ mchange float,\ PRIMARY KEY (date, code))' % table return True if table in self.mysql_client.get_all_tables( ) else self.mysql_client.create(sql, table) def get_k_data_in_range(self, start_date, end_date): ndays = delta_days(start_date, end_date) date_dmy_format = time.strftime("%m/%d/%Y", time.strptime(start_date, "%Y-%m-%d")) data_times = pd.date_range(date_dmy_format, periods=ndays, freq='D') date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))( data_times.to_pydatetime()) data_dict = OrderedDict() for _date in date_only_array: if CCalendar.is_trading_day(_date, redis=self.redis): table_name = self.get_table_name(_date) if table_name not in data_dict: data_dict[table_name] = list() data_dict[table_name].append(str(_date)) all_df = pd.DataFrame() for key in data_dict: table_list = sorted(data_dict[key], reverse=False) if len(table_list) == 1: df = self.get_data(table_list[0]) if df is not None: all_df = all_df.append(df) else: start_date = table_list[0] end_date = table_list[len(table_list) - 1] df = self.get_data_between(start_date, end_date) if df is not None: all_df = all_df.append(df) return all_df def get_data_between(self, start_date, end_date): #start_date and end_date shoulw be in the same table sql = "select * from %s where date between \"%s\" and \"%s\"" % ( self.get_table_name(start_date), start_date, end_date) return self.mysql_client.get(sql) def get_k_data(self, cdate): cdate = datetime.now().strftime('%Y-%m-%d') if cdate is None else cdate sql = "select * from %s where date=\"%s\"" % ( self.get_table_name(cdate), cdate) return self.mysql_client.get(sql) def get_industry_data(self, cdate, code): return (code, CIndex(code).get_k_data(cdate)) def generate_data(self, cdate): good_list = list() obj_pool = Pool(500) all_df = pd.DataFrame() industry_info = IndustryInfo.get(self.redis) failed_list = industry_info.code.tolist() cfunc = partial(self.get_industry_data, cdate) failed_count = 0 while len(failed_list) > 0: is_failed = False self.logger.debug("restart failed ip len(%s)" % len(failed_list)) for code_data in obj_pool.imap_unordered(cfunc, failed_list): if code_data[1] is not None: tem_df = code_data[1] tem_df['code'] = code_data[0] all_df = all_df.append(tem_df) failed_list.remove(code_data[0]) else: is_failed = True if is_failed: failed_count += 1 if failed_count > 10: self.logger.info("%s rindustry init failed" % failed_list) return pd.DataFrame() time.sleep(10) obj_pool.join(timeout=5) obj_pool.kill() self.mysql_client.changedb(self.get_dbname()) if all_df.empty: return all_df all_df = all_df.reset_index(drop=True) return all_df def set_data(self, cdate=datetime.now().strftime('%Y-%m-%d')): if not CCalendar.is_trading_day(cdate, redis=self.redis): return False table_name = self.get_table_name(cdate) if not self.is_table_exists(table_name): if not self.create_table(table_name): self.logger.error("create rindex table failed") return False self.redis.sadd(self.dbname, table_name) if self.is_date_exists(table_name, cdate): self.logger.debug("existed rindex table:%s, date:%s" % (table_name, cdate)) return True df = self.generate_data(cdate) if df.empty: return False self.redis.set(ct.TODAY_ALL_INDUSTRY, _pickle.dumps(df, 2)) if self.mysql_client.set(df, table_name): return self.redis.sadd(table_name, cdate) return False def update(self, end_date=None, num=10): if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') #if end_date == datetime.now().strftime('%Y-%m-%d'): end_date = get_day_nday_ago(end_date, num = 1, dformat = "%Y-%m-%d") start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d") date_array = get_dates_array(start_date, end_date) succeed = True for mdate in date_array: if CCalendar.is_trading_day(mdate, redis=self.redis): if not self.set_data(mdate): self.logger.error("%s rindustry set failed" % mdate) succeed = False return succeed
class StockExchange(object): def __init__(self, market = ct.SH_MARKET_SYMBOL, dbinfo = ct.DB_INFO, redis_host = None): self.market = market self.dbinfo = dbinfo self.balcklist = ['2006-07-10'] if market == ct.SH_MARKET_SYMBOL else list() self.logger = getLogger(__name__) self.dbname = self.get_dbname(market) self.redis = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host) self.header = {"Host": "query.sse.com.cn", "Referer": "http://www.sse.com.cn/market/stockdata/overview/day/", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"} self.mysql_client = CMySQL(self.dbinfo, dbname = self.dbname, iredis = self.redis) if not self.mysql_client.create_db(self.dbname): raise Exception("create %s failed" % self.dbname) @staticmethod def get_dbname(market): return market def get_table_name(self): return "%s_deal" % self.dbname def create_table(self): table = self.get_table_name() if table not in self.mysql_client.get_all_tables(): sql = 'create table if not exists %s(date varchar(10) not null,\ name varchar(20) not null,\ amount float,\ number int,\ negotiable_value float,\ market_value float,\ pe float,\ totals float,\ outstanding float,\ volume float,\ transactions float,\ turnover float,\ PRIMARY KEY (date, name))' % table if not self.mysql_client.create(sql, table): return False return True def get_k_data_in_range(self, start_date, end_date): sql = "select * from %s where date between \"%s\" and \"%s\"" % (self.get_table_name(), start_date, end_date) return self.mysql_client.get(sql) def get_k_data(self, cdate = datetime.now().strftime('%Y-%m-%d')): sql = "select * from %s where date=\"%s\"" % (self.get_table_name(), cdate) return self.mysql_client.get(sql) def is_table_exists(self, table_name): if self.redis.exists(self.dbname): return table_name in set(str(table, encoding = "utf8") for table in self.redis.smembers(self.dbname)) return False def is_date_exists(self, table_name, cdate): if self.redis.exists(table_name): return cdate in set(str(tdate, encoding = "utf8") for tdate in self.redis.smembers(table_name)) return False def get_url(self): if self.market == ct.SH_MARKET_SYMBOL: return "http://query.sse.com.cn/marketdata/tradedata/queryTradingByProdTypeData.do?jsonCallBack=jsonpCallback%s&searchDate=%s&prodType=gp&_=%s" else: return "http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1803&TABKEY=%s&txtQueryDate=%s&random=%s" def get_sh_type_name(self, dtype): if '1' == dtype: return "A股" elif '2' == dtype: return "B股" elif '12' == dtype: return "上海市场" elif '90' == dtype: return "科创板" else: return None def get_data_from_url(self, cdate = datetime.now().strftime('%Y-%m-%d')): if self.market == ct.SH_MARKET_SYMBOL: current_milli_time = lambda: int(round(time.time() * 1000)) url = self.get_url() % (int_random(5), cdate, current_milli_time()) response = smart_get(requests.get, url, headers=self.header) if response.status_code != 200: self.logger.error("get exchange data failed, response code:%s" % response.status_code) return pd.DataFrame() json_result = loads_jsonp(response.text) if json_result is None: self.logger.error("parse exchange data jsonp failed") return pd.DataFrame() datas = list() for json_obj in json_result['result']: name = self.get_sh_type_name(json_obj['productType']) if name is None: self.logger.error("get unknown type for SH data:%s" % json_obj['productType']) return pd.DataFrame() elif name == "科创板": continue else: amount = 0 if json_obj['trdAmt'] == '' else float(json_obj['trdAmt']) number = 0 if json_obj['istVol'] == '' else int(json_obj['istVol']) negotiable_value = 0 if json_obj['negotiableValue'] == '' else float(json_obj['negotiableValue']) market_value = 0 if json_obj['marketValue'] == '' else float(json_obj['marketValue']) volume = 0 if json_obj['trdVol'] == '' else float(json_obj['trdVol']) pe = 0 if json_obj['profitRate'] == '' else float(json_obj['profitRate']) transactions = 0 if json_obj['trdTm'] == '' else float(json_obj['trdTm']) turnover = 0 if json_obj['exchangeRate'] == '' else float(json_obj['exchangeRate']) outstanding = 0 if turnover == 0 else volume / (100 * turnover) totals = outstanding data = {'amount': amount,\ 'number': number,\ 'negotiable_value': negotiable_value,\ 'market_value': market_value,\ 'pe': pe,\ 'totals': totals,\ 'outstanding': outstanding,\ 'volume': volume,\ 'transactions': transactions,\ 'turnover': turnover} if any(data.values()): data['name'] = name data['date'] = cdate datas.append(data) df = pd.DataFrame.from_dict(datas) else: datas = list() for name, tab in ct.SZ_MARKET_DICT.items(): url = self.get_url() % (tab, cdate, float_random(17)) df = smart_get(pd.read_excel, url, usecols = [0, 1]) if df is None: return pd.DataFrame() if df.empty: continue if len(df) == 1 and df.values[0][0] == '没有找到符合条件的数据!': continue if name == "深圳市场": amount = 0 #amount = float(df.loc[df['指标名称'] == '市场总成交金额(元)', '本日数值'].values[0].replace(',', '')) / 100000000 number = int(float(df.loc[df['指标名称'] == '上市公司数', '本日数值'].values[0].replace(',', ''))) negotiable_value = float(df.loc[df['指标名称'] == '股票流通市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000 market_value = float(df.loc[df['指标名称'] == '股票总市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000 pe = float(df.loc[df['指标名称'] == '股票平均市盈率', '本日数值'].values[0].replace(',', '')) totals = float(df.loc[df['指标名称'] == '股票总股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000 outstanding = float(df.loc[df['指标名称'] == '股票流通股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000 volume = 0 transactions = 0 turnover = float(df.loc[df['指标名称'] == '股票平均换手率', '本日数值'].values[0]) else: amount = float(df.loc[df['指标名称'] == '总成交金额(元)', '本日数值'].values[0].replace(',', '')) / 100000000 number = int(float(df.loc[df['指标名称'] == '上市公司数', '本日数值'].values[0].replace(',', ''))) negotiable_value = float(df.loc[df['指标名称'] == '上市公司流通市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000 market_value = float(df.loc[df['指标名称'] == '上市公司市价总值(元)', '本日数值'].values[0].replace(',', '')) / 100000000 pe = float(df.loc[df['指标名称'] == '平均市盈率(倍)', '本日数值'].values[0]) totals = float(df.loc[df['指标名称'] == '总发行股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000 outstanding = float(df.loc[df['指标名称'] == '总流通股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000 volume = float(df.loc[df['指标名称'] == '总成交股数', '本日数值'].values[0].replace(',', '')) / 100000000 transactions = float(df.loc[df['指标名称'] == '总成交笔数', '本日数值'].values[0].replace(',', '')) / 10000 turnover = 100 * volume / outstanding data = { 'name': name,\ 'date': cdate,\ 'amount': amount,\ 'number': number,\ 'negotiable_value': negotiable_value,\ 'market_value': market_value,\ 'pe': pe,\ 'totals': totals,\ 'outstanding': outstanding,\ 'volume': volume,\ 'transactions': transactions,\ 'turnover': turnover } datas.append(data) df = pd.DataFrame.from_dict(datas) if not df.empty: df.at[df.name == "深圳市场", 'amount'] = df.amount.sum() - df.loc[df.name == "深圳市场", 'amount'] df.at[df.name == "深圳市场", 'volume'] = df.volume.sum() - df.loc[df.name == "深圳市场", 'volume'] df.at[df.name == "深圳市场", 'transactions'] = df.transactions.sum() - df.loc[df.name == "深圳市场", 'transactions'] return df def set_k_data(self, cdate = datetime.now().strftime('%Y-%m-%d')): table_name = self.get_table_name() if not self.is_table_exists(table_name): if not self.create_table(): self.logger.error("create tick table failed") return False self.redis.sadd(self.dbname, table_name) if self.is_date_exists(table_name, cdate): self.logger.debug("existed table:%s, date:%s" % (table_name, cdate)) return True df = self.get_data_from_url(cdate) if df.empty: self.logger.debug("get data from %s failed, date:%s" % (self.market, cdate)) return False if self.mysql_client.set(df, table_name): self.redis.sadd(table_name, cdate) return True return False def update(self, end_date = None, num = 10): if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') if end_date == datetime.now().strftime('%Y-%m-%d'): end_date = get_day_nday_ago(end_date, num = 1, dformat = "%Y-%m-%d") start_date = get_day_nday_ago(end_date, num = num, dformat = "%Y-%m-%d") succeed = True for mdate in get_dates_array(start_date, end_date): if mdate in self.balcklist: continue if CCalendar.is_trading_day(mdate, redis = self.redis): if not self.set_k_data(mdate): succeed = False self.logger.info("market %s for %s set failed" % (self.market, mdate)) return succeed
class Margin(object): def __init__(self, dbinfo=ct.DB_INFO, redis_host=None): self.logger = getLogger(__name__) self.crawler = get_tushare_client() self.dbname = self.get_dbname() self.redis = create_redis_obj( ) if redis_host is None else create_redis_obj(host=redis_host) self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis) if not self.mysql_client.create_db(self.dbname): raise Exception("init margin database failed") @staticmethod def get_dbname(): return "margin" def get_table_name(self, cdate): cdates = cdate.split('-') return "%s_day_%s_%s" % (self.dbname, cdates[0], (int(cdates[1]) - 1) // 3 + 1) def is_date_exists(self, table_name, cdate): if self.redis.exists(table_name): return self.redis.sismember(table_name, cdate) return False def create_table(self, table): sql = 'create table if not exists %s(date varchar(10) not null,\ code varchar(10) not null,\ rzye float,\ rzmre float,\ rzche float,\ rqye float,\ rqyl float,\ rqmcl float,\ rqchl float,\ rzrqye float,\ PRIMARY KEY (date, code))' % table return True if table in self.mysql_client.get_all_tables( ) else self.mysql_client.create(sql, table) def get_k_data_in_range(self, start_date, end_date): ndays = delta_days(start_date, end_date) date_dmy_format = time.strftime("%m/%d/%Y", time.strptime(start_date, "%Y-%m-%d")) data_times = pd.date_range(date_dmy_format, periods=ndays, freq='D') date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))( data_times.to_pydatetime()) data_dict = OrderedDict() for _date in date_only_array: if CCalendar.is_trading_day(_date, redis=self.redis): table_name = self.get_table_name(_date) if table_name not in data_dict: data_dict[table_name] = list() data_dict[table_name].append(str(_date)) all_df = pd.DataFrame() for key in data_dict: table_list = sorted(data_dict[key], reverse=False) if len(table_list) == 1: df = self.get_data(table_list[0]) if df is not None: all_df = all_df.append(df) else: start_date = table_list[0] end_date = table_list[len(table_list) - 1] df = self.get_data_between(start_date, end_date) if df is not None: all_df = all_df.append(df) return all_df def get_data_between(self, start_date, end_date): #start_date and end_date should be in the same table table_name = self.get_table_name(start_date) if not self.is_table_exists(table_name): return None sql = "select * from %s where date between \"%s\" and \"%s\"" % ( table_name, start_date, end_date) return self.mysql_client.get(sql) def get_data(self, cdate=datetime.now().strftime('%Y-%m-%d')): sql = "select * from %s where date=\"%s\"" % ( self.get_table_name(cdate), cdate) return self.mysql_client.get(sql) def update(self, end_date=None, num=10): if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d") date_array = get_dates_array(start_date, end_date) succeed = True for mdate in date_array: if CCalendar.is_trading_day(mdate, redis=self.redis): if mdate == end_date: continue if not self.set_data(mdate): self.logger.error("%s set failed" % mdate) succeed = False return succeed def is_table_exists(self, table_name): if self.redis.exists(self.dbname): return self.redis.sismember(self.dbname, table_name) return False def set_data(self, cdate=datetime.now().strftime('%Y-%m-%d')): table_name = self.get_table_name(cdate) if not self.is_table_exists(table_name): if not self.create_table(table_name): self.logger.error("create tick table failed") return False self.redis.sadd(self.dbname, table_name) if self.is_date_exists(table_name, cdate): self.logger.debug("existed table:%s, date:%s" % (table_name, cdate)) return True total_df = smart_get(self.crawler.margin, trade_date=transfer_date_string_to_int(cdate)) if total_df is None: self.logger.error("crawel margin for %s failed" % cdate) return False total_df = total_df.rename(columns={ "trade_date": "date", "exchange_id": "code" }) total_df['rqyl'] = 0 total_df['rqchl'] = 0 detail_df = smart_get(self.crawler.margin_detail, trade_date=transfer_date_string_to_int(cdate)) if detail_df is None: self.logger.error("crawel detail margin for %s failed" % cdate) return False detail_df = detail_df.rename(columns={ "trade_date": "date", "ts_code": "code" }) total_df = total_df.append(detail_df, sort=False) total_df['date'] = pd.to_datetime( total_df.date).dt.strftime("%Y-%m-%d") total_df = total_df.reset_index(drop=True) if self.mysql_client.set(total_df, table_name): time.sleep(1) return self.redis.sadd(table_name, cdate) return False
class StockConnect(object): def __init__(self, market_from=ct.SH_MARKET_SYMBOL, market_to=ct.HK_MARKET_SYMBOL, dbinfo=ct.DB_INFO, redis_host=None): self.market_from = market_from self.market_to = market_to self.balcklist = None self.crawler = None self.mysql_client = None self.dbinfo = dbinfo self.logger = getLogger(__name__) self.redis = create_redis_obj( ) if redis_host is None else create_redis_obj(host=redis_host) def set_market(self, market_from, market_to): self.market_from = market_from self.market_to = market_to self.balcklist = [ "2018-10-17", "2018-09-25", "2018-07-02", "2018-05-22", "2018-04-02", "2018-03-30" ] if market_from in [ct.SH_MARKET_SYMBOL, ct.SZ_MARKET_SYMBOL ] else list() self.dbname = self.get_dbname(market_from, market_to) self.crawler = MCrawl(market_from) self.mysql_client = CMySQL(self.dbinfo, self.dbname, iredis=self.redis) return False if not self.mysql_client.create_db(self.dbname) else True def quit(self): self.crawler.quit() def close(self): self.crawler.close() @staticmethod def get_dbname(mfrom, mto): return "%s2%s" % (mfrom, mto) def get_table_name(self, cdate): cdates = cdate.split('-') return "%s_stock_day_%s_%s" % (self.dbname, cdates[0], (int(cdates[1]) - 1) // 3 + 1) def is_date_exists(self, table_name, cdate): if self.redis.exists(table_name): return cdate in set( str(tdate, encoding=ct.UTF8) for tdate in self.redis.smembers(table_name)) return False def create_table(self, table): sql = 'create table if not exists %s(date varchar(10) not null,\ code varchar(10) not null,\ name varchar(90),\ volume int,\ percent float,\ PRIMARY KEY (date, code))' % table return True if table in self.mysql_client.get_all_tables( ) else self.mysql_client.create(sql, table) def get_k_data_in_range(self, start_date, end_date): ndays = delta_days(start_date, end_date) date_dmy_format = time.strftime("%m/%d/%Y", time.strptime(start_date, "%Y-%m-%d")) data_times = pd.date_range(date_dmy_format, periods=ndays, freq='D') date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))( data_times.to_pydatetime()) data_dict = OrderedDict() for _date in date_only_array: if CCalendar.is_trading_day(_date, redis=self.redis): table_name = self.get_table_name(_date) if table_name not in data_dict: data_dict[table_name] = list() data_dict[table_name].append(str(_date)) all_df = pd.DataFrame() for key in data_dict: table_list = sorted(data_dict[key], reverse=False) if len(table_list) == 1: df = self.get_k_data(table_list[0]) if df is not None: all_df = all_df.append(df) else: start_date = table_list[0] end_date = table_list[len(table_list) - 1] df = self.get_data_between(start_date, end_date) if df is not None: all_df = all_df.append(df) return all_df def get_data_between(self, start_date, end_date): #start_date and end_date should be in the same table sql = "select * from %s where date between \"%s\" and \"%s\"" % ( self.get_table_name(start_date), start_date, end_date) return self.mysql_client.get(sql) def get_k_data(self, cdate=datetime.now().strftime('%Y-%m-%d')): sql = "select * from %s where date=\"%s\"" % ( self.get_table_name(cdate), cdate) return self.mysql_client.get(sql) def update(self, end_date=None, num=10): if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d") succeed = True for mdate in get_dates_array(start_date, end_date): if CCalendar.is_trading_day(mdate, redis=self.redis): if mdate == end_date or mdate in self.balcklist: continue if not self.set_data(mdate): succeed = False return succeed def is_table_exists(self, table_name): if self.redis.exists(self.dbname): return table_name in set( str(table, encoding=ct.UTF8) for table in self.redis.smembers(self.dbname)) return False def set_data(self, cdate=datetime.now().strftime('%Y-%m-%d')): table_name = self.get_table_name(cdate) if not self.is_table_exists(table_name): if not self.create_table(table_name): self.logger.error("create tick table failed") return False self.redis.sadd(self.dbname, table_name) if self.is_date_exists(table_name, cdate): self.logger.debug("existed table:%s, date:%s" % (table_name, cdate)) return True ret, df = self.crawler.crawl(cdate) if ret != 0: return False if df.empty: return True df = df.reset_index(drop=True) df['date'] = cdate if self.mysql_client.set(df, table_name): return self.redis.sadd(table_name, cdate) return False