class CStock(TickerHandlerBase): def __init__(self, dbinfo, code): self.code = code self.dbname = self.get_dbname(code) self.redis = create_redis_obj() self.name = self.get('name') self.data_type_dict = {9: "day"} self.influx_client = CInflux(ct.IN_DB_INFO, self.dbname) self.mysql_client = CMySQL(dbinfo, self.dbname) if not self.create(): raise Exception("create stock %s table failed" % self.code) @staticmethod def get_dbname(code): return "s%s" % code @staticmethod def get_redis_name(code): return "realtime_%s" % code def on_recv_rsp(self, rsp_pb): '''获取逐笔 get_rt_ticker 和 TickerHandlerBase''' ret, data = super(CStock, self).on_recv_rsp(rsp_pb) return ret, data def has_on_market(self, cdate): time2Market = self.get('timeToMarket') if str(time2Market) == '0': return False t = time.strptime(str(time2Market), "%Y%m%d") y, m, d = t[0:3] time2Market = datetime(y, m, d) t = time.strptime(cdate, "%Y-%m-%d") y, m, d = t[0:3] time4Date = datetime(y, m, d) return True if (time4Date - time2Market).days > 0 else False def is_subnew(self, time2Market=None, timeLimit=365): if time2Market == '0': return False #for stock has not been in market if time2Market == None: time2Market = self.get('timeToMarket') t = time.strptime(time2Market, "%Y%m%d") y, m, d = t[0:3] time2Market = datetime(y, m, d) return True if (datetime.today() - time2Market).days < timeLimit else False def create(self): self.create_influx_db() return self.create_mysql_table() def create_influx_db(self): self.influx_client.create() def create_mysql_table(self): for _, table_name in self.data_type_dict.items(): if table_name not in self.mysql_client.get_all_tables(): sql = 'create table if not exists %s(cdate varchar(10) not null, open float, high float, close float, low float, volume float, amount float, PRIMARY KEY(cdate))' % table_name if not self.mysql_client.create(sql, table_name): return False return True def create_ticket_table(self, table): sql = 'create table if not exists %s(date varchar(10) not null, ctime varchar(8) not null, price float(5,2), cchange varchar(10) not null, volume int not null, amount int not null, ctype varchar(9) not null, PRIMARY KEY (date, ctime, cchange, volume, amount, ctype))' % table return True if table in self.mysql_client.get_all_tables( ) else self.mysql_client.create(sql, table) def get(self, attribute): df_byte = self.redis.get(ct.STOCK_INFO) if df_byte is None: return None df = _pickle.loads(df_byte) if len(df.loc[df.code == self.code][attribute].values) == 0: return None return df.loc[df.code == self.code][attribute].values[0] def run(self, data): self.redis.set(self.get_redis_name(self.code), _pickle.dumps(data.tail(1), 2)) self.influx_client.set(data) def merge_ticket(self, df): ex = df[df.duplicated( subset=['ctime', 'cchange', 'volume', 'amount', 'ctype'], keep=False)] dlist = list(ex.index) while len(dlist) > 0: snum = 1 sindex = dlist[0] for _index in range(1, len(dlist)): if sindex + 1 == dlist[_index]: snum += 1 if _index == len(dlist) - 1: df.drop_duplicates(keep='first', inplace=True) df.at[sindex, 'volume'] = snum * df.loc[sindex]['volume'] df.at[sindex, 'amount'] = snum * df.loc[sindex]['amount'] else: df.drop_duplicates(keep='first', inplace=True) df.at[sindex, 'volume'] = snum * df.loc[sindex]['volume'] df.at[sindex, 'amount'] = snum * df.loc[sindex]['amount'] sindex = dlist[_index] snum = 1 df = df.reset_index(drop=True) ex = df[df.duplicated( subset=['ctime', 'cchange', 'volume', 'amount', 'ctype'], keep=False)] dlist = list(ex.index) return df def get_market(self): if (self.code.startswith("6") or self.code.startswith("500") or self.code.startswith("550") or self.code.startswith("510")) or self.code.startswith("7"): return ct.MARKET_SH elif (self.code.startswith("00") or self.code.startswith("30") or self.code.startswith("150") or self.code.startswith("159")): return ct.MARKET_SZ else: return ct.MARKET_OTHER def get_redis_tick_table(self, cdate): cdates = cdate.split('-') return "tick_%s_%s_%s" % (self.code, cdates[0], cdates[1]) def is_tick_table_exists(self, tick_table): if self.redis.exists(self.dbname): return tick_table in set( str(table, encoding="utf8") for table in self.redis.smembers(self.dbname)) return False def is_date_exists(self, tick_table, cdate): if self.redis.exists(tick_table): return cdate in set( str(tdate, encoding="utf8") for tdate in self.redis.smembers(tick_table)) return False def set_k_data(self): prestr = "1" if self.get_market() == ct.MARKET_SH else "0" filename = "%s%s.csv" % (prestr, self.code) df = pd.read_csv("/data/tdx/history/days/%s" % filename, sep=',') df = df[['date', 'open', 'high', 'close', 'low', 'amount', 'volume']] df['date'] = df['date'].astype(str) df['date'] = pd.to_datetime(df.date).dt.strftime("%Y-%m-%d") df = df.rename(columns={'date': 'cdate'}) df = df.reset_index(drop=True) return self.mysql_client.set(df, 'day', method=ct.REPLACE) def set_ticket(self, cdate=None): cdate = datetime.now().strftime('%Y-%m-%d') if cdate is None else cdate if not self.has_on_market(cdate): logger.debug("not on market code:%s, date:%s" % (self.code, cdate)) return tick_table = self.get_redis_tick_table(cdate) if not self.is_tick_table_exists(tick_table): if not self.create_ticket_table(tick_table): logger.error("create tick table failed") return if self.is_date_exists(tick_table, cdate): logger.debug("existed code:%s, date:%s" % (self.code, cdate)) return df = ts.get_tick_data(self.code, date=cdate) df_tdx = read_tick( os.path.join( ct.TIC_DIR, '%s.tic' % datetime.strptime(cdate, "%Y-%m-%d").strftime("%Y%m%d")), self.code) if not df_tdx.empty: if df is not None and not df.empty and df.loc[0]['time'].find( "当天没有数据") == -1: net_volume = df.volume.sum() tdx_volume = df_tdx.volume.sum() if net_volume != tdx_volume: raise Exception( "code:%s, date:%s, net volume:%s, tdx volume:%s not equal" % (self.code, cdate, net_volume, tdx_volume)) df = df_tdx else: if df is None: logger.debug("nonedata code:%s, date:%s" % (self.code, cdate)) return if df.empty: logger.debug("emptydata code:%s, date:%s" % (self.code, cdate)) return if df.loc[0]['time'].find("当天没有数据") != -1: logger.debug("nodata code:%s, date:%s" % (self.code, cdate)) return df.columns = ['ctime', 'price', 'cchange', 'volume', 'amount', 'ctype'] logger.debug("code:%s date:%s" % (self.code, cdate)) df = self.merge_ticket(df) df['date'] = cdate logger.debug("write data code:%s, date:%s, table:%s" % (self.code, cdate, tick_table)) if self.mysql_client.set(df, tick_table): logger.info("start record:%s. table:%s" % (self.code, tick_table)) self.redis.sadd(tick_table, cdate) def get_k_data(self, date=None, dtype=9): table_name = self.data_type_dict[dtype] if date is not None: sql = "select * from %s where date=\"%s\"" % (table_name, date) else: sql = "select * from %s" % table_name return self.mysql_client.get(sql) def is_after_release(self, code_id, _date): time2Market = self.get('timeToMarket') t = time.strptime(str(time2Market), "%Y%m%d") y, m, d = t[0:3] time2Market = datetime(y, m, d) return (datetime.strptime(_date, "%Y-%m-%d") - time2Market).days > 0
class CIndex(CMysqlObj): ZZ_URL_HEAD = 'http://www.csindex.com.cn/uploads/file/autofile/cons/%scons.xls' SZ_URL_HEAD = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1747_zs&TABKEY=tab1&ZSDM=%s&random=%s' ZZ_URL_WEIGHT_HEAD = 'http://www.csindex.com.cn/uploads/file/autofile/closeweight/%scloseweight.xls' INDEX_URLS = { '000001': (ZZ_URL_HEAD % '000001', [0, 4, 5], ['date', 'code', 'name']), '000016': (ZZ_URL_HEAD % '000016', [0, 4, 5], ['date', 'code', 'name']), '000300': (ZZ_URL_WEIGHT_HEAD % '000300', [0, 4, 5, 8], ['date', 'code', 'name', 'weight']), '000905': (ZZ_URL_WEIGHT_HEAD % '000905', [0, 4, 5, 8], ['date', 'code', 'name', 'weight']), '399001': (SZ_URL_HEAD % ('399001', float_random()), [0, 1, 5], ['code', 'name', 'weight']), '399005': (SZ_URL_HEAD % ('399005', float_random()), [0, 1, 5], ['code', 'name', 'weight']), '399006': (SZ_URL_HEAD % ('399006', float_random()), [0, 1, 5], ['code', 'name', 'weight']), '399673': (SZ_URL_HEAD % ('399673', float_random()), [0, 1, 5], ['code', 'name', 'weight']) } def __init__(self, code, dbinfo = ct.DB_INFO, redis_host = None, should_create_influxdb = False, should_create_mysqldb = False): super(CIndex, self).__init__(code, self.get_dbname(code), dbinfo, redis_host) self.code = code self.influx_client = CInflux(ct.IN_DB_INFO, self.dbname, iredis = self.redis) #self.mysql_client.delete_db(self.dbname) if not self.create(should_create_influxdb, should_create_mysqldb): raise Exception("create index %s table failed" % self.code) @staticmethod def get_dbname(code): return "i%s" % code @staticmethod def get_redis_name(code): return "realtime_i%s" % code @staticmethod def get_market(code): if code.startswith("000") or code.startswith("880"): return ct.MARKET_SH elif code.startswith("399"): return ct.MARKET_SZ else: return ct.MARKET_OTHER def run(self, data): if not data.empty: self.redis.set(self.get_redis_name(self.dbname), _pickle.dumps(data.tail(1), 2)) self.influx_client.set(data) def get_day_table(self): return "%s_day" % self.dbname def create_influx_db(self): return self.influx_client.create() def create(self, should_create_influxdb, should_create_mysqldb): influxdb_flag = self.create_influx_db() if should_create_influxdb else True mysql_flag = self.create_db(self.dbname) and self.create_mysql_table() if should_create_mysqldb else True return influxdb_flag and mysql_flag def create_mysql_table(self): table_name = self.get_day_table() if table_name not in self.mysql_client.get_all_tables(): sql = 'create table if not exists %s(date varchar(10),\ open float,\ high float,\ close float,\ preclose float,\ low float,\ volume bigint,\ amount float,\ preamount float,\ pchange float,\ mchange float,\ PRIMARY KEY(date))' % table_name if not self.mysql_client.create(sql, table_name): return False return True def get_components_table_name(self, cdate): cdates = cdate.split('-') return "%s_components_%s_%s" % (self.dbname, cdates[0], (int(cdates[1])-1)//3 + 1) def get_stock_day_table(self): return "%s_day" % self.dbname def get_k_data_in_range(self, start_date, end_date): table_name = self.get_stock_day_table() sql = "select * from %s where date between \"%s\" and \"%s\"" %(table_name, start_date, end_date) return self.mysql_client.get(sql) def get_k_data(self, date = None): table_name = self.get_stock_day_table() if date is not None: sql = "select * from %s where date=\"%s\"" % (table_name, date) else: sql = "select * from %s" % table_name return self.mysql_client.get(sql) def create_components_table(self, table_name): sql = 'create table if not exists %s(date varchar(10) not null,\ code varchar(20) not null,\ name varchar(20),\ weight float,\ flag int,\ PRIMARY KEY (date, code))' % table_name return True if table_name in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, table_name) def get_components_data(self, cdate = datetime.now().strftime('%Y-%m-%d')): table_name = self.get_components_table_name(cdate) if not self.is_table_exists(table_name): return pd.DataFrame() sql = "select * from %s where date=\"%s\"" % (table_name, cdate) return self.mysql_client.get(sql) def set_components_data_from_joinquant(self, basic_dict, cdate): table_name = self.get_components_table_name(cdate) if not self.is_table_exists(table_name): if not self.create_components_table(table_name): logger.error("create components table failed") return False if self.is_date_exists(table_name, cdate): logger.debug("existed table:%s, date:%s" % (table_name, cdate)) return True code = "{}.XSHG".format(self.code) if self.code.startswith("6") else "{}.XSHE".format(self.code) stocks = get_index_stocks(code, date = cdate) df = pd.DataFrame(stocks, columns =['code']) df['code'] = df['code'].str.split('.', expand=True)[0] if not df[~df.code.isin(basic_dict)].empty: logger.error("code from juquant is not in basic in data {}".format(df[~df.code.isin(basic_dict)])) return False df['name'] = df['code'].apply(lambda x: basic_dict[x]) df['date'] = cdate if 'wieight' not in df.columns: df['weight'] = 1/len(df) if 'flag' not in df.columns: df['flag'] = 1 df = df.reset_index(drop = True) if is_df_has_unexpected_data(df): logger.error("data for %s is not clear" % self.code) return False if self.mysql_client.set(df, table_name): if self.redis.sadd(table_name, cdate): return True return False def set_components_data(self, cdate = datetime.now().strftime('%Y-%m-%d')): table_name = self.get_components_table_name(cdate) if not self.is_table_exists(table_name): if not self.create_components_table(table_name): logger.error("create components table failed") return False if self.is_date_exists(table_name, cdate): logger.debug("existed table:%s, date:%s" % (table_name, cdate)) return True url = self.INDEX_URLS[self.code][0] columns = self.INDEX_URLS[self.code][1] column_names = self.INDEX_URLS[self.code][2] df = smart_get(pd.read_excel, url, usecols = columns) if df is None: logger.error("data for %s is empty" % self.code) return False df.columns = column_names df.code = df.code.astype('str').str.zfill(6) df['date'] = cdate if 'wieight' not in df.columns: df['weight'] = 1/len(df) if 'flag' not in df.columns: df['flag'] = 1 if self.code == "000001": df = df[df.code.str.startswith('6')] df = df.reset_index(drop = True) if is_df_has_unexpected_data(df): logger.error("data for %s is not clear" % self.code) return False if self.mysql_client.set(df, table_name): if self.redis.sadd(table_name, cdate): return True return False def handle_unexpected_data(self, df): if self.code == '399005': df.loc[df.date == '2007-06-20', 'amount'] = 7.285928e+09 return df def read(self, fpath): prestr = "1" if self.get_market(self.code) == ct.MARKET_SH else "0" filename = "%s%s.csv" % (prestr, self.code) dheaders = ['date', 'open', 'high', 'close', 'low', 'amount', 'volume'] df = pd.read_csv(fpath % filename, sep = ',', usecols = dheaders) df['date'] = df['date'].astype(str) df['date'] = pd.to_datetime(df.date).dt.strftime("%Y-%m-%d") df = self.handle_unexpected_data(df) df = df.reset_index(drop = True) df = df.sort_values(by = 'date', ascending= True) return df def set_k_data(self, cdate = None, fpath = "/data/tdx/history/days/%s"): if cdate is None: return self.set_all_data(fpath) else: return self.set_day_data(cdate, fpath) def set_all_data(self, fpath): df = self.read(fpath) if 0 == len(df): logger.error("no data for %s" % self.code) return False else: if 1 == len(df): df.at[0, 'preclose'] = df.loc[0, 'open'] df.at[0, 'pchange'] = 100 * (df.at[0, 'close'] - df.at[0, 'preclose']) / df.at[0, 'preclose'] df.at[0, 'preamount'] = df.loc[0, 'amount'] df.at[0, 'mchange'] = 0 else: df['preclose'] = df['close'].shift(1) df.at[0, 'preclose'] = df.loc[0, 'open'] df['pchange'] = 100 * (df['close'] - df['preclose']) / df['preclose'] df['preamount'] = df['amount'].shift(1) df.at[0, 'preamount'] = df.loc[0, 'amount'] df['mchange'] = 100 * (df['amount'] - df['preamount']) / df['preamount'] day_table = self.get_day_table() existed_date_list = self.get_existed_keys_list(day_table) df = df[~df.date.isin(existed_date_list)] df = df.reset_index(drop = True) if df.empty: return True if is_df_has_unexpected_data(df): logger.error("data for %s is not clear" % self.code) return False if not self.mysql_client.set(df, day_table): logger.error("set data %s %s failed" % (self.code, day_table)) return False if 0 == self.redis.sadd(day_table, *set(df.date.tolist())): logger.error("sadd %s for %s failed" % (self.code, day_table)) return False return True def set_day_data(self, cdate, fpath): day_table = self.get_day_table() if self.is_date_exists(day_table, cdate): logger.debug("existed data for code:%s, date:%s" % (self.code, cdate)) return True df = self.read(fpath) if 0 == len(df): logger.error("no data for %s:%s" % (cdate, self.code)) return False elif 1 == len(df): if len(df.loc[df.date == cdate]) == 0: logger.error("no data:%s for %s" % (cdate, self.code)) return False df.at[0, 'preclose'] = df.at[0, 'open'] df.at[0, 'pchange'] = 100 * (df.at[0, 'close'] - df.at[0, 'preclose']) / df.at[0, 'preclose'] df.at[0, 'preamount'] = df.at[0, 'amount'] df.at[0, 'mchange'] = 0 else: index_list = df.loc[df.date == cdate].index.values if len(index_list) == 0: logger.error("no data:%s for %s" % (cdate, self.code)) return False preday_index = index_list[0] - 1 preday_df = df.loc[preday_index] df = df.loc[df.date == cdate] df['preclose'] = preday_df['close'] df['pchange'] = 100 * (df['close'] - df['preclose']) / df['preclose'] df['preamount'] = preday_df['amount'] df['mchange'] = 100 * (df['amount'] - df['preamount']) / df['preamount'] df = df.reset_index(drop = True) day_table = self.get_day_table() if is_df_has_unexpected_data(df): logger.error("data for %s is not clear" % self.code) return False if self.mysql_client.set(df, day_table): if self.redis.sadd(day_table, cdate): return True return False def get_val_filename(self): return "%s_val.csv" % self.dbname def get_val_data(self, mdate = None): index_val_path = os.path.join("/data/valuation/indexs", self.get_val_filename()) if not os.path.exists(index_val_path): return None df = pd.read_csv(index_val_path) if mdate is None: return df else: return df.loc[df.date == mdate].reset_index(drop = True) def set_val_data(self, df, mdate = '', fpath = "/data/valuation/indexs"): index_val_path = os.path.join(fpath, self.get_val_filename()) if mdate == '': df.to_csv(index_val_path, index=False, header=True, mode='w', encoding='utf8') else: if not os.path.exists(index_val_path): df.to_csv(index_val_path, index=False, header=True, mode='w', encoding='utf8') else: vdf = self.get_val_data(mdate) if vdf.empty: df.to_csv(index_val_path, index=False, header=False, mode='a+', encoding='utf8') return True
class Combination: def __init__(self, dbinfo, code): self.code = code self.dbname = self.get_dbname(code) self.redis = create_redis_obj() self.data_type_dict = {9: "day"} self.influx_client = CInflux(ct.IN_DB_INFO, self.dbname) self.mysql_client = cmysql.CMySQL(dbinfo, self.dbname) if not self.create(): raise Exception("create combination table failed") @staticmethod def get_dbname(code): return "c%s" % code @staticmethod def get_redis_name(code): return "realtime_%s" % code def create_mysql_table(self): for _, table_name in self.data_type_dict.items(): if table_name not in self.mysql_client.get_all_tables(): sql = 'create table if not exists %s(date varchar(10), code varchar(10), open float, high float, close float, low float, volume float)' % table_name if not self.mysql_client.create(sql, table_name): return False return True #def create_realtime(self): # sql = 'create table if not exists %s(name varchar(20), code varchar(10), price float, pre_close float, date varchar(25), time varchar(20), amount float, volume float)' % self.realtime_table # return True if self.realtime_table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, self.realtime_table) def create_influx_db(self): self.influx_client.create() def create(self): self.create_influx_db() return self.create_mysql_table() def get_code_list(self): contentStr = self.get('content') return contentStr.split(',') def compute(self): code_list = self.get_code_list() df = pd.DataFrame() for code in code_list: df_byte = self.redis.get(CStock.get_redis_name(code)) if df_byte is None: continue df = df.append(_pickle.loads(df_byte)) num = len(df) if 0 == num: return pd.DataFrame() _price = df.price.astype(float).sum() / num _volume = df.volume.astype(float).sum() / num _amount = df.turnover.astype(float).sum() / num ctime = datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') data = { 'code': [self.code], 'name': [self.get('name')], 'time': [ctime], 'price': [_price], 'amount': [_amount], 'volume': [_volume] } df = pd.DataFrame(data) df.time = pd.to_datetime(df.time) df = df.set_index('time') return df def run(self): _new_data = self.compute() if not _new_data.empty: self.redis.set(self.get_redis_name(self.code), _pickle.dumps(_new_data, 2)) self.influx_client.set(_new_data) def get(self, attribute): df_byte = self.redis.get(ct.COMBINATION_INFO) if df_byte is None: return None df = _pickle.loads(df_byte) return df.loc[df.code == self.code][attribute].values[0]
class Combination(CMysqlObj): def __init__(self, code, should_create_db=False, dbinfo=ct.DB_INFO, redis_host=None): super(Combination, self).__init__(code, self.get_dbname(code), dbinfo, redis_host) self.code = code self.influx_client = CInflux(ct.IN_DB_INFO, self.dbname, iredis=self.redis) if should_create_db: if not self.create(): raise Exception("%s create combination table failed" % code) @staticmethod def get_dbname(code): return "c%s" % code @staticmethod def get_redis_name(code): return "realtime_c%s" % code def create_influx_db(self): return self.influx_client.create() def create(self): return self.create_influx_db() def get_code_list(self): contentStr = self.get('content') return contentStr.split(',') def compute(self): code_list = self.get_code_list() df = pd.DataFrame() for code in code_list: df_byte = self.redis.get(CStock.get_redis_name(code)) if df_byte is None: continue df = df.append(_pickle.loads(df_byte)) num = len(df) if 0 == num: return pd.DataFrame() _price = df.price.astype(float).sum() / num _volume = df.volume.astype(float).sum() / num _amount = df.turnover.astype(float).sum() / num ctime = datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') data = { 'code': [self.code], 'name': [self.get('name')], 'time': [ctime], 'price': [_price], 'amount': [_amount], 'volume': [_volume] } df = pd.DataFrame(data) df.time = pd.to_datetime(df.time) df = df.set_index('time') return df def is_table_exists(self, table_name): if self.redis.exists(self.dbname): return table_name in set( table.decode() for table in self.redis.smembers(self.dbname)) return False def is_date_exists(self, table_name, cdate): if self.redis.exists(table_name): return cdate in set(tdate.decode() for tdate in self.redis.smembers(table_name)) return False def get_existed_keys_list(self, table_name): if self.redis.exists(table_name): return list(tdate.decode() for tdate in self.redis.smembers(table_name)) return list() def run(self): _new_data = self.compute() if not _new_data.empty: self.redis.set(self.get_redis_name(self.code), _pickle.dumps(_new_data, 2)) self.influx_client.set(_new_data) def get(self, attribute): df_byte = self.redis.get(ct.COMBINATION_INFO) if df_byte is None: return None df = _pickle.loads(df_byte) return df.loc[df.code == self.code][attribute].values[0]
class CStock(CMysqlObj): def __init__(self, code, dbinfo=ct.DB_INFO, should_create_influxdb=False, should_create_mysqldb=False, redis_host=None): super(CStock, self).__init__(code, self.get_dbname(code), dbinfo, redis_host) self.influx_client = CInflux(ct.IN_DB_INFO, dbname=self.dbname, iredis=self.redis) if not self.create(should_create_influxdb, should_create_mysqldb): raise Exception("create stock %s table failed" % self.code) def __del__(self): self.influx_client = None @staticmethod def get_dbname(code): return "s%s" % code @staticmethod def get_redis_name(code): return "realtime_s%s" % code def adjust_share(self, data, info): data['outstanding'] = 0 data['totals'] = 0 if 0 == len(info): return pd.DataFrame() cur_totals = 0 cur_outstanding = 0 next_totals = 0 next_outstanding = 0 start_index = 0 for info_index, end_date in info.date.iteritems(): cur_outstanding = int(info.at[info_index, 'money']) #当前流通盘 cur_totals = int(info.at[info_index, 'price']) #当前总股本 next_outstanding = int(info.at[info_index, 'count']) #后流通盘 next_totals = int(info.at[info_index, 'rate']) #后总股本 dates = data.loc[data.date < end_date].index.tolist() if len(dates) == 0: if info_index == len(info) - 1: data.at[start_index:, 'outstanding'] = next_outstanding data.at[start_index:, 'totals'] = next_totals continue end_index = dates[len(dates) - 1] #if cur_outstanding != last_pre_outstanding: # logger.debug("%s 日期:%s 前流通盘:%s 不等于 预期前流通盘:%s" % (self.code, start_date, cur_outstanding, last_pre_outstanding)) #elif cur_totals != last_pre_totals: # logger.debug("%s 日期:%s 后流通盘:%s 不等于 预期后流通盘:%s" % (self.code, start_date, cur_totals, last_pre_totals)) data.at[start_index:end_index - 1, 'outstanding'] = cur_outstanding data.at[start_index:end_index - 1, 'totals'] = cur_totals cur_outstanding = next_outstanding cur_totals = next_totals start_index = end_index #finish the last date if info_index == len(info) - 1: data.at[start_index:, 'outstanding'] = cur_outstanding data.at[start_index:, 'totals'] = cur_totals data['totals'] = data['totals'].astype(int) data['totals'] = data['totals'] * 10000 data['outstanding'] = data['outstanding'].astype(int) data['outstanding'] = data['outstanding'] * 10000 data = data[data.volume < data.outstanding] data = data.reset_index(drop=True) return data def qfq(self, data, info): data['adj'] = 1.0 data['preclose'] = data['close'].shift(1) data.at[0, 'preclose'] = data.loc[0, 'open'] #if 0 == len(info): return data for info_index, start_date in info.date.iteritems(): dates = data.loc[data.date <= start_date].index.tolist() if len(dates) == 0: continue rate = info.loc[info_index, 'rate'] #配k股 price = info.loc[info_index, 'price'] #配股价格 money = info.loc[info_index, 'money'] #分红 count = info.loc[info_index, 'count'] #转送股数量 start_index = dates[len(dates) - 1] adj = (data.loc[start_index, 'preclose'] * 10 - money + rate * price) / ( (10 + rate + count) * data.loc[start_index, 'preclose']) data.at[:start_index - 1, 'adj'] = data.loc[:start_index - 1, 'adj'] * adj return data def has_on_market(self, cdate): time2Market = self.get('timeToMarket') if str(time2Market) == '0': return False t = time.strptime(str(time2Market), "%Y%m%d") y, m, d = t[0:3] time2Market = datetime(y, m, d) return (datetime.strptime(cdate, "%Y-%m-%d") - time2Market).days > 0 @property def is_subnew(self, time2Market=None, timeLimit=365): if time2Market == '0': return False #for stock has not been in market if time2Market == None: time2Market = self.get('timeToMarket') t = time.strptime(time2Market, "%Y%m%d") y, m, d = t[0:3] time2Market = datetime(y, m, d) return True if (datetime.today() - time2Market).days < timeLimit else False def create(self, should_create_influxdb, should_create_mysqldb): influxdb_flag = self.create_influx_db( ) if should_create_influxdb else True mysql_flag = self.create_db(self.dbname) and self.create_mysql_table( ) if should_create_mysqldb else True return influxdb_flag and mysql_flag def create_influx_db(self): return self.influx_client.create() def create_mysql_table(self): table_name = self.get_day_table() if table_name not in self.mysql_client.get_all_tables(): sql = 'create table if not exists %s(date varchar(10) not null,\ open float,\ high float,\ close float,\ preclose float,\ low float,\ volume bigint,\ amount float,\ outstanding bigint,\ totals bigint,\ adj float,\ aprice float,\ pchange float,\ turnover float,\ sai float,\ sri float,\ uprice float,\ sprice float,\ mprice float,\ lprice float,\ ppercent float,\ npercent float,\ base float,\ ibase bigint,\ breakup int,\ ibreakup bigint,\ pday int,\ profit float,\ gamekline float,\ PRIMARY KEY(date))' % table_name if not self.mysql_client.create(sql, table_name): return False return True def get(self, attribute): df_byte = self.redis.get(ct.STOCK_INFO) if df_byte is None: return None df = _pickle.loads(df_byte) if len(df.loc[df.code == self.code][attribute].values) == 0: return None return df.loc[df.code == self.code][attribute].values[0] def run(self, data): self.redis.set(self.get_redis_name(self.code), _pickle.dumps(data.tail(1), 2)) self.influx_client.set(data) def merge_ticket(self, df): ex = df[df.duplicated( subset=['time', 'cchange', 'volume', 'amount', 'ctype'], keep=False)] dlist = list(ex.index) while len(dlist) > 0: snum = 1 sindex = dlist[0] for _index in range(1, len(dlist)): if sindex + 1 == dlist[_index]: snum += 1 if _index == len(dlist) - 1: df.drop_duplicates(keep='first', inplace=True) df.at[sindex, 'volume'] = snum * df.loc[sindex]['volume'] df.at[sindex, 'amount'] = snum * df.loc[sindex]['amount'] else: df.drop_duplicates(keep='first', inplace=True) df.at[sindex, 'volume'] = snum * df.loc[sindex]['volume'] df.at[sindex, 'amount'] = snum * df.loc[sindex]['amount'] sindex = dlist[_index] snum = 1 df = df.reset_index(drop=True) ex = df[df.duplicated( subset=['time', 'cchange', 'volume', 'amount', 'ctype'], keep=False)] dlist = list(ex.index) return df def get_market(self): if self.code.startswith("6") or self.code.startswith( "500") or self.code.startswith("550") or self.code.startswith( "510") or self.code.startswith("7"): return ct.MARKET_SH elif self.code.startswith("00") or self.code.startswith( "30") or self.code.startswith("150") or self.code.startswith( "159"): return ct.MARKET_SZ else: return ct.MARKET_OTHER def get_chip_distribution_table(self, cdate): cdates = cdate.split('-') return "chip_%s_%s" % (self.dbname, cdates[0]) def get_redis_tick_table(self, cdate): cdates = cdate.split('-') return "tick_%s_%s_%s" % (self.dbname, cdates[0], cdates[1]) def get_day_table(self): return "%s_day" % self.dbname def create_chip_table(self, table): sql = 'create table if not exists %s(pos int not null,\ sdate varchar(10) not null,\ date varchar(10) not null,\ price decimal(8,2) not null,\ volume bigint not null,\ outstanding bigint not null,\ PRIMARY KEY (pos, sdate, date, price, volume, outstanding))' % table return True if table in self.mysql_client.get_all_tables( ) else self.mysql_client.create(sql, table) def get_pre_str(self): return "1" if self.get_market() == ct.MARKET_SH else "0" def read(self, cdate=None, fpath="/data/tdx/history/days/%s%s.csv"): prestr = self.get_pre_str() filename = fpath % (prestr, self.code) if not os.path.exists(filename): return pd.DataFrame(), None dheaders = ['date', 'open', 'high', 'close', 'low', 'amount', 'volume'] df = pd.read_csv(filename, sep=',', usecols=dheaders) df = df[(df['volume'] > 0) & (df['amount'] > 0)] df = df.drop_duplicates(subset=['date'], keep='first') df = df.sort_values(by='date', ascending=True) df = df.reset_index(drop=True) if cdate is not None: index_list = df.loc[df.date == transfer_date_string_to_int( cdate)].index.values if len(index_list) == 0: return pd.DataFrame(), None preday_index = index_list[0] - 1 if preday_index < 0: return pd.DataFrame(), None else: pre_day = df.at[preday_index, 'date'] return df.loc[df.date == transfer_date_string_to_int( cdate)], transfer_int_to_date_string(pre_day) return df, None def collect_right_info(self, info, cdate=None): info = info[(info.code == self.code) & (info.date <= int(datetime.now().strftime('%Y%m%d')))] info = info.sort_values(by='date', ascending=True) info = info.reset_index(drop=True) #collect stock amount change info #6:增发新股(如: 600887 2002-08-20), no change for stock or price total_stock_change_type_list = [ '2', '3', '4', '5', '7', '8', '9', '10', '11' ] quantity_change_info = info[info.type.isin( total_stock_change_type_list)] quantity_change_info = quantity_change_info[[ 'date', 'type', 'money', 'price', 'count', 'rate' ]] quantity_change_info = quantity_change_info.sort_index(ascending=True) quantity_change_info = quantity_change_info.reset_index(drop=True) quantity_change_info['money'] = quantity_change_info['money'].astype( int) quantity_change_info['price'] = quantity_change_info['price'].astype( int) #collect stock price change info price_change_info = info[info.type == 1] price_change_info = price_change_info[[ 'money', 'price', 'count', 'rate', 'date' ]] price_change_info = price_change_info.sort_index(ascending=True) price_change_info = price_change_info.reset_index(drop=True) return quantity_change_info, price_change_info def transfer2adjusted(self, df): df = df[[ 'date', 'open', 'high', 'close', 'preclose', 'low', 'volume', 'amount', 'outstanding', 'totals', 'adj' ]] df['date'] = df['date'].astype(str) df['date'] = pd.to_datetime(df.date).dt.strftime("%Y-%m-%d") df['low'] = df['adj'] * df['low'] df['open'] = df['adj'] * df['open'] df['high'] = df['adj'] * df['high'] df['close'] = df['adj'] * df['close'] df['preclose'] = df['adj'] * df['preclose'] df['volume'] = df['volume'].astype(int) df['aprice'] = df['adj'] * df['amount'] / df['volume'] df['pchange'] = 100 * (df['close'] - df['preclose']) / df['preclose'] df['turnover'] = 100 * df['volume'] / df['outstanding'] return df def is_need_reright(self, cdate, price_change_info): if len(price_change_info) == 0: return False now_date = transfer_date_string_to_int(cdate) p_index = price_change_info.date.index[-1] p_date = price_change_info.date[p_index] return now_date == p_date def relative_index_strength(self, df, index_df, cdate=None): index_df = index_df.loc[index_df.date.isin(df.date.tolist())] if len(df) != len(index_df): logger.debug("length of code %s is not equal to index." % self.code) index_df.index = df.loc[df.date.isin(index_df.date.tolist())].index if cdate is None: df['sai'] = 0 df['sri'] = 0 s_pchange = (df['close'] - df['preclose']) / df['preclose'] i_pchange = (index_df['close'] - index_df['preclose']) / index_df['preclose'] df['sri'] = 100 * (s_pchange - i_pchange) df['sri'] = df['sri'].fillna(0) df.at[(i_pchange < 0) & (s_pchange > 0), 'sai'] = df.loc[(i_pchange < 0) & (s_pchange > 0), 'sri'] else: s_pchange = (df.loc[df.date == cdate, 'close'] - df.loc[df.date == cdate, 'preclose'] ) / df.loc[df.date == cdate, 'preclose'] s_pchange = s_pchange.values[0] i_pchange = (index_df.loc[index_df.date == cdate, 'close'] - index_df.loc[index_df.date == cdate, 'preclose'] ) / index_df.loc[index_df.date == cdate, 'preclose'] i_pchange = i_pchange.values[0] df['sai'] = 100 * (s_pchange - i_pchange ) if s_pchange > 0 and i_pchange < 0 else 0 df['sri'] = 100 * (s_pchange - i_pchange) return df def set_today_data(self, df, index_df, pre_date, cdate): day_table = self.get_day_table() if self.is_date_exists(day_table, cdate): logger.debug("existed data for code:%s, date:%s" % (self.code, cdate)) return True index_df = index_df.loc[index_df.date == cdate] preday_df = self.get_k_data(date=pre_date) if preday_df is None: logger.error("%s get pre date data failed." % self.code) return False if preday_df.empty: logger.error("%s get pre date data empty." % self.code) return False df['adj'] = 1.0 df['preclose'] = preday_df['close'][0] df['totals'] = preday_df['totals'][0] df['outstanding'] = preday_df['outstanding'][0] #transfer data to split-adjusted share prices df = self.transfer2adjusted(df) df = self.relative_index_strength(df, index_df, cdate) if df is None: return False #set chip distribution dist_df = df.append(preday_df, sort=False) dist_df = dist_df.sort_values(by='date', ascending=True) dist_data = self.compute_distribution(dist_df, cdate) if dist_data.empty: logger.error("%s chip distribution compute failed." % self.code) return False if self.set_chip_distribution(dist_data, zdate=cdate): df['uprice'] = mac(dist_data, 0) df['sprice'] = mac(dist_data, 5) df['mprice'] = mac(dist_data, 13) df['lprice'] = mac(dist_data, 37) df = pro_nei_chip(df, dist_data, preday_df, cdate) if is_df_has_unexpected_data(df): logger.error("data for %s is not clean." % self.code) return False if self.mysql_client.set(df, self.get_day_table()): return self.redis.sadd(day_table, cdate) return False def set_all_data(self, quantity_change_info, price_change_info, index_info): df, _ = self.read() if df.empty: logger.error("read empty file for:%s" % self.code) return False #modify price and quanity for all split-adjusted share prices df = self.adjust_share(df, quantity_change_info) if df.empty: return False df = self.qfq(df, price_change_info) if df.empty: return False #transfer data to split-adjusted share prices df = self.transfer2adjusted(df) #compute strength relative index df = self.relative_index_strength(df, index_info) if df is None: logger.error("length of code %s is not equal to index." % self.code) return False #set chip distribution #logger.info("compute %s distribution" % self.code) dist_data = self.compute_distribution(df) if dist_data.empty: logger.error("%s is empty distribution." % self.code) return False if not self.set_chip_distribution(dist_data): logger.info("store %s distribution failed" % self.code) return False df['uprice'] = mac(dist_data, 0) df['sprice'] = mac(dist_data, 5) df['mprice'] = mac(dist_data, 13) df['lprice'] = mac(dist_data, 37) df = pro_nei_chip(df, dist_data) if is_df_has_unexpected_data(df): logger.error("data for %s is not clean." % self.code) return False day_table = self.get_day_table() if not self.mysql_client.delsert(df, day_table): logger.error("save %s data to mysql failed." % self.code) return False self.redis.sadd(day_table, *set(df.date.tolist())) return True def get_base_floating_profit(self, date=None): return self.get_k_data(date) def get_base_floating_profit_in_range(self, start_date, end_date): return self.get_k_data_in_range(start_date, end_date) def set_base_floating_profit(self): df = self.get_k_data() if df is None: return False df['base'] = 0.0 df['ibase'] = 0 df['breakup'] = 0 df['ibreakup'] = 0 df['pday'] = 0 df['profit'] = 0.0 df = base_floating_profit(df) return self.mysql_client.delsert(df, self.get_day_table()) def set_k_data(self, bonus_info, index_info, cdate=None): if not self.has_on_market(datetime.now().strftime('%Y-%m-%d')): return True quantity_change_info, price_change_info = self.collect_right_info( bonus_info) if cdate is None or self.is_need_reright(cdate, price_change_info): return self.set_all_data(quantity_change_info, price_change_info, index_info) else: today_df, pre_date = self.read(cdate) if today_df.empty: return True if pre_date is None: return self.set_all_data(quantity_change_info, price_change_info, index_info) else: return self.set_today_data(today_df, index_info, pre_date, cdate) def get_chip_distribution(self, mdate=None): df = pd.DataFrame() if mdate is not None: table = self.get_chip_distribution_table(mdate) if self.is_table_exists(table): df = self.mysql_client.get("select * from %s" % table) return df.loc[df.date == mdate] else: time2Market = self.get('timeToMarket') start_year = int(time2Market / 10000) end_year = int(datetime.now().strftime('%Y')) year_list = get_years_between(start_year, end_year) for table in [ self.get_chip_distribution_table(myear) for myear in year_list ]: if self.is_table_exists(table): tmp_df = self.mysql_client.get("select * from %s" % table) df = df.append(tmp_df) df = df.reset_index(drop=True) return df def compute_distribution(self, data, zdate=None): data = data[[ 'date', 'open', 'aprice', 'outstanding', 'volume', 'amount' ]] if zdate is None: df = compute_distribution(data) else: mdate_list = data.date.tolist() pre_date, now_date = mdate_list if now_date != zdate: logger.error( "%s data new date %s is not equal to now date %s" % (self.code, now_date, zdate)) return pd.DataFrame() pre_date_dist = self.get_chip_distribution(pre_date) if pre_date_dist.empty: logger.error("pre data for %s dist %s is empty" % (self.code, pre_date)) return pd.DataFrame() pre_date_dist = pre_date_dist.sort_values(by='pos', ascending=True) pos = data.loc[data.date == zdate].index[0] volume = data.loc[data.date == zdate, 'volume'].tolist()[0] aprice = data.loc[data.date == zdate, 'aprice'].tolist()[0] outstanding = data.loc[data.date == zdate, 'outstanding'].tolist()[0] pre_outstanding = data.loc[data.date == pre_date, 'outstanding'].tolist()[0] zdate = zdate.encode("UTF-8") df = compute_oneday_distribution(pre_date_dist, zdate, pos, volume, aprice, pre_outstanding, outstanding) return df def set_chip_table(self, df, myear): #get new df tmp_df = df.loc[df.date.str.startswith(myear)] #some stock halted more than one year if tmp_df.empty: return (myear, True) tmp_df = tmp_df.reset_index(drop=True) #get chip table name chip_table = self.get_chip_distribution_table(myear) if not self.is_table_exists(chip_table): if not self.create_chip_table(chip_table): logger.error("create chip table:%s failed" % chip_table) return (myear, False) if not self.mysql_client.set(tmp_df, chip_table): return (myear, False) else: #update df to mysql if not self.mysql_client.delsert(tmp_df, chip_table): return (myear, False) self.redis.sadd(chip_table, *set(tmp_df.date.tolist())) return (myear, True) def set_chip_distribution(self, df, zdate=None): if zdate is None: time2Market = self.get('timeToMarket') start_year = int(time2Market / 10000) end_year = int(datetime.now().strftime('%Y')) year_list = get_years_between(start_year, end_year) cfunc = partial(self.set_chip_table, df) return concurrent_run(cfunc, year_list, num=20) else: chip_table = self.get_chip_distribution_table(zdate) if not self.is_table_exists(chip_table): if not self.create_chip_table(chip_table): logger.error("create chip table:%s failed" % chip_table) return False if self.is_date_exists(chip_table, zdate): logger.debug("existed chip for code:%s, date:%s" % (self.code, zdate)) return True if is_df_has_unexpected_data(df): logger.error("data for %s is not clear" % self.code) return False if self.mysql_client.set(df, chip_table): if self.redis.sadd(chip_table, zdate): logger.debug("finish record chip:%s. table:%s" % (self.code, chip_table)) return True return False def set_ticket(self, cdate=None): cdate = datetime.now().strftime('%Y-%m-%d') if cdate is None else cdate if not self.has_on_market(cdate): logger.debug("not on market code:%s, date:%s" % (self.code, cdate)) return True tick_table = self.get_redis_tick_table(cdate) if not self.is_table_exists(tick_table): if not self.create_ticket_table(tick_table): logger.error("create tick table failed") return False if not self.redis.sadd(self.dbname, tick_table): logger.error("add tick table to redis failed") return False if self.is_date_exists(tick_table, cdate): logger.debug("existed code:%s, date:%s" % (self.code, cdate)) return True logger.debug("%s read code from file %s" % (self.code, cdate)) df = ts.get_tick_data(self.code, date=cdate) df_tdx = read_tick( os.path.join( ct.TIC_DIR, '%s.tic' % datetime.strptime(cdate, "%Y-%m-%d").strftime("%Y%m%d")), self.code) if not df_tdx.empty: if df is not None and not df.empty and df.loc[0]['time'].find( "当天没有数据") == -1: net_volume = df.volume.sum() tdx_volume = df_tdx.volume.sum() if net_volume != tdx_volume: logger.error( "code:%s, date:%s, net volume:%s, tdx volume:%s not equal" % (self.code, cdate, net_volume, tdx_volume)) df = df_tdx else: if df is None: logger.debug("nonedata code:%s, date:%s" % (self.code, cdate)) return True if df.empty: logger.debug("emptydata code:%s, date:%s" % (self.code, cdate)) return True if df.loc[0]['time'].find("当天没有数据") != -1: logger.debug("nodata code:%s, date:%s" % (self.code, cdate)) return True df.columns = ['time', 'price', 'cchange', 'volume', 'amount', 'ctype'] logger.debug("merge ticket code:%s date:%s" % (self.code, cdate)) df = self.merge_ticket(df) df['date'] = cdate logger.debug("write data code:%s, date:%s, table:%s" % (self.code, cdate, tick_table)) if is_df_has_unexpected_data(df): logger.error("data for %s is not clear" % self.code) return False if self.mysql_client.set(df, tick_table): logger.debug("finish record:%s. table:%s" % (self.code, tick_table)) if self.redis.sadd(tick_table, cdate): return True return False def get_ticket(self, cdate): cdate = datetime.now().strftime('%Y-%m-%d') if cdate is None else cdate if not self.has_on_market(cdate): logger.debug("not on market code:%s, date:%s" % (self.code, cdate)) return sql = "select * from %s where date=\"%s\"" % ( self.get_redis_tick_table(cdate), cdate) return self.mysql_client.get(sql) def get_k_data_in_range(self, start_date, end_date, dtype=9): table_name = self.get_day_table() sql = "select * from %s where date between \"%s\" and \"%s\"" % ( table_name, start_date, end_date) return self.mysql_client.get(sql) def get_k_data(self, date=None, dtype=9): table_name = self.get_day_table() if date is not None: sql = "select * from %s where date=\"%s\"" % (table_name, date) else: sql = "select * from %s" % table_name return self.mysql_client.get(sql)