コード例 #1
0
class PlateValuationCrawler(object):
    def __init__(self, dbinfo = ct.DB_INFO, redis_host = None):
        self.dbname = self.get_dbname()
        self.table_name = self.get_tablename()
        self.redis = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host)
        self.mysql_client = CMySQL(dbinfo, self.dbname, iredis = self.redis)
        if not self.mysql_client.create_db(self.dbname): raise Exception("create plate database failed")
        if not self.create_table(self.table_name): raise Exception("create valuation table failed")

    @staticmethod
    def get_dbname():
        return "plate"

    @staticmethod
    def get_tablename():
        return "valuation"

    def create_table(self, table):
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             code varchar(10) not null,\
                                             name varchar(50),\
                                             pe float,\
                                             ttm float,\
                                             pb float,\
                                             dividend float,\
                                             PRIMARY KEY(date, code))' % table
        return True if table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, table)
コード例 #2
0
class InvestorCrawler(object):
    def __init__(self, dbinfo = ct.DB_INFO, redis_host = None):
        self.dbname = self.get_dbname()
        self.table = self.get_table_name()
        self.redis = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host)
        self.mysql_client = CMySQL(dbinfo, self.dbname, iredis = self.redis)
        if not self.mysql_client.create_db(self.dbname): raise Exception("init stock database failed")
        if not self.create_table(): raise Exception("init week investor table failed")

    @staticmethod
    def get_dbname():
        return "stock"

    @staticmethod
    def get_table_name():
        return "investor"

    def create_table(self):
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             new_investor float,\
                                             final_investor float,\
                                             new_natural_person float,\
                                             new_non_natural_person float,\
                                             final_natural_person float,\
                                             final_non_natural_person float,\
                                             PRIMARY KEY(date))' % self.table
        return True if self.table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, self.table)
コード例 #3
0
ファイル: plate_peratio.py プロジェクト: dxcv/smart_deal_tool
class PlatePERatioCrawler(object):
    def __init__(self, dbinfo=ct.DB_INFO, redis_host=None):
        self.dbname = self.get_dbname()
        self.redis = create_redis_obj(
        ) if redis_host is None else create_redis_obj(host=redis_host)
        self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis)
        if not self.mysql_client.create_db(self.dbname):
            raise Exception("init pledge database failed")

    @staticmethod
    def get_dbname():
        return "plate_pe"

    def create_table(self, table):
        import pdb
        pdb.set_trace()
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             code varchar(10) not null,\
                                             name varchar(50),\
                                             pledge_counts int,\
                                             unlimited_pledge_stocks float,\
                                             limited_pledge_stocks float,\
                                             total_stocks float,\
                                             pledge_ratio float,\
                                             PRIMARY KEY(date, code))' % table
        return True if table in self.mysql_client.get_all_tables(
        ) else self.mysql_client.create(sql, table)
コード例 #4
0
class ChinaTreasuryRateCrawler(object):
    def __init__(self, dbinfo = ct.DB_INFO, redis_host = None):
        self.dbname = self.get_dbname()
        self.table_name = self.get_tablename()
        self.redis = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host)
        self.mysql_client = CMySQL(dbinfo, self.dbname, iredis = self.redis)
        if not self.mysql_client.create_db(self.dbname): raise Exception("create china treasury database failed")
        if not self.create_table(self.table_name): raise Exception("create rate table failed")

    @staticmethod
    def get_dbname():
        return "china_treasury"

    @staticmethod
    def get_tablename():
        return "yield"

    def create_table(self, table):
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             name varchar(50),\
                                             month3 float,\
                                             month6 float,\
                                             year1 float,\
                                             year3 float,\
                                             year5 float,\
                                             year7 float,\
                                             year10 float,\
                                             year30 float,\
                                             PRIMARY KEY(date, name))' % table
        return True if table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, table)
コード例 #5
0
ファイル: hkex.py プロジェクト: dxcv/smart_deal_tool
class HkexCrawler(object):
    def __init__(self, dbinfo = ct.DB_INFO, redis_host = None):
        for (market_from, market_to) in [(ct.SH_MARKET_SYMBOL, ct.HK_MARKET_SYMBOL), (ct.HK_MARKET_SYMBOL, ct.SH_MARKET_SYMBOL), (ct.SZ_MARKET_SYMBOL, ct.HK_MARKET_SYMBOL), (ct.HK_MARKET_SYMBOL, ct.SZ_MARKET_SYMBOL)]:
            dbname = self.get_dbname(market_from, market_to) 
            self.mysql_client = CMySQL(dbinfo, dbname)
            if not self.mysql_client.create_db(dbname): raise Exception("init hkex crawler database failed")
            #self.mysql_client.delete(self.get_topten_table(dbname))
            if not self.create_topten_table(dbname): raise Exception("init hkex crawler topten table failed")
            #self.mysql_client.delete(self.get_capital_table(dbname))
            if not self.create_capital_table(dbname): raise Exception("init hkex crawler capital table failed failed")

    @staticmethod
    def get_dbname(market_from, market_to):
        return "%s2%s" % (market_from, market_to)

    @staticmethod
    def get_topten_table(dbname):
        return "%s_topten" % dbname

    @staticmethod
    def get_capital_table(dbname):
        return "%s_capital_overview" % dbname

    def create_topten_table(self, dbname):
        table = self.get_topten_table(dbname)
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             code varchar(10) not null,\
                                             name varchar(50),\
                                             rank int,\
                                             total_turnover float,\
                                             buy_turnover float,\
                                             sell_turnover float,\
                                             PRIMARY KEY(date, code))' % table
        return True if table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, table)


    def create_capital_table(self, dbname):
        table = self.get_capital_table(dbname)
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             total_turnover float,\
                                             buy_turnover float,\
                                             sell_turnover float,\
                                             total_trade_count long,\
                                             buy_trade_count long,\
                                             sell_trade_count long,\
                                             PRIMARY KEY(date))' % table
        return True if table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, table)

    def run(self):
        settings = get_project_settings()
        myrunner = CrawlerRunner(settings)
        myrunner.crawl(HkexSpider)
        d = myrunner.join()
        d.addBoth(lambda _: reactor.stop())
        reactor.run() #the script will block here until the crawling is finished
        return True
コード例 #6
0
class MonthInvestorCrawler(object):
    def __init__(self, dbinfo = ct.DB_INFO, redis_host = None):
        self.dbname = self.get_dbname()
        self.table = self.get_table_name()
        self.redis = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host)
        self.mysql_client = CMySQL(dbinfo, self.dbname, iredis = self.redis)
        if not self.mysql_client.create_db(self.dbname): raise Exception("init stock database failed")
        if not self.create_table(): raise Exception("init month investor table failed")

    @staticmethod
    def get_dbname():
        return "stock"

    @staticmethod
    def get_table_name():
        return "month_investor"

    def create_table(self):
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             new_investor float,\
                                             new_natural_person float,\
                                             new_non_natural_person float,\
                                             final_investor float,\
                                             final_natural_person float,\
                                             final_natural_a_person float,\
                                             final_natural_b_person float,\
                                             final_non_natural_person float,\
                                             final_non_natural_a_person float,\
                                             final_non_natural_b_person float,\
                                             final_hold_investor float,\
                                             final_a_hold_investor float,\
                                             final_b_hold_investor float,\
                                             trading_investor float,\
                                             trading_a_investor float,\
                                             trading_b_investor float,\
                                             PRIMARY KEY(date))' % self.table
        return True if self.table in self.mysql_client.get_all_tables() else self.mysql_client.create(sql, self.table)

    def get_data(self, mdate):
        table_name = self.get_table_name()
        if date is not None:
            sql = "select * from %s where date=\"%s\"" %(table_name, date)
        else:
            sql = "select * from %s" % table_name
        return self.mysql_client.get(sql)

    def get_data_in_range(self, start_date, end_date):
        table_name = self.get_table_name()
        sql = "select * from %s where date between \"%s\" and \"%s\"" %(table_name, start_date, end_date)
        return self.mysql_client.get(sql)
コード例 #7
0
ファイル: cobj.py プロジェクト: tfangz888/smart_deal_tool
class CMysqlObj(CClass):
    __slots__ = ("code", "redis", "mysql_client")

    def __init__(self, code, dbname, dbinfo, redis_host):
        self.code = code
        self._dbname = dbname
        self.redis = create_redis_obj(
        ) if redis_host is None else create_redis_obj(redis_host)
        self.mysql_client = CMySQL(dbinfo=dbinfo,
                                   dbname=dbname,
                                   iredis=self.redis)

    def __del__(self):
        self.redis = None
        self.mysql_client = None

    def create_db(self, db_name):
        return self.mysql_client.create_db(db_name)

    def is_table_exists(self, table_name):
        if self.redis.exists(self.dbname):
            return table_name in set(
                table.decode() for table in self.redis.smembers(self.dbname))
        return False

    def is_date_exists(self, table_name, cdate):
        if self.redis.exists(table_name):
            return cdate in set(tdate.decode()
                                for tdate in self.redis.smembers(table_name))
        return False

    def get_existed_keys_list(self, table_name):
        if self.redis.exists(table_name):
            return list(tdate.decode()
                        for tdate in self.redis.smembers(table_name))
        return list()

    @property
    def dbname(self):
        return self._dbname

    @dbname.setter
    def dbname(self, value):
        self._dbname = value
コード例 #8
0
class StockLimitCrawler(object):
    def __init__(self, dbinfo=ct.DB_INFO, redis_host=None):
        self.dbname = self.get_dbname()
        self.table_name = self.get_tablename()
        self.redis = create_redis_obj(
        ) if redis_host is None else create_redis_obj(host=redis_host)
        self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis)
        if not self.mysql_client.create_db(self.dbname):
            raise Exception("create stock limit database failed")
        if not self.create_table(self.table_name):
            raise Exception("create stock limit table failed")

    @staticmethod
    def get_dbname():
        return "stock"

    @staticmethod
    def get_tablename():
        return "limitup"

    def create_table(self, table):
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             code varchar(6) not null,\
                                             price float,\
                                             pchange float,\
                                             prange float,\
                                             concept varchar(200),\
                                             fcb float,\
                                             flb float,\
                                             fdmoney float,\
                                             first_time varchar(20),\
                                             last_time varchar(20),\
                                             open_times varchar(20),\
                                             intensity float,\
                                             PRIMARY KEY (date, code))' % self.table
        return True if table in self.mysql_client.get_all_tables(
        ) else self.mysql_client.create(sql, table)
コード例 #9
0
class RIndexStock:
    def __init__(self, dbinfo=ct.DB_INFO, redis_host=None):
        self.redis = create_redis_obj(
        ) if redis_host is None else create_redis_obj(host=redis_host)
        self.dbname = self.get_dbname()
        self.redis_host = redis_host
        self.logger = getLogger(__name__)
        self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis)
        if not self.mysql_client.create_db(self.get_dbname()):
            raise Exception("init rstock database failed")

    @staticmethod
    def get_dbname():
        return ct.RINDEX_STOCK_INFO_DB

    def get_table_name(self, cdate):
        cdates = cdate.split('-')
        return "%s_day_%s_%s" % (self.get_dbname(), cdates[0],
                                 (int(cdates[1]) - 1) // 3 + 1)

    def is_date_exists(self, table_name, cdate):
        if self.redis.exists(table_name):
            return cdate in set(
                str(tdate, encoding="utf8")
                for tdate in self.redis.smembers(table_name))
        return False

    def is_table_exists(self, table_name):
        if self.redis.exists(self.dbname):
            return table_name in set(
                str(table, encoding="utf8")
                for table in self.redis.smembers(self.dbname))
        return False

    def create_table(self, table):
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             code varchar(10) not null,\
                                             open float,\
                                             high float,\
                                             close float,\
                                             preclose float,\
                                             low float,\
                                             volume float,\
                                             amount float,\
                                             outstanding float,\
                                             totals float,\
                                             adj float,\
                                             aprice float,\
                                             pchange float,\
                                             turnover float,\
                                             sai float,\
                                             sri float,\
                                             uprice float,\
                                             sprice float,\
                                             mprice float,\
                                             lprice float,\
                                             ppercent float,\
                                             npercent float,\
                                             base float,\
                                             ibase bigint,\
                                             breakup int,\
                                             ibreakup bigint,\
                                             pday int,\
                                             profit float,\
                                             gamekline float,\
                                             PRIMARY KEY (date, code))' % table
        return True if table in self.mysql_client.get_all_tables(
        ) else self.mysql_client.create(sql, table)

    def get_k_data_in_range(self, start_date, end_date):
        ndays = delta_days(start_date, end_date)
        date_dmy_format = time.strftime("%m/%d/%Y",
                                        time.strptime(start_date, "%Y-%m-%d"))
        data_times = pd.date_range(date_dmy_format, periods=ndays, freq='D')
        date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))(
            data_times.to_pydatetime())
        data_dict = OrderedDict()
        for _date in date_only_array:
            if CCalendar.is_trading_day(_date, redis=self.redis):
                table_name = self.get_table_name(_date)
                if table_name not in data_dict: data_dict[table_name] = list()
                data_dict[table_name].append(str(_date))
        all_df = pd.DataFrame()
        for key in data_dict:
            table_list = sorted(data_dict[key], reverse=False)
            if len(table_list) == 1:
                df = self.get_data(table_list[0])
                if df is not None: all_df = all_df.append(df)
            else:
                start_date = table_list[0]
                end_date = table_list[len(table_list) - 1]
                df = self.get_data_between(start_date, end_date)
                if df is not None: all_df = all_df.append(df)
        return all_df

    def get_data_between(self, start_date, end_date):
        #start_date and end_date should be in the same table
        sql = "select * from %s where date between \"%s\" and \"%s\"" % (
            self.get_table_name(start_date), start_date, end_date)
        return self.mysql_client.get(sql)

    def get_data(self, cdate=datetime.now().strftime('%Y-%m-%d')):
        sql = "select * from %s where date=\"%s\"" % (
            self.get_table_name(cdate), cdate)
        return self.mysql_client.get(sql)

    def get_stock_data(self, cdate, code):
        return (code, CStock(code).get_k_data(cdate))

    def generate_all_data_1(self, cdate, black_list=list()):
        failed_list = CStockInfo(redis_host=self.redis_host).get(
            redis=self.redis).code.tolist()
        if len(black_list) > 0:
            failed_list = list(set(failed_list).difference(set(black_list)))
        cfunc = partial(self.get_stock_data, cdate)
        return queue_process_concurrent_run(cfunc,
                                            failed_list,
                                            redis_client=self.redis)

    def generate_all_data(self, cdate, black_list=ct.BLACK_LIST):
        from gevent.pool import Pool
        obj_pool = Pool(5000)
        failed_list = CStockInfo(redis_host=self.redis_host).get(
            redis=self.redis).code.tolist()
        if len(black_list) > 0:
            failed_list = list(set(failed_list).difference(set(black_list)))
        all_df = pd.DataFrame()
        last_length = len(failed_list)
        cfunc = partial(self.get_stock_data, cdate)
        while last_length > 0:
            self.logger.info("all stock list:%s, cdate:%s", len(failed_list),
                             cdate)
            for code_data in obj_pool.imap_unordered(cfunc, failed_list):
                if code_data[1] is not None:
                    tem_df = code_data[1]
                    tem_df['code'] = code_data[0]
                    all_df = all_df.append(tem_df)
                    failed_list.remove(code_data[0])
            if len(failed_list) != last_length:
                self.logger.debug(
                    "last failed list:%s, current failed list:%s" %
                    (last_length, len(failed_list)))
                last_length = len(failed_list)
            else:
                if last_length > 0: time.sleep(600)
        obj_pool.join(timeout=5)
        obj_pool.kill()
        all_df = all_df.drop_duplicates()
        all_df = all_df.sort_values(by='date', ascending=True)
        all_df = all_df.reset_index(drop=True)
        return all_df

    def update(self, end_date=datetime.now().strftime('%Y-%m-%d'), num=30):
        #if end_date == datetime.now().strftime('%Y-%m-%d'): end_date = get_day_nday_ago(end_date, num = 1, dformat = "%Y-%m-%d")
        start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d")
        date_array = get_dates_array(start_date, end_date)
        succeed = True
        count = 0
        for mdate in date_array:
            count += 1
            if CCalendar.is_trading_day(mdate, redis=self.redis):
                if not self.set_day_data(mdate):
                    self.logger.error("set %s data for rstock failed" % mdate)
                    succeed = False
        return succeed

    def set_day_data(self, cdate):
        table_name = self.get_table_name(cdate)
        if not self.is_table_exists(table_name):
            if not self.create_table(table_name):
                self.logger.error("create tick table failed")
                return False
            self.redis.sadd(self.dbname, table_name)
        if self.is_date_exists(table_name, cdate):
            self.logger.debug("existed table:%s, date:%s" %
                              (table_name, cdate))
            return True
        df = self.generate_all_data(cdate)
        if df is None: return False
        if self.mysql_client.set(df, table_name):
            self.redis.sadd(table_name, cdate)
            return True
        return False
コード例 #10
0
class RIndexIndustryInfo:
    def __init__(self, dbinfo=ct.DB_INFO, redis_host=None):
        self.redis = create_redis_obj(
        ) if redis_host is None else create_redis_obj(host=redis_host)
        self.dbname = self.get_dbname()
        self.logger = getLogger(__name__)
        self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis)
        if not self.mysql_client.create_db(self.get_dbname()):
            raise Exception("init rindex stock database failed")

    @staticmethod
    def get_dbname():
        return ct.RINDEX_INDUSTRY_INFO_DB

    def get_table_name(self, cdate):
        cdates = cdate.split('-')
        return "rindustry_day_%s_%s" % (cdates[0],
                                        (int(cdates[1]) - 1) // 3 + 1)

    def is_date_exists(self, table_name, cdate):
        if self.redis.exists(table_name):
            return cdate in set(
                str(tdate, encoding="utf8")
                for tdate in self.redis.smembers(table_name))
        return False

    def is_table_exists(self, table_name):
        if self.redis.exists(self.dbname):
            return table_name in set(
                str(table, encoding="utf8")
                for table in self.redis.smembers(self.dbname))
        return False

    def create_table(self, table):
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             code varchar(10) not null,\
                                             open float,\
                                             high float,\
                                             close float,\
                                             preclose float,\
                                             low float,\
                                             volume bigint,\
                                             amount float,\
                                             preamount float,\
                                             pchange float,\
                                             mchange float,\
                                             PRIMARY KEY (date, code))' % table
        return True if table in self.mysql_client.get_all_tables(
        ) else self.mysql_client.create(sql, table)

    def get_k_data_in_range(self, start_date, end_date):
        ndays = delta_days(start_date, end_date)
        date_dmy_format = time.strftime("%m/%d/%Y",
                                        time.strptime(start_date, "%Y-%m-%d"))
        data_times = pd.date_range(date_dmy_format, periods=ndays, freq='D')
        date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))(
            data_times.to_pydatetime())
        data_dict = OrderedDict()
        for _date in date_only_array:
            if CCalendar.is_trading_day(_date, redis=self.redis):
                table_name = self.get_table_name(_date)
                if table_name not in data_dict: data_dict[table_name] = list()
                data_dict[table_name].append(str(_date))
        all_df = pd.DataFrame()
        for key in data_dict:
            table_list = sorted(data_dict[key], reverse=False)
            if len(table_list) == 1:
                df = self.get_data(table_list[0])
                if df is not None: all_df = all_df.append(df)
            else:
                start_date = table_list[0]
                end_date = table_list[len(table_list) - 1]
                df = self.get_data_between(start_date, end_date)
                if df is not None: all_df = all_df.append(df)
        return all_df

    def get_data_between(self, start_date, end_date):
        #start_date and end_date shoulw be in the same table
        sql = "select * from %s where date between \"%s\" and \"%s\"" % (
            self.get_table_name(start_date), start_date, end_date)
        return self.mysql_client.get(sql)

    def get_k_data(self, cdate):
        cdate = datetime.now().strftime('%Y-%m-%d') if cdate is None else cdate
        sql = "select * from %s where date=\"%s\"" % (
            self.get_table_name(cdate), cdate)
        return self.mysql_client.get(sql)

    def get_industry_data(self, cdate, code):
        return (code, CIndex(code).get_k_data(cdate))

    def generate_data(self, cdate):
        good_list = list()
        obj_pool = Pool(500)
        all_df = pd.DataFrame()
        industry_info = IndustryInfo.get(self.redis)
        failed_list = industry_info.code.tolist()
        cfunc = partial(self.get_industry_data, cdate)
        failed_count = 0
        while len(failed_list) > 0:
            is_failed = False
            self.logger.debug("restart failed ip len(%s)" % len(failed_list))
            for code_data in obj_pool.imap_unordered(cfunc, failed_list):
                if code_data[1] is not None:
                    tem_df = code_data[1]
                    tem_df['code'] = code_data[0]
                    all_df = all_df.append(tem_df)
                    failed_list.remove(code_data[0])
                else:
                    is_failed = True
            if is_failed:
                failed_count += 1
                if failed_count > 10:
                    self.logger.info("%s rindustry init failed" % failed_list)
                    return pd.DataFrame()
                time.sleep(10)
        obj_pool.join(timeout=5)
        obj_pool.kill()
        self.mysql_client.changedb(self.get_dbname())
        if all_df.empty: return all_df
        all_df = all_df.reset_index(drop=True)
        return all_df

    def set_data(self, cdate=datetime.now().strftime('%Y-%m-%d')):
        if not CCalendar.is_trading_day(cdate, redis=self.redis): return False
        table_name = self.get_table_name(cdate)
        if not self.is_table_exists(table_name):
            if not self.create_table(table_name):
                self.logger.error("create rindex table failed")
                return False
            self.redis.sadd(self.dbname, table_name)
        if self.is_date_exists(table_name, cdate):
            self.logger.debug("existed rindex table:%s, date:%s" %
                              (table_name, cdate))
            return True
        df = self.generate_data(cdate)
        if df.empty: return False
        self.redis.set(ct.TODAY_ALL_INDUSTRY, _pickle.dumps(df, 2))
        if self.mysql_client.set(df, table_name):
            return self.redis.sadd(table_name, cdate)
        return False

    def update(self, end_date=None, num=10):
        if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d')
        #if end_date == datetime.now().strftime('%Y-%m-%d'): end_date = get_day_nday_ago(end_date, num = 1, dformat = "%Y-%m-%d")
        start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d")
        date_array = get_dates_array(start_date, end_date)
        succeed = True
        for mdate in date_array:
            if CCalendar.is_trading_day(mdate, redis=self.redis):
                if not self.set_data(mdate):
                    self.logger.error("%s rindustry set failed" % mdate)
                    succeed = False
        return succeed
コード例 #11
0
ファイル: sexchange.py プロジェクト: orf53975/smart_deal_tool
class StockExchange(object):
    def __init__(self, market = ct.SH_MARKET_SYMBOL, dbinfo = ct.DB_INFO, redis_host = None):
        self.market       = market
        self.dbinfo       = dbinfo
        self.balcklist    = ['2006-07-10'] if market == ct.SH_MARKET_SYMBOL else list()
        self.logger       = getLogger(__name__)
        self.dbname       = self.get_dbname(market)
        self.redis        = create_redis_obj() if redis_host is None else create_redis_obj(host = redis_host)
        self.header       = {"Host": "query.sse.com.cn",
                             "Referer": "http://www.sse.com.cn/market/stockdata/overview/day/",
                             "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
        self.mysql_client = CMySQL(self.dbinfo, dbname = self.dbname, iredis = self.redis)
        if not self.mysql_client.create_db(self.dbname):
            raise Exception("create %s failed" % self.dbname)
   
    @staticmethod
    def get_dbname(market):
        return market

    def get_table_name(self):
        return "%s_deal" % self.dbname

    def create_table(self):
        table = self.get_table_name() 
        if table not in self.mysql_client.get_all_tables():
            sql = 'create table if not exists %s(date varchar(10) not null,\
                                                 name varchar(20) not null,\
                                                 amount float,\
                                                 number int,\
                                                 negotiable_value float,\
                                                 market_value float,\
                                                 pe float,\
                                                 totals float,\
                                                 outstanding float,\
                                                 volume float,\
                                                 transactions float,\
                                                 turnover float,\
                                                 PRIMARY KEY (date, name))' % table 
            if not self.mysql_client.create(sql, table): return False
        return True

    def get_k_data_in_range(self, start_date, end_date):
        sql = "select * from %s where date between \"%s\" and \"%s\"" % (self.get_table_name(), start_date, end_date)
        return self.mysql_client.get(sql)

    def get_k_data(self, cdate = datetime.now().strftime('%Y-%m-%d')):
        sql = "select * from %s where date=\"%s\"" % (self.get_table_name(), cdate)
        return self.mysql_client.get(sql)

    def is_table_exists(self, table_name):
        if self.redis.exists(self.dbname):
            return table_name in set(str(table, encoding = "utf8") for table in self.redis.smembers(self.dbname))
        return False

    def is_date_exists(self, table_name, cdate):
        if self.redis.exists(table_name):
            return cdate in set(str(tdate, encoding = "utf8") for tdate in self.redis.smembers(table_name))
        return False

    def get_url(self):
        if self.market == ct.SH_MARKET_SYMBOL:
            return "http://query.sse.com.cn/marketdata/tradedata/queryTradingByProdTypeData.do?jsonCallBack=jsonpCallback%s&searchDate=%s&prodType=gp&_=%s"
        else:
            return "http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1803&TABKEY=%s&txtQueryDate=%s&random=%s"

    def get_sh_type_name(self, dtype):
        if '1' == dtype:
            return "A股"
        elif '2' == dtype:
            return "B股"
        elif '12' == dtype:
            return "上海市场"
        elif '90' == dtype:
            return "科创板"
        else:
            return None

    def get_data_from_url(self, cdate = datetime.now().strftime('%Y-%m-%d')):
        if self.market == ct.SH_MARKET_SYMBOL:
            current_milli_time = lambda: int(round(time.time() * 1000))
            url = self.get_url() % (int_random(5), cdate, current_milli_time())
            response = smart_get(requests.get, url, headers=self.header)
            if response.status_code != 200:
                self.logger.error("get exchange data failed, response code:%s" % response.status_code)
                return pd.DataFrame()
            json_result = loads_jsonp(response.text)
            if json_result is None:
                self.logger.error("parse exchange data jsonp failed")
                return pd.DataFrame()
            datas = list()
            for json_obj in json_result['result']:
                name = self.get_sh_type_name(json_obj['productType'])
                if name is None:
                    self.logger.error("get unknown type for SH data:%s" % json_obj['productType'])
                    return pd.DataFrame()
                elif name == "科创板":
                    continue
                else:
                    amount           = 0 if json_obj['trdAmt'] == '' else float(json_obj['trdAmt'])
                    number           = 0 if json_obj['istVol'] == '' else int(json_obj['istVol'])
                    negotiable_value = 0 if json_obj['negotiableValue'] == '' else float(json_obj['negotiableValue'])
                    market_value     = 0 if json_obj['marketValue'] == '' else float(json_obj['marketValue'])
                    volume           = 0 if json_obj['trdVol'] == '' else float(json_obj['trdVol'])
                    pe               = 0 if json_obj['profitRate'] == '' else float(json_obj['profitRate'])
                    transactions     = 0 if json_obj['trdTm'] == '' else float(json_obj['trdTm'])
                    turnover         = 0 if json_obj['exchangeRate'] == '' else float(json_obj['exchangeRate'])
                    outstanding      = 0 if turnover == 0 else volume / (100 * turnover)
                    totals           = outstanding
                    data = {'amount': amount,\
                            'number': number,\
                            'negotiable_value': negotiable_value,\
                            'market_value': market_value,\
                            'pe': pe,\
                            'totals': totals,\
                            'outstanding': outstanding,\
                            'volume': volume,\
                            'transactions': transactions,\
                            'turnover': turnover}
                    if any(data.values()):
                        data['name'] = name
                        data['date'] = cdate
                        datas.append(data)
            df = pd.DataFrame.from_dict(datas)
        else:
            datas = list()
            for name, tab in ct.SZ_MARKET_DICT.items():
                url = self.get_url() % (tab, cdate, float_random(17))
                df = smart_get(pd.read_excel, url, usecols = [0, 1])
                if df is None: return pd.DataFrame()
                if df.empty: continue
                if len(df) == 1 and df.values[0][0] == '没有找到符合条件的数据!': continue
                if name == "深圳市场":
                    amount           = 0
                    #amount           = float(df.loc[df['指标名称'] == '市场总成交金额(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                    number           = int(float(df.loc[df['指标名称'] == '上市公司数', '本日数值'].values[0].replace(',', '')))
                    negotiable_value = float(df.loc[df['指标名称'] == '股票流通市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                    market_value     = float(df.loc[df['指标名称'] == '股票总市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                    pe               = float(df.loc[df['指标名称'] == '股票平均市盈率', '本日数值'].values[0].replace(',', ''))
                    totals           = float(df.loc[df['指标名称'] == '股票总股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000
                    outstanding      = float(df.loc[df['指标名称'] == '股票流通股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000
                    volume           = 0
                    transactions     = 0
                    turnover         = float(df.loc[df['指标名称'] == '股票平均换手率', '本日数值'].values[0])
                else:
                    amount           = float(df.loc[df['指标名称'] == '总成交金额(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                    number           = int(float(df.loc[df['指标名称'] == '上市公司数', '本日数值'].values[0].replace(',', '')))
                    negotiable_value = float(df.loc[df['指标名称'] == '上市公司流通市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                    market_value     = float(df.loc[df['指标名称'] == '上市公司市价总值(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                    pe               = float(df.loc[df['指标名称'] == '平均市盈率(倍)', '本日数值'].values[0])
                    totals           = float(df.loc[df['指标名称'] == '总发行股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000
                    outstanding      = float(df.loc[df['指标名称'] == '总流通股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000
                    volume           = float(df.loc[df['指标名称'] == '总成交股数', '本日数值'].values[0].replace(',', '')) / 100000000
                    transactions     = float(df.loc[df['指标名称'] == '总成交笔数', '本日数值'].values[0].replace(',', '')) / 10000
                    turnover         = 100 * volume / outstanding
                data = {
                    'name': name,\
                    'date': cdate,\
                    'amount': amount,\
                    'number': number,\
                    'negotiable_value': negotiable_value,\
                    'market_value': market_value,\
                    'pe': pe,\
                    'totals': totals,\
                    'outstanding': outstanding,\
                    'volume': volume,\
                    'transactions': transactions,\
                    'turnover': turnover
                }
                datas.append(data)
            df = pd.DataFrame.from_dict(datas)
            if not df.empty:
                df.at[df.name == "深圳市场", 'amount']       = df.amount.sum() - df.loc[df.name == "深圳市场", 'amount']
                df.at[df.name == "深圳市场", 'volume']       = df.volume.sum() - df.loc[df.name == "深圳市场", 'volume']
                df.at[df.name == "深圳市场", 'transactions'] = df.transactions.sum() - df.loc[df.name == "深圳市场", 'transactions']
        return df

    def set_k_data(self, cdate = datetime.now().strftime('%Y-%m-%d')):
        table_name = self.get_table_name()
        if not self.is_table_exists(table_name):
            if not self.create_table():
                self.logger.error("create tick table failed")
                return False
            self.redis.sadd(self.dbname, table_name)

        if self.is_date_exists(table_name, cdate): 
            self.logger.debug("existed table:%s, date:%s" % (table_name, cdate))
            return True

        df = self.get_data_from_url(cdate)
        if df.empty:
            self.logger.debug("get data from %s failed, date:%s" % (self.market, cdate))
            return False

        if self.mysql_client.set(df, table_name):
            self.redis.sadd(table_name, cdate)
            return True
        return False

    def update(self, end_date = None, num = 10):
        if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d')
        if end_date == datetime.now().strftime('%Y-%m-%d'): end_date = get_day_nday_ago(end_date, num = 1, dformat = "%Y-%m-%d")
        start_date = get_day_nday_ago(end_date, num = num, dformat = "%Y-%m-%d")
        succeed = True
        for mdate in get_dates_array(start_date, end_date):
            if mdate in self.balcklist: continue
            if CCalendar.is_trading_day(mdate, redis = self.redis):
                if not self.set_k_data(mdate):
                    succeed = False
                    self.logger.info("market %s for %s set failed" % (self.market, mdate))
        return succeed
コード例 #12
0
class Margin(object):
    def __init__(self, dbinfo=ct.DB_INFO, redis_host=None):
        self.logger = getLogger(__name__)
        self.crawler = get_tushare_client()
        self.dbname = self.get_dbname()
        self.redis = create_redis_obj(
        ) if redis_host is None else create_redis_obj(host=redis_host)
        self.mysql_client = CMySQL(dbinfo, self.dbname, iredis=self.redis)
        if not self.mysql_client.create_db(self.dbname):
            raise Exception("init margin database failed")

    @staticmethod
    def get_dbname():
        return "margin"

    def get_table_name(self, cdate):
        cdates = cdate.split('-')
        return "%s_day_%s_%s" % (self.dbname, cdates[0],
                                 (int(cdates[1]) - 1) // 3 + 1)

    def is_date_exists(self, table_name, cdate):
        if self.redis.exists(table_name):
            return self.redis.sismember(table_name, cdate)
        return False

    def create_table(self, table):
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             code varchar(10) not null,\
                                             rzye float,\
                                             rzmre float,\
                                             rzche float,\
                                             rqye float,\
                                             rqyl float,\
                                             rqmcl float,\
                                             rqchl float,\
                                             rzrqye float,\
                                             PRIMARY KEY (date, code))' % table
        return True if table in self.mysql_client.get_all_tables(
        ) else self.mysql_client.create(sql, table)

    def get_k_data_in_range(self, start_date, end_date):
        ndays = delta_days(start_date, end_date)
        date_dmy_format = time.strftime("%m/%d/%Y",
                                        time.strptime(start_date, "%Y-%m-%d"))
        data_times = pd.date_range(date_dmy_format, periods=ndays, freq='D')
        date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))(
            data_times.to_pydatetime())
        data_dict = OrderedDict()
        for _date in date_only_array:
            if CCalendar.is_trading_day(_date, redis=self.redis):
                table_name = self.get_table_name(_date)
                if table_name not in data_dict: data_dict[table_name] = list()
                data_dict[table_name].append(str(_date))
        all_df = pd.DataFrame()
        for key in data_dict:
            table_list = sorted(data_dict[key], reverse=False)
            if len(table_list) == 1:
                df = self.get_data(table_list[0])
                if df is not None: all_df = all_df.append(df)
            else:
                start_date = table_list[0]
                end_date = table_list[len(table_list) - 1]
                df = self.get_data_between(start_date, end_date)
                if df is not None: all_df = all_df.append(df)
        return all_df

    def get_data_between(self, start_date, end_date):
        #start_date and end_date should be in the same table
        table_name = self.get_table_name(start_date)
        if not self.is_table_exists(table_name): return None
        sql = "select * from %s where date between \"%s\" and \"%s\"" % (
            table_name, start_date, end_date)
        return self.mysql_client.get(sql)

    def get_data(self, cdate=datetime.now().strftime('%Y-%m-%d')):
        sql = "select * from %s where date=\"%s\"" % (
            self.get_table_name(cdate), cdate)
        return self.mysql_client.get(sql)

    def update(self, end_date=None, num=10):
        if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d')
        start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d")
        date_array = get_dates_array(start_date, end_date)
        succeed = True
        for mdate in date_array:
            if CCalendar.is_trading_day(mdate, redis=self.redis):
                if mdate == end_date: continue
                if not self.set_data(mdate):
                    self.logger.error("%s set failed" % mdate)
                    succeed = False
        return succeed

    def is_table_exists(self, table_name):
        if self.redis.exists(self.dbname):
            return self.redis.sismember(self.dbname, table_name)
        return False

    def set_data(self, cdate=datetime.now().strftime('%Y-%m-%d')):
        table_name = self.get_table_name(cdate)
        if not self.is_table_exists(table_name):
            if not self.create_table(table_name):
                self.logger.error("create tick table failed")
                return False
            self.redis.sadd(self.dbname, table_name)

        if self.is_date_exists(table_name, cdate):
            self.logger.debug("existed table:%s, date:%s" %
                              (table_name, cdate))
            return True

        total_df = smart_get(self.crawler.margin,
                             trade_date=transfer_date_string_to_int(cdate))
        if total_df is None:
            self.logger.error("crawel margin for %s failed" % cdate)
            return False

        total_df = total_df.rename(columns={
            "trade_date": "date",
            "exchange_id": "code"
        })
        total_df['rqyl'] = 0
        total_df['rqchl'] = 0

        detail_df = smart_get(self.crawler.margin_detail,
                              trade_date=transfer_date_string_to_int(cdate))
        if detail_df is None:
            self.logger.error("crawel detail margin for %s failed" % cdate)
            return False

        detail_df = detail_df.rename(columns={
            "trade_date": "date",
            "ts_code": "code"
        })

        total_df = total_df.append(detail_df, sort=False)
        total_df['date'] = pd.to_datetime(
            total_df.date).dt.strftime("%Y-%m-%d")
        total_df = total_df.reset_index(drop=True)
        if self.mysql_client.set(total_df, table_name):
            time.sleep(1)
            return self.redis.sadd(table_name, cdate)
        return False
コード例 #13
0
ファイル: hgt.py プロジェクト: tfangz888/smart_deal_tool
class StockConnect(object):
    def __init__(self,
                 market_from=ct.SH_MARKET_SYMBOL,
                 market_to=ct.HK_MARKET_SYMBOL,
                 dbinfo=ct.DB_INFO,
                 redis_host=None):
        self.market_from = market_from
        self.market_to = market_to
        self.balcklist = None
        self.crawler = None
        self.mysql_client = None
        self.dbinfo = dbinfo
        self.logger = getLogger(__name__)
        self.redis = create_redis_obj(
        ) if redis_host is None else create_redis_obj(host=redis_host)

    def set_market(self, market_from, market_to):
        self.market_from = market_from
        self.market_to = market_to
        self.balcklist = [
            "2018-10-17", "2018-09-25", "2018-07-02", "2018-05-22",
            "2018-04-02", "2018-03-30"
        ] if market_from in [ct.SH_MARKET_SYMBOL, ct.SZ_MARKET_SYMBOL
                             ] else list()
        self.dbname = self.get_dbname(market_from, market_to)
        self.crawler = MCrawl(market_from)
        self.mysql_client = CMySQL(self.dbinfo, self.dbname, iredis=self.redis)
        return False if not self.mysql_client.create_db(self.dbname) else True

    def quit(self):
        self.crawler.quit()

    def close(self):
        self.crawler.close()

    @staticmethod
    def get_dbname(mfrom, mto):
        return "%s2%s" % (mfrom, mto)

    def get_table_name(self, cdate):
        cdates = cdate.split('-')
        return "%s_stock_day_%s_%s" % (self.dbname, cdates[0],
                                       (int(cdates[1]) - 1) // 3 + 1)

    def is_date_exists(self, table_name, cdate):
        if self.redis.exists(table_name):
            return cdate in set(
                str(tdate, encoding=ct.UTF8)
                for tdate in self.redis.smembers(table_name))
        return False

    def create_table(self, table):
        sql = 'create table if not exists %s(date varchar(10) not null,\
                                             code varchar(10) not null,\
                                             name varchar(90),\
                                             volume int,\
                                             percent float,\
                                             PRIMARY KEY (date, code))' % table
        return True if table in self.mysql_client.get_all_tables(
        ) else self.mysql_client.create(sql, table)

    def get_k_data_in_range(self, start_date, end_date):
        ndays = delta_days(start_date, end_date)
        date_dmy_format = time.strftime("%m/%d/%Y",
                                        time.strptime(start_date, "%Y-%m-%d"))
        data_times = pd.date_range(date_dmy_format, periods=ndays, freq='D')
        date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))(
            data_times.to_pydatetime())
        data_dict = OrderedDict()
        for _date in date_only_array:
            if CCalendar.is_trading_day(_date, redis=self.redis):
                table_name = self.get_table_name(_date)
                if table_name not in data_dict: data_dict[table_name] = list()
                data_dict[table_name].append(str(_date))
        all_df = pd.DataFrame()
        for key in data_dict:
            table_list = sorted(data_dict[key], reverse=False)
            if len(table_list) == 1:
                df = self.get_k_data(table_list[0])
                if df is not None: all_df = all_df.append(df)
            else:
                start_date = table_list[0]
                end_date = table_list[len(table_list) - 1]
                df = self.get_data_between(start_date, end_date)
                if df is not None: all_df = all_df.append(df)
        return all_df

    def get_data_between(self, start_date, end_date):
        #start_date and end_date should be in the same table
        sql = "select * from %s where date between \"%s\" and \"%s\"" % (
            self.get_table_name(start_date), start_date, end_date)
        return self.mysql_client.get(sql)

    def get_k_data(self, cdate=datetime.now().strftime('%Y-%m-%d')):
        sql = "select * from %s where date=\"%s\"" % (
            self.get_table_name(cdate), cdate)
        return self.mysql_client.get(sql)

    def update(self, end_date=None, num=10):
        if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d')
        start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d")
        succeed = True
        for mdate in get_dates_array(start_date, end_date):
            if CCalendar.is_trading_day(mdate, redis=self.redis):
                if mdate == end_date or mdate in self.balcklist: continue
                if not self.set_data(mdate):
                    succeed = False
        return succeed

    def is_table_exists(self, table_name):
        if self.redis.exists(self.dbname):
            return table_name in set(
                str(table, encoding=ct.UTF8)
                for table in self.redis.smembers(self.dbname))
        return False

    def set_data(self, cdate=datetime.now().strftime('%Y-%m-%d')):
        table_name = self.get_table_name(cdate)
        if not self.is_table_exists(table_name):
            if not self.create_table(table_name):
                self.logger.error("create tick table failed")
                return False
            self.redis.sadd(self.dbname, table_name)

        if self.is_date_exists(table_name, cdate):
            self.logger.debug("existed table:%s, date:%s" %
                              (table_name, cdate))
            return True

        ret, df = self.crawler.crawl(cdate)
        if ret != 0: return False
        if df.empty: return True
        df = df.reset_index(drop=True)
        df['date'] = cdate
        if self.mysql_client.set(df, table_name):
            return self.redis.sadd(table_name, cdate)
        return False