Esempio n. 1
0
 def __init__(self, dbinfo=ct.DB_INFO, redis_host=None):
     self.dbinfo = dbinfo
     self.logger = getLogger(__name__)
     self.index_objs = dict()
     self.stock_objs = dict()
     self.updating_date = None
     self.combination_objs = dict()
     self.cal_client = CCalendar(dbinfo, redis_host)
     self.index_info_client = IndexInfo()
     self.cvaluation_client = CValuation()
     self.reviewer = CReivew(dbinfo, redis_host)
     self.comb_info_client = CombinationInfo(dbinfo, redis_host)
     self.stock_info_client = CStockInfo(dbinfo, redis_host)
     self.rindex_stock_data_client = RIndexStock(dbinfo, redis_host)
     self.industry_info_client = IndustryInfo(dbinfo, redis_host)
     self.rindustry_info_client = RIndexIndustryInfo(dbinfo, redis_host)
     self.animation_client = CAnimation(dbinfo, redis_host)
     self.subscriber = Subscriber()
     self.quote_handler = StockQuoteHandler()
     self.ticker_handler = TickerHandler()
     self.connect_client = StockConnect(market_from=ct.SH_MARKET_SYMBOL,
                                        market_to=ct.HK_MARKET_SYMBOL,
                                        dbinfo=dbinfo,
                                        redis_host=redis_host)
     self.margin_client = Margin(dbinfo=dbinfo, redis_host=redis_host)
     self.emotion_client = Emotion(dbinfo=dbinfo, redis_host=redis_host)
     self.sh_exchange_client = StockExchange(ct.SH_MARKET_SYMBOL)
     self.sz_exchange_client = StockExchange(ct.SZ_MARKET_SYMBOL)
Esempio n. 2
0
 def generate_all_data(self, cdate, black_list=ct.BLACK_LIST):
     from gevent.pool import Pool
     obj_pool = Pool(5000)
     failed_list = CStockInfo(redis_host=self.redis_host).get(
         redis=self.redis).code.tolist()
     if len(black_list) > 0:
         failed_list = list(set(failed_list).difference(set(black_list)))
     all_df = pd.DataFrame()
     last_length = len(failed_list)
     cfunc = partial(self.get_stock_data, cdate)
     while last_length > 0:
         self.logger.info("all stock list:%s, cdate:%s", len(failed_list),
                          cdate)
         for code_data in obj_pool.imap_unordered(cfunc, failed_list):
             if code_data[1] is not None:
                 tem_df = code_data[1]
                 tem_df['code'] = code_data[0]
                 all_df = all_df.append(tem_df)
                 failed_list.remove(code_data[0])
         if len(failed_list) != last_length:
             self.logger.debug(
                 "last failed list:%s, current failed list:%s" %
                 (last_length, len(failed_list)))
             last_length = len(failed_list)
         else:
             if last_length > 0: time.sleep(600)
     obj_pool.join(timeout=5)
     obj_pool.kill()
     all_df = all_df.drop_duplicates()
     all_df = all_df.sort_values(by='date', ascending=True)
     all_df = all_df.reset_index(drop=True)
     return all_df
Esempio n. 3
0
 def __init__(self, dbinfo):
     self.combination_objs = dict()
     self.stock_objs = dict()
     self.evt = AsyncResult()
     self.dbinfo = dbinfo
     self.cal_client = CCalendar(dbinfo)
     self.comb_info_client = CombinationInfo(dbinfo)
     self.stock_info_client = CStockInfo(dbinfo)
     self.delisted_info_client = CDelisted(dbinfo)
     self.animation_client = CAnimation(dbinfo)
     self.subscriber = Subscriber()
Esempio n. 4
0
 def generate_all_data_1(self, cdate, black_list=list()):
     failed_list = CStockInfo(redis_host=self.redis_host).get(
         redis=self.redis).code.tolist()
     if len(black_list) > 0:
         failed_list = list(set(failed_list).difference(set(black_list)))
     cfunc = partial(self.get_stock_data, cdate)
     return queue_process_concurrent_run(cfunc,
                                         failed_list,
                                         redis_client=self.redis)
Esempio n. 5
0
 def run(self, cdate):
     df = pd.DataFrame()
     code_list = CStockInfo.get().code.tolist()
     responses = yield [self.get_stock_data(cdate, code) for code in code_list]
     for response in responses:
         if response[1] is not None:
             tem_df = response[1]
             tem_df['code'] = response[0]
             df = df.append(tem_df)
     raise gen.Return(value=df)
Esempio n. 6
0
 def generate_all_data(self, cdate):
     from gevent.pool import Pool
     good_list = list()
     obj_pool = Pool(4000)
     all_df = pd.DataFrame()
     failed_list = CStockInfo(redis_host = self.redis_host).get(redis = self.redis).code.tolist()
     cfunc = partial(self.get_stock_data, cdate)
     while len(failed_list) > 0:
         print("all stock list:%s, cdate:%s" % (len(failed_list),cdate))
         for code_data in obj_pool.imap_unordered(cfunc, failed_list):
             if code_data[1] is not None:
                 tem_df = code_data[1]
                 tem_df['code'] = code_data[0]
                 all_df = all_df.append(tem_df)
                 failed_list.remove(code_data[0])
     obj_pool.join(timeout = 5)
     obj_pool.kill()
     all_df = all_df.drop_duplicates()
     all_df = all_df.sort_values(by = 'date', ascending= True)
     all_df = all_df.reset_index(drop = True)
     return all_df
Esempio n. 7
0
class MValuation(object):
    def __init__(self):
        self.logger = getLogger(__name__)
        self.cval_client = CValuation()
        self.stock_info_client = CStockInfo()

    def collect_financial_data(
            self,
            spath="/data/crawler/china_security_industry_valuation/stock",
            tpath='/data/valuation/cstocks'):
        def myfunc(code, mdate):
            tmp_df = df.loc[(df.code == code) & (df.date == mdate)]
            tmp_df = tmp_df.reset_index(drop=True)
            CStock(code).set_val_data(tmp_df, fpath="/data/valuation/cstocks")

        spath_obj = Path(spath)
        csvs = spath_obj.glob('*.csv')
        xfiles = [xfile.name for xfile in csvs]
        xfiles.sort()
        use_cols = ['code', 'date', 'pe', 'pb', 'ttm', 'dividend']
        dtype_dict = {
            'code': str,
            'date': str,
            'pe': float,
            'pb': float,
            'ttm': float,
            'dividend': float
        }
        for fname in xfiles:
            df = pd.read_csv(spath_obj / fname,
                             header=0,
                             encoding="utf8",
                             usecols=use_cols,
                             dtype=dtype_dict)
            df = df[use_cols]
            vfunc = np.vectorize(myfunc)
            vfunc(df['code'].values, df['date'].values)

    def set_financial_data(self, code='688122', mdate='2019-07-30'):
        try:
            df = self.stock_info_client.get()
            code_list = df['code'].tolist()
            time2market_list = df['timeToMarket'].tolist()
            code2timedict = dict(zip(code_list, time2market_list))
            self.cval_client.set_stock_valuation(code2timedict, mdate, code)
        except Exception as e:
            self.logger.error(e)
            traceback.print_exc()

    def set_r_financial_data(self, mdate, code_list):
        def cget(mdate, code):
            return code, CStock(code).get_val_data(mdate)

        try:
            obj_pool = Pool(5000)
            all_df = pd.DataFrame()
            cfunc = partial(cget, mdate)
            for code_data in obj_pool.imap_unordered(cfunc, code_list):
                if code_data[1] is not None and not code_data[1].empty:
                    tem_df = code_data[1]
                    tem_df['code'] = code_data[0]
                    all_df = all_df.append(tem_df)
            obj_pool.join(timeout=5)
            obj_pool.kill()
            all_df = all_df.reset_index(drop=True)
            file_name = self.get_r_financial_name(mdate)
            file_path = Path(self.rvaluation_dir) / file_name
            all_df.to_csv(file_path,
                          index=False,
                          header=True,
                          mode='w',
                          encoding='utf8')
            return True
        except Exception as e:
            self.logger.error(e)
            traceback.print_exc()
            return False

    def update_index(self,
                     end_date=datetime.now().strftime('%Y-%m-%d'),
                     num=3361):
        succeed = True
        start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d")
        date_array = get_dates_array(start_date, end_date, asending=True)
        for mdate in date_array:
            if CCalendar.is_trading_day(mdate):
                for code in ct.INDEX_DICT:
                    if not self.cval_client.set_index_valuation(code, mdate):
                        self.logger.error(
                            "{} set {} data for rvaluation failed".format(
                                code, mdate))
                        succeed = False
        return succeed

    def update(self, end_date=datetime.now().strftime('%Y-%m-%d'), num=7):
        succeed = True
        base_df = self.stock_info_client.get_basics()
        code_list = base_df.code.tolist()
        start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d")
        date_array = get_dates_array(start_date, end_date)
        for mdate in date_array:
            if CCalendar.is_trading_day(mdate):
                if not self.set_r_financial_data(mdate, code_list):
                    self.logger.error("set %s data for rvaluation failed" %
                                      mdate)
                    succeed = False
        return succeed

    def get_min_val_in_range(self, dtype, code):
        vdf = self.cval_client.get_horizontal_data(code)
        vdf = vdf[(vdf['date'] - 1231) % 10000 == 0]
        vdf = vdf[-5:]
        return vdf[dtype].median()
Esempio n. 8
0
class DataManager:
    def __init__(self, dbinfo=ct.DB_INFO, redis_host=None):
        self.dbinfo = dbinfo
        self.logger = getLogger(__name__)
        self.index_objs = dict()
        self.stock_objs = dict()
        self.updating_date = None
        self.combination_objs = dict()
        self.cal_client = CCalendar(dbinfo, redis_host)
        self.index_info_client = IndexInfo()
        self.cvaluation_client = CValuation()
        self.reviewer = CReivew(dbinfo, redis_host)
        self.comb_info_client = CombinationInfo(dbinfo, redis_host)
        self.stock_info_client = CStockInfo(dbinfo, redis_host)
        self.rindex_stock_data_client = RIndexStock(dbinfo, redis_host)
        self.industry_info_client = IndustryInfo(dbinfo, redis_host)
        self.rindustry_info_client = RIndexIndustryInfo(dbinfo, redis_host)
        self.animation_client = CAnimation(dbinfo, redis_host)
        self.subscriber = Subscriber()
        self.quote_handler = StockQuoteHandler()
        self.ticker_handler = TickerHandler()
        self.connect_client = StockConnect(market_from=ct.SH_MARKET_SYMBOL,
                                           market_to=ct.HK_MARKET_SYMBOL,
                                           dbinfo=dbinfo,
                                           redis_host=redis_host)
        self.margin_client = Margin(dbinfo=dbinfo, redis_host=redis_host)
        self.emotion_client = Emotion(dbinfo=dbinfo, redis_host=redis_host)
        self.sh_exchange_client = StockExchange(ct.SH_MARKET_SYMBOL)
        self.sz_exchange_client = StockExchange(ct.SZ_MARKET_SYMBOL)

    def is_collecting_time(self):
        now_time = datetime.now()
        _date = now_time.strftime('%Y-%m-%d')
        y, m, d = time.strptime(_date, "%Y-%m-%d")[0:3]
        aft_open_hour, aft_open_minute, aft_open_second = (17, 10, 00)
        aft_open_time = datetime(y, m, d, aft_open_hour, aft_open_minute,
                                 aft_open_second)
        aft_close_hour, aft_close_minute, aft_close_second = (23, 59, 59)
        aft_close_time = datetime(y, m, d, aft_close_hour, aft_close_minute,
                                  aft_close_second)
        #self.logger.info("collecting now time. open_time:%s < now_time:%s < close_time:%s" % (aft_open_time, now_time, aft_close_time))
        return aft_open_time < now_time < aft_close_time

    def is_morning_time(self, now_time=datetime.now()):
        _date = now_time.strftime('%Y-%m-%d')
        y, m, d = time.strptime(_date, "%Y-%m-%d")[0:3]
        mor_open_hour, mor_open_minute, mor_open_second = (0, 0, 0)
        mor_open_time = datetime(y, m, d, mor_open_hour, mor_open_minute,
                                 mor_open_second)
        mor_close_hour, mor_close_minute, mor_close_second = (6, 30, 0)
        mor_close_time = datetime(y, m, d, mor_close_hour, mor_close_minute,
                                  mor_close_second)
        return mor_open_time < now_time < mor_close_time

    def collect_combination_runtime_data(self):
        def _combination_run(code_id):
            self.combination_objs[code_id].run()
            return (code_id, True)

        todo_iplist = list(self.combination_objs.keys())
        return concurrent_run(_combination_run, todo_iplist, num=10)

    def collect_stock_runtime_data(self):
        if self.ticker_handler.empty(): return
        datas = self.ticker_handler.getQueue()
        while not datas.empty():
            df = datas.get()
            df = df.set_index('time')
            df.index = pd.to_datetime(df.index)
            for code_str in set(df.code):
                code_id = code_str.split('.')[1]
                self.stock_objs[code_id].run(df.loc[df.code == code_str])

    def init_real_stock_info(self):
        concerned_list = self.comb_info_client.get_concerned_list()
        prefix_concerned_list = [add_prifix(code) for code in concerned_list]
        ret = self.subscriber.subscribe(prefix_concerned_list, SubType.TICKER,
                                        self.ticker_handler)
        if 0 == ret:
            for code in concerned_list:
                if code not in self.stock_objs:
                    self.stock_objs[code] = CStock(code,
                                                   self.dbinfo,
                                                   should_create_influxdb=True,
                                                   should_create_mysqldb=True)
        return ret

    def init_index_info(self):
        index_list = ct.INDEX_DICT.keys()
        prefix_index_list = [add_index_prefix(code) for code in index_list]
        ret = self.subscriber.subscribe(prefix_index_list, SubType.QUOTE,
                                        self.quote_handler)
        if 0 != ret:
            self.logger.error("subscribe for index list failed")
            return ret
        for code in index_list:
            if code not in self.index_objs:
                self.index_objs[code] = CIndex(code,
                                               should_create_influxdb=True,
                                               should_create_mysqldb=True)
        return 0

    def collect_index_runtime_data(self):
        if self.quote_handler.empty(): return
        datas = self.quote_handler.getQueue()
        while not datas.empty():
            df = datas.get()
            df['time'] = df.data_date + ' ' + df.data_time
            df = df.drop(['data_date', 'data_time'], axis=1)
            df = df.set_index('time')
            df.index = pd.to_datetime(df.index)
            for code_str in set(df.code):
                code_id = code_str.split('.')[1]
                self.index_objs[code_id].run(df.loc[df.code == code_str])

    def run(self, sleep_time):
        while True:
            try:
                self.logger.debug("enter run")
                if self.cal_client.is_trading_day():
                    if is_trading_time():
                        t_sleep_time = 1
                        if not self.subscriber.status():
                            self.subscriber.start()
                            if 0 == self.init_index_info(
                            ) and 0 == self.init_real_stock_info():
                                self.init_combination_info()
                            else:
                                self.logger.debug("enter stop subscriber")
                                self.subscriber.stop()
                        else:
                            self.collect_stock_runtime_data()
                            self.collect_combination_runtime_data()
                            self.collect_index_runtime_data()
                            self.animation_client.collect()
                    else:
                        t_sleep_time = sleep_time
                        if self.subscriber.status():
                            self.subscriber.stop()
                else:
                    t_sleep_time = sleep_time
            except Exception as e:
                #traceback.print_exc()
                self.logger.error(e)
            gevent.sleep(t_sleep_time)

    def set_update_info(self,
                        step_length,
                        exec_date,
                        cdate=None,
                        filename=ct.STEPFILE):
        step_info = dict()
        if cdate is None: cdate = 'none'
        step_info[cdate] = dict()
        step_info[cdate]['step'] = step_length
        step_info[cdate]['date'] = exec_date
        with open(filename, 'w') as f:
            json.dump(step_info, f)
        self.logger.info("finish step :%s" % step_length)

    def get_update_info(self,
                        cdate=None,
                        exec_date=None,
                        filename=ct.STEPFILE):
        if cdate is None: cdate = 'none'
        if not os.path.exists(filename): return (0, exec_date)
        with open(filename, 'r') as f:
            step_info = json.load(f)
        if cdate not in step_info: return (0, exec_date)
        return (step_info[cdate]['step'], step_info[cdate]['date'])

    def bootstrap(self,
                  cdate=None,
                  exec_date=datetime.now().strftime('%Y-%m-%d'),
                  ndays=3):
        finished_step, exec_date = self.get_update_info(cdate, exec_date)
        self.logger.info("enter updating.%s" % finished_step)
        if finished_step < 1:
            if not self.cal_client.init():
                self.logger.error("cal client init failed")
                return False
            self.set_update_info(1, exec_date, cdate)

        if finished_step < 2:
            if not self.index_info_client.update():
                self.logger.error("index info init failed")
                return False
            self.set_update_info(2, exec_date, cdate)

        if finished_step < 3:
            if not self.stock_info_client.update():
                self.logger.error("stock info init failed")
                return False
            self.set_update_info(3, exec_date, cdate)

        if finished_step < 4:
            if not self.comb_info_client.update():
                self.logger.error("comb info init failed")
                return False
            self.set_update_info(4, exec_date, cdate)

        if finished_step < 5:
            if not self.industry_info_client.update():
                self.logger.error("industry info init failed")
                return False
            self.set_update_info(5, exec_date, cdate)

        if finished_step < 6:
            if not self.init_tdx_index_info(cdate):
                self.logger.error("init tdx index info failed")
                return False
            self.set_update_info(6, exec_date, cdate)

        if finished_step < 7:
            if not self.sh_exchange_client.update(exec_date, num=ndays):
                self.logger.error("sh exchange update failed")
                return False
            self.set_update_info(7, exec_date, cdate)

        if finished_step < 8:
            if not self.sz_exchange_client.update(exec_date, num=ndays):
                self.logger.error("sz exchange update failed")
                return False
            self.set_update_info(8, exec_date, cdate)

        if finished_step < 9:
            if not self.init_index_components_info(exec_date):
                self.logger.error("init index components info failed")
                return False
            self.set_update_info(9, exec_date, cdate)

        if finished_step < 10:
            if not self.init_industry_info(cdate):
                self.logger.error("init industry info failed")
                return False
            self.set_update_info(10, exec_date, cdate)

        if finished_step < 11:
            if not self.rindustry_info_client.update(exec_date, num=ndays):
                self.logger.error("init %s rindustry info failed" % exec_date)
                return False
            self.set_update_info(11, exec_date, cdate)

        if finished_step < 12:
            if not self.init_yesterday_hk_info(exec_date, num=ndays):
                self.logger.error("init yesterday hk info failed")
                return False
            self.set_update_info(12, exec_date, cdate)

        if finished_step < 13:
            if not self.margin_client.update(exec_date, num=ndays):
                self.logger.error("init yesterday margin failed")
                return False
            self.set_update_info(13, exec_date, cdate)

        if finished_step < 14:
            if not self.init_stock_info(cdate):
                self.logger.error("init stock info set failed")
                return False
            self.set_update_info(14, exec_date, cdate)

        if finished_step < 15:
            if not self.init_base_float_profit():
                self.logger.error("init base float profit for all stock")
                return False
            self.set_update_info(15, exec_date, cdate)

        if finished_step < 16:
            if not self.init_valuation_info(cdate):
                self.logger.error("init stock valuation info failed")
                return False
            self.set_update_info(16, exec_date, cdate)

        if finished_step < 17:
            if not self.init_rvaluation_info(cdate):
                self.logger.error("init r stock valuation info failed")
                return False
            self.set_update_info(17, exec_date, cdate)

        if finished_step < 18:
            if not self.init_rindex_valuation_info(cdate):
                self.logger.error("init r index valuation info failed")
                return False
            self.set_update_info(18, exec_date, cdate)

        if finished_step < 19:
            if not self.rindex_stock_data_client.update(exec_date, num=ndays):
                self.logger.error("rstock data set failed")
                return False
            self.set_update_info(19, exec_date, cdate)

        if finished_step < 20:
            if not self.set_bull_stock_ratio(exec_date, num=ndays):
                self.logger.error("bull ratio set failed")
                return False
            self.set_update_info(20, exec_date, cdate)

        self.logger.info("updating succeed")
        return True

    def clear_network_env(self):
        kill_process("google-chrome")
        kill_process("renderer")
        kill_process("Xvfb")
        kill_process("zygote")
        kill_process("defunct")
        kill_process("show-component-extension-options")

    def update(self, sleep_time):
        succeed = False
        while True:
            self.logger.debug("enter daily update process. %s" %
                              datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            try:
                if self.cal_client.is_trading_day():
                    #self.logger.info("is trading day. %s, succeed:%s" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), succeed))
                    if self.is_collecting_time():
                        self.logger.debug(
                            "enter collecting time. %s, succeed:%s" %
                            (datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                             succeed))
                        if not succeed:
                            self.clear_network_env()
                            mdate = datetime.now().strftime('%Y-%m-%d')
                            ndate = get_latest_data_date()
                            if ndate is not None:
                                if ndate >= transfer_date_string_to_int(mdate):
                                    if self.updating_date is None:
                                        self.updating_date = mdate
                                    succeed = self.bootstrap(
                                        cdate=self.updating_date,
                                        exec_date=self.updating_date)
                                    if succeed: self.updating_date = None
                                else:
                                    self.logger.debug("%s is older for %s" %
                                                      (ndate, mdate))
                    else:
                        succeed = False
                gevent.sleep(sleep_time)
            except Exception as e:
                time.sleep(1)
                self.logger.error(e)

    def init_combination_info(self):
        trading_info = self.comb_info_client.get()
        for _, code_id in trading_info['code'].iteritems():
            if str(code_id) not in self.combination_objs:
                self.combination_objs[str(code_id)] = Combination(
                    code_id, self.dbinfo)

    def init_base_float_profit(self):
        def _set_base_float_profit(code_id):
            if CStock(code_id).set_base_floating_profit():
                self.logger.info("%s set base float profit success" % code_id)
                return (code_id, True)
            else:
                self.logger.error("%s set base float profit failed" % code_id)
                return (code_id, False)

        df = self.stock_info_client.get()
        if df.empty: return False
        failed_list = df.code.tolist()
        return process_concurrent_run(_set_base_float_profit,
                                      failed_list,
                                      num=8)

    def init_rindex_valuation_info(self, cdate):
        for code in ct.INDEX_DICT:
            if not self.cvaluation_client.set_index_valuation(code, cdate):
                self.logger.error(
                    "{} set {} data for rvaluation failed".format(code, mdate))
                return False
        return True

    def init_rvaluation_info(self, cdate=None):
        def cget(mdate, code):
            return code, CStock(code).get_val_data(mdate)

        df = self.stock_info_client.get()
        code_list = df.code.tolist()
        try:
            obj_pool = Pool(5000)
            all_df = pd.DataFrame()
            cfunc = partial(cget, cdate)
            for code_data in obj_pool.imap_unordered(cfunc, code_list):
                if code_data[1] is not None and not code_data[1].empty:
                    tem_df = code_data[1]
                    tem_df['code'] = code_data[0]
                    all_df = all_df.append(tem_df)
            obj_pool.join(timeout=5)
            obj_pool.kill()
            all_df = all_df.reset_index(drop=True)
            file_name = "{}.csv".format(cdate)
            file_path = Path(ct.RVALUATION_DIR) / file_name
            all_df.to_csv(file_path,
                          index=False,
                          header=True,
                          mode='w',
                          encoding='utf8')
            return True
        except Exception as e:
            self.logger.error(e)
            return False

    def init_valuation_info(self, cdate=None):
        df = self.stock_info_client.get()
        code_list = df['code'].tolist()
        time2market_list = df['timeToMarket'].tolist()
        code2timedict = dict(zip(code_list, time2market_list))
        cfun = partial(self.cvaluation_client.set_stock_valuation,
                       code2timedict, cdate)
        return process_concurrent_run(cfun,
                                      code_list,
                                      num=15,
                                      black_list=list())

    def init_stock_info(self, cdate=None):
        def _set_stock_info(mdate, bonus_info, index_info, code_id):
            try:
                if CStock(code_id).set_k_data(bonus_info, index_info, mdate):
                    self.logger.info("%s set k data success for date:%s",
                                     code_id, mdate)
                    return (code_id, True)
                else:
                    self.logger.error("%s set k data failed for date:%s",
                                      code_id, mdate)
                    return (code_id, False)
            except Exception as e:
                self.logger.error("%s set k data for date %s exception:%s",
                                  code_id, mdate, e)
                return (code_id, False)

        #get stock bonus info
        bonus_info = pd.read_csv("/data/tdx/base/bonus.csv",
                                 sep=',',
                                 dtype={
                                     'code': str,
                                     'market': int,
                                     'type': int,
                                     'money': float,
                                     'price': float,
                                     'count': float,
                                     'rate': float,
                                     'date': int
                                 })

        index_info = CIndex('000001').get_k_data()
        if index_info is None or index_info.empty: return False
        df = self.stock_info_client.get()
        if df.empty: return False
        failed_list = df.code.tolist()
        if cdate is None:
            cfunc = partial(_set_stock_info, cdate, bonus_info, index_info)
            return process_concurrent_run(cfunc, failed_list, num=8)
        else:
            cfunc = partial(_set_stock_info, cdate, bonus_info, index_info)
            succeed = True
            if not process_concurrent_run(cfunc, failed_list, num=8):
                succeed = False
            return succeed
            #start_date = get_day_nday_ago(cdate, num = 4, dformat = "%Y-%m-%d")
            #for mdate in get_dates_array(start_date, cdate, asending = True):
            #    if self.cal_client.is_trading_day(mdate):
            #        self.logger.info("start recording stock info: %s", mdate)
            #        cfunc = partial(_set_stock_info, mdate, bonus_info, index_info)
            #        if not process_concurrent_run(cfunc, failed_list, num = 500):
            #            self.logger.error("compute stock info for %s failed", mdate)
            #            return False
            #return True

    def init_industry_info(self, cdate, num=1):
        def _set_industry_info(cdate, code_id):
            return (code_id, CIndex(code_id).set_k_data(cdate))

        df = self.industry_info_client.get()
        if cdate is None:
            cfunc = partial(_set_industry_info, cdate)
            return concurrent_run(cfunc, df.code.tolist(), num=5)
        else:
            succeed = True
            start_date = get_day_nday_ago(cdate, num=num, dformat="%Y-%m-%d")
            for mdate in get_dates_array(start_date, cdate, asending=True):
                if self.cal_client.is_trading_day(mdate):
                    cfunc = partial(_set_industry_info, mdate)
                    if not concurrent_run(cfunc, df.code.tolist(), num=5):
                        succeed = False
            return succeed

    def init_yesterday_hk_info(self, cdate, num):
        succeed = True
        for data in ((ct.SH_MARKET_SYMBOL, ct.HK_MARKET_SYMBOL),
                     (ct.SZ_MARKET_SYMBOL, ct.HK_MARKET_SYMBOL)):
            if not self.connect_client.set_market(data[0], data[1]):
                self.logger.error("connect_client for %s failed" % data)
                succeed = False
                continue
            if not self.connect_client.update(cdate, num=num):
                succeed = False

            self.connect_client.close()
            self.connect_client.quit()
        return succeed

    def get_concerned_index_codes(self):
        index_codes = list(ct.INDEX_DICT.keys())
        #添加MSCI板块
        index_codes.append('880883')
        return index_codes

    def init_index_components_info(self, cdate=None):
        if cdate is None: cdate = datetime.now().strftime('%Y-%m-%d')

        def _set_index_info(code_id):
            if code_id in self.index_objs:
                _obj = self.index_objs[code_id]
            else:
                _obj = CIndex(code_id) if code_id in list(
                    ct.INDEX_DICT.keys()) else TdxFgIndex(code_id)
            return (code_id, _obj.set_components_data(cdate))

        index_codes = self.get_concerned_index_codes()
        return concurrent_run(_set_index_info, index_codes, num=10)

    def set_bull_stock_ratio(self, cdate, num=10):
        def _set_bull_stock_ratio(code_id):
            return (code_id, BullStockRatio(code_id).update(cdate, num))

        index_codes = self.get_concerned_index_codes()
        return concurrent_run(_set_bull_stock_ratio, index_codes)

    def init_tdx_index_info(self, cdate=None, num=1):
        def _set_index_info(cdate, code_id):
            try:
                if code_id in self.index_objs:
                    _obj = self.index_objs[code_id]
                else:
                    _obj = CIndex(code_id) if code_id in list(
                        ct.TDX_INDEX_DICT.keys()) else TdxFgIndex(code_id)
                return (code_id, _obj.set_k_data(cdate))
            except Exception as e:
                self.logger.error(e)
                return (code_id, False)

        #index_code_list = self.get_concerned_index_codes()
        index_code_list = list(ct.TDX_INDEX_DICT.keys())
        if cdate is None:
            cfunc = partial(_set_index_info, cdate)
            return concurrent_run(cfunc, index_code_list, num=5)
        else:
            succeed = True
            start_date = get_day_nday_ago(cdate, num=num, dformat="%Y-%m-%d")
            for mdate in get_dates_array(start_date, cdate, asending=True):
                if self.cal_client.is_trading_day(mdate):
                    cfunc = partial(_set_index_info, mdate)
                    if not concurrent_run(cfunc, index_code_list, num=5):
                        succeed = False
            return succeed
Esempio n. 9
0
def get_all_codelist():
    #返回不包含ST股票
    stock_info_client = CStockInfo(dbinfo=ct.OUT_DB_INFO,
                                   redis_host='127.0.0.1')
    df = stock_info_client.get(redis_host='127.0.0.1')
    return df[~df.name.str.contains("ST")].code.tolist()
Esempio n. 10
0
# encoding: utf-8
import os
import sys
from os.path import abspath, dirname
sys.path.insert(0, dirname(dirname(abspath(__file__))))
import const as ct
from cstock_info import CStockInfo
with open('result', 'r') as f:
    out = f.read()

cs = CStockInfo(ct.OUT_DB_INFO, redis_host='127.0.0.1')
info = cs.get(redis=cs.redis)
info = info[['code', 'name']]
adict = dict()
for row in out.split():
    [name, reson] = row.split(':')
    x = info.loc[info.name == name]
    if not x.empty:
        code = x['code'].values[0]
    else:
        print("%s not found" % name)
    adict[code] = row

xdict = ct.BLACK_DICT
newdict = {**xdict, **adict}

import json
jsonDumpsIndentStr = json.dumps(newdict, indent=1, ensure_ascii=False)
print(jsonDumpsIndentStr)
Esempio n. 11
0
class DataManager:
    def __init__(self, dbinfo):
        self.combination_objs = dict()
        self.stock_objs = dict()
        self.evt = AsyncResult()
        self.dbinfo = dbinfo
        self.cal_client = CCalendar(dbinfo)
        self.comb_info_client = CombinationInfo(dbinfo)
        self.stock_info_client = CStockInfo(dbinfo)
        self.delisted_info_client = CDelisted(dbinfo)
        self.animation_client = CAnimation(dbinfo)
        self.subscriber = Subscriber()

    def is_collecting_time(self, now_time=None):
        if now_time is None: now_time = datetime.now()
        _date = now_time.strftime('%Y-%m-%d')
        y, m, d = time.strptime(_date, "%Y-%m-%d")[0:3]
        mor_open_hour, mor_open_minute, mor_open_second = (19, 0, 0)
        mor_open_time = datetime(y, m, d, mor_open_hour, mor_open_minute,
                                 mor_open_second)
        mor_close_hour, mor_close_minute, mor_close_second = (23, 59, 59)
        mor_close_time = datetime(y, m, d, mor_close_hour, mor_close_minute,
                                  mor_close_second)
        return mor_open_time < now_time < mor_close_time

    def is_tcket_time(self, now_time=None):
        if now_time is None: now_time = datetime.now()
        _date = now_time.strftime('%Y-%m-%d')
        y, m, d = time.strptime(_date, "%Y-%m-%d")[0:3]
        mor_open_hour, mor_open_minute, mor_open_second = (0, 0, 0)
        mor_open_time = datetime(y, m, d, mor_open_hour, mor_open_minute,
                                 mor_open_second)
        mor_close_hour, mor_close_minute, mor_close_second = (9, 0, 0)
        mor_close_time = datetime(y, m, d, mor_close_hour, mor_close_minute,
                                  mor_close_second)
        aft_open_hour, aft_open_minute, aft_open_second = (15, 10, 0)
        aft_open_time = datetime(y, m, d, aft_open_hour, aft_open_minute,
                                 aft_open_second)
        aft_close_hour, aft_close_minute, aft_close_second = (23, 59, 59)
        aft_close_time = datetime(y, m, d, aft_close_hour, aft_close_minute,
                                  aft_close_second)
        return (mor_open_time < now_time <
                mor_close_time) or (aft_open_time < now_time < aft_close_time)

    def collect(self, sleep_time):
        while True:
            try:
                self.init_all_stock_tick()
            except Exception as e:
                logger.error(e)
            time.sleep(sleep_time)

    def collect_combination_runtime_data(self):
        obj_pool = Pool(10)
        for code_id in self.combination_objs:
            try:
                if obj_pool.full(): obj_pool.join()
                obj_pool.spawn(self.combination_objs[code_id].run)
            except Exception as e:
                logger.info(e)
        obj_pool.join()
        obj_pool.kill()

    def collect_stock_runtime_data(self):
        obj_pool = Pool(100)
        for code_id in self.stock_objs:
            try:
                if obj_pool.full(): obj_pool.join()
                ret, df = self.subscriber.get_tick_data(add_prifix(code_id))
                if 0 == ret:
                    df = df.set_index('time')
                    df.index = pd.to_datetime(df.index)
                    obj_pool.spawn(self.stock_objs[code_id].run, df)
            except Exception as e:
                logger.info(e)
        obj_pool.join()
        obj_pool.kill()

    def run(self, sleep_time):
        while True:
            try:
                if self.cal_client.is_trading_day():
                    if is_trading_time() and not self.subscriber.status():
                        self.subscriber.start()
                        self.init_combination_info()
                        self.init_real_stock_info()
                    elif is_trading_time() and self.subscriber.status():
                        self.collect_stock_runtime_data()
                        self.collect_combination_runtime_data()
                        self.animation_client.collect()
                    elif not is_trading_time() and self.subscriber.status():
                        self.subscriber.stop()
            except Exception as e:
                logger.error(e)
                #traceback.print_exc()
            time.sleep(sleep_time)

    def update(self, sleep_time):
        while True:
            try:
                if self.cal_client.is_trading_day():
                    if self.is_collecting_time():
                        self.init()
                time.sleep(sleep_time)
            except Exception as e:
                logger.error(e)
                #traceback.print_exc()

    def init(self, status=False):
        self.cal_client.init(status)
        self.comb_info_client.init()
        self.stock_info_client.init()
        self.delisted_info_client.init(status)
        self.init_today_stock_tick()
        #self.halted_info_client.init(status)

    def get_concerned_list(self):
        combination_info = self.comb_info_client.get()
        if combination_info is None: return list()
        combination_info = combination_info.reset_index(drop=True)
        res_list = list()
        for index, _ in combination_info['code'].iteritems():
            objliststr = combination_info.loc[index]['content']
            objlist = objliststr.split(',')
            res_list.extend(objlist)
        return list(set(res_list))

    def init_combination_info(self):
        trading_info = self.comb_info_client.get()
        for _, code_id in trading_info['code'].iteritems():
            if str(code_id) not in self.combination_objs:
                self.combination_objs[str(code_id)] = Combination(
                    self.dbinfo, code_id)

    def init_today_stock_tick(self):
        _date = datetime.now().strftime('%Y-%m-%d')
        obj_pool = Pool(50)
        df = self.stock_info_client.get()
        if self.cal_client.is_trading_day(_date):
            for _, code_id in df.code.iteritems():
                _obj = self.stock_objs[
                    code_id] if code_id in self.stock_objs else CStock(
                        self.dbinfo, code_id)
                try:
                    if obj_pool.full(): obj_pool.join()
                    obj_pool.spawn(_obj.set_ticket, _date)
                    obj_pool.spawn(_obj.set_k_data)
                except Exception as e:
                    logger.info(e)
        obj_pool.join()
        obj_pool.kill()

    def init_all_stock_tick(self):
        start_date = '2015-01-01'
        _today = datetime.now().strftime('%Y-%m-%d')
        num_days = delta_days(start_date, _today)
        start_date_dmy_format = time.strftime(
            "%m/%d/%Y", time.strptime(start_date, "%Y-%m-%d"))
        data_times = pd.date_range(start_date_dmy_format,
                                   periods=num_days,
                                   freq='D')
        date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))(
            data_times.to_pydatetime())
        date_only_array = date_only_array[::-1]
        obj_pool = Pool(4)
        df = self.stock_info_client.get()
        for _, code_id in df.code.iteritems():
            _obj = self.stock_objs[
                code_id] if code_id in self.stock_objs else CStock(
                    self.dbinfo, code_id)
            for _date in date_only_array:
                if self.cal_client.is_trading_day(_date):
                    try:
                        if obj_pool.full(): obj_pool.join()
                        obj_pool.spawn(_obj.set_ticket, _date)
                    except Exception as e:
                        logger.info(e)
        obj_pool.join()
        obj_pool.kill()

    def init_real_stock_info(self):
        concerned_list = self.get_concerned_list()
        for code_id in concerned_list:
            ret = self.subscriber.subscribe_tick(add_prifix(code_id), CStock)
            if 0 == ret:
                if code_id not in self.stock_objs:
                    self.stock_objs[code_id] = CStock(self.dbinfo, code_id)

    def download_and_extract(self, sleep_time):
        while True:
            try:
                if self.cal_client.is_trading_day():
                    if self.is_collecting_time():
                        download(ct.ZIP_DIR)
                        list_files = os.listdir(ct.ZIP_DIR)
                        for filename in list_files:
                            if not filename.startswith('.'):
                                file_path = os.path.join(ct.ZIP_DIR, filename)
                                if os.path.exists(file_path):
                                    unzip(file_path, ct.TIC_DIR)
            except Exception as e:
                logger.error(e)
                time.sleep(sleep_time)
# -*- coding: utf-8 -*-
import os
import sys
from os.path import abspath, dirname
sys.path.insert(0, dirname(dirname(abspath(__file__))))
import const as ct
from cindex import CIndex
from jqdatasdk import auth
from ccalendar import CCalendar
from cstock_info import CStockInfo
from base.cdate import get_day_nday_ago, get_dates_array
if __name__ == '__main__':
    num = 5500
    end_date = '2019-08-13'
    stock_info_client = CStockInfo()
    df = stock_info_client.get()
    code_list = df['code'].tolist()
    name_list = df['name'].tolist()
    code2namedict = dict(zip(code_list, name_list))
    start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d")
    date_array = get_dates_array(start_date, end_date)
    auth('18701683341', '52448oo78')
    for code in [
            '000001', '000016', '000300', '000905', '399001', '399005',
            '399673'
    ]:
        obj = CIndex(code)
        for mdate in date_array:
            if CCalendar.is_trading_day(mdate):
                table_name = obj.get_components_table_name(mdate)
                if obj.is_table_exists(table_name):
Esempio n. 13
0
    industry = stock_info[stock_info.code == code].industry.values[0]
    industries = [industry for n in range(len(tmp_df))]
    industry_series = pd.Series(industries, index=tmp_df.index)
    #set pchange
    new_tmp_df = pd.DataFrame()
    new_tmp_df['name'] = name_series
    new_tmp_df['industry'] = industry_series
    return new_tmp_df


if __name__ == '__main__':
    if not os.path.exists('norm.json'):
        creview = CReivew(ct.DB_INFO)
        start_date = '2018-02-09'
        end_date = '2018-09-10'
        stock_info = CStockInfo.get()
        stock_info = stock_info[['code', 'name', 'industry', 'timeToMarket']]
        stock_info = stock_info[(stock_info.timeToMarket < 20180327)
                                & (stock_info.timeToMarket > 0)]
        if not os.path.exists('index.json'):
            #上证指数的数据
            logger.info("start get index data")
            szzs_df = CIndex('000001').get_k_data_in_range(
                start_date, end_date)
            szzs_df = szzs_df.sort_values(by='date', ascending=True)
            szzs_df['code'] = 'i000001'
            szzs_df['name'] = "上证指数"
            szzs_df['industry'] = "所有"
            szzs_df['preclose'] = szzs_df['close'].shift(1)
            szzs_df = szzs_df[szzs_df.date != start_date]
            szzs_df['pchange'] = 100 * (szzs_df.close -
Esempio n. 14
0
class DataManager:
    def __init__(self, dbinfo=ct.DB_INFO, redis_host=None):
        self.dbinfo = dbinfo
        self.logger = getLogger(__name__)
        self.index_objs = dict()
        self.stock_objs = dict()
        self.combination_objs = dict()
        self.cal_client = CCalendar(dbinfo, redis_host)
        self.index_info_client = IndexInfo()
        self.comb_info_client = CombinationInfo(dbinfo, redis_host)
        self.stock_info_client = CStockInfo(dbinfo, redis_host)
        self.rindex_stock_data_client = RIndexStock(dbinfo, redis_host)
        self.industry_info_client = IndustryInfo(dbinfo, redis_host)
        self.rindustry_info_client = RIndexIndustryInfo(dbinfo, redis_host)
        self.limit_client = CLimit(dbinfo, redis_host)
        self.animation_client = CAnimation(dbinfo, redis_host)
        self.subscriber = Subscriber()
        self.quote_handler = StockQuoteHandler()
        self.ticker_handler = TickerHandler()
        self.connect_client = StockConnect(market_from=ct.SH_MARKET_SYMBOL,
                                           market_to=ct.HK_MARKET_SYMBOL,
                                           dbinfo=dbinfo,
                                           redis_host=redis_host)
        self.margin_client = Margin(dbinfo=dbinfo, redis_host=redis_host)
        self.emotion_client = Emotion(dbinfo=dbinfo, redis_host=redis_host)
        self.sh_exchange_client = StockExchange(ct.SH_MARKET_SYMBOL)
        self.sz_exchange_client = StockExchange(ct.SZ_MARKET_SYMBOL)

    def is_collecting_time(self, now_time=datetime.now()):
        _date = now_time.strftime('%Y-%m-%d')
        y, m, d = time.strptime(_date, "%Y-%m-%d")[0:3]
        aft_open_hour, aft_open_minute, aft_open_second = (19, 00, 00)
        aft_open_time = datetime(y, m, d, aft_open_hour, aft_open_minute,
                                 aft_open_second)
        aft_close_hour, aft_close_minute, aft_close_second = (23, 59, 59)
        aft_close_time = datetime(y, m, d, aft_close_hour, aft_close_minute,
                                  aft_close_second)
        return aft_open_time < now_time < aft_close_time

    def is_morning_time(self, now_time=datetime.now()):
        _date = now_time.strftime('%Y-%m-%d')
        y, m, d = time.strptime(_date, "%Y-%m-%d")[0:3]
        mor_open_hour, mor_open_minute, mor_open_second = (0, 0, 0)
        mor_open_time = datetime(y, m, d, mor_open_hour, mor_open_minute,
                                 mor_open_second)
        mor_close_hour, mor_close_minute, mor_close_second = (6, 30, 0)
        mor_close_time = datetime(y, m, d, mor_close_hour, mor_close_minute,
                                  mor_close_second)
        return mor_open_time < now_time < mor_close_time

    def collect_combination_runtime_data(self):
        def _combination_run(code_id):
            self.combination_objs[code_id].run()
            return (code_id, True)

        todo_iplist = list(self.combination_objs.keys())
        return concurrent_run(_combination_run, todo_iplist, num=10)

    def collect_stock_runtime_data(self):
        if self.ticker_handler.empty(): return
        datas = self.ticker_handler.getQueue()
        while not datas.empty():
            df = datas.get()
            df = df.set_index('time')
            df.index = pd.to_datetime(df.index)
            for code_str in set(df.code):
                code_id = code_str.split('.')[1]
                self.stock_objs[code_id].run(df.loc[df.code == code_str])

    def init_real_stock_info(self):
        concerned_list = self.comb_info_client.get_concerned_list()
        prefix_concerned_list = [add_prifix(code) for code in concerned_list]
        ret = self.subscriber.subscribe(prefix_concerned_list, SubType.TICKER,
                                        self.ticker_handler)
        if 0 == ret:
            for code in concerned_list:
                if code not in self.stock_objs:
                    self.stock_objs[code] = CStock(code,
                                                   self.dbinfo,
                                                   should_create_influxdb=True,
                                                   should_create_mysqldb=True)
        return ret

    def init_index_info(self):
        index_list = ct.INDEX_DICT.keys()
        prefix_index_list = [add_index_prefix(code) for code in index_list]
        ret = self.subscriber.subscribe(prefix_index_list, SubType.QUOTE,
                                        self.quote_handler)
        if 0 != ret:
            self.logger.error("subscribe for index list failed")
            return ret
        for code in index_list:
            if code not in self.index_objs:
                self.index_objs[code] = CIndex(code,
                                               should_create_influxdb=True,
                                               should_create_mysqldb=True)

    def collect_index_runtime_data(self):
        if self.quote_handler.empty(): return
        datas = self.quote_handler.getQueue()
        while not datas.empty():
            df = datas.get()
            df['time'] = df.data_date + ' ' + df.data_time
            df = df.drop(['data_date', 'data_time'], axis=1)
            df = df.set_index('time')
            df.index = pd.to_datetime(df.index)
            for code_str in set(df.code):
                code_id = code_str.split('.')[1]
                self.index_objs[code_id].run(df.loc[df.code == code_str])

    def run(self, sleep_time):
        while True:
            try:
                if self.cal_client.is_trading_day():
                    if is_trading_time():
                        sleep_time = 1
                        if not self.subscriber.status():
                            self.subscriber.start()
                            if 0 == self.init_index_info(
                            ) and 0 == self.init_real_stock_info():
                                self.init_combination_info()
                            else:
                                self.logger.debug("enter stop dict time")
                                self.subscriber.stop()
                        else:
                            self.collect_stock_runtime_data()
                            self.collect_combination_runtime_data()
                            self.collect_index_runtime_data()
                            self.animation_client.collect()
                    else:
                        sleep_time = 60
                        if self.subscriber.status():
                            self.subscriber.stop()
            except Exception as e:
                traceback.print_exc()
                self.logger.error(e)
            time.sleep(sleep_time)

    def set_update_info(self,
                        step_length,
                        exec_date,
                        cdate=None,
                        filename=ct.STEPFILE):
        step_info = dict()
        if cdate is None: cdate = 'none'
        step_info[cdate] = dict()
        step_info[cdate]['step'] = step_length
        step_info[cdate]['date'] = exec_date
        with open(filename, 'w') as f:
            json.dump(step_info, f)
        self.logger.info("finish step :%s" % step_length)

    def get_update_info(self,
                        cdate=None,
                        exec_date=None,
                        filename=ct.STEPFILE):
        if cdate is None: cdate = 'none'
        if not os.path.exists(filename): return (0, exec_date)
        with open(filename, 'r') as f:
            step_info = json.load(f)
        if cdate not in step_info: return (0, exec_date)
        return (step_info[cdate]['step'], step_info[cdate]['date'])

    def bootstrap(self,
                  cdate=None,
                  exec_date=datetime.now().strftime('%Y-%m-%d')):
        finished_step, exec_date = self.get_update_info(cdate, exec_date)
        self.logger.info("enter updating.%s" % finished_step)
        if finished_step < 1:
            if not self.cal_client.init():
                self.logger.error("cal_client init failed")
                return False
            self.set_update_info(1, exec_date, cdate)

        if finished_step < 2:
            if not self.index_info_client.update():
                self.logger.error("index_info init failed")
                return False
            self.set_update_info(2, exec_date, cdate)

        if finished_step < 3:
            if not self.stock_info_client.update():
                self.logger.error("stock_info init failed")
                return False
            self.set_update_info(3, exec_date, cdate)

        if finished_step < 4:
            if not self.comb_info_client.update():
                self.logger.error("comb_info init failed")
                return False
            self.set_update_info(4, exec_date, cdate)

        if finished_step < 5:
            if not self.industry_info_client.update():
                self.logger.error("industry_info init failed")
                return False
            self.set_update_info(5, exec_date, cdate)

        if finished_step < 6:
            if not self.download_and_extract(exec_date):
                self.logger.error("download_and_extract failed")
                return False
            self.set_update_info(6, exec_date, cdate)

        if finished_step < 7:
            if not self.init_tdx_index_info(cdate):
                self.logger.error("init_tdx_index_info failed")
                return False
            self.set_update_info(7, exec_date, cdate)

        if finished_step < 8:
            if not self.sh_exchange_client.update(exec_date, num=30):
                self.logger.error("sh exchange update failed")
                return False
            self.set_update_info(8, exec_date, cdate)

        if finished_step < 9:
            if not self.sz_exchange_client.update(exec_date, num=30):
                self.logger.error("sz exchange update failed")
                return False
            self.set_update_info(9, exec_date, cdate)

        if finished_step < 10:
            if not self.init_index_components_info(exec_date):
                self.logger.error("init index components info failed")
                return False
            self.set_update_info(10, exec_date, cdate)

        if finished_step < 11:
            if not self.init_industry_info(cdate):
                self.logger.error("init_industry_info failed")
                return False
            self.set_update_info(11, exec_date, cdate)

        if finished_step < 12:
            if not self.rindustry_info_client.update(exec_date):
                self.logger.error("init %s rindustry info failed" % exec_date)
                return False
            self.set_update_info(12, exec_date, cdate)

        if finished_step < 13:
            if not self.limit_client.update(exec_date):
                self.logger.error("init_limit_info failed")
                return False
            self.set_update_info(13, exec_date, cdate)

        if finished_step < 14:
            if not self.init_yesterday_hk_info(exec_date):
                self.logger.error("init_yesterday_hk_info failed")
                return False
            self.set_update_info(14, exec_date, cdate)

        if finished_step < 15:
            if not self.margin_client.update(exec_date):
                self.logger.error("init_yesterday_margin failed")
                return False
            self.set_update_info(15, exec_date, cdate)

        if finished_step < 16:
            if not self.init_stock_info(cdate):
                self.logger.error("init_stock_info set failed")
                return False
            self.set_update_info(16, exec_date, cdate)

        if finished_step < 17:
            if not self.init_base_float_profit():
                self.logger.error("init base float profit for all stock")
                return False
            self.set_update_info(17, exec_date, cdate)

        if finished_step < 18:
            if not self.rindex_stock_data_client.update(exec_date, num=300):
                self.logger.error("rindex_stock_data set failed")
                return False
            self.set_update_info(18, exec_date, cdate)

        self.logger.info("updating succeed")
        return True

    def update(self, sleep_time):
        while True:
            self.logger.info("enter daily update process. %s" %
                             datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            try:
                if self.cal_client.is_trading_day():
                    self.logger.info(
                        "is trading day. %s" %
                        datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
                    if self.is_collecting_time():
                        self.logger.info(
                            "is collecting time. %s" %
                            datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
                        self.bootstrap(
                            cdate=datetime.now().strftime('%Y-%m-%d'))
            except Exception as e:
                kill_process("google-chrome")
                kill_process("renderer")
                kill_process("Xvfb")
                kill_process("zygote")
                kill_process("defunct")
                kill_process("show-component-extension-options")
                self.logger.error(e)
            time.sleep(sleep_time)

    def init_combination_info(self):
        trading_info = self.comb_info_client.get()
        for _, code_id in trading_info['code'].iteritems():
            if str(code_id) not in self.combination_objs:
                self.combination_objs[str(code_id)] = Combination(
                    code_id, self.dbinfo)

    def init_base_float_profit(self):
        def _set_base_float_profit(code_id):
            return (code_id,
                    True) if CStock(code_id).set_base_floating_profit() else (
                        code_id, False)

        failed_list = self.stock_info_client.get().code.tolist()
        return process_concurrent_run(_set_base_float_profit,
                                      failed_list,
                                      num=500)

    def init_stock_info(self, cdate=None):
        def _set_stock_info(_date, bonus_info, index_info, code_id):
            try:
                if CStock(code_id).set_k_data(bonus_info, index_info, _date):
                    self.logger.info("%s set k data success" % code_id)
                    return (code_id, True)
                else:
                    self.logger.error("%s set k data failed" % code_id)
                    return (code_id, False)
            except Exception as e:
                self.logger.error("%s set k data exception:%s" % (code_id, e))
                return (code_id, False)

        #get stock bonus info
        bonus_info = pd.read_csv("/data/tdx/base/bonus.csv",
                                 sep=',',
                                 dtype={
                                     'code': str,
                                     'market': int,
                                     'type': int,
                                     'money': float,
                                     'price': float,
                                     'count': float,
                                     'rate': float,
                                     'date': int
                                 })

        index_info = CIndex('000001').get_k_data()
        if index_info is None or index_info.empty: return False

        df = self.stock_info_client.get()
        failed_list = df.code.tolist()
        if cdate is None:
            cfunc = partial(_set_stock_info, cdate, bonus_info, index_info)
            return process_concurrent_run(cfunc, failed_list, num=5)
        else:
            succeed = True
            start_date = get_day_nday_ago(cdate, num=10, dformat="%Y-%m-%d")
            for mdate in get_dates_array(start_date, cdate, asending=True):
                if self.cal_client.is_trading_day(mdate):
                    cfunc = partial(_set_stock_info, mdate, bonus_info,
                                    index_info)
                    if not process_concurrent_run(cfunc, failed_list, num=500):
                        succeed = False
            return succeed

    def init_industry_info(self, cdate):
        def _set_industry_info(cdate, code_id):
            return (code_id, CIndex(code_id).set_k_data(cdate))

        df = self.industry_info_client.get()
        if cdate is None:
            cfunc = partial(_set_industry_info, cdate)
            return concurrent_run(cfunc, df.code.tolist(), num=5)
        else:
            succeed = True
            start_date = get_day_nday_ago(cdate, num=30, dformat="%Y-%m-%d")
            for mdate in get_dates_array(start_date, cdate, asending=True):
                if self.cal_client.is_trading_day(mdate):
                    cfunc = partial(_set_industry_info, mdate)
                    if not concurrent_run(cfunc, df.code.tolist(), num=5):
                        succeed = False
            return succeed

    def init_yesterday_hk_info(self, cdate):
        succeed = True
        for data in ((ct.SH_MARKET_SYMBOL, ct.HK_MARKET_SYMBOL),
                     (ct.SZ_MARKET_SYMBOL, ct.HK_MARKET_SYMBOL)):
            if not self.connect_client.set_market(data[0], data[1]):
                self.logger.error("connect_client for %s failed" % data)
                succeed = False
                continue

            if not self.connect_client.update(cdate):
                succeed = False

            self.connect_client.close()
            self.connect_client.quit()
        kill_process("zygote")
        kill_process("defunct")
        kill_process("show-component-extension-options")
        return succeed

    def init_index_components_info(self, cdate=None):
        if cdate is None: cdate = datetime.now().strftime('%Y-%m-%d')

        def _set_index_info(code_id):
            _obj = self.index_objs[
                code_id] if code_id in self.index_objs else CIndex(code_id)
            return (code_id, _obj.set_components_data(cdate))

        return concurrent_run(_set_index_info,
                              list(ct.INDEX_DICT.keys()),
                              num=10)

    def init_tdx_index_info(self, cdate=None):
        def _set_index_info(cdate, code_id):
            try:
                _obj = self.index_objs[
                    code_id] if code_id in self.index_objs else CIndex(code_id)
                return (code_id, _obj.set_k_data(cdate))
            except Exception as e:
                self.logger.error(e)
                return (code_id, False)

        if cdate is None:
            cfunc = partial(_set_index_info, cdate)
            return concurrent_run(cfunc, list(ct.TDX_INDEX_DICT.keys()), num=5)
        else:
            succeed = True
            start_date = get_day_nday_ago(cdate, num=30, dformat="%Y-%m-%d")
            for mdate in get_dates_array(start_date, cdate, asending=True):
                if self.cal_client.is_trading_day(mdate):
                    cfunc = partial(_set_index_info, mdate)
                    if not concurrent_run(
                            cfunc, list(ct.TDX_INDEX_DICT.keys()), num=5):
                        succeed = False
            return succeed

    def download_and_extract(self, cdate):
        try:
            if not download(ct.ZIP_DIR, cdate): return False
            list_files = os.listdir(ct.ZIP_DIR)
            for filename in list_files:
                if not filename.startswith('.'):
                    file_path = os.path.join(ct.ZIP_DIR, filename)
                    if os.path.exists(file_path):
                        unzip(file_path, ct.TIC_DIR)
            return True
        except Exception as e:
            self.logger.error(e)
            return False
Esempio n. 15
0
    def generate(self, cdate, sh_df, sz_df, sh_rzrq_df, sz_rzrq_df, av_df,
                 limit_info, stock_info, industry_info, index_info,
                 all_stock_info):
        image_dir = os.path.join(self.sdir, "%s-StockReView" % cdate)
        file_name = "%s.md" % image_dir
        #if os.path.exists(file_name): return True
        os.makedirs(image_dir, exist_ok=True)

        md = MarkdownWriter()
        md.addTitle(cdate)
        md.addHeader("股票复盘", 1)

        #指数行情
        index_info = index_info[[
            'name', 'open', 'high', 'close', 'low', 'volume', 'amount',
            'pchange'
        ]]
        md.addHeader("指数行情", 2)
        t_index = MarkdownTable(
            headers=["名称", "价格", "涨幅(百分比)", "成交量", "成交额(亿)"])
        for index in range(len(index_info)):
            data_list = index_info.loc[index].tolist()
            data_list = [
                data_list[0],
                round(data_list[3], 2),
                round(data_list[7], 2),
                int(data_list[5] / 100),
                round(data_list[6] / 100000000, 2)
            ]
            data_list = [str(i) for i in data_list]
            t_index.addRow(data_list)
        md.addTable(t_index)

        # 资金面分析
        md.addHeader("资金面分析:", 2)
        x_dict = dict()
        x_dict['日期'] = sh_df.date.tolist()
        #上海和深圳的成交额分析
        md.addHeader("成交额分析:", 3)
        self.market_plot(sh_df, sz_df, x_dict, 'amount', dir_name=image_dir)
        md.addImage("market_amount.png", imageTitle="成交额")
        #上海和深圳的流通市值分析
        md.addHeader("流通市值分析:", 3)
        self.market_plot(sh_df,
                         sz_df,
                         x_dict,
                         'negotiable_value',
                         dir_name=image_dir)
        md.addImage("market_negotiable_value.png", imageTitle="流通市值")
        #上海和深圳的换手率分析
        md.addHeader("市场换手率分析:", 3)
        self.market_plot(sh_df, sz_df, x_dict, 'turnover', dir_name=image_dir)
        md.addImage("market_turnover.png", imageTitle="换手率")
        #上海和深圳的融资融券分析
        md.addHeader("融资融券分析:", 3)
        y_dict = dict()
        y_dict['日期'] = sh_rzrq_df.date.tolist()

        self.market_plot(sh_rzrq_df,
                         sz_rzrq_df,
                         y_dict,
                         'rzrqye',
                         dir_name=image_dir)

        md.addImage("market_rzrqye.png", imageTitle="融资融券")
        #平均股价走势
        md.addHeader("平均股价分析:", 3)
        self.plot_ohlc(av_df, '平均股价', '平均股价走势图', image_dir, 'average_price')
        md.addImage("average_price.png", imageTitle="平均股价")

        #活点地图
        md.addHeader("活点地图分析:", 3)
        mmap_clinet = MarauderMap(CStockInfo().get().code.tolist())
        mmap_clinet.plot(cdate, image_dir, 'marauder_map')
        md.addImage("marauder_map.png", imageTitle="活点地图")

        #大盘是否超跌
        md.addHeader("大盘是否超跌:", 3)
        moj = MarketOversoldJudger()
        md.addText("大盘是否超跌:%s" % moj.judge(stock_info))

        #牛熊股比
        md.addHeader("牛熊股比:", 3)
        all_marauder_data = self.mmap_clinet.ris.get_data(cdate)

        bull_stock_num = len(all_marauder_data[all_marauder_data.profit >= 0])
        bear_stock_num = len(all_marauder_data[all_marauder_data.profit < 0])
        md.addText("牛熊股比:%s" % (100 * bull_stock_num / bear_stock_num))

        #涨停分析
        md.addHeader("涨停跌停分析:", 3)
        self.static_plot(stock_info,
                         limit_info,
                         dir_name=image_dir,
                         file_name='pchange_static')
        md.addImage("pchange_static.png", imageTitle="活点地图")

        #行业分析
        md.addHeader("行业分析:", 2)
        ##总成交额分析
        total_amount = industry_info['amount'].sum()
        df = industry_info.sort_values(by='amount', ascending=False)
        df = df[['name', 'code', 'amount']]
        df = df.head(min(9, len(df)))
        df.at[len(df)] = ['其他', '999999', total_amount - df['amount'].sum()]
        df['amount'] = df['amount'] / 1e8
        xtuple = tuple(
            (df['name'] + ':' + df['amount'].astype('str') + '亿').tolist())
        md.addHeader("总成交额分析:", 3)
        self.plot_pie(df,
                      'amount',
                      '每日成交额行业分布',
                      xtuple,
                      image_dir,
                      'industry_amount_distribution',
                      ctype='func')
        md.addImage("industry_amount_distribution.png", imageTitle="总成交额分析")

        ##总涨幅分析
        df = industry_info[industry_info['pchange'] > 0]
        if not df.empty:
            df = df[['name', 'code', 'pchange']]
            df = df.sort_values(by='pchange', ascending=False)
            df = df.head(min(10, len(df)))
            xtuple = tuple((df['name'] + ':' + df['pchange'].astype('str') +
                            '%').tolist())
            md.addHeader("总涨幅分析:", 3)
            self.plot_pie(df, 'pchange', '每日涨幅行业分布', xtuple, image_dir,
                          'industry_price_increase_distribution')
            md.addImage("industry_price_increase_distribution.png",
                        imageTitle="总涨幅分析")

        ##金额增加额的行业分布
        df = industry_info[industry_info['money_change'] > 0]
        if not df.empty:
            df = df[['name', 'code', 'money_change']]
            df = df.sort_values(by='money_change', ascending=False)
            df = df.head(min(10, len(df)))
            xtuple = tuple((df['name'] + ':' +
                            df['money_change'].astype('str') + '亿').tolist())
            md.addHeader("金额增加额的行业分布:", 3)
            self.plot_pie(df, 'money_change', '每日成交增加额行业分布', xtuple, image_dir,
                          'industry_money_increase_distribution')
            md.addImage("industry_money_increase_distribution.png",
                        imageTitle="金额增加额的行业分布")

        ##金额增加百分比的行业分布
        df = industry_info[industry_info['mchange'] > 0]
        if not df.empty:
            df = df[['name', 'code', 'mchange']]
            df = df.sort_values(by='mchange', ascending=False)
            df = df.head(min(10, len(df)))
            xtuple = tuple((df['name'] + ':' + df['mchange'].astype('str') +
                            '%').tolist())
            md.addHeader("金额增加百分比的行业分布:", 3)
            self.plot_pie(df, 'mchange', '每日成交增加比例行业分布', xtuple, image_dir,
                          'industry_money_increase_percent_distribution')
            md.addImage("industry_money_increase_percent_distribution.png",
                        imageTitle="金额增加百分比的行业分布")

        ##总跌幅分析
        df = industry_info[industry_info['pchange'] < 0]
        if not df.empty:
            df = df[['name', 'code', 'pchange']]
            df = df.sort_values(by='pchange', ascending=True)
            df = df.head(min(10, len(df)))
            df['pchange'] = df['pchange'] * -1
            xtuple = tuple((df['name'] + '跌幅:' + df['pchange'].astype('str') +
                            '%').tolist())
            md.addHeader("总跌幅分析:", 3)
            self.plot_pie(df, 'pchange', '每日涨幅行业分布', xtuple, image_dir,
                          'industry_price_decrease_distribution')
            md.addImage("industry_price_decrease_distribution.png",
                        imageTitle="总跌幅分析")

        ##金额减少额的行业分布
        df = industry_info[industry_info['money_change'] < 0]
        if not df.empty:
            df = df[['name', 'code', 'money_change']]
            df = df.sort_values(by='money_change', ascending=True)
            df = df.head(min(10, len(df)))
            df['money_change'] = df['money_change'] * -1
            xtuple = tuple((df['name'] + ':减少' +
                            df['money_change'].astype('str') + '亿').tolist())
            md.addHeader("金额减少额的行业分布:", 3)
            self.plot_pie(df, 'money_change', '每日成交减少额行业分布', xtuple, image_dir,
                          'industry_money_decrease_distribution')
            md.addImage("industry_money_decrease_distribution.png",
                        imageTitle="金额减少额的行业分布")

        ##金额减少百分比的行业分布
        df = industry_info[industry_info['mchange'] < 0]
        if not df.empty:
            df = df[['name', 'code', 'mchange']]
            df = df.sort_values(by='mchange', ascending=False)
            df = df.head(min(10, len(df)))
            df['mchange'] = df['mchange'] * -1
            xtuple = tuple((df['name'] + ':减少' + df['mchange'].astype('str') +
                            '%').tolist())
            md.addHeader("金额减少百分比的行业分布:", 3)
            self.plot_pie(df, 'mchange', '每日成交减少百分比行业分布', xtuple, image_dir,
                          'industry_money_decrease_percent_distribution')
            md.addImage("industry_money_decrease_percent_distribution.png",
                        imageTitle="金额减少百分比的行业分布")

        #选股指标
        md.addHeader("选股器选股", 2)
        t_selector = MarkdownTable(headers=["方法", "股票列表"])

        stm = StrongerThanMarketSelecter()
        stm_code_list = stm.choose(all_stock_info, av_df)
        t_selector.addRow(['强于平均股价10%', json.dumps(stm_code_list)])

        amus = AntiMarketUpSelecter()
        amus_code_list = amus.choose(stock_info)
        t_selector.addRow(['逆势上涨', json.dumps(stm_code_list)])

        lvhps = LowVolumeHighProfitSelecter()
        lvhps_code_list = lvhps.choose(stock_info)
        t_selector.addRow(['高盈利低换手', json.dumps(lvhps_code_list)])

        gkblvs = GameKLineBigraiseLargeVolumeSelecter()
        gkblvs_code_list = gkblvs.choose(stock_info)
        t_selector.addRow(['博弈K线带量长阳', json.dumps(gkblvs_code_list)])

        gkbsvs = GameKLineBigraiseSmallVolumeSelecter()
        gkbsvs_code_list = gkbsvs.choose(stock_info)
        t_selector.addRow(['博弈K线无量长阳', json.dumps(gkbsvs_code_list)])

        ncis = NeiChipIntensiveSelecter()
        ncis_code_list = ncis.choose(stock_info)
        t_selector.addRow(['低位筹码密集', json.dumps(ncis_code_list)])

        bmbl = BullMoreBearLessSelecter()
        bmbl_code_list = bmbl.choose(all_stock_info)
        t_selector.addRow(['牛长熊短', json.dumps(bmbl_code_list)])

        ncns = NoChipNetSpaceSelecter()
        ncns_code_list = ncns.choose(stock_info)
        t_selector.addRow(['无筹码净空间', json.dumps(ncns_code_list)])

        md.addTable(t_selector)
        with open(file_name, "w+") as f:
            f.write(md.getStream())
Esempio n. 16
0
def update_code_list():
    base_df = CStockInfo().get()
    return base_df.code.tolist()
Esempio n. 17
0
    def relation_plot(self, df, good_list):
        close_price_list = [
            df[df.code == code].close.tolist() for code in good_list
        ]
        close_prices = np.vstack(close_price_list)

        open_price_list = [
            df[df.code == code].open.tolist() for code in good_list
        ]
        open_prices = np.vstack(open_price_list)

        # the daily variations of the quotes are what carry most information
        variation = (close_prices - open_prices) * 100 / open_prices

        logger.info("get variation succeed")
        # #############################################################################
        # learn a graphical structure from the correlations
        edge_model = covariance.GraphLassoCV()
        # standardize the time series: using correlations rather than covariance is more efficient for structure recovery
        X = variation.copy().T
        X /= X.std(axis=0)
        edge_model.fit(X)

        logger.info("mode compute succeed")
        # #############################################################################
        # cluster using affinity propagation
        _, labels = cluster.affinity_propagation(edge_model.covariance_)
        n_labels = labels.max()
        code_list = np.array(good_list)

        industry_dict = dict()
        industry_df_info = IndustryInfo.get()
        for index, name in industry_df_info.name.iteritems():
            content = industry_df_info.loc[index]['content']
            a_code_list = json.loads(content)
            for code in a_code_list:
                industry_dict[code] = name

        cluster_dict = dict()
        for i in range(n_labels + 1):
            cluster_dict[i] = code_list[labels == i]
            name_list = [
                CStockInfo.get(code, 'name') for code in code_list[labels == i]
            ]
            logger.info('cluster code %i: %s' %
                        ((i + 1), ', '.join(name_list)))

        cluster_info = dict()
        for group, _code_list in cluster_dict.items():
            for code in _code_list:
                iname = industry_dict[code]
                if group not in cluster_info: cluster_info[group] = set()
                cluster_info[group].add(iname)
            logger.info('cluster inustry %i: %s' %
                        ((i + 1), ', '.join(list(cluster_info[group]))))

        # #############################################################################
        # find a low-dimension embedding for visualization: find the best position of
        # the nodes (the stocks) on a 2D plane
        # we use a dense eigen_solver to achieve reproducibility (arpack is
        # initiated with random vectors that we don't control). In addition, we
        # use a large number of neighbors to capture the large-scale structure.
        node_position_model = manifold.LocallyLinearEmbedding(
            n_components=2, eigen_solver='dense', n_neighbors=6)
        embedding = node_position_model.fit_transform(X.T).T

        # #############################################################################
        # visualizatio
        plt.figure(1, facecolor='w', figsize=(10, 8))
        plt.clf()
        ax = plt.axes([0., 0., 1., 1.])
        plt.axis('off')

        # display a graph of the partial correlations
        partial_correlations = edge_model.precision_.copy()
        d = 1 / np.sqrt(np.diag(partial_correlations))
        partial_correlations *= d
        partial_correlations *= d[:, np.newaxis]
        non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

        # plot the nodes using the coordinates of our embedding
        plt.scatter(embedding[0],
                    embedding[1],
                    s=100 * d**2,
                    c=labels,
                    cmap=plt.cm.nipy_spectral)

        # plot the edges
        start_idx, end_idx = np.where(non_zero)
        # a sequence of (*line0*, *line1*, *line2*), where:: linen = (x0, y0), (x1, y1), ... (xm, ym)
        segments = [[embedding[:, start], embedding[:, stop]]
                    for start, stop in zip(start_idx, end_idx)]
        values = np.abs(partial_correlations[non_zero])
        lc = LineCollection(segments,
                            zorder=0,
                            cmap=plt.cm.hot_r,
                            norm=plt.Normalize(0, .7 * values.max()))
        lc.set_array(values)
        lc.set_linewidths(15 * values)
        ax.add_collection(lc)

        # add a label to each node. The challenge here is that we want to position the labels to avoid overlap with other labels
        for index, (name, label,
                    (x, y)) in enumerate(zip(code_list, labels, embedding.T)):
            dx = x - embedding[0]
            dx[index] = 1
            dy = y - embedding[1]
            dy[index] = 1
            this_dx = dx[np.argmin(np.abs(dy))]
            this_dy = dy[np.argmin(np.abs(dx))]
            if this_dx > 0:
                horizontalalignment = 'left'
                x = x + .002
            else:
                horizontalalignment = 'right'
                x = x - .002
            if this_dy > 0:
                verticalalignment = 'bottom'
                y = y + .002
            else:
                verticalalignment = 'top'
                y = y - .002
            plt.text(x,
                     y,
                     name,
                     size=10,
                     horizontalalignment=horizontalalignment,
                     verticalalignment=verticalalignment,
                     bbox=dict(facecolor='w',
                               edgecolor=plt.cm.nipy_spectral(label /
                                                              float(n_labels)),
                               alpha=.6))
        plt.xlim(
            embedding[0].min() - .15 * embedding[0].ptp(),
            embedding[0].max() + .10 * embedding[0].ptp(),
        )
        plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
                 embedding[1].max() + .03 * embedding[1].ptp())
        plt.savefig('/tmp/relation.png', dpi=1000)
Esempio n. 18
0
 def __init__(self):
     self.logger = getLogger(__name__)
     self.cval_client = CValuation()
     self.stock_info_client = CStockInfo()
Esempio n. 19
0
# -*- coding: utf-8 -*-
import os
import sys
from os.path import abspath, dirname

sys.path.insert(0, dirname(dirname(dirname(abspath(__file__)))))
import traceback
import const as ct
import pandas as pd
from rstock import RIndexStock
from cstock_info import CStockInfo
from industry_info import IndustryInfo
if __name__ == '__main__':
    try:
        mdate = '2019-08-02'
        cobj = CStockInfo()
        robj = RIndexStock()
        iobj = IndustryInfo()
        black_list = list(ct.BLACK_DICT.keys())

        bdf = cobj.get()
        stock_info = robj.get_data(mdate)
        idf = iobj.get_csi_industry_data(mdate)
        df = pd.merge(bdf, idf, how='left', on=['code'])
        df = pd.merge(stock_info, df, how='inner', on=['code'])
        df = df[~df.code.isin(black_list)]
        df = df[(df.profit > 1) & (df.profit < 3) & (df.pday > 30) &
                (df.timeToMarket < 20150101)]
        df = df.reset_index(drop=True)
        #df = df[['code', 'name', 'industry', 'profit', 'pday', 'pind_name', 'sind_name', 'tind_name', 'find_name']]
        df = df[['code', 'name', 'profit', 'pday', 'find_name']]