Esempio n. 1
0
    def fit(self):
        w = tf.Variable(tf.zeros([self.x_train.shape[1], self.y_train.shape[1]]))
        b = tf.Variable(tf.zeros([self.y_train.shape[1]]))

        activation = tf.nn.softmax(tf.matmul(self.x, w) + b)
        cost = -tf.reduce_sum(self.y * tf.log(activation))
        optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(cost)
        self.init = tf.initialize_all_variables()
        with tf.Session() as sess:
            sess.run(self.init)
            for epoch in range(self.training_epochs):
                avg_cost = 0.
                if self.batch_size == -1:
                    self.batch_size = int(self.x_train.shape[0] / 10)
                total_batch = int(self.x_train.shape[0] / self.batch_size)
                for i in range(total_batch):
                    batch_xs = self.x_train[i * self.batch_size: (i + 1) * self.batch_size]
                    batch_ys = self.y_train[i * self.batch_size: (i + 1) * self.batch_size]
                    sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys})
                    avg_cost += sess.run(cost, feed_dict={self.x: batch_xs, self.y: batch_ys}) / total_batch
            ZLog.info("Optimization Finished!")

            self.pred = tf.argmax(activation, 1)
            if self.x_test is not None:
                correct_prediction = tf.equal(self.pred, tf.argmax(self.y, 1))
                accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
                ZLog.info("Accuracy:" + str(accuracy.eval({self.x: self.x_test, self.y: self.y_test})))
Esempio n. 2
0
    def gmm_component_filter(self, nc=20, threshold=0.72, show=True):
        clf = GMM(nc, n_iter=500, random_state=3).fit(self.fiter.y)
        ss = clf.predict(self.fiter.y)

        self.fiter.df['p_rk_cg'] = self.fiter.df['profit_cg'].rank()
        self.fiter.df['ss'] = ss

        win_top = len(self.fiter.df['profit_cg']) - len(self.fiter.df['profit_cg']) * 0.25
        loss_top = len(self.fiter.df['profit_cg']) * 0.25
        self.fiter.df['rk'] = 0
        self.fiter.df['rk'] = np.where(self.fiter.df['p_rk_cg'] > win_top, 1, self.fiter.df['rk'])
        self.fiter.df['rk'] = np.where(self.fiter.df['p_rk_cg'] < loss_top, -1, self.fiter.df['rk'])

        xt = pd.crosstab(self.fiter.df['ss'], self.fiter.df['rk'])
        xt_pct = xt.div(xt.sum(1).astype(float), axis=0)

        if show:
            xt_pct.plot(
                figsize=(16, 8),
                kind='bar',
                stacked=True,
                title=str('ss') + ' -> ' + str('result'))
            plt.xlabel(str('ss'))
            plt.ylabel(str('result'))

            ZLog.info(xt_pct[xt_pct[-1] > threshold])
            ZLog.info(xt_pct[xt_pct[1] > threshold])

        self.top_loss_ss = xt_pct[xt_pct[-1] > threshold].index
        self.top_win_ss = xt_pct[xt_pct[1] > threshold].index
        return xt, xt_pct
Esempio n. 3
0
    def judge(self, **kwargs):

        if not kwargs.has_key('deg_hisWindowPd') \
                or not kwargs.has_key('deg_windowPd') \
                or not kwargs.has_key('deg_60WindowPd') \
                or not kwargs.has_key('lowBkCnt') \
                or not kwargs.has_key('wave_score1') \
                or not kwargs.has_key('wave_score2') \
                or not kwargs.has_key('wave_score3'):
            ZLog.info('judge dlw kwargs error!')
            return

        if not hasattr(self, 'estimator') or not hasattr(
                self, MlFiterDlwJudgeClass.K_GOLDEN_DEG_PROB):
            '''
                暂时只info,如果有必要需要raise exception
            '''
            ZLog.info('not estimator or prob')
            return

        w = np.array([
            kwargs['deg_hisWindowPd'], kwargs['deg_windowPd'],
            kwargs['deg_60WindowPd'], kwargs['lowBkCnt'],
            kwargs['wave_score1'], kwargs['wave_score2'], kwargs['wave_score3']
        ])

        prob_threshold = self.prob_threshold
        estimator = self.estimator
        prob = estimator.predict_proba(w.reshape(1, -1))[:, 1][0]
        if prob > prob_threshold:
            return True
        return False
Esempio n. 4
0
 def print_progress(ind, last=False):
     if last or (ind > 0 and ind % K_PRINT_ITER == 0):
         ZLog.info('Iteration %d/%d\n' % (ind + 1, iter_n))
         ZLog.debug('  content loss: %g\n' % org_loss.eval())
         ZLog.debug('    style loss: %g\n' % style_loss.eval())
         ZLog.debug('       tv loss: %g\n' % tv_loss.eval())
         ZLog.debug('    total loss: %g\n' % loss.eval())
Esempio n. 5
0
def show_orders_hist(order_pd, s_list=None, q_default=10):

    if s_list is None:
        s_list = ['lowBkCnt', 'atr_std', 'jump_power', 'diff_days',
                  'wave_score1', 'wave_score2', 'wave_score3',
                  'deg_60WindowPd', 'deg_hisWindowPd', 'deg_windowPd']

    s_list = filter(lambda x: order_pd.columns.tolist().count(x) > 0, s_list)
    for sn in s_list:
        uq = len(np.unique(order_pd[sn]))
        if uq == 1:
            continue

        bins = 10
        bins = uq // 50 if uq // 50 > bins else bins
        order_pd[sn].hist(bins=bins)
        plt.show()

        try:
            cats = pd.qcut(order_pd[sn], q_default)
        except Exception:
            '''
                某一个数据超出q的数量导致无法分
            '''
            import pandas.core.algorithms as algos
            bins = algos.quantile(np.unique(order_pd[sn]), np.linspace(0, 1, q_default + 1))
            cats = pd.tools.tile._bins_to_cuts(order_pd[sn], bins, include_lowest=True)
            # ZLog.info(sn + ' qcut except use bins!')
        ZLog.info('{0} show hist and qcuts'.format(sn))
        ZLog.info(cats.value_counts())
Esempio n. 6
0
    def judge(self, **kwargs):

        if not kwargs.has_key('deg_hisWindowPd') \
                or not kwargs.has_key('deg_windowPd') \
                or not kwargs.has_key('deg_60WindowPd') \
                or not kwargs.has_key('lowBkCnt') \
                or not kwargs.has_key('wave_score1') \
                or not kwargs.has_key('wave_score2') \
                or not kwargs.has_key('wave_score3'):
            ZLog.info('judge dlw kwargs error!')
            return

        if not hasattr(self, 'estimator') or not hasattr(self, MlFiterDlwJudgeClass.K_GOLDEN_DEG_PROB):
            '''
                暂时只info,如果有必要需要raise exception
            '''
            ZLog.info('not estimator or prob')
            return

        w = np.array([kwargs['deg_hisWindowPd'], kwargs['deg_windowPd'], kwargs['deg_60WindowPd'], kwargs['lowBkCnt'],
                      kwargs['wave_score1'], kwargs['wave_score2'], kwargs['wave_score3']])

        prob_threshold = self.prob_threshold
        estimator = self.estimator
        prob = estimator.predict_proba(w.reshape(1, -1))[:, 1][0]
        if prob > prob_threshold:
            return True
        return False
Esempio n. 7
0
    def gmm_component_filter(self, nc=20, threshold=0.72, show=True):
        clf = GMM(nc, n_iter=500, random_state=3).fit(self.fiter.y)
        ss = clf.predict(self.fiter.y)

        self.fiter.df['p_rk_cg'] = self.fiter.df['profit_cg'].rank()
        self.fiter.df['ss'] = ss

        win_top = len(self.fiter.df['profit_cg']) - len(
            self.fiter.df['profit_cg']) * 0.25
        loss_top = len(self.fiter.df['profit_cg']) * 0.25
        self.fiter.df['rk'] = 0
        self.fiter.df['rk'] = np.where(self.fiter.df['p_rk_cg'] > win_top, 1,
                                       self.fiter.df['rk'])
        self.fiter.df['rk'] = np.where(self.fiter.df['p_rk_cg'] < loss_top, -1,
                                       self.fiter.df['rk'])

        xt = pd.crosstab(self.fiter.df['ss'], self.fiter.df['rk'])
        xt_pct = xt.div(xt.sum(1).astype(float), axis=0)

        if show:
            xt_pct.plot(figsize=(16, 8),
                        kind='bar',
                        stacked=True,
                        title=str('ss') + ' -> ' + str('result'))
            plt.xlabel(str('ss'))
            plt.ylabel(str('result'))

            ZLog.info(xt_pct[xt_pct[-1] > threshold])
            ZLog.info(xt_pct[xt_pct[1] > threshold])

        self.top_loss_ss = xt_pct[xt_pct[-1] > threshold].index
        self.top_win_ss = xt_pct[xt_pct[1] > threshold].index
        return xt, xt_pct
Esempio n. 8
0
def show_orders_hist(order_pd, s_list=None, q_default=10):

    if s_list is None:
        s_list = [
            'lowBkCnt', 'atr_std', 'jump_power', 'diff_days', 'wave_score1',
            'wave_score2', 'wave_score3', 'deg_60WindowPd', 'deg_hisWindowPd',
            'deg_windowPd'
        ]

    s_list = filter(lambda x: order_pd.columns.tolist().count(x) > 0, s_list)
    for sn in s_list:
        uq = len(np.unique(order_pd[sn]))
        if uq == 1:
            continue

        bins = 10
        bins = uq // 50 if uq // 50 > bins else bins
        order_pd[sn].hist(bins=bins)
        plt.show()

        try:
            cats = pd.qcut(order_pd[sn], q_default)
        except Exception:
            '''
                某一个数据超出q的数量导致无法分
            '''
            import pandas.core.algorithms as algos
            bins = algos.quantile(np.unique(order_pd[sn]),
                                  np.linspace(0, 1, q_default + 1))
            cats = pd.tools.tile._bins_to_cuts(order_pd[sn],
                                               bins,
                                               include_lowest=True)
            # ZLog.info(sn + ' qcut except use bins!')
        ZLog.info('{0} show hist and qcuts'.format(sn))
        ZLog.info(cats.value_counts())
Esempio n. 9
0
 def calc_above(self):
     orderPd = self.orderPd
     order_outer_diff_days = orderPd[(orderPd['diff_days'] == 0) | (orderPd['diff_days'] > self.dd_threshold)]
     self.above = order_outer_diff_days.result.value_counts()[
                      1].sum() / order_outer_diff_days.result.value_counts().sum()
     ZLog.info('above win rate: ' + str(self.above))
     return self.above
Esempio n. 10
0
 def calc_below(self):
     order_diff_days = self.orderPd[(self.orderPd['diff_days'] > 0) & (
         self.orderPd['diff_days'] <= self.dd_threshold)]
     self.below = order_diff_days.result.value_counts()[1].sum(
     ) / order_diff_days.result.value_counts().sum()
     ZLog.info('below win rate: ' + str(self.below))
     return self.below
Esempio n. 11
0
def check_golden_mc_result(filter_ret, w_rate, symbol=None, bp=24.3, pf_cnt=200, loop_cnt=20000, p_outter_loop=1):
    """
    最后可视化验证少量的筛选结果

    :param filter_ret:
    :param w_rate:
    :param symbol:
    :param bp:
    :param pf_cnt:
    :param loop_cnt:
    :param p_outter_loop:
    :return:
    """
    if symbol is None:
        kl_pd = SymbolPd.make_kfold_pd('usNOAH')[-82:-40]
    else:
        kl_pd = SymbolPd.make_kfold_pd(symbol)

    for loc in np.arange(filter_ret.shape[0]):
        percents = ast.literal_eval(filter_ret['keys'].iloc[loc])
        ZLog.info(loc)
        ZLog.info(percents)
        loss_percent = np.array(percents[0:4])
        win_percent = np.array(percents[4:9])
        profits_dict = {}
        golden_tuple = namedtuple('golden', (
            'below200', 'below250', 'below300', 'below382', 'above618', 'above700', 'above800', 'above900', 'above950'))

        _golden_mc_process_cmp(kl_pd, loss_percent, win_percent, golden_tuple, profits_dict, w_rate,
                               bp, pf_cnt, p_outter_loop, loop_cnt, show=True)
Esempio n. 12
0
 def fit_img(self,
             img_path,
             resize=False,
             size=480,
             enhance=None,
             iter_n=10,
             **kwargs):
     ZLog.info('TensorPrismaClass miss fit_img!!')
Esempio n. 13
0
 def calc_above(self):
     orderPd = self.orderPd
     order_outer_diff_days = orderPd[(orderPd['diff_days'] == 0) | (
         orderPd['diff_days'] > self.dd_threshold)]
     self.above = order_outer_diff_days.result.value_counts()[1].sum(
     ) / order_outer_diff_days.result.value_counts().sum()
     ZLog.info('above win rate: ' + str(self.above))
     return self.above
Esempio n. 14
0
    def predict_kwargs(self, w_col, need_ind_cnt=1, **kwargs):
        for col in w_col:
            if col not in kwargs:
                ZLog.info('judge kwargs error!')
                return

        x = np.array([kwargs[col] for col in w_col])
        x = x.reshape(1, -1)

        return self.predict(x, need_ind_cnt) == 1
Esempio n. 15
0
    def predict_hit_kwargs(self, w_col, **kwargs):
        for col in w_col:
            if col not in kwargs:
                ZLog.info('judge kwargs error!')
                return

        x = np.array([kwargs[col] for col in w_col])
        x = x.reshape(1, -1)

        return self.hit_cnt(x)
Esempio n. 16
0
    def _do_cross_val_score(self, x, y, cv, scoring):
        fiter = self.get_fiter()

        scores = cross_validation.cross_val_score(fiter, x, y, cv=cv, scoring=scoring)

        mean_sc = np.mean(np.sqrt(-scores)) if scoring == 'mean_squared_error' \
            else np.mean(scores)

        ZLog.info(scoring + ' mean: ' + str(mean_sc))

        return scores
Esempio n. 17
0
 def make_boost_dummies(self, orderPd, cats_ss, prefix, regex):
     try:
         cats = pd.qcut(cats_ss, self.qcut_bins)
     except Exception, e:
         '''
             某一个数据超出q的数量导致无法分
         '''
         import pandas.core.algorithms as algos
         bins = algos.quantile(np.unique(cats_ss), np.linspace(0, 1, self.qcut_bins + 1))
         cats = pd.tools.tile._bins_to_cuts(cats_ss, bins, include_lowest=True)
         ZLog.info(prefix + ' qcut except use bins!')
Esempio n. 18
0
 def do_snn_tt(cls, x, y, n_folds=10, nn_hdim=3, num_passes=20000, print_loss=False):
     kf = KFold(len(y), n_folds=n_folds, shuffle=True)
     acs = list()
     for i, (train_index, test_index) in enumerate(kf):
         x_train, x_test = x[train_index], x[test_index]
         y_train, y_test = y[train_index], y[test_index]
         m_l__t_f = cls(x_train, y_train, x_test, y_test, nn_hdim=nn_hdim, num_passes=num_passes,
                        print_loss=print_loss)
         accuracy = m_l__t_f.fit()
         acs.append(accuracy)
     ZLog.info('accuracys mean = {}'.format(np.array(acs).mean()))
Esempio n. 19
0
    def judge(self, **kwargs):
        for w in MlFiterDegPd.g_w_col:
            if w not in kwargs:
                ZLog.info('judge deg kwargs error!')
                return

        regex = MlFiterDegPd.g_regex_d
        w_col = MlFiterDegPd.g_w_col

        pd_class = MlFiterDegPdClass
        return self.do_judge(w_col, regex, pd_class, **kwargs)
Esempio n. 20
0
def show_golden_process(w_rate,
                        symbol=None,
                        mc_golden=False,
                        bp=24.3,
                        pf_cnt=200,
                        loop_cnt=20000,
                        outter_loop=1):
    if symbol is None:
        kl_pd = SymbolPd.make_kfold_pd('usNOAH')[-82:-40]
    else:
        kl_pd = SymbolPd.make_kfold_pd(symbol)

    golden = TLineGolden.calc_mc_golden(
        kl_pd, g_mc_percent,
        g_mc_loss_cnt) if mc_golden else TLineGolden.calc_golden(kl_pd)

    while outter_loop > 0:
        outter_loop -= 1
        profits = []
        for _ in np.arange(loop_cnt):
            wl = init_golden_w_full(
                golden, bp) if mc_golden else init_golden_wl(golden, bp)
            if wl is None:
                ZLog.info('init_golden_wl out of bp range!')
                return

            sp = 0
            while wl is not None:
                supports = wl['supports']
                resistances = wl['resistances']
                w = np.random.binomial(1, w_rate)
                '''
                    sp = resistances[-1]
                    sp = supports[0]
                    -1, 0都行,反正这里走到有意义的sp时都是一个了
                '''
                if w:
                    sp = resistances[-1]
                else:
                    sp = supports[0]

                wl = golden_map_wl_grid(w, wl)
            else:
                """
                    -10 默认交易成本(手续费)
                """
                pf = pf_cnt * (sp - bp) - 10
                profits.append(pf)
        profits = pd.Series(profits)
        show = (outter_loop == 0)
        NpUtil.calc_regress_ang(profits.cumsum(), show)
        if show:
            profits.hist()
Esempio n. 21
0
    def fit(self):
        weights = {
            'h1': tf.Variable(tf.random_normal([self.n_input, self.n_hidden_1])),
            'h2': tf.Variable(tf.random_normal([self.n_hidden_1, self.n_hidden_2])),
            'out': tf.Variable(tf.random_normal([self.n_hidden_2, self.n_classes]))
        }
        biases = {
            'b1': tf.Variable(tf.random_normal([self.n_hidden_1])),
            'b2': tf.Variable(tf.random_normal([self.n_hidden_2])),
            'out': tf.Variable(tf.random_normal([self.n_classes]))
        }

        mul_predict = self.multilayer_perceptron(self.x, weights, biases)
        # Softmax loss
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(mul_predict, self.y))
        # Adam Optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(cost)
        self.init = tf.initialize_all_variables()

        with tf.Session() as sess:
            sess.run(self.init)
            # Training cycle
            total_batch = int(len(self.x_train) / self.batch_size) + 1
            tf.train.SummaryWriter(K_LOG_FILE, graph=sess.graph)
            for epoch in range(self.training_epochs):
                avg_cost = 0.

                perm = np.arange(len(self.x_train))
                np.random.shuffle(perm)
                self.x_train = self.x_train[perm]
                self.y_train = self.y_train[perm]

                for i in range(total_batch):
                    batch_xs = self.x_train[i * self.batch_size: (i + 1) * self.batch_size]
                    batch_ys = self.y_train[i * self.batch_size: (i + 1) * self.batch_size]
                    # Fit training using batch data
                    sess.run(optimizer, feed_dict={self.x: batch_xs, self.y: batch_ys})
                    # Compute average loss
                    avg_cost += sess.run(cost, feed_dict={self.x: batch_xs, self.y: batch_ys}) / total_batch

                if epoch % self.display_step == 0:
                    print "Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)
            ZLog.info("Optimization Finished!")

            self.pred = tf.argmax(mul_predict, 1)

            if self.x_test is not None:
                correct_prediction = tf.equal(tf.argmax(mul_predict, 1), tf.argmax(self.y, 1))
                accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
                ac = accuracy.eval({self.x: self.x_test, self.y: self.y_test})
                ZLog.info("Accuracy:" + str(ac))
                return ac
Esempio n. 22
0
    def _do_collect_work(self):
        with ThreadPoolExecutor(max_workers=len(self.back_proxys) *
                                3) as executor:
            """
                这里使用线程池还是因为代理的质量太差了, 要控制线程数量
            """
            thread_lock = threading.RLock()
            all_same_cnt = 0
            while True:
                soup = BeautifulSoup(self.driver.page_source, "lxml")
                img_objs = soup.select('#imgid > div > ul > li[data-objurl]')

                sub_same_cnt = 0
                for img in img_objs:
                    url = img['data-objurl']
                    url_thumb = img['data-thumburl']
                    if self.requested_url.count(url) > 0:
                        sub_same_cnt += 1
                        continue

                    url_dict = {'url': url, 'url_thumb': url_thumb}
                    if g_enable_debug:
                        self.down_load_img(url_dict, thread_lock)
                    else:
                        executor.submit(self.down_load_img, url_dict,
                                        thread_lock)
                    # 就在这里append否则里面还要线程同步
                    self.requested_url.append(url)

                js = "window.scrollTo({}, {})".format(
                    self.current_pos,
                    self.current_pos + K_SCROLL_MOVE_DISTANCE)
                self.current_pos += K_SCROLL_MOVE_DISTANCE
                self.driver.execute_script(js)
                time.sleep(K_SCROLL_SLEEP_TIME)
                """
                    所有都在requested中记录全等一次否则重置
                """
                if sub_same_cnt == len(img_objs):
                    all_same_cnt += 1
                else:
                    all_same_cnt = 0
                """
                    达到一定次数,认为到底部了
                """
                if all_same_cnt > 30:
                    break

                if self.collect_cnt >= K_COLLECT_CNT:
                    ZLog.info('collect_cnt > K_COLLECT_CNT task end')
                    break
Esempio n. 23
0
    def choose_cprs_component(self, llps):
        """
        :param llps: cprs[(so.cprs['lps'] < 0) & (so.cprs['lms'] < -0.0)]
        你所需要的符合筛选条件的cprs
        :return:
        """
        if not hasattr(self, 'cprs'):
            raise ValueError('gmm_component_filter not exe!!!! ')

        nts_pd = pd.DataFrame()
        for nk in llps.index:
            nts_pd = nts_pd.append(self.nts[nk])
        nts_pd = nts_pd.drop_duplicates(subset='ind', keep='last')
        ZLog.info('nts_pd.shape = {0}'.format(nts_pd.shape))
        loss_rate = nts_pd.result.value_counts()[0] / nts_pd.result.value_counts().sum()
        win_rate = nts_pd.result.value_counts()[1] / nts_pd.result.value_counts().sum()
        ZLog.info('nts_pd loss rate = {0}'.format(loss_rate))

        improved = (nts_pd.shape[0] / self.fiter.order_has_ret.shape[0]) * (loss_rate - win_rate)
        ZLog.info('improved rate = {0}'.format(improved))

        xt = self.fiter.order_has_ret.result.value_counts()
        ZLog.info('predict win rate = ' + str(xt[1] / xt.sum() + improved))

        nts_pd.sort_index()['profit'].cumsum().plot()
        plt.show()
Esempio n. 24
0
def calc_similar(symbol, cmp_symbol, sc=slice(0, 2), show=True):
    """
        sc: 使用几个维度相似性验证的选择切片
        默认使用:
                    E_CORE_TASK_CG_PEARS  = 0
                    E_CORE_TASK_CG_SPERM  = 1
        如只想使用SPERM sc=slice(1, 2)

        对比的股票在rank中的位置分量
    """
    pd_list = get_pdlist(sc)

    sum_rank = get_sum_rank(pd_list, symbol)

    cmp_rank = sum_rank.sort_values(ascending=True).index.tolist().index(cmp_symbol)
    rank_score = 1 - cmp_rank / sum_rank.shape[0]
    if show:
        ZLog.info(symbol + ' similar rank score' + cmp_symbol + ' :' + str(rank_score))

        mul_pd = SymbolPd.make_kfold_mulpd([symbol, cmp_symbol])

        klpd_symbol = SymbolPd.get_n_year(mul_pd[symbol], from_year=2)
        klpd_cmp_symbol = SymbolPd.get_n_year(mul_pd[cmp_symbol], from_year=2)
        """
            缩放到同一个数量级
        """
        kl_pd_symbol_nrm, klpd_cmp_symbol_nrm = NpUtil.two_mean_list(klpd_symbol.close,
                                                                     klpd_cmp_symbol.close, type_look='look_max')

        kl_pd_symbol_nrm.plot()
        klpd_cmp_symbol_nrm.plot()
        plt.legend([symbol, cmp_symbol])
        plt.title('similar draw')
        plt.show()

        distance = (kl_pd_symbol_nrm - klpd_cmp_symbol_nrm)
        distance_mean = distance.mean()
        distance_std = distance.std()
        above = distance_mean + distance_std
        below = distance_mean - distance_std
        distance.plot()
        plt.axhline(distance_mean, color='r', linestyle='--')
        plt.axhline(above, color='c')
        plt.axhline(below, color='g')
        plt.title('similar distance')
        plt.legend(['distance', 'distance_mean', 'distance above', 'distance below'],
                   bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.show()

    return rank_score
Esempio n. 25
0
    def feature_selection(self, **kwargs):
        x, y = kwargs['x'], kwargs['y']
        fiter = self.get_fiter()

        selector = RFE(fiter)
        selector.fit(x, y)

        ZLog.info('RFE selection')
        ZLog.info(
            pd.DataFrame(
                {
                    'support': selector.support_,
                    'ranking': selector.ranking_
                },
                index=self.df.columns[1:]))

        selector = RFECV(fiter, cv=3, scoring='mean_squared_error')
        selector.fit(x, y)
        ZLog.newline()
        ZLog.info('RFECV selection')
        ZLog.info(
            pd.DataFrame(
                {
                    'support': selector.support_,
                    'ranking': selector.ranking_
                },
                index=self.df.columns[1:]))
Esempio n. 26
0
    def do_thread_work(self, proxy, checked_list, thread_lock):
        if proxy['type'] == 'HTTP':
            proxy_dict = dict(http='http://{}'.format(proxy['proxy']),
                              https='http://{}'.format(proxy['proxy']))
        else:
            proxy_dict = dict(http='socks5://{}'.format(proxy['proxy']),
                              https='socks5://{}'.format(proxy['proxy']))

        try:
            # r = requests.post("https://www.baidu.com/", headers=self.headers, proxies=proxy_dict, timeout=15,
            #                   verify=False)
            img_url = 'http://picm.bbzhi.com/dongwubizhi/labuladuoxunhuiquanbizhi/animal_' \
                      'labrador_retriever_1600x1200_44243_m.jpg'

            enable_stream = False
            if enable_stream:
                response = requests.get(img_url,
                                        headers=self.headers,
                                        proxies=proxy_dict,
                                        timeout=15,
                                        stream=True)
                if response.status_code == 200:
                    test_name = '../gen/check_proxy.jpg'
                    with open(test_name, 'wb') as f:
                        for chunk in response.iter_content(chunk_size=1024):
                            if chunk:
                                f.write(chunk)
                                f.flush()

                        check_img = PIL.Image.open(test_name)
                        check_img.close()
            else:
                response = requests.get(img_url,
                                        headers=self.headers,
                                        proxies=proxy_dict,
                                        timeout=(10, 20))
                if response.status_code == 200:
                    test_name = '../gen/check_proxy.jpg'
                    with open(test_name, 'wb') as f:
                        f.write(response.content)
                        f.flush()
                    check_img = PIL.Image.open(test_name)
                    check_img.close()
        except Exception as e:
            # ZLog.exception(e)
            return
        with thread_lock:
            ZLog.info('{} check ok'.format(proxy['proxy']))
            checked_list.append(proxy)
Esempio n. 27
0
    def judge(self, **kwargs):
        for w in MlFiterGoldenPd.g_w_col:
            if w not in kwargs:
                ZLog.info('judge golden kwargs error!')
                return

        regex = MlFiterGoldenPd.g_regex_d

        """
            要保持和mertics做的pd一样的顺序
        """
        w_col = MlFiterGoldenPd.g_w_col

        pd_class = MlFiterGoldenPdClass
        return self.do_judge(w_col, regex, pd_class, **kwargs)
Esempio n. 28
0
    def _do_cross_val_score(self, x, y, cv, scoring):
        fiter = self.get_fiter()

        scores = cross_validation.cross_val_score(fiter,
                                                  x,
                                                  y,
                                                  cv=cv,
                                                  scoring=scoring)

        mean_sc = np.mean(np.sqrt(-scores)) if scoring == 'mean_squared_error' \
            else np.mean(scores)

        ZLog.info(scoring + ' mean: ' + str(mean_sc))

        return scores
Esempio n. 29
0
    def scores(self, y_pre, y=None):
        ZLog.info('scores(self, y_pre, y=None)')
        _, y = self.proxy_xy(None, y)
        ZLog.info("accuracy = %.2f" % (accuracy_score(y, y_pre)))
        ZLog.info("precision_score = %.2f" % (metrics.precision_score(y, y_pre)))
        ZLog.info("recall_score = %.2f" % (metrics.recall_score(y, y_pre)))

        self._confusion_matrix_with_report(y, y_pre)
Esempio n. 30
0
 def _confusion_matrix_with_report(self, test_y, predictions):
     confusion_matrix = metrics.confusion_matrix(test_y, predictions)
     print("Confusion Matrix ", confusion_matrix)
     print("          Predicted")
     print("         |  0  |  1  |")
     print("         |-----|-----|")
     print("       0 | %3d | %3d |" %
           (confusion_matrix[0, 0], confusion_matrix[0, 1]))
     print("Actual   |-----|-----|")
     print("       1 | %3d | %3d |" %
           (confusion_matrix[1, 0], confusion_matrix[1, 1]))
     print("         |-----|-----|")
     '''
         一般情况下loss, win可以概括
     '''
     ZLog.info(classification_report(test_y, predictions))
Esempio n. 31
0
def check_golden_mc_result(filter_ret,
                           w_rate,
                           symbol=None,
                           bp=24.3,
                           pf_cnt=200,
                           loop_cnt=20000,
                           p_outter_loop=1):
    """
    最后可视化验证少量的筛选结果

    :param filter_ret:
    :param w_rate:
    :param symbol:
    :param bp:
    :param pf_cnt:
    :param loop_cnt:
    :param p_outter_loop:
    :return:
    """
    if symbol is None:
        kl_pd = SymbolPd.make_kfold_pd('usNOAH')[-82:-40]
    else:
        kl_pd = SymbolPd.make_kfold_pd(symbol)

    for loc in np.arange(filter_ret.shape[0]):
        percents = ast.literal_eval(filter_ret['keys'].iloc[loc])
        ZLog.info(loc)
        ZLog.info(percents)
        loss_percent = np.array(percents[0:4])
        win_percent = np.array(percents[4:9])
        profits_dict = {}
        golden_tuple = namedtuple(
            'golden',
            ('below200', 'below250', 'below300', 'below382', 'above618',
             'above700', 'above800', 'above900', 'above950'))

        _golden_mc_process_cmp(kl_pd,
                               loss_percent,
                               win_percent,
                               golden_tuple,
                               profits_dict,
                               w_rate,
                               bp,
                               pf_cnt,
                               p_outter_loop,
                               loop_cnt,
                               show=True)
Esempio n. 32
0
    def scores(self, y_pre, y=None):
        ZLog.info('scores(self, y_pre, y=None)')
        _, y = self.proxy_xy(None, y)
        ZLog.info("accuracy = %.2f" % (accuracy_score(y, y_pre)))
        ZLog.info("precision_score = %.2f" %
                  (metrics.precision_score(y, y_pre)))
        ZLog.info("recall_score = %.2f" % (metrics.recall_score(y, y_pre)))

        self._confusion_matrix_with_report(y, y_pre)
Esempio n. 33
0
def show_golden_process(w_rate, symbol=None, mc_golden=False, bp=24.3, pf_cnt=200, loop_cnt=20000, outter_loop=1):
    if symbol is None:
        kl_pd = SymbolPd.make_kfold_pd('usNOAH')[-82:-40]
    else:
        kl_pd = SymbolPd.make_kfold_pd(symbol)

    golden = TLineGolden.calc_mc_golden(kl_pd, g_mc_percent, g_mc_loss_cnt) if mc_golden else TLineGolden.calc_golden(
        kl_pd)

    while outter_loop > 0:
        outter_loop -= 1
        profits = []
        for _ in np.arange(loop_cnt):
            wl = init_golden_w_full(golden, bp) if mc_golden else init_golden_wl(golden, bp)
            if wl is None:
                ZLog.info('init_golden_wl out of bp range!')
                return

            sp = 0
            while wl is not None:
                supports = wl['supports']
                resistances = wl['resistances']
                w = np.random.binomial(1, w_rate)

                '''
                    sp = resistances[-1]
                    sp = supports[0]
                    -1, 0都行,反正这里走到有意义的sp时都是一个了
                '''
                if w:
                    sp = resistances[-1]
                else:
                    sp = supports[0]

                wl = golden_map_wl_grid(w, wl)
            else:
                """
                    -10 默认交易成本(手续费)
                """
                pf = pf_cnt * (sp - bp) - 10
                profits.append(pf)
        profits = pd.Series(profits)
        show = (outter_loop == 0)
        NpUtil.calc_regress_ang(profits.cumsum(), show)
        if show:
            profits.hist()
Esempio n. 34
0
    def importances_coef_pd(self, **kwargs):
        if not hasattr(self, 'df'):
            raise ValueError('please make a df func first!')

        x, y = kwargs['x'], kwargs['y']
        fiter = self.get_fiter()
        fiter.fit(x, y)

        self.echo_info(fiter)
        if hasattr(fiter, 'feature_importances_'):
            return pd.DataFrame(
                {'feature': list(self.df.columns)[1:], 'importance': fiter.feature_importances_}).sort_values(
                'importance')
        elif hasattr(fiter, 'coef_'):
            return pd.DataFrame({"columns": list(self.df.columns)[1:], "coef": list(fiter.coef_.T)})
        else:
            ZLog.info('fiter not hasattr feature_importances_ or coef_!')
Esempio n. 35
0
    def do_judge(self, w_col, regex_dummies, pd_class, **kwargs):
        if not hasattr(self, 'estimator') or not hasattr(self, 'prob_threshold') \
                or not hasattr(self, 'dummies') \
                or not hasattr(self, 'invoke_hmm') or not hasattr(self, 'invoke_pca'):
            '''
                暂时只info,如果有必要需要raise exception
            '''
            ZLog.info('not estimator or prob or dhp')
            return True

        w = np.array([kwargs[col] for col in w_col])
        w = w.reshape(1, -1)

        prob_threshold = self.prob_threshold
        estimator = self.estimator
        dummies = self.dummies
        invoke_hmm = self.invoke_hmm
        invoke_pca = self.invoke_pca

        df = None
        if dummies or invoke_hmm:
            df = pd.DataFrame(w)
            df.columns = w_col

        if dummies and df is not None:
            df_dummies = pd_class.dummies_xy(df)
            regex = regex_dummies
            df = df_dummies.filter(regex=regex)
            w = df.as_matrix()

        if invoke_hmm:
            '''
                只是置换出hmm形式的x值,这里的df没有修改,暂时也没有必要
            '''
            w = pd_class.hmm_predict(self, w).reshape(1, -1)
        elif invoke_pca:
            '''
                elif 互斥
            '''
            w = pd_class.pca_predict(self, w).reshape(1, -1)

        prob = estimator.predict_proba(w)[:, 1][0]
        if prob > prob_threshold:
            return True
        return False
Esempio n. 36
0
    def _confusion_matrix_with_report(self, test_y, predictions):
        confusion_matrix = metrics.confusion_matrix(test_y, predictions)
        print("Confusion Matrix ", confusion_matrix)
        print("          Predicted")
        print("         |  0  |  1  |")
        print("         |-----|-----|")
        print("       0 | %3d | %3d |" % (confusion_matrix[0, 0],
                                          confusion_matrix[0, 1]))
        print("Actual   |-----|-----|")
        print("       1 | %3d | %3d |" % (confusion_matrix[1, 0],
                                          confusion_matrix[1, 1]))
        print("         |-----|-----|")

        '''
            一般情况下loss, win可以概括
        '''
        ZLog.info(classification_report(test_y,
                                        predictions))
Esempio n. 37
0
def plot_confusion_matrices(estimator, x, y, n_folds=10):
    y_pred = run_cv_estimator(estimator, x, y, n_folds=n_folds)

    class_names = np.unique(y).tolist()

    confusion_matrix = metrics.confusion_matrix(y, y_pred)

    ZLog.info(confusion_matrix)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(confusion_matrix)
    plt.title('Confusion matrix for %s' % estimator.__class__.__name__)
    fig.colorbar(cax)
    ax.set_xticklabels([''] + class_names)
    ax.set_yticklabels([''] + class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()
Esempio n. 38
0
    def check_proxy(self):
        checked_list = list()
        thread_lock = threading.RLock()
        thread_array = []
        for proxy in self.proxy_list:
            # self.do_thread_work(proxy, checked_list, thread_lock)
            t = threading.Thread(target=self.do_thread_work, args=(
                proxy,
                checked_list,
                thread_lock,))
            t.setDaemon(True)
            t.start()
            thread_array.append(t)

        for t in thread_array:
            t.join()

        self.proxy_list = checked_list
        ZLog.info('proxy_list len={}'.format(len(self.proxy_list)))
Esempio n. 39
0
def plot_confusion_matrices(estimator, x, y, n_folds=10):
    y_pred = run_cv_estimator(estimator, x, y, n_folds=n_folds)

    class_names = np.unique(y).tolist()

    confusion_matrix = metrics.confusion_matrix(y, y_pred)

    ZLog.info(confusion_matrix)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(confusion_matrix)
    plt.title('Confusion matrix for %s' % estimator.__class__.__name__)
    fig.colorbar(cax)
    ax.set_xticklabels([''] + class_names)
    ax.set_yticklabels([''] + class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()
Esempio n. 40
0
 def do_tf_tt(cls, x, y, n_folds=10, **kwargs):
     """
     如果需要扩张除init四个外的参数, 子类继续自己扩张把
     :param x:
     :param y:
     :param n_folds:
     :return:
     """
     kf = KFold(len(y), n_folds=n_folds, shuffle=True)
     acs = list()
     for i, (train_index, test_index) in enumerate(kf):
         x_train, x_test = x[train_index], x[test_index]
         y_train, y_test = y[train_index], y[test_index]
         m_l__t_f = cls(x_train, y_train, x_test, y_test, **kwargs)
         ac = m_l__t_f.fit()
         if ac is not None:
             acs.append(ac)
     if len(acs) > 0:
         ZLog.info('acs mean = {}'.format(np.array(acs).mean()))
Esempio n. 41
0
    def judge(self, **kwargs):
        for w in MlFiterJumpPd.g_w_col:
            if w not in kwargs:
                ZLog.info('judge golden kwargs error!')
                return

        regex = MlFiterJumpPd.g_regex_d

        """
            要保持和mertics做的pd一样的顺序
        """
        w_col = MlFiterJumpPd.g_w_col

        pd_class = MlFiterJumpPdClass

        if not hasattr(self, 'dd_threshold'):
            ZLog.info('not dd_threshold')
            return True

        return self.do_judge(w_col, regex, pd_class, **kwargs)
Esempio n. 42
0
def graphviz_tree(estimator, features, x, y):
    if not hasattr(estimator, 'tree_'):
        ZLog.info('only tree can graphviz!')
        return

    estimator.fit(x, y)

    tree.export_graphviz(estimator.tree_, out_file='graphviz.dot', feature_names=features)
    os.system("dot -T png graphviz.dot -o graphviz.png")

    '''
        !open $path
        要是方便用notebook直接open其实显示效果好,plt,show的大小不好调整
    '''
    # path = ZEnv.shell_cmd_result('pwd') + '/graphviz.png'
    # !open $path
    image_file = cbook.get_sample_data(ZEnv.shell_cmd_result('pwd') + '/graphviz.png')
    image = plt.imread(image_file)
    plt.imshow(image)
    plt.axis('off')  # clear x- and y-axes
    plt.show()
Esempio n. 43
0
    def importances_coef_pd(self, **kwargs):
        if not hasattr(self, 'df'):
            raise ValueError('please make a df func first!')

        x, y = kwargs['x'], kwargs['y']
        fiter = self.get_fiter()
        fiter.fit(x, y)

        self.echo_info(fiter)
        if hasattr(fiter, 'feature_importances_'):
            return pd.DataFrame({
                'feature': list(self.df.columns)[1:],
                'importance': fiter.feature_importances_
            }).sort_values('importance')
        elif hasattr(fiter, 'coef_'):
            return pd.DataFrame({
                "columns": list(self.df.columns)[1:],
                "coef": list(fiter.coef_.T)
            })
        else:
            ZLog.info('fiter not hasattr feature_importances_ or coef_!')
Esempio n. 44
0
    def show_general(self, use_fiter=False):
        order_has_ret_fit = self.fiter.order_has_ret if use_fiter else self.orders_pd[self.orders_pd['result'] <> 0]

        ZLog.info('all fit order = ' + str(order_has_ret_fit.shape))

        xt = order_has_ret_fit.result.value_counts()
        ZLog.info('win rate = ' + str(xt[1] / xt.sum()))
        ZLog.info('profit_cg.sum() = ' + str(order_has_ret_fit.profit_cg.sum()))

        order_has_ret_fit.sort_values('buy Date')['profit_cg'].cumsum().plot(grid=True, title='profit_cg cumsum')

        profit_cg_win_mean = order_has_ret_fit[order_has_ret_fit['profit_cg'] > 0].profit_cg.mean()
        profit_cg_loss_mean = order_has_ret_fit[order_has_ret_fit['profit_cg'] < 0].profit_cg.mean()
        ZLog.info('win mean = {0} loss_mean = {1} '.format(profit_cg_win_mean, profit_cg_loss_mean))
        plt.show()
Esempio n. 45
0
    def feature_selection(self, **kwargs):
        x, y = kwargs['x'], kwargs['y']
        fiter = self.get_fiter()

        selector = RFE(fiter)
        selector.fit(x, y)

        ZLog.info('RFE selection')
        ZLog.info(pd.DataFrame({'support': selector.support_, 'ranking': selector.ranking_},
                               index=self.df.columns[1:]))

        selector = RFECV(fiter, cv=3, scoring='mean_squared_error')
        selector.fit(x, y)
        ZLog.newline()
        ZLog.info('RFECV selection')
        ZLog.info(pd.DataFrame({'support': selector.support_, 'ranking': selector.ranking_},
                               index=self.df.columns[1:]))
Esempio n. 46
0
    def train_test_split_df(self, df=None, test_size=0.1, random_state=0):
        if df is None:
            df = self.df

        train_df, cv_df = train_test_split(df, test_size=test_size, random_state=random_state)

        fiter = self.get_fiter()

        fiter.fit(train_df.as_matrix()[:, 1:], train_df.as_matrix()[:, 0])

        predictions = fiter.predict(cv_df.as_matrix()[:, 1:])

        ZLog.info("accuracy = %.2f" % (accuracy_score(cv_df.as_matrix()[:, 0], predictions)))
        ZLog.info("precision_score = %.2f" % (metrics.precision_score(cv_df.as_matrix()[:, 0], predictions)))
        ZLog.info("recall_score = %.2f" % (metrics.recall_score(cv_df.as_matrix()[:, 0], predictions)))

        self._confusion_matrix_with_report(cv_df.as_matrix()[:, 0], predictions)
Esempio n. 47
0
def verify_process(est_cls, judge_cls, make_x_func, make_order_func, order_pd, only_jd=False, first_local=False,
                   tn_threshold=800):
    """
    :param est_cls:
    :param judge_cls:
    :param make_x_func:
    :param make_order_func:
    :param order_pd:
    :param only_jd: 使用以序列化的只进行judge
    :param first_local: 优先使用本地分类器
    :param tn_threshold:
    :return:
    """
    if not only_jd:
        _, _, _, _, _, _ = est_cls.dump_process(judge_cls, order_pd, tn_threshold, True, first_local=first_local)

    def apply_judge(order, p_make_x_func):
        x = p_make_x_func(order)
        """
            离散不使用隐因子
        """
        d_ret = est_cls.do_predict_process(judge_cls, True, False, False, **x)
        """
            连续不使用隐因子
        """
        v_ret = est_cls.do_predict_process(judge_cls, False, False, False, **x)

        """
            离散使用隐因子
        """
        dm_ret = est_cls.do_predict_process(judge_cls, True, True, False, **x)
        """
            连续使用隐因子
        """
        vm_ret = est_cls.do_predict_process(judge_cls, False, True, False, **x)

        """
            离散使用pca
        """
        dp_ret = est_cls.do_predict_process(judge_cls, True, False, True, **x)
        """
            连续使用pca
        """
        vp_ret = est_cls.do_predict_process(judge_cls, False, False, True, **x)
        return d_ret, v_ret, dm_ret, vm_ret, dp_ret, vp_ret

    order_has_ret = make_order_func(order_pd)
    jd_ret = order_pd.apply(apply_judge, axis=1, args=(make_x_func,))

    order_has_ret['d_ret'] = [1 if ret[0] else 0 for ret in jd_ret]
    order_has_ret['v_ret'] = [1 if ret[1] else 0 for ret in jd_ret]
    order_has_ret['dm_ret'] = [1 if ret[2] else 0 for ret in jd_ret]
    order_has_ret['vm_ret'] = [1 if ret[3] else 0 for ret in jd_ret]
    order_has_ret['dp_ret'] = [1 if ret[4] else 0 for ret in jd_ret]
    order_has_ret['vp_ret'] = [1 if ret[5] else 0 for ret in jd_ret]

    v_ret_result = metrics.accuracy_score(order_has_ret[order_has_ret['v_ret'] == 0]['result'],
                                          order_has_ret[order_has_ret['v_ret'] == 0]['v_ret'])
    ZLog.info('v_ret_result: ' + str(v_ret_result))

    d_ret_result = metrics.accuracy_score(order_has_ret[order_has_ret['d_ret'] == 0]['result'],
                                          order_has_ret[order_has_ret['d_ret'] == 0]['d_ret'])
    ZLog.info('d_ret_result: ' + str(d_ret_result))

    dp_ret_result = metrics.accuracy_score(order_has_ret[order_has_ret['dp_ret'] == 0]['result'],
                                           order_has_ret[order_has_ret['dp_ret'] == 0]['dp_ret'])
    ZLog.info('dp_ret_result: ' + str(dp_ret_result))

    vp_ret_result = metrics.accuracy_score(order_has_ret[order_has_ret['vp_ret'] == 0]['result'],
                                           order_has_ret[order_has_ret['vp_ret'] == 0]['vp_ret'])
    ZLog.info('vp_ret_result: ' + str(vp_ret_result))

    dm_ret_result = metrics.accuracy_score(order_has_ret[order_has_ret['dm_ret'] == 0]['result'],
                                           order_has_ret[order_has_ret['dm_ret'] == 0]['dm_ret'])
    ZLog.info('dm_ret_result: ' + str(dm_ret_result))

    vm_ret_result = metrics.accuracy_score(order_has_ret[order_has_ret['vm_ret'] == 0]['result'],
                                           order_has_ret[order_has_ret['vm_ret'] == 0]['vm_ret'])
    ZLog.info('vm_ret_result: ' + str(vm_ret_result))

    ZLog.newline(fill_cnt=58)

    v_ret_result_all = metrics.accuracy_score(order_has_ret['result'], order_has_ret['v_ret'])
    ZLog.info('v_ret_result_all: ' + str(v_ret_result_all))
    d_ret_result_all = metrics.accuracy_score(order_has_ret['result'], order_has_ret['d_ret'])
    ZLog.info('d_ret_result_all: ' + str(d_ret_result_all))
    dp_ret_result_all = metrics.accuracy_score(order_has_ret['result'], order_has_ret['dp_ret'])
    ZLog.info('dp_ret_result_all: ' + str(dp_ret_result_all))
    vp_ret_result_all = metrics.accuracy_score(order_has_ret['result'], order_has_ret['vp_ret'])
    ZLog.info('vp_ret_result_all: ' + str(vp_ret_result_all))
    dm_ret_result_all = metrics.accuracy_score(order_has_ret['result'], order_has_ret['dm_ret'])
    ZLog.info('dm_ret_result_all: ' + str(dm_ret_result_all))
    vm_ret_result_all = metrics.accuracy_score(order_has_ret['result'], order_has_ret['vm_ret'])
    ZLog.info('vm_ret_result_all: ' + str(vm_ret_result_all))

    ZLog.newline(fill_cnt=58)
    order_has_ret['vdmret'] = order_has_ret['d_ret'] + order_has_ret['v_ret'] + order_has_ret['dp_ret'] + order_has_ret[
        'vp_ret']
    order_has_ret['vdmret'].value_counts().plot(kind='barh')
    plt.title('vdmret barh')
    plt.show()

    ((order_has_ret['vdmret'] == 1) & (order_has_ret['v_ret'] == 1)).value_counts().plot(kind='bar')
    plt.title('v_ret == 1')
    plt.show()

    ((order_has_ret['vdmret'] == 1) & (order_has_ret['d_ret'] == 1)).value_counts().plot(kind='bar')
    plt.title('d_ret == 1')
    plt.show()

    ((order_has_ret['vdmret'] == 1) & (order_has_ret['vm_ret'] == 1)).value_counts().plot(kind='bar')
    plt.title('vm_ret == 1')
    plt.show()

    ((order_has_ret['vdmret'] == 1) & (order_has_ret['dm_ret'] == 1)).value_counts().plot(kind='bar')
    plt.title('dm_ret == 1')
    plt.show()

    ((order_has_ret['vdmret'] == 1) & (order_has_ret['dp_ret'] == 1)).value_counts().plot(kind='bar')
    plt.title('dp_ret == 1')
    plt.show()

    ((order_has_ret['vdmret'] == 1) & (order_has_ret['vp_ret'] == 1)).value_counts().plot(kind='bar')
    plt.title('vp_ret == 1')
    plt.show()

    final_result = metrics.accuracy_score(order_has_ret[order_has_ret['vdmret'] == 0]['result'],
                                          order_has_ret[order_has_ret['vdmret'] == 0]['vdmret'])
    ZLog.info('final_result: ' + str(final_result))

    order_has_ret['vdmret_one'] = np.where(order_has_ret['vdmret'] == 1, 0, 1)
    final_one_result = metrics.accuracy_score(order_has_ret[order_has_ret['vdmret_one'] == 0]['result'],
                                              order_has_ret[order_has_ret['vdmret_one'] == 0]['vdmret_one'])
    ZLog.info('final_one_result: ' + str(final_one_result))

    order_has_ret['vdmret_two'] = np.where(order_has_ret['vdmret'] == 2, 0, 1)
    final_two_result = metrics.accuracy_score(order_has_ret[order_has_ret['vdmret_two'] == 0]['result'],
                                              order_has_ret[order_has_ret['vdmret_two'] == 0]['vdmret_two'])
    ZLog.info('final_two_result: ' + str(final_two_result))

    order_has_ret['vdmret_three'] = np.where(order_has_ret['vdmret'] == 3, 0, 1)
    final_three_result = metrics.accuracy_score(order_has_ret[order_has_ret['vdmret_three'] == 0]['result'],
                                                order_has_ret[order_has_ret['vdmret_three'] == 0]['vdmret_three'])
    ZLog.info('final_three_result: ' + str(final_three_result))

    order_has_ret['vdmret_four'] = np.where(order_has_ret['vdmret'] == 4, 0, 1)
    final_four_result = metrics.accuracy_score(order_has_ret[order_has_ret['vdmret_four'] == 0]['result'],
                                               order_has_ret[order_has_ret['vdmret_four'] == 0]['vdmret_four'])
    ZLog.info('final_four_result: ' + str(final_four_result))

    return jd_ret, order_has_ret
Esempio n. 48
0
 def echo_info(self, fiter=None):
     if fiter is None:
         fiter = self.get_fiter()
     ZLog.info(format(fiter.__class__.__name__, '*^58s'))
Esempio n. 49
0
    def train_test_split_xy(self, x=None, y=None, test_size=0.1, random_state=0):
        x, y = self.proxy_xy(x, y)
        train_x, test_x, train_y, test_y = train_test_split(x,
                                                            y,
                                                            test_size=test_size,
                                                            random_state=random_state)
        ZLog.info(x.shape, y.shape)
        ZLog.info(train_x.shape, train_y.shape)
        ZLog.info(test_x.shape, test_y.shape)

        fiter = self.get_fiter()
        clf = fiter.fit(train_x, train_y)
        predictions = clf.predict(test_x)

        ZLog.info("accuracy = %.2f" % (accuracy_score(test_y, predictions)))
        ZLog.info("precision_score = %.2f" % (metrics.precision_score(test_y, predictions)))
        ZLog.info("recall_score = %.2f" % (metrics.recall_score(test_y, predictions)))

        self._confusion_matrix_with_report(test_y, predictions)
Esempio n. 50
0
 def plot_roc_estimator(self, x=None, y=None):
     x, y = self.proxy_xy(x, y)
     fiter = self.get_fiter()
     ZLog.info(fiter.__class__.__name__ + ' :roc')
     MlFiterExcute.plot_roc_estimator(fiter, x, y)