def update_least_resistance_levels(self): if not self.locked_resistance_levels: df1 = self.df1 df1['vol ob score'] = scale(df1['Volume']) df2 = df1[df1.index > datetime.datetime.now() - datetime.timedelta(hours=8)] level_bins = pd.qcut(df2['Close'].sort_values(), 4, duplicates='drop') grouped_levels = df2.groupby(level_bins)['vol ob score'].mean( ).sort_values(ascending=False).reset_index().sort_index() grouped_levels['R-Level'] = pd.Series( pd.IntervalIndex(grouped_levels['Close']).right) grouped_levels['S-Level'] = pd.Series( pd.IntervalIndex(grouped_levels['Close']).left) least_resistance = grouped_levels[ grouped_levels['vol ob score'] == grouped_levels['vol ob score'].min()] self.lower_resistance_level = float( least_resistance['S-Level'].values[0]) self.upper_resistance_level = float( least_resistance['R-Level'].values[0]) self.target_gain = self.upper_resistance_level / self.lower_resistance_level - 1 self.first_entry_price = self.lower_resistance_level self.second_entry_price = self.lower_resistance_level * ( 1 + (self.target_gain * self.entry_point_scale[1])) self.third_entry_price = self.lower_resistance_level * ( 1 + (self.target_gain * self.entry_point_scale[2])) logging.info("Lower: {} Upper: {} Target Gain: {}".format( self.lower_resistance_level, self.upper_resistance_level, self.target_gain))
def reindex_str_to_IntervalIndex(df): """Saving Intervals with pandas is horrible at the moment as there are no parsers yet, so this clunky code is needed unfortunately. """ idx = pd.IntervalIndex([pd.Interval(*json.loads(i)) for i in df.index.str.replace("(", "[").to_list()]) df.index = idx return(df)
def percent_distance_std(pred, real, bins): bins = np.array(bins, dtype='float32') diff = np.power(real - pred, 2) std = diff.std(ddof=0) print(f"std = {std}") bins = bins * std interval = [] for i in range(len(bins)): if i > 0: interval.append(pd.Interval(left=bins[i - 1], right=bins[i])) ii = pd.IntervalIndex(interval, closed='right', dtype='interval[float32]') cut = pd.cut(diff, bins=ii) s = pd.Series(cut).value_counts(dropna=False).sort_index() counts = s.to_numpy() total = counts.sum() print(s) result = np.array([]) for c in counts: percent = c / total result = np.append(result, percent) return result
def get_intervals_for_variable(series, n=10, missing_values_indicators = [0]): series_copy = series.copy() binary_dictionary = {} ticks = sorted(missing_values_indicators) for i in range(1, n): ticks.append(int(series.loc[np.logical_not(series.isin(missing_values_indicators))].quantile(i/n))) ticks=np.unique(ticks) intervals = [pd.Interval(ticks[i], ticks[i+1],closed='left') for i in range(len(ticks)-1)] intervals.append(pd.Interval(ticks[-1],np.inf,closed='left')) return pd.IntervalIndex(intervals)
def plot_calibration(y_true,y_score,figpath=None): """ Calibration plot Parameters ---------- y_true: list-like or pandas.Series true y labels y_score: list-like or pandas.Series predicted probability figpath: str path for saving figure Returns ------- None Examples -------- >>> plot_calibration(y_train,y_train_pred) """ df_calibration = pd.DataFrame({'y_true':y_true,'y_score':y_score}) df_calibration['bin'] = pd.cut(df_calibration['y_score'],np.arange(0,1.1,0.1)) df_calibration = df_calibration.groupby('bin')['y_true'].agg(['size','mean']) df_calibration.index = pd.IntervalIndex(df_calibration.index) df_calibration = df_calibration.loc[df_calibration['size']>0,:].copy() fig,axs = plt.subplots(figsize=(8,8)) axs.bar(df_calibration.index.mid,df_calibration['size'],width=0.08,color='lightgrey') twinx = axs.twinx() axs.set_yscale('log') axs.set_xlabel('Predicted probability') axs.set_ylabel('Number of patients') twinx.plot(df_calibration.index.mid,df_calibration['mean'],marker='o',linestyle='-',color='red') twinx.plot([0,1],[0,1],color='red',linestyle=':') twinx.set_ylabel('Actual probability',color='red') twinx.set_xticks(np.arange(0,1.1,0.1)) twinx.set_yticks(np.arange(0,1.1,0.1)) twinx.set_xlim([-0.01,1.01]) twinx.set_ylim([-0.01,1.01]) twinx.tick_params(axis='y',color='red',labelcolor='red') twinx.grid(axis='y',color='pink') from matplotlib.ticker import ScalarFormatter axs.yaxis.set_major_formatter(ScalarFormatter()) axs.set_title('Calibration plot') plt.tight_layout() if figpath is not None: plt.savefig(figpath,dpi=200) plt.show()
def combine_industry_helper(df): rpt_dates = df[date_col] stock = df['S_INFO_WINDCODE'].iloc[0] if stock in ind.index: interval_map = ind.loc[stock] intervals = pd.IntervalIndex(interval_map.index.tolist()) value = pd.cut(rpt_dates, intervals).map(interval_map) df[class_std] = value else: df[class_std] = None return df