def check_for_nan_values(self, tickers='all', exclude_last_value=True, *args, **kwargs): #TODO: CODE THIS FUNCTION ! do_print = utils.parse_kwargs('do_print', kwargs, error_arg=True) if tickers == 'all': tickers = self.tickers else: tickers = utils.check_ticker_input(tickers_input=tickers, tickers_avail=self.tickers, do_print=True) for ticker in tickers: if exclude_last_value: nan_indices = np.where(np.isnan(self.data[ticker][:-1]))[0] valid_indices = np.where(np.isfinite(self.data[ticker][:-1]))[0] valid_indices = np.hstack((valid_indices, self.data[ticker].shape[0] - 1)) filtered_data = self.data[ticker][valid_indices] self.data[ticker] = filtered_data else: utils.print_issue('INFO', 'Last value is considered to be removed.', do_print=do_print) nan_indices = np.where(np.isnan(self.data[ticker]))[0] if nan_indices.size > 0: #print(self.data[ticker].dropna())#[~np.isnan(self.data[ticker][:last_value_index])]) return input_message = 'Remove {} NaN values? '.format(nan_indices.size) if self._get_answer(input_message=input_message): self.data[ticker] = self.data[ticker][~nan_indices] else: utils.print_issue('INFO', 'No NaN values detected.', do_print=do_print)
def show_possibilities(self, tickers='all', *args, **kwargs): do_print = utils.parse_kwargs("do_print", kwargs, True) if tickers == 'all': tickers = self.tickers else: tickers = utils.check_ticker_input(tickers_input=tickers, tickers_avail=self.tickers, do_print=do_print) for ticker in tickers: utils.print_issue(None, '=' * 80, do_print=do_print) utils.print_issue('INFO', 'Current ticker: {}'.format(ticker), do_print=do_print) #check if last value is nan: last_value_index = -1 if not np.isnan(self.data[ticker][last_value_index]): utils.print_issue('WARNING', 'Last value of data set is not NaN!') input_message = 'Proceed anyways? ' if not self._get_answer(input_message=input_message): continue else: last_value_index = -2 if self.break_values[ticker] is None and generic_value is None: utils.print_issue('ERROR', 'No break values computed for this ticker!') continue deviation = utils.parse_kwargs('deviation', kwargs, error_arg=.0125) bottom_value, top_value = self.break_values[ticker] middle_value = (top_value - bottom_value)*.5 + bottom_value bottom_value *= (1 - deviation) top_value *= (1 + deviation) test_values = [bottom_value, middle_value, top_value] for value in test_values: utils.print_issue(None, '-' * 80, do_print=do_print) utils.print_issue('INFO', 'Result for value: {}'.format(value), do_print=do_print) #create an imag_model: test_model = self.copy_model() #assign the value to the last entry: test_model.data[ticker][-1] = value #init model test_model._init_model(do_print=False) test_model.eval_model(do_print=False) p_range = utils.parse_kwargs('plot_range', kwargs, None) p_index = utils.parse_kwargs('plot_from_index', kwargs, None) p_date = utils.parse_kwargs('plot_from_date', kwargs, None) switch_axes = utils.parse_kwargs('switch_axes', kwargs, False) return_plot = utils.parse_kwargs("return_plot", kwargs, False) save_figures = utils.parse_kwargs("save_figures", kwargs, False) fig_name = "{}_imag_value_{:.2f}".format(ticker, value) output_folder = utils.parse_kwargs("output_folder", kwargs, None) plotting.plot_model(model=test_model, tickers=ticker, plot_range=p_range, plot_from_index=p_index, plot_from_date=p_date, plot_break_values=True, switch_axes=switch_axes, return_plot=return_plot, output_folder=output_folder, save_figures=save_figures, fig_name=fig_name)
def __init__(self, tickers, data=None, buy_delay=1, periods=(12, 26, 9)): self.tickers = utils.check_ticker_input(tickers_input=tickers, tickers_avail=None, do_print=True) self.data = data self.local_min, self.local_max, self.grad = None, None, None self.buy_delay = buy_delay self.ticker_df = dict.fromkeys(self.tickers) self.break_values = None self.tolerances = None self.z_values = dict.fromkeys(self.tickers) self.periods = periods
def plot_model(model, tickers='all', plot_range=None, plot_from_index=None, plot_from_date=None, plot_break_values=True, switch_axes=False, **kwargs): ''' Function to plot a model. Inputs: - model: model of class MODEL - tickers: tickers to plot default: all, i.e. tickers in input class MODEL - plot_range: range to plot of type pandas.date_range() defualt: None, i.e. complete data set - plot_break_values: if available, plot break_values of input class MODEL default: True ''' do_print = utils.parse_kwargs("do_print", kwargs, True) if tickers == 'all': tickers = model.tickers else: tickers = utils.check_ticker_input(tickers_input=tickers, tickers_avail=model.tickers, do_print=do_print) for ticker in tickers: if plot_range is not None: x_axis = model.data[ticker][plot_range].index indices = np.where(np.isin(model.data[ticker].index, plot_range))[0] elif plot_from_index is not None: x_axis = model.data[ticker].index[plot_from_index:] indices = np.arange(plot_from_index, model.data[ticker].index.shape[0], 1) elif plot_from_date is not None: idx = model.data[ticker].index.get_loc(plot_from_date).start x_axis = model.data[ticker].index[idx:] indices = np.arange(idx, model.data[ticker].index.shape[0], 1) else: x_axis = model.data[ticker].index indices = np.arange(0, x_axis.shape[0], 1) grad = model.grad[ticker][indices] min_arg = np.where(model.local_min[ticker] >= indices[0]) max_arg = np.where(model.local_max[ticker] >= indices[0]) try: local_min = model.local_min[ticker][min_arg] local_max = model.local_max[ticker][max_arg] in_loop = False except TypeError: #loop over tickers: in_loop = True local_min = model.local_min[ticker][0][min_arg[1]] local_max = model.local_max[ticker][0][max_arg[1]] price = model.data[ticker][indices] try: buy_dates = model.ticker_df[ticker]['Buy Dates'].values[min_arg[0]] if in_loop: buy_dates = model.ticker_df[ticker]['Buy Dates'].values[ min_arg[1]] except IndexError: utils.print_issue( 'INFO', 'New buy signal was detected for last value: {}.'.format( model.data[ticker][-1]), do_print=do_print) buy_dates = model.ticker_df[ticker]['Buy Dates'].values[min_arg[0] [:-1]] if in_loop: buy_dates = model.ticker_df[ticker]['Buy Dates'].values[ min_arg[1][:-1]] buy_dates = np.hstack( (buy_dates, model.data[ticker].index[local_min[-1] + 1].to_numpy())) try: sell_dates = model.ticker_df[ticker]['Sell Dates'].values[ max_arg[0]] if in_loop: sell_dates = model.ticker_df[ticker]['Sell Dates'].values[ max_arg[1]] except IndexError: utils.print_issue( 'INFO', 'New sell signal was detected for last value: {}.'.format( model.data[ticker][-1]), do_print=do_print) sell_dates = model.ticker_df[ticker]['Sell Dates'].values[ max_arg[0][:-1]] if in_loop: sell_dates = model.ticker_df[ticker]['Sell Dates'].values[ max_arg[1][:-1]] sell_dates = np.hstack( (sell_dates, model.data[ticker].index[local_max[-1] + 1].to_numpy())) #Generating plots: fig, axs = plt.subplots(2, 1, figsize=(16, 9), sharex=True) if switch_axes: ax_indices = [1, 0] else: ax_indices = [0, 1] axs[ax_indices[0]].fill_between(x_axis, 0, grad, where=grad > 0, facecolor='green', interpolate=True, label='Up Trend') axs[ax_indices[0]].fill_between(x_axis, 0, grad, where=grad <= 0, facecolor='red', interpolate=True, label='Down Trend') axs[ax_indices[0]].vlines(model.data[ticker].index[local_min], np.min(grad), np.max(grad), color='g', label='Min Reached') axs[ax_indices[0]].vlines(model.data[ticker].index[local_max], np.min(grad), np.max(grad), color='r', label='Peak Reached') #layout: axs[ax_indices[0]].set_title('{} - MODEL'.format(ticker), fontsize='larger') axs[ax_indices[0]].set_ylabel('Gradient [-]', fontsize='larger') #subplot 2: axs[ax_indices[1]].plot(x_axis, price, label='{}'.format(ticker)) axs[ax_indices[1]].vlines(buy_dates, np.min(price), np.max(price), color='g', label='Buy Dates') axs[ax_indices[1]].vlines(sell_dates, np.min(price), np.max(price), color='r', linestyle='--', label='Sell dates') if plot_break_values: if model.break_values is not None: axs[ax_indices[1]].hlines(model.break_values[ticker][0], x_axis[0], x_axis[-1], color='k', label='Break value {:.5f}'.format( model.break_values[ticker][0])) axs[ax_indices[1]].hlines(model.break_values[ticker][1], x_axis[0], x_axis[-1], color='c', label='Break value {:.5f}'.format( model.break_values[ticker][1])) #layout: axs[ax_indices[1]].set_title('{} - PRICE'.format(ticker), fontsize='larger') axs[ax_indices[1]].set_ylabel('Price', fontsize='larger') #settings for all plots: axs[np.sort(ax_indices)[-1]].set_xlabel('Date', fontsize='larger') for n in ax_indices: axs[ax_indices[n]].grid() axs[ax_indices[n]].legend(loc='upper left') save_figures = utils.parse_kwargs(key="save_figures", kwargs=kwargs, error_arg=False) return_plot = utils.parse_kwargs(key="return_plot", kwargs=kwargs, error_arg=False) output_folder = utils.parse_kwargs(key="output_folder", kwargs=kwargs, error_arg=False) fig_name = utils.parse_kwargs(key="fig_name", kwargs=kwargs, error_arg="{}_evaluation".format(ticker)) if fig_name is not None: plt.suptitle(fig_name) if all([save_figures, output_folder, fig_name]): fname = "{}/{}.pdf".format(output_folder, fig_name) plt.savefig(fname=fname) plt.close() message = "Exported: %s" % fname utils.print_issue("INFO", message, do_print=do_print) #return if return_plot: return plt
def comp_break_values(self, tickers='all', refactor_step_size=1, append_break_values=False, parallel_computing=True,\ *args, **kwargs): do_print = utils.parse_kwargs('do_print', kwargs, error_arg=True) if tickers == 'all': tickers = self.tickers else: tickers = utils.check_ticker_input(tickers_input=tickers, tickers_avail=self.tickers, do_print=True) imag_model = self.copy_model() break_values_dict = dict.fromkeys(tickers) current_values = dict.fromkeys(tickers, None) tolerances = dict.fromkeys(tickers) deviation = .3 utils.print_issue('INFO', 'Compute break values with {:.2%} deviation'.format(deviation), do_print=do_print) for ticker in tickers: utils.print_issue('INFO', 'Current ticker: {}'.format(ticker), do_print=do_print) break_values = [None, None] if np.isnan(self.data[ticker].values[-1]): value_index = -2 else: value_index = -1 current_values[ticker] = self.data[ticker].values[value_index] #create range: start_value = current_values[ticker] * (1 - deviation) end_value = current_values[ticker] * (1 + deviation) step_size = (current_values[ticker] / 5000) * refactor_step_size rng = np.arange(start_value, end_value, step_size) try: import multiprocessing as mp except ModuleNotFoundError: utils.print_issue('ERROR', 'Multiprocessing module not available.', do_print=do_print) parallel_computing = False if not parallel_computing: break_values_dict[ticker] = np.sort(self._comp_bvs(model=imag_model, rng=rng, ticker=ticker)) else: n_procs = cpu_count() utils.print_issue('INFO', 'Using {} processes.'.format(n_procs), do_print=do_print) rng_list = self._do_array_split(rng, n_procs) from functools import partial inputs_partial = partial(self._comp_bvs, imag_model, ticker) with mp.Pool(processes=n_procs) as pool: bvs = pool.map(inputs_partial, rng_list) bv_final = [None, None] for bv_list in bvs: for n, bv in enumerate(bv_list): if bv is not None and bv_final[n] is None: bv_final[n] = bv if all(bv_final): break break_values_dict[ticker] = np.sort(bv_final) #make sure to already have sort break_values_dict! tolerances[ticker] = break_values_dict[ticker] - current_values[ticker] self.tolerances = tolerances self.break_values = break_values_dict if append_break_values: utils.print_issue('INFO', 'Appending break values to model data', do_print=do_print) for ticker in tickers: smal_tol = np.argsort(tolerances[ticker])[0] self.data[ticker][-1] = break_values_dict[ticker][smal_tol] self._init_model(do_print=False) else: utils.print_issue('INFO', 'Current values: {}'.format(current_values), do_print=do_print) utils.print_issue('INFO', 'Break values: {}'.format(break_values_dict), do_print=do_print) utils.print_issue('INFO', 'Tolerances: {}'.format(tolerances), do_print=do_print)
def eval_model(self, tickers='all', entry_money=200, fees=(1.0029, .9954), tax=.25, visualize=False, *args, **kwargs): ''' Function to evaluate the price model predictions Inputs: - data: price data of asset - locs: buy and sell locations, i.e. return from from function price_model() - entry_money: initial investment default = 100 - fees: fee for buying and selling prices, i.e. buy asset at broker for slightly higher price than actual asset prices, vice versa for sells default = (1.005, .995), i.e. .5% higher buy price and .5% lower sell price - tax: german tay payments for annual wins > 800€ default = .25, i.e. 25% - df_return: return model evaluation as pandas DataFrame default = True Outputs: - net_income: Net Income/win after entry_money (and possibly tax) subtracted - df_return: model evaluation as pandas DataFrame ''' do_print = utils.parse_kwargs('do_print', kwargs, error_arg=True) if tickers == 'all': valid_tickers = self.tickers else: valid_tickers = utils.check_ticker_input(tickers_input=tickers, tickers_avail=self.tickers, do_print=do_print) utils.print_opening(ticker=valid_tickers, start_date=self.data.index[0].strftime('%D'), end_date=self.data.index[-1].strftime('%D'), initial_investment_per_ticker=entry_money, do_print=do_print) if any([self.local_min is None, self.local_max is None, self.grad is None]): self._init_model(do_print=do_print) for ticker in valid_tickers: utils.print_issue('TICKER', ticker, do_print=do_print) buy_locs, sell_locs = self._get_locs(ticker=ticker, do_print=do_print) buy_prices = self.data[ticker][buy_locs] buy_dates = self.data[ticker].index.values[buy_locs] sell_prices = self.data[ticker][sell_locs] sell_dates = self.data[ticker].index.values[sell_locs] buy_prices *= fees[0] sell_prices *= fees[1] #check if nan in prices: #TODO: ''' nan_indices = np.isnan(sell_prices) sell_prices = sell_prices[~nan_indices] buy_prices = buy_prices[~nan_indices] nan_indices = np.isnan(buy_prices) sell_prices = sell_prices[~nan_indices] buy_prices = buy_prices[~nan_indices] ''' n_calls = sell_prices.shape[0] if buy_prices.shape > sell_prices.shape: #must use to_numpy() since the dates are still stored in prices as names #-> pandas devides same dates, obviously buy and sell dates differ, #hence pandas would return NaN all the time ratios = sell_prices.to_numpy() / buy_prices.to_numpy()[:-1] else: ratios = sell_prices.to_numpy() / buy_prices.to_numpy() trade_rewards = entry_money * np.cumprod(ratios) #Calculate trade wins trade_wins = np.diff(trade_rewards) #Insert first win try: trade_wins = np.insert(trade_wins, 0, trade_rewards[0] - entry_money) except IndexError: #case where one has one buy but not yet selled. pass #Evaluate Calls good_calls = np.where(trade_wins > 0) bad_calls = np.where(trade_wins < 0) try: efficiency = good_calls[0].shape[0] / n_calls except ZeroDivisionError: efficiency = np.nan #TODO: Error handling here: win_loss = trade_wins / (trade_rewards - trade_wins) average_win = np.mean(win_loss[np.where(win_loss > 0)]) average_loss = np.mean(win_loss[np.where(win_loss < 0)]) if np.sum(trade_wins) > 800: tax_pays = np.sum(trade_wins) * tax utils.print_issue('INFO', '{:.2f} tax was paid.'.format(tax_pays), do_print=do_print) net_income = (trade_rewards[-1] - entry_money) * (1 - tax) else: utils.print_issue('INFO', 'No tax paid.', do_print=do_print) net_income = np.sum(trade_wins) #create final DataFrame sell_grad = self.grad[ticker][sell_locs - self.buy_delay] buy_grad = self.grad[ticker][buy_locs - self.buy_delay] #be aware that buy_dates can be 1 entry longer then sell dates! if buy_dates.shape[0] > sell_dates.shape[0]: if sell_dates.shape[0] > 0: utils.print_issue('INFO', 'Last entry of "Sell Dates" will \ be assigned equally as the penultimate one.', do_print=do_print) sell_dates = np.append(sell_dates, sell_dates[-1]) else: utils.print_issue('INFO', 'First entry of "Sell Dates" \ will be first entry of "Buy Dates".', do_print=do_print) sell_dates = buy_dates[0] try: sell_prices.loc[pd.Timestamp.max] = np.nan except: #OverflowError: --> NOT WORKING? sell_prices.loc[buy_prices.index[-1]] = np.nan trade_rewards = np.append(trade_rewards, np.nan) trade_wins = np.append(trade_wins, np.nan) win_loss = np.append(win_loss, np.nan) sell_grad = np.append(sell_grad, np.nan) grad_diff = sell_grad - buy_grad final_df = pd.DataFrame(data = {'Buy Dates': buy_dates, 'Sell Dates': sell_dates, 'Buy Prices': buy_prices.to_numpy(), 'Sell Prices': sell_prices.to_numpy(), 'Trade Reward': trade_rewards, 'Trade Win': trade_wins, 'Trade Efficiency': win_loss, 'Grad at Buy': buy_grad, 'Grad at Sell': sell_grad, 'Grad Difference': grad_diff}) self.ticker_df[ticker] = final_df utils.print_issue(None, '-' * 80, do_print=do_print) utils.print_issue('SUMMARY', 'Average trade win: {:.10%}'.format(average_win), do_print=do_print) utils.print_issue('SUMMARY', 'Average trade loss: {:.10%}'.format(average_loss), do_print=do_print) utils.print_issue('SUMMARY', 'Efficiency: {:.2%}'.format(efficiency), do_print=do_print) utils.print_issue('SUMMARY', 'NET WIN: {:.2f}'.format(net_income), do_print=do_print) utils.print_issue(None, '=' * 80, do_print=do_print)
def calc_probs(model, time=None, tickers='all', stats_data=None, \ auto_update_tolerances=False, *args, **kwargs): if tickers == 'all': tickers = model.tickers else: tickers = utils.check_ticker_input(tickers_input=tickers, \ tickers_avail=model.tickers) try: timezone = kwargs['timezone'] except KeyError: timezone = None try: start = kwargs['start'] except KeyError: start = None for ticker in tickers: utils.print_issue(None, '=' * 80) utils.print_issue('INFO', 'Current ticker: {}'.format(ticker)) z_values, tols, means = _create_z_values(model=model, ticker=ticker, \ stats_data=stats_data, timezone=timezone, \ start=start, \ auto_update_tolerances=auto_update_tolerances) freq_range, frequencies = _create_freq() delta_t = model.data.index[-1].to_datetime64() - pd.Timestamp.now( ).to_datetime64() delta_t = pd.Timedelta(delta_t).seconds / 3600 arg = np.argsort(tols) value_arg = np.argsort(model.break_values[ticker]) probs = ss.norm.cdf(z_values) * 100 # do 1 - if: flip_arg = np.where(z_values > 0) probs[np.where( z_values > 0)] = (1 - ss.norm.cdf(z_values[flip_arg])) * 100 poly_deg = 5 poly_probs = np.zeros(2) fig, axs = plt.subplots(2, 1, figsize=(16, 9), sharex=True, sharey=True) for n, ax in enumerate(axs): ax.plot(frequencies, probs[n], \ label='Probability') ax.vlines(delta_t, np.min(probs), np.max(probs), label='Time to deadline') poly_line = np.poly1d(np.polyfit(freq_range, probs[n], poly_deg)) ax.plot(frequencies, poly_line(freq_range), 'r', label='Polyfit of deg {}'.format(poly_deg)) title = 'Ticker: {} - Break Value: {} - Tolerance: {}'.format(ticker, \ model.break_values[ticker][value_arg[n]], tols[arg[n]]) current_prob = poly_line(delta_t) ax.text(x=delta_t - .25, y=(np.max(probs) + np.min(probs))*.5, \ s='{:.2f}%'.format(current_prob), fontsize='larger') ax.set_title(title, fontsize='large') ax.legend() ax.grid() ax.yaxis.get_label().set_fontsize('larger') ax.xaxis.get_label().set_fontsize('larger') poly_probs[n] = current_prob ax.invert_xaxis() plt.setp(axs[-1], xlabel='Time to break value [h]') plt.setp(axs, ylabel='Probability [%]') prob_between = np.abs(np.diff(poly_probs))[0] for n, prob in enumerate(poly_probs): utils.print_issue('STATS-EVAL', \ 'Probability for tol={:.5f}: {:.2f}%'.format(tols[arg][n], prob)) utils.print_issue('STATS-EVAL', \ 'Probability between: {:.2f}%'.format(prob_between)) plt.show()
def calc_probs(model, time=None, tickers='all', stats_data=None, auto_update_tolerances=False, *args, **kwargs): """Function to calculate statistics.""" do_print = utils.parse_kwargs("do_print", kwargs, True) if tickers == 'all': tickers = model.tickers else: tickers = utils.check_ticker_input(tickers_input=tickers, tickers_avail=model.tickers) try: timezone = kwargs['timezone'] except KeyError: timezone = None try: start = kwargs['start'] except KeyError: start = None for ticker in tickers: utils.print_issue(None, '=' * 80) utils.print_issue('INFO', 'Current ticker: {}'.format(ticker)) z_values, tols, means = _create_z_values(model=model, ticker=ticker, stats_data=stats_data, timezone=timezone, start=start, auto_update_tolerances=auto_update_tolerances) freq_range, frequencies = _create_freq() delta_t = model.data.index[-1].to_datetime64() - pd.Timestamp.now().to_datetime64() delta_t = pd.Timedelta(delta_t).seconds / 3600 arg = np.argsort(tols) value_arg = np.argsort(model.break_values[ticker]) probs = ss.norm.cdf(z_values) * 100 # do 1 - if: flip_arg = np.where(z_values > 0) probs[np.where(z_values > 0)] = (1 - ss.norm.cdf(z_values[flip_arg])) * 100 poly_deg = 5 poly_probs = np.zeros(2) fig, axs = plt.subplots(2, 1, figsize=(16, 9), sharex=True, sharey=True) for n, ax in enumerate(axs): ax.plot(frequencies, probs[n], label='Probability') ax.vlines(delta_t, np.min(probs), np.max(probs), label='Time to deadline') poly_line = np.poly1d(np.polyfit(freq_range, probs[n], poly_deg)) ax.plot(frequencies, poly_line(freq_range), 'r', label='Polyfit of deg {}'.format(poly_deg)) title = 'Ticker: {} - Break Value: {} - Tolerance: {}'.format(ticker, model.break_values[ticker][value_arg[n]], tols[arg[n]]) current_prob = poly_line(delta_t) ax.text(x=delta_t - .25, y=(np.max(probs) + np.min(probs))*.5, s='{:.2f}%'.format(current_prob), fontsize='larger') ax.set_title(title, fontsize='large') ax.legend() ax.grid() ax.yaxis.get_label().set_fontsize('larger') ax.xaxis.get_label().set_fontsize('larger') poly_probs[n] = current_prob ax.invert_xaxis() plt.setp(axs[-1], xlabel='Time to break value [h]') plt.setp(axs, ylabel='Probability [%]') prob_between = np.abs(np.diff(poly_probs))[0] for n, prob in enumerate(poly_probs): utils.print_issue('STATS-EVAL', 'Probability for tol={:.5f}: {:.2f}%'.format(tols[arg][n], prob)) utils.print_issue('STATS-EVAL', 'Probability between: {:.2f}%'.format(prob_between)) save_figures = utils.parse_kwargs(key="save_figures", kwargs=kwargs, error_arg=False) return_plot = utils.parse_kwargs(key="return_plot", kwargs=kwargs, error_arg=False) output_folder = utils.parse_kwargs(key="output_folder", kwargs=kwargs, error_arg=False) fig_name = "{}_statistics".format(ticker) if fig_name is not None: plt.suptitle(fig_name) if all([save_figures, output_folder, fig_name]): fname = "{}/{}.pdf".format(output_folder, fig_name) plt.savefig(fname=fname) plt.close() message = "Exported: %s" %fname utils.print_issue("INFO", message, do_print=do_print) #return if return_plot: return plt