Esempio n. 1
0
    def simulate(self, data_sets, t, step_size, poly_order, eval_lens=None, del_len=10):
        # This method evaluates the polynomials at each point in eval_lens then puts them together in a continuous way
        if eval_lens is None:
            eval_lens = np.array([30])
        ind0 = del_len
        indf = ind0 + np.max(eval_lens)
        x_sets = {}
        single_x_sets = [np.array([]) for i in range(0, self.N)]
        eval_t = {}
        for eval_len in eval_lens:
            # There is a different data set for each length of time
            x_sets[eval_len] = [single_x_sets.copy(), single_x_sets.copy()]
            eval_t[eval_len] = single_x_sets

        while indf < len(t):
            progress_printer(len(data_sets[0]), ind0, start_ind=del_len, tsk='Simulation')
            x0s = [x[ind0] for x in data_sets]
            y0s = [np.mean(np.diff(x[ind0-del_len:ind0])) for x in data_sets]
            current_t = t[ind0:indf] - t[ind0]
            self.reset(x0s=x0s, y0s=y0s)
            x_fit, t_fit = self.evaluate(current_t, step_size, poly_order, verbose=False)
            for key in x_sets.keys():
                eval_len = key - 2
                for i in range(0, self.N):
                    x_sets[key][0][i] = np.append(x_sets[key][0][i], x_fit[i][eval_len]) # TODO taylor eval_len using t_fit
                    x_sets[key][1][i] = np.append(x_sets[key][1][i], data_sets[i][ind0 + eval_len])
                    eval_t[key][i] = np.append(eval_t[key][i], t[ind0 + eval_len])

            ind0 += 1
            indf = ind0 + np.max(eval_lens)

        return x_sets, eval_t
Esempio n. 2
0
    def break_up_time(self, time, step_size, polynomial_order, verbose):
        # This method breaks up the time vectors so that the polynoials at each step can be used to evaluate
        t_poly = list(self.polynomials.keys())
        t_poly.sort()
        ind = 0
        i = 0
        max_i = len(t_poly)
        t_dict = {}

        while i < max_i:
            progress_printer(len(time), round(ind, -1), tsk='Propogation', suppress_output=(not verbose))
            t0 = t_poly[i]
            next_i = i + 1
            if (next_i >= max_i) and ((time[-1] - time[ind]) <= step_size):
                t_dict[t0] = time[ind::]
                break
            elif (next_i >= max_i):
                self.take_next_step(step_size, polynomial_order)
                t_poly = list(self.polynomials.keys())
                t_poly.sort()
                max_i = len(t_poly)

            time_mask = (t_poly[i] <= time) * (time < t_poly[next_i])
            if len(time_mask) == 0:
                time_mask = i
            t_current = time[time_mask]
            t_dict[t0] = t_current
            ind += len(t_current)
            i += 1
            if len(t_current) == 0:
                break

        return t_dict
Esempio n. 3
0
def normalize_fill_array_and_order_book(historical_order_books, historical_fills):
    # This function takes advantage of the Markovian nature of crypto prices and normalizes the fills by the current
    # top bid. This is intended to make the predictions homogeneous no matter what the current price is
    # Note: current setup has prices at every third entry, should change to have identifying headers

    order_book = historical_order_books
    fills = historical_fills

    fill_ind = 0
    order_book_ts_vals = order_book.ts.values
    order_book_top_bid_vals = order_book['0'].values # The fills are normalized off the top bid

    fill_ts_vals = str_list_to_timestamp(fills.time.values)
    fill_price_vals = fills.price.values


    current_fill_ts = fill_ts_vals[fill_ind]
    current_fill = fill_price_vals[fill_ind]
    normalized_fills = np.array([])

    for order_book_ind in range(0, len(order_book_ts_vals)):

        progress_printer(len(order_book_ts_vals), order_book_ind)

        ts = order_book_ts_vals[order_book_ind]
        current_bid = order_book_top_bid_vals[order_book_ind]

        while (ts > current_fill_ts) or (np.abs(current_fill - current_bid) < 1):
            fill_ind += 1
            if fill_ind == len(fill_price_vals):
                # If there are more order book states after the last fill than this stops early
                return normalized_fills, normalized_order_book
            current_fill_ts = fill_ts_vals[fill_ind]
            current_fill = fill_price_vals[fill_ind]

        current_order_book_row = order_book[order_book.index == order_book_ind]
        current_order_book_row = current_order_book_row.drop(['ts'], axis=1)
        fill_base_val = price_at_max_order_size(current_order_book_row)
        price_base_val = average_orderbook_features(0, current_order_book_row)

        current_normalized_fill = current_fill/fill_base_val
        normalized_fills = np.append(normalized_fills, current_normalized_fill)


        normalized_order_book_row = normalize_order_book_row(price_base_val, current_order_book_row)

        if order_book_ind == 0:
            normalized_order_book = normalized_order_book_row
        else:
            normalized_order_book = np.vstack((normalized_order_book, normalized_order_book_row))

    return normalized_fills, normalized_order_book
Esempio n. 4
0
    def find_positive_profit_interval(self):
        data = self.prices
        hold_arr = np.zeros(len(data))
        t = len(data) - 1
        last_t = 0
        buy_ls = []
        sell_ls = []

        while last_t != t:
            last_t = t
            progress_printer(len(data),
                             len(data) - t,
                             tsk='Calculating Strategy')
            # loop backwards through the prices to determine the optimal trade strategy
            buy_ind = self.find_next_trade(True, t)
            sell_ind = self.find_next_trade(False, t)
            if sell_ind < buy_ind:
                test_sell_ind, _ = self.find_intermediary_trades(
                    buy_ind, t, False)
                if test_sell_ind > buy_ind:
                    t = test_sell_ind
                else:
                    for i in range(buy_ind, t):
                        hold_arr[i] = 1
                    t = buy_ind

            else:
                test_buy_ind, test_t = self.find_intermediary_trades(
                    buy_ind, t, False)
                if test_buy_ind > sell_ind:
                    test_t = np.argmax(
                        data[test_buy_ind:test_t]) + test_buy_ind
                    for i in range(test_buy_ind, test_t):
                        hold_arr[i] = 1
                    t = test_buy_ind
                else:
                    t = sell_ind
        for ind in range(1, len(hold_arr)):
            hold = hold_arr[ind]
            last_hold = hold_arr[ind - 1]
            if hold == last_hold:
                continue
            elif hold:
                buy_ls.append(ind)
            else:
                sell_ls.append(ind)

        return np.array(buy_ls).astype(int), np.array(sell_ls).astype(int)
Esempio n. 5
0
    def get_total_err_from_x0_and_y0(self, step_size, order, coeff_list, shift_list, x0s_guess, y0s_guess, data_len=None, verbose=False):
        self.reset(x0s_guess, y0s_guess)
        err_tot = 0
        x=None
        dat=None
        for n in range(1, len(self.data)+1):
            coeff = coeff_list[n-1]
            shift = shift_list[n-1]
            if verbose:
                err_partial, x, dat = self.err(step_size, order, n, coeff, shift, verbose=not (n-1), data_len=data_len)
                progress_printer(self.propogators[0].N, n-1, tsk='Evaluating Polynomials for Error Estimation')
            else:
                err_partial, x, dat = self.err(step_size, order, n, coeff, shift, data_len=data_len)
            err_tot += err_partial

        return err_tot / (len(self.data)+1), x, dat
Esempio n. 6
0
def update_and_order_processes(procs, queue, full_len):
    data = [None for i in range(0, len(procs))]
    stop_loop = True
    completed_len = 0

    while stop_loop:

        stop_loop = np.any([(not x is None) for x in procs])

        for i in range(0, len(procs)):
            proc = procs[i]
            if proc is None:
                continue

            proc.join(timeout=1)
            while not queue.empty():
                temp_data = queue.get()
                usd_len = len(temp_data['USD'])
                sym_len = len(temp_data['SYM'])
                if usd_len > 0:
                    completed_len += usd_len
                    # TODO adjust full_len for short sym_len values
                else:
                    completed_len += sym_len

                if data[temp_data['process id']] is None:
                    data[temp_data['process id']] = {
                        temp_data['seg id']: temp_data
                    }  # Puts the segments in order
                else:
                    data[temp_data['process id']][
                        temp_data['seg id']] = temp_data

                progress_printer(full_len,
                                 completed_len,
                                 digit_resolution=4,
                                 print_resolution=0)

            if not proc.is_alive():
                procs[i] = None

    for j in range(0, len(data)):
        new_entry = stitch_process_segments(data[j])
        data[j] = new_entry

    return data
Esempio n. 7
0
    def create_training_data(self,
                             feture_type,
                             step_size,
                             all_currencies_exchange_data,
                             syms,
                             training_data_path=None):
        # This method creates training data for the neural net, this includes predicting the price every step size
        #
        # These are the variables needed to make the prediction
        sym_prices, train_len, prediction_len, sym = self.data_instances()
        train_predict_offset = train_len + prediction_len
        all_prices = format_data_for_propogator(all_currencies_exchange_data)

        # This is the initialization of the variables needed to create the training data
        training_columns = None
        prediction_array = np.array([])
        strategy = OptimalStrategy(sym_prices)

        # These are the answer to the prediction
        buy_arr, sell_arr = strategy.find_positive_profit_interval()
        buy_price_differences = strategy.find_next_trade_diff(buy_arr)
        sell_price_differences = strategy.find_next_trade_diff(sell_arr)

        for i in range(step_size,
                       len(buy_price_differences) - train_predict_offset - 10,
                       step_size):
            progress_printer(len(buy_price_differences) - train_predict_offset,
                             i,
                             digit_resolution=3,
                             start_ind=step_size,
                             tsk='training data creation for ' + sym)
            start_ind = i + train_len
            # Create Fourier coefficients
            if training_data_path is None:
                psm_training_data = [
                    raw_price[i:i + train_len] for raw_price in all_prices
                ]
                system_fit, coeff_list, shift_list = create_multifrequency_propogator_from_data(
                    psm_training_data, syms)
                min_future_price, max_future_price, min_past_price, max_past_price, err = self.get_prediction_features(
                    sym, np.arange(0, 30), coeff_list, shift_list,
                    sym_prices[i + train_len], system_fit)
                current_price_data = self.aggreagate_price_data(
                    start_ind, step_size, all_currencies_exchange_data[sym])

                # Create training row
                training_row = np.append(
                    current_price_data,
                    np.array([
                        min_future_price, max_future_price, min_past_price,
                        max_past_price, err
                    ]))

                if training_columns is None:
                    training_columns = np.array([training_row])
                else:
                    training_columns = np.vstack(
                        (training_columns, training_row))

            # Create prediction enctry
            if feture_type == 'buy':
                prediction_feature = buy_price_differences[i + train_len +
                                                           step_size + 1]
            elif feture_type == 'sell':
                prediction_feature = sell_price_differences[i + train_len +
                                                            step_size + 1]
            else:
                raise ValueError('Must predict either buys or sells')

            prediction_array = np.append(prediction_array, prediction_feature)

        if training_data_path is None:
            scalar = StandardScaler()
            training_data = scalar.fit_transform(training_columns)
            ts = str(time()).split('.')[0]
            data_name = ts + '_' + self.sym + '_' + feture_type
            with open(
                    '/Users/rjh2nd/PycharmProjects/CryptoNeuralNet/ManualTradeHelper/Data//'
                    + data_name + '.pickle', 'wb') as f:
                pickle.dump(training_data, f, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            training_data = pickle.load(open(training_data_path, "rb"))

        for i in range(0, 12):
            inds_to_delete_1 = find_outliers(training_data[::, -1])
            inds_to_delete_2 = find_outliers(training_data[::, -4])
            inds_to_delete = inds_to_delete_1 + inds_to_delete_2
            training_data = np.delete(training_data, inds_to_delete, axis=0)
            prediction_array = np.delete(prediction_array, inds_to_delete)
            scalar = StandardScaler()
            training_data = scalar.fit_transform(training_data)

        return training_data.reshape(
            training_data.shape[0], training_data.shape[1],
            1), (prediction_array -
                 np.mean(prediction_array)) / np.std(prediction_array)
Esempio n. 8
0
    system_fit, coeff_list, shift_list = create_multifrequency_propogator_from_data(train_list, sym_list)

    test_list = [x[train_len:train_len+test_len] for x in raw_data_list]
    norm_test_list = [(x - shift)/(coeff) for x, shift, coeff, in zip(test_list, shift_list, coeff_list)]
    norm_train_list = [(x - shift)/(coeff) for x, shift, coeff, in zip(train_list, shift_list, coeff_list)]
    test_x0s = [x[0] for x in norm_test_list]
    test_y0s = [np.mean(np.diff(x[-100::])) for x in norm_train_list]
    system_fit.reset(x0s=test_x0s, y0s=test_y0s)
    # system_fit.plot_simulation(norm_test_list, t, psm_step_size, psm_order, coefficients=coeff_list, shifts=shift_list,
    #                            eval_lens=[10, 15, 20, 30], del_len=100)

    for i in range(0, len(sym_list)):
        coeff = coeff_list[i]
        shift = shift_list[i]
        progress_printer(system_fit.propogators[0].N, i, tsk='Evaluationg Polynomials')
        x_fit, t_fit = system_fit.evaluate_nth_polynomial(t, psm_step_size, psm_order, n=i + 1, verbose=i==False)
        x_fit = x_fit[~np.isnan(x_fit)]
        x_raw = concat_data_list[i][train_len:train_len+2*test_len]
        x0 = np.mean(concat_data_list[i][train_len-10:train_len])
        t_plot_raw = np.linspace(0, 2*test_len, len(x_raw))
        t_plot_fit = np.linspace(0, np.max(t), len(x_fit))
        true_fit = np.polyfit(t, x_raw[0:test_len], 1)
        true_fit_line = np.polyval(true_fit, t_plot_raw)
        trend = trend_line(coeff * ( x_fit - x_fit[0] ))

        plt.figure()
        # fit_len = len(x_fit)
        plt.plot(t_plot_raw, x_raw) # True data
        plt.plot(t_fit, coeff * ( x_fit - x_fit[0] ) + x_raw[0]) # Calculated fit line
        plt.plot(t_plot_raw, true_fit_line) # True fit line