Exemple #1
0
def do_main():
	#---Imcomplete DATA---
	#for missing data, we need to fill those gaps with data, so the NaNs dont blow our calculations
		#interpolating is not good (predicting the future)
	#use the last known value to fill the gap ->fill forward
	#for missing data in the begging ->fill backwards
	
	#use pandas fillna to fill the data (bfill and ffill)
	#http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.fillna.html

	crypto_codes = ['BTC','ETH', 'LTC']
	currency_code = 'EUR'
	dates = pd.date_range('2017-04-01', '2017-08-01') #time where ETH started, so data is missing

	df = get_gdax_data(crypto_codes, currency_code, dates)

	ax = df.plot(title="LTH Close stats", label='LTH')
	ax.set_xlabel("Date")
	ax.set_ylabel("Close Price")
	ax.legend(loc='upper left')
	plt.show()

	#forward fill first
	df.fillna(method='ffill', inplace=True)
	ax = df.plot(title="LTH Close stats", label='LTH')
	plt.show()

	#backward fill second
	df.fillna(method='bfill', inplace=True)
	ax = df.plot(title="LTH Close stats", label='LTH')
	plt.show()
def do_main():
	#---Porfolio values/stats---
		#portefolio_value -> how much are your stocks worth in total, each day
		#cumulative return -> portefolio_value[-1]/portefolio_value[0] - 1
		#avg_daily_ret -> related to its performance (the more the better)
		#std_daily_ret -> related with RISK (deviation/volatility)
		#sharpe_ratio -> metric to adjust return for risk (access if high return but better risk is worth or not)
						#= (portefolio_return - risk_free_return) / portefolio_return_std
							#risk_free_return is the return you would get in a risk free asset (bank account), which these days = 0 (ZERO!!!!)
						# = (mean(daily_ret - daily_rf)) / std(daily_ret) * k
							# need to multiply by a adjusting factor - k
								# k = sqrt(#sampes_per_year)
						# the higher the sharpe_ratio the better!!

	crypto_codes = ['BTC', 'LTC', 'ETH']
	currency_code = 'EUR'
	dates = pd.date_range('2017-06-01', '2017-08-25')
	df = get_gdax_data(crypto_codes, currency_code, dates)

	#get portefolio value
	portfolio_money = 1000
	portfolio__rel_alloc = np.array([0.4, 0.3, 0.3])
	portfolio_value = get_portfolio_value(df, portfolio__rel_alloc * portfolio_money)
	print("portfolio_value")
	print(portfolio_value.tail())

	#daily return
	daily_return = compute_daily_returns(portfolio_value)[1:] #remove the initial daily return which is 0
	print("daily_return")
	print(daily_return.tail())

	#cumulative return
	cumulative_return = get_portfolio_cumulative_return(portfolio_value);
	print("cumulative_return=", cumulative_return)

	#avg_daily_ret
	avg_daily_ret = daily_return.mean();
	print("avg_daily_ret=", avg_daily_ret)

	#std_daily_ret
	std_daily_ret = daily_return.std();
	print("std_daily_ret=", std_daily_ret)

	#sharpe_ratio
	sharpe_ratio = get_portfolio_sharp_ratio(daily_return, 0, 252); #252 active days a year
	print("sharpe_ratio=", sharpe_ratio)
def do_main():
    #---Optimize a portfolio for higher shape ratio--
    #https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html

    #get the data
    crypto_codes = ['BTC', 'LTC', 'ETH']
    currency_code = 'EUR'
    dates = pd.date_range('2017-08-01', '2017-09-01')
    df = get_gdax_data(crypto_codes, currency_code, dates)

    #Initial Guess
    portfolio_allocations_0 = np.array([0.4, 0.3, 0.3])

    #Solver constraints and bounds
    cons = ({
        'type': 'eq',
        'fun': lambda x: 1 - x.sum()
    })  #must return 0 to be true
    bnds = ((0, 1.0), (0, 1.0), (0, 1.0))
    result = spo.minimize(
        minimize_function,
        portfolio_allocations_0,
        args=(df, ),
        method='SLSQP',
        options={'disp': True},
        bounds=bnds,  #values of X are in range [0, 1]
        constraints=cons)  #allocations must sum to 1.0
    print("Optimized Allocations:")
    print(result.x)

    optimized_portfolio_value = get_portfolio_value(df, result.x)
    print("Max profit=", optimized_portfolio_value[-1])

    optimized_daily_return = compute_daily_returns(optimized_portfolio_value)[
        1:]  #remove the initial daily return which is 0

    sharpe_ratio = get_portfolio_sharp_ratio(optimized_daily_return, 0, 252)
    print("Max Sharpe Ratio=", sharpe_ratio)

    optimized_daily_return.plot(title='Daily Returns')
    plt.show()
Exemple #4
0
def do_main():
    crypto_codes = ['BTC']
    currency_code = 'EUR'
    dates = pd.date_range('2017-05-01', '2017-08-24')

    df = get_gdax_data(crypto_codes, currency_code, dates)
    ax = df.plot(title="BTC Close stats", label='BTC')
    ax.set_xlabel("Date")
    ax.set_ylabel("Close Price")
    ax.legend(loc='upper left')

    #dataframe computations like mean, etc
    #http://pandas.pydata.org/pandas-docs/stable/api.html#api-dataframe-stats
    print("--mean--")
    print(df.mean())
    print("--median--")
    print(df.median())
    print("--std--")
    print(df.std())
    #http://pandas.pydata.org/pandas-docs/stable/computation.html?highlight=rolling%20statistics#moving-rolling-statistics-moments
    #rolling statistics -> statistics over just a "window" of data, done for all points
    #3.g.: bollinger bands -> limiting "thersholds" at 2sigma (rolling standard dev).
    # if price is there, good opportunity to buy/sell
    rm_BTC = get_rolling_mean(df['BTC close'], 20)  #20 days
    rstd_BTC = get_rolling_std(df['BTC close'], 20)  #20 days
    b_upper_band, b_lower_band = get_bollinger_bands(rm_BTC, rstd_BTC)

    rm_BTC.plot(label='Rolling mean',
                ax=ax)  #plot on the same plot "access object"
    b_upper_band.plot(label='Upper band', ax=ax)
    b_lower_band.plot(label='Lower band', ax=ax)

    plt.show()

    #daily returns -> how much did the price go up and down on a day
    # todays price relative to yesterday
    daily_returns = compute_daily_returns(df)
    daily_returns.plot(title="Daily Returns")
    plt.show()
Exemple #5
0
def do_main():

    #get the data
    crypto_codes = ['BTC']
    currency_code = 'EUR'
    training_dates = pd.date_range('2017-07-01', '2017-08-01')
    df_training = get_gdax_data(crypto_codes, currency_code, training_dates)

    test_dates = pd.date_range('2017-08-01', '2017-09-01')
    df_test = get_gdax_data(crypto_codes, currency_code, test_dates)

    x_training, y_training = calculate_df_input_output_data(df_training, 5)
    x_test, y_test = calculate_df_input_output_data(df_test, 5)

    print("y_test:")
    print(y_test)

    linear_reg_learner = LinRegLearner()
    linear_reg_learner.train(x_training, y_training)
    y_predicted_lr = linear_reg_learner.query(x_test)

    print("y_predicted_lr:")
    print(y_predicted_lr)

    #Root Mean Squared Error
    rms_error_lr = get_rms_error(y_predicted_lr, y_test)
    print("rms_error_lr: " + str(rms_error_lr))

    #Correlation
    correlation_lr = np.corrcoef(y_predicted_lr, y_test)
    print("correlation_lr: " + str(correlation_lr))

    #KNN
    knn_learner = KNNLearner(3)
    knn_learner.train(x_training, y_training)
    y_predicted_knn = knn_learner.query(x_test)

    print("y_predicted_knn:")
    print(y_predicted_knn)

    #Root Mean Squared Error
    rms_error_knn = get_rms_error(y_predicted_knn, y_test)
    print("rms_error_knn: " + str(rms_error_knn))

    #Correlation
    correlation_knn = np.corrcoef(y_predicted_knn, y_test)
    print("correlation_knn: " + str(correlation_knn))

    #knn in this case is very bad bc almost all x_test are outside of x_training range (price shot up) and knn cant extrapolate

    #PolyRegLearner
    poly_learner = PolyRegLearner(2)
    poly_learner.train(x_training, y_training)
    y_predicted_poly = poly_learner.query(x_test)

    print("y_predicted_poly:")
    print(y_predicted_poly)

    #Root Mean Squared Error
    rms_error_poly = get_rms_error(y_predicted_poly, y_test)
    print("rms_error_poly: " + str(rms_error_poly))

    #Correlation
    correlation_poly = np.corrcoef(y_predicted_poly, y_test)
    print("correlation_poly: " + str(correlation_poly))

    #Ensemble (lesson 24)
    ensemble_learner = EnsembleLearner()
    ensemble_learner.train(x_training, y_training)
    y_predicted_ensemble = ensemble_learner.query(x_test)

    print("y_predicted_ensemble:")
    print(y_predicted_ensemble)

    #Root Mean Squared Error
    rms_error_ensemble = get_rms_error(y_predicted_ensemble, y_test)
    print("rms_error_ensemble: " + str(rms_error_ensemble))

    #Correlation
    correlation_ensemble = np.corrcoef(y_predicted_ensemble, y_test)
    print("correlation_ensemble: " + str(correlation_ensemble))
def do_main():
	#---Histograms and scatter plots--- for Daily Returs
		#Histograms -> integral of the ocurrances -> usually matches the normal/guassian distribution
			#we can measue mean, std and kurtosis
				#kurtosis: 	>0, "fat tails", there are more occurances at the "tails"/end, than the guassian distrinution
						#	<0, "skinny tais", there are less than
						# measures how the "tails" probability is different from gaussian distrib
		#Scatter Plots -> plot the difference between two values (two 'stocks')
			#commun to use linear regression to get the function of this relation
				#slope(m) = β, represents how reactive a stock is to the market (SPY)
					#slope does not mean correlation!!
						#correlation is if the values are less scatter (and more close together)...the band is thin!
				#intersection with 0 (b) = α, if positive, means that this stock has better performance than market(SPY)

	crypto_codes = ['BTC']
	currency_code = 'EUR'
	dates = pd.date_range('2017-01-01', '2017-08-01')
	df = get_gdax_data(crypto_codes, currency_code, dates)

	ax = df.plot(title="Close stats", label='Close')
	plt.show()

	daily_returns = compute_daily_returns(df)
	daily_returns.plot(title="Daily Returns", label='Daily Returns')
	plt.show()

	#--histogram--
	daily_returns.hist(bins=40) #use 20 bins

	#add mean and std (lines) to histogram
	mean = daily_returns['BTC close'].mean()
	print("mean=", mean)
	std = daily_returns['BTC close'].std()
	print("std=", std)

	plt.axvline(mean, color='w', linestyle='dashed', linewidth=1)
	plt.axvline(std, color='r', linestyle='dashed', linewidth=1)
	plt.axvline(-std, color='r', linestyle='dashed', linewidth=1)

	plt.show()

	#compute kurtosis
	print("kurtosis=", daily_returns.kurtosis())

	#compare two histograms
	crypto_codes = ['LTC', 'ETH']
	dates = pd.date_range('2017-06-01', '2017-08-25')
	df = get_gdax_data(crypto_codes, currency_code, dates)
	ax = df.plot(title="Close stats", label='Close')
	plt.show()

	daily_returns = compute_daily_returns(df)
	daily_returns.hist(bins=40) 
	plt.show()

	#to plot both histograms together
	daily_returns['LTC close'].hist(bins=20, label='LTC')
	daily_returns['ETH close'].hist(bins=20, label='ETH')
	plt.show()

	#--Scatterplots--
	crypto_codes = ['BTC', 'LTC', 'ETH']
	df = get_gdax_data(crypto_codes, currency_code, dates)
	ax = df.plot(title="Close stats", label='Close')
	plt.show()

	daily_returns = compute_daily_returns(df)

	#Scatterplot BTC vs LTC
	daily_returns.plot(kind='scatter', x='BTC close', y='LTC close')
		#fit a line using regression/numpy
	beta_LTC, alpha_LTC = np.polyfit(daily_returns['BTC close'], daily_returns['LTC close'], 1) #degree 1 poly (line)
	print("beta_LTC=", beta_LTC)
	print("alpha_LTC=", alpha_LTC)
	plt.plot(daily_returns['BTC close'], beta_LTC*daily_returns['BTC close'] + alpha_LTC, '-', color='r')
	plt.show()
	

	daily_returns.plot(kind='scatter', x='BTC close', y='ETH close')
	beta_ETH, alpha_ETH = np.polyfit(daily_returns['BTC close'], daily_returns['ETH close'], 1) #degree 1 poly (line)
	print("beta_ETH=", beta_ETH)
	print("alpha_ETH=", alpha_ETH)
	plt.plot(daily_returns['BTC close'], beta_ETH*daily_returns['BTC close'] + alpha_ETH, '-', color='r')
	plt.show()

	#Correlation
	print daily_returns.corr(method='pearson') #pearson is the most commun method to calc correlation