dataset_name = st.sidebar.selectbox("Select Dataset", ("Iris", "Breast Cancer", "Wine")) classifier = st.sidebar.selectbox("Select Classifiers", ("KNN", "SVM", "Random Forest")) scaling = st.sidebar.checkbox("Scaling?") # Get the data X, y = utilities.get_dataset(dataset_name) st.write("Shape of the data:", X.shape) st.write("Number of Classes:", len(np.unique(y))) # Add parameters to the UI based on the classifier params = utilities.add_parameter_ui(classifier) # Get our classifier with the correct classifiers clf = utilities.get_classifier(classifier, params) # Check if scaling is required if scaling: X = utilities.scale_data(X) # Make predictions and get accuray accuracy = utilities.classification(X, y, clf) st.write("**Classifer:** ", classifier) st.write("**Accuracy:** ", accuracy) # Plot the components of the data utilities.plot_data(X, y)
from sklearn import tree import os import utilities as util import pandas as pd import numpy as np os.chdir('E:/decision-trees') tamu = pd.read_csv("tamu.txt", sep=' ', header=None) #explore the dataframe tamu.shape tamu.info() X = np.array(tamu[[1, 0]]) y = np.array(tamu[2]) util.plot_data(X, y) tree_estimator = tree.DecisionTreeClassifier(random_state=2017, max_depth=1) tree_estimator.fit(X, y) util.plot_decision_boundary(lambda x: tree_estimator.predict(x), X, y)
# -*- coding: utf-8 -*- """ Created on Mon Sep 11 11:30:39 2017 @author: venkat """ import os os.getcwd() os.chdir("E:\\deep_learning") from utilities import plot_data, plot_confusion_matrix, plot_loss_accuracy, plot_decision_boundary from sklearn.datasets import make_moons, make_circles from keras.models import Sequential from keras.layers import Dense x, y = make_circles(n_samples=1000, noise=0.05, random_state=0, factor=0.3) plot_data(x, y) model = Sequential() model.add(Dense(units=4, activation='tanh', input_shape=(2, ))) model.add(Dense(units=2, activation='tanh')) model.add(Dense(units=1, activation='sigmoid')) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(x, y, epochs=1, verbose=0) plot_decision_boundary(lambda x: model.predict(x), x, y) plot_confusion_matrix(model, x, y)
from utilities import plot_data, plot_confusion_matrix, plot_loss_accuracy, plot_decision_boundary from sklearn.datasets import make_moons, make_circles from keras.models import Sequential from keras.layers import Dense from keras.optimizers import Adam from keras.utils import plot_model X, y = make_circles(n_samples=1000, noise=0.05, factor=0.3, random_state=0) #X, y = make_moons(n_samples=1000, noise=0.05, random_state=0) plot_data(X, y) #single perceptron model for binary classifcation model1 = Sequential() model1.add(Dense(1, input_shape=(2, ), activation='sigmoid')) model1.compile('adam', 'binary_crossentropy', metrics=['accuracy']) plot_model(model1, show_shapes=True, to_file='model1.png') history1 = model1.fit(X, y, verbose=0, epochs=100) plot_loss_accuracy(history1) plot_decision_boundary(lambda x: model1.predict(x), X, y) y_pred = model1.predict_classes(X, verbose=0) plot_confusion_matrix(model1, X, y) #mlp model for binary classification model2 = Sequential() model2.add(Dense(4, input_shape=(2, ), activation='tanh')) model2.add(Dense(2, activation='tanh')) model2.add(Dense(1, activation='sigmoid'))
import os import pandas as pd from sklearn.manifold import TSNE import utilities as util import numpy as np #changes working directory os.chdir('D:/Data/DataScience/Practice/titanic') titanic_train = pd.read_csv("train.csv") #EDA titanic_train.shape titanic_train.info() titanic_train1 = pd.get_dummies(titanic_train, columns=['Sex', 'Pclass', 'Embarked']) titanic_train1.shape titanic_train1.info() X_train = titanic_train1.drop( ['PassengerId', 'Name', 'Age', 'Ticket', 'Cabin', 'Survived'], axis=1, inplace=False) X_train.shape tsne = TSNE(perplexity=30.0, n_components=2, n_iter=10000) titanic_2 = tsne.fit_transform(X_train) util.plot_data(titanic_2, np.array(titanic_train1['Survived']))
r_t = data['r'][t] b_t = data['b'][t] localization = e.localize(u_t, r_t, b_t) temp_corr = localization[0].T temp_pred = localization[2].T temp_P = localization[1] temp_P_1 = localization[3] if t == 0: prediction = np.array([temp_corr]) roboterr_data = np.array([temp_pred]) else: prediction = np.vstack((prediction, np.array([temp_corr]))) roboterr_data = np.vstack((roboterr_data, np.array([temp_pred]))) plot_data(data['l'], ground_truth[:t + 1, :], prediction, roboterr_data, temp_P, temp_P_1, t) print("Odometery Norm:", np.linalg.norm(ground_truth[t] - temp_pred)) print("Corrected Norm:", np.linalg.norm(ground_truth[t] - temp_corr), '\n') mean_odometry_error = np.mean( np.sqrt((ground_truth[:iterations] - roboterr_data[:iterations])**2)) mean_corrected_error = np.mean( np.sqrt((ground_truth[:iterations] - prediction[:iterations])**2)) print("Mean Square Error in Odometry: {}".format(mean_odometry_error)) print( "Mean Square Error in EKF Correction: {}".format(mean_corrected_error))
def test_experiment_one(n_days=21, data_size=12, train_size=0.7, max_k=50, max_trade_size=0.1, years_to_go_back=2, initial_investment=10000, gen_plot=False, verbose=False, savelogs=False): today = dt.date.today() yr = today.year - years_to_go_back mo = today.month - 1 # Just temporary, take out 1 when data download is fixed. da = today.day - 1 start_date = dt.datetime(yr, mo, da) end_date = dt.datetime(yr + 1, mo, da) adr = [None] * 12 vol = [None] * 12 sr = [None] * 12 myport = ['AAPL', 'GLD'] myalloc = [0.5, 0.5] # Portfolio values for Holding the Same Allocation (conservative case) actual_prices = util.load_data(myport, start_date, end_date) actual_prices.fillna(method='ffill', inplace=True) actual_prices.fillna(method='bfill', inplace=True) prices_SPY = actual_prices['SPY'] actual_prices = actual_prices[myport] adr_cons, vol_cons, sharpe_cons, pv_cons = util.compute_returns( actual_prices, myalloc, sf=252.0, rfr=0.0) # Portfolio values with monthly optimization using hindsight (best possible case) # Portfolio values for Machine Learner ml_allocs = [] ml_trade_dates = [] for i in range(int(252 / n_days)): temp = round(i * 52 * n_days / 252) test_date = start_date + dt.timedelta(weeks=round(i * 52 * n_days / 252)) #print(i, temp, test_date) if verbose: print(('EXPERIMENT %i - %s') % (i, str(test_date.strftime("%m/%d/%Y")))) myalloc, trade_date = run_today(end_date=test_date, n_days=n_days, data_size=data_size, myport=myport, allocations=myalloc, train_size=train_size, max_k=max_k, max_trade_size=max_trade_size, gen_plot=gen_plot, verbose=verbose, savelogs=savelogs) ml_allocs.append(myalloc) ml_trade_dates.append(trade_date) ml_allocations = pd.DataFrame(data=ml_allocs, index=ml_trade_dates, columns=myport) all_dates = actual_prices.index #ml_allocaations = ml_allocaations.reindex(all_dates, method='ffill') actual_prices['Cash'] = 1.0 ml_holdings = pd.DataFrame(data=0.0, index=all_dates, columns=myport) ml_holdings['Cash'] = 0.0 ml_holdings.ix[0, 'Cash'] = initial_investment values = ml_holdings * actual_prices porvals = values.sum(axis=1) for index, allocation in ml_allocations.iterrows(): if index < ml_holdings.index.min(): index = ml_holdings.index.min() #else: # index = ml_holdings.index.get_loc(tdate, method='ffill') tomorrow = ml_holdings.index.get_loc(index) + 1 for symbol in myport: ml_holdings.loc[tomorrow:, symbol] = porvals.loc[ index] * allocation[symbol] / actual_prices.loc[index, symbol] values = ml_holdings * actual_prices porvals = values.sum(axis=1) if gen_plot: # add code to plot here df_temp = pd.concat([pv_cons, porvals, prices_SPY], keys=['Conservative', 'ML', 'SPY'], axis=1) df_temp = df_temp / df_temp.ix[0, :] util.plot_data(df_temp, 'Daily portfolio value and SPY', 'Date', 'Normalized Price') ret_cons = (pv_cons[-1] / pv_cons[0]) - 1 ret_porvals = (porvals[-1] / porvals[0]) - 1 ret_SPY = (prices_SPY[-1] / prices_SPY[0]) - 1 return ret_cons, ret_porvals, ret_SPY
def run_today(start_date=dt.datetime(2015, 1, 1), end_date=dt.datetime(2017, 1, 1), n_days=21, data_size=12, myport=['AAPL', 'GOOG'], allocations=[0.5, 0.5], train_size=0.7, max_k=50, max_trade_size=0.1, gen_plot=False, verbose=False, savelogs=False): """ :param start_date: Beginning of time period :param end_date: End of time period :param n_days: Number of days into the future to predict the daily returns of a fund :param data_size: The number of months of data to use in the machine learning model. :param myport: The funds available in your portfolio :param allocations: The percentage of your portfolio invested in the funds :param train_size: The percentage of data used for training the ML model, remained used for testing. :param max_k: Maximum number of neighbors used in kNN :param max_trade_size: The maximum percentage of your portfolio permitted to be traded in any one transaction. :param gen_plot: Boolean to see if you want to plot results :param verbose: Boolean to print out information during execution of application. :return: """ start_date = calc_start_date( end_date, data_size) #end_date - dt.timedelta(weeks=int(data_size * 52/12)) #print('start:', start_date, 'end:', end_date) if verbose: print('-' * 20 + '\nFORECAST\n' + '-' * 20) forecast = fc.forecast(start_date, end_date, symbols=myport, train_size=train_size, n_days=n_days, max_k=max_k, gen_plot=gen_plot, verbose=verbose, savelogs=savelogs) if verbose: print('\n' + '-' * 20 + '\nOPTIMIZE\n' + '-' * 20) target_allocations = opt.optimize_return(forecast, myport, allocations, gen_plot=gen_plot, verbose=verbose, savelogs=savelogs) if verbose: print('\n' + '-' * 20 + '\nORDERS\n' + '-' * 20) trade_date = forecast.index.max() orders = td.create_orders(myport, allocations, target_allocations, trade_date=trade_date, max_trade_size=max_trade_size, verbose=verbose, savelogs=savelogs) if verbose: print(orders) new_allocations = allocations.copy() for i in range(orders.shape[0]): # fix this code so that the correct allocations are updated! index = myport.index(orders.loc[i, 'Symbol']) #symbol = orders.loc[i, 'Symbol'] if orders.loc[i, 'Action'] == 'SELL': new_allocations[index] -= orders.loc[i, 'Quantity'] else: new_allocations[index] += orders.loc[i, 'Quantity'] adr_current, vol_current, sr_current, pv_current = util.compute_returns( forecast, allocations=allocations) adr_target, vol_target, sr_target, pv_target = util.compute_returns( forecast, allocations=target_allocations) adr_new, vol_new, sr_new, pv_new = util.compute_returns( forecast, allocations=new_allocations) if verbose: print("Portfolios:", "Current", "Target", "New") print("Daily return: %.5f %.5f %.5f" % (adr_current, adr_target, adr_new)) print("Daily Risk: %.5f %.5f %.5f" % (vol_current, vol_target, vol_new)) print("Sharpe Ratio: %.5f %.5f %.5f" % (sr_current, sr_target, sr_new)) print("Return vs Risk: %.5f %.5f %.5f" % (adr_current / vol_current, adr_target / vol_target, adr_new / vol_new)) print("\nALLOCATIONS\n" + "-" * 40) print("Symbol", "Current", "Target", 'New') for i, symbol in enumerate(myport): print("%s %.3f %.3f %.3f" % (symbol, allocations[i], target_allocations[i], new_allocations[i])) # Compare daily portfolio value with SPY using a normalized plot if gen_plot: fig, ax = plt.subplots() ax.scatter(vol_current, adr_current, c='green', s=15, alpha=0.5) # Current portfolio ax.scatter(vol_target, adr_target, c='red', s=15, alpha=0.5) # ef ax.scatter(vol_new, adr_new, c='black', s=25, alpha=0.75) # ef ax.set_xlabel('St. Dev. Daily Returns') ax.set_ylabel('Mean Daily Returns') #ax.set_xlim(min(vol)/1.5, max(vol)*1.5) #ax.set_ylim(min(adr)/1.5, max(adr)*1.5) ax.grid() ax.grid(linestyle=':') fig.tight_layout() plt.show() # add code to plot here df_temp = pd.concat([pv_current, pv_target, pv_new], keys=['Current', 'Target', 'New'], axis=1) df_temp = df_temp / df_temp.ix[0, :] util.plot_data(df_temp, 'Forecasted Daily portfolio value and SPY', 'Date-21', 'Normalized Price') if False: # meh was going to plot portfolio values for the last year but trying something else now prior_prices = util.load_data(myport, start_date, end_date) prior_prices.fillna(method='ffill', inplace=True) prior_prices.fillna(method='bfill', inplace=True) #prices_SPY = prior_prices['SPY'] # SPY prices, for benchmark comparison prior_prices = prior_prices[myport] # prices of portfolio symbols forecast_prices = forecast * prior_prices time_span = pd.date_range(forecast.index.min(), end_date + dt.timedelta(days=n_days * 2)) forecast_prices = forecast_prices.reindex(time_span) forecast_prices = forecast_prices.shift(periods=n_days * 2) forecast_prices = forecast_prices.dropna() forecast_prices = pd.concat([prior_prices, forecast_prices], axis=0) adr_current, vol_current, sr_current, pv_current = util.compute_returns( forecast_prices, allocations=allocations) adr_target, vol_target, sr_target, pv_target = util.compute_returns( forecast_prices, allocations=target_allocations) adr_new, vol_new, sr_new, pv_new = util.compute_returns( forecast_prices, allocations=new_allocations) df_temp = pd.concat([pv_current, pv_target, pv_new], keys=['Current', 'Target', 'New'], axis=1) df_temp = df_temp / df_temp.ix[0, :] util.plot_data(df_temp, 'Daily portfolio value and SPY', 'Date', 'Normalized Price') return new_allocations, trade_date
def do_linear_search(test=False, test_dim=32): """ Linear search function... Returns ------- None. """ logger = ut.get_logger() device = "cuda" model_name = "EDSR" config = toml.load("../config.toml") run = config["run"] scale = int(config["scale"]) if config["scale"] else 4 # device information _, device_name = ut.get_device_details() total, _, _ = ut.get_gpu_details( device, "\nDevice info:", logger, print_details=False ) log_message = ( "\nDevice: " + device + "\tDevice name: " + device_name + "\tTotal memory: " + str(total) ) logger.info(log_message) ut.clear_cuda(None, None) state = "Before loading model: " total, used, _ = ut.get_gpu_details(device, state, logger, print_details=True) model = md.load_edsr(device=device) state = "After loading model: " total, used, _ = ut.get_gpu_details(device, state, logger, print_details=True) # ============================================================================= # file = open("temp_max_dim.txt", "r") # line = file.read() # max_dim = int(line.split(":")[1]) # ============================================================================= config = toml.load("../config.toml") max_dim = int(config["max_dim"]) if test == False: detailed_result, memory_used, memory_free = result_from_dimension_range( device, logger, config, model, 1, max_dim ) else: detailed_result, memory_used, memory_free = result_from_dimension_range( device, logger, config, model, test_dim, test_dim ) if test == False: # get mean # get std mean_time, std_time = ut.get_mean_std(detailed_result) mean_memory_used, std_memory_used = ut.get_mean_std(memory_used) mean_memory_free, std_memory_free = ut.get_mean_std(memory_free) # make folder for saving results plt_title = "Model: {} | GPU: {} | Memory: {} MB".format( model_name, device_name, total ) date = "_".join(str(time.ctime()).split()) date = "_".join(date.split(":")) foldername = date os.mkdir("results/" + foldername) # plot data ut.plot_data( foldername, "dimension_vs_meantime", mean_time, "Dimensionn of Patch(nxn)", "Mean Processing Time: LR -> SR, Scale: {} ( {} runs )".format(scale, run), mode="mean time", title=plt_title, ) ut.plot_data( foldername, "dimension_vs_stdtime", std_time, "Dimension n of Patch(nxn)", "Std of Processing Time: LR -> SR, Scale: {} ( {} runs )".format( scale, run ), mode="std time", title=plt_title, ) ut.plot_data( foldername, "dimension_vs_meanmemoryused", mean_memory_used, "Dimension n of Patch(nxn)", "Mean Memory used: LR -> SR, Scale: {} ( {} runs )".format(scale, run), mode="mean memory used", title=plt_title, ) ut.plot_data( foldername, "dimension_vs_stdmemoryused", std_memory_used, "Dimension n of Patch(nxn)", "Std Memory Used: LR -> SR, Scale: {} ( {} runs )".format(scale, run), mode="std memory used", title=plt_title, ) ut.plot_data( foldername, "dimension_vs_meanmemoryfree", mean_memory_free, "Dimension n of Patch(nxn)", "Mean Memory Free: LR -> SR, Scale: {} ( {} runs )".format(scale, run), mode="mean memory free", title=plt_title, ) ut.plot_data( foldername, "dimension_vs_stdmemoryfree", std_memory_free, "Dimension n of Patch(nxn)", "Std Memory Free: LR -> SR, Scale: {} ( {} runs )".format(scale, run), mode="std memory free", title=plt_title, ) # save data ut.save_csv( foldername, "total_stat", device, device_name, total, mean_time, std_time, mean_memory_used, std_memory_used, mean_memory_free, std_memory_free, )