import matplotlib.pyplot as plt from sklearn.cross_validation import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import accuracy_score from sklearn import svm from sklearn.metrics import mean_squared_error import numpy as np name = raw_input("Enter company name\n") mydata = DataReader(name,'yahoo',datetime(2008,1,1),datetime(2016,4,10)) #Which data to pull #print (mydata.shape) #rows,col mydata = mydata.dropna(axis = 0) #drop rows with missing values kmeans_model = KMeans(n_clusters = 5, random_state = 1) good_columns = mydata._get_numeric_data() #get only numeric columns kmeans_model.fit(good_columns) labels = kmeans_model.labels_ pca_2 = PCA(2) #create PCA Model plot_columns = pca_2.fit_transform(good_columns) plt.scatter(x = plot_columns[:,0],y = plot_columns[:,1],c = labels) #plt.show()
import matplotlib.pyplot as plt from sklearn.cross_validation import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import accuracy_score from sklearn import svm from sklearn.metrics import mean_squared_error import numpy as np name = raw_input("Enter company name\n") mydata = DataReader(name, 'yahoo', datetime(2008, 1, 1), datetime(2016, 4, 10)) #Which data to pull #print (mydata.shape) #rows,col mydata = mydata.dropna(axis=0) #drop rows with missing values kmeans_model = KMeans(n_clusters=5, random_state=1) good_columns = mydata._get_numeric_data() #get only numeric columns kmeans_model.fit(good_columns) labels = kmeans_model.labels_ pca_2 = PCA(2) #create PCA Model plot_columns = pca_2.fit_transform(good_columns) plt.scatter(x=plot_columns[:, 0], y=plot_columns[:, 1], c=labels) #plt.show()
plt.show() sns.distplot(AAPL['Daily Return'].dropna(),bins=100,color='purple') plt.show() closing_df=DataReader(tech_list,'yahoo',start,end)['Adj Close'] tech_rets=closing_df.pct_change() #sns.jointplot('GOOG','MSFT',tech_rets,kind='scatter',color='seagreen') #plt.show() #sns.pairplot(tech_rets.dropna()) #plt.show() #kernel density estimate plots in lower triangle plot, hist in diag, scatter at top plots returns_fig=sns.PairGrid(closing_df.dropna()) returns_fig.map_upper(plt.scatter,color='purple') returns_fig.map_lower(sns.kdeplot,cmap='cool_d') returns_fig.map_diag(plt.hist,bins=30) plt.show() #show how strong correlation are #sns.corrplot(closing_df,annot=True) #plt.show() #--------------------risk analysis---------------------- #rets=tech_rets.dropna() #area=np.pi*20 #plt.scatter(rets.mean(),rets.std(),s=area) #plt.xlabel("Expected Return") #plt.ylabel("Risk")