Ejemplo n.º 1
0
import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.metrics import mean_squared_error
import numpy as np


name = raw_input("Enter company name\n")

mydata = DataReader(name,'yahoo',datetime(2008,1,1),datetime(2016,4,10)) #Which data to pull

#print (mydata.shape) #rows,col

mydata = mydata.dropna(axis = 0) #drop rows with missing values

kmeans_model = KMeans(n_clusters = 5, random_state = 1)

good_columns = mydata._get_numeric_data() #get only numeric columns

kmeans_model.fit(good_columns)

labels = kmeans_model.labels_

pca_2 = PCA(2) #create PCA Model
plot_columns = pca_2.fit_transform(good_columns)
plt.scatter(x = plot_columns[:,0],y = plot_columns[:,1],c = labels)

#plt.show()
Ejemplo n.º 2
0
import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.metrics import mean_squared_error
import numpy as np

name = raw_input("Enter company name\n")

mydata = DataReader(name, 'yahoo', datetime(2008, 1, 1),
                    datetime(2016, 4, 10))  #Which data to pull

#print (mydata.shape) #rows,col

mydata = mydata.dropna(axis=0)  #drop rows with missing values

kmeans_model = KMeans(n_clusters=5, random_state=1)

good_columns = mydata._get_numeric_data()  #get only numeric columns

kmeans_model.fit(good_columns)

labels = kmeans_model.labels_

pca_2 = PCA(2)  #create PCA Model
plot_columns = pca_2.fit_transform(good_columns)
plt.scatter(x=plot_columns[:, 0], y=plot_columns[:, 1], c=labels)

#plt.show()
Ejemplo n.º 3
0
plt.show()

sns.distplot(AAPL['Daily Return'].dropna(),bins=100,color='purple')
plt.show()

closing_df=DataReader(tech_list,'yahoo',start,end)['Adj Close']
tech_rets=closing_df.pct_change()

#sns.jointplot('GOOG','MSFT',tech_rets,kind='scatter',color='seagreen')
#plt.show()

#sns.pairplot(tech_rets.dropna())
#plt.show()

#kernel density estimate plots in lower triangle plot, hist in diag, scatter at top plots
returns_fig=sns.PairGrid(closing_df.dropna())
returns_fig.map_upper(plt.scatter,color='purple')
returns_fig.map_lower(sns.kdeplot,cmap='cool_d')
returns_fig.map_diag(plt.hist,bins=30)
plt.show()
#show how strong correlation are
#sns.corrplot(closing_df,annot=True)
#plt.show()

#--------------------risk analysis----------------------

#rets=tech_rets.dropna()
#area=np.pi*20
#plt.scatter(rets.mean(),rets.std(),s=area)
#plt.xlabel("Expected Return")
#plt.ylabel("Risk")