#pip install pydataset from pydataset import data data('iris') data('mtcars') #https://vincentarelbundock.github.io/Rdatasets/datasets.html import statsmodels.api as sm mtcars = sm.datasets.get_rdataset(dataname='mtcars', package= 'datasets') mtcars.data.head() #%% #Load from Excel/ CSV and export to data = mtcars.data data.head(6) type(data) data.to_csv('mtcars.csv') data.to_excel('mtcarsExcel.xlsx','sheet3', header=False) #writing to multiple sheets writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter') # Write each dataframe to a different worksheet. you could write different string like above if you want data.to_excel(writer, sheet_name='sheet1') data.to_excel(writer, sheet_name='sheet2') # Close the Pandas Excel writer and output the Excel file. writer.save() #%% data.to_excel? #load from CSV and Excel
from sklearn.cluster import KMeans Kmean2 = KMeans(n_clusters=2) Kmean2.fit(X2) centers2 = Kmean2.cluster_centers_ centers2 Kmean2.labels_ plt.scatter(X2[:, 0], X2[:, 1], s=50, c=Kmean2.labels_) plt.scatter(centers2[:, 0], centers2[:, 1], s=100, marker='*', color=['red']) plt.show() from pydataset import data iris = data('iris') data = iris.copy() data.head() #how many groups data.Species.value_counts() data.columns X3 = data[['Sepal.Length', 'Sepal.Width']] X3 y3 = data.Species.values y3 X3.shape plt.scatter(X3['Sepal.Length'], X3['Sepal.Width'], s=50) #group them into 3 categories irisCluster = KMeans(n_clusters=3) irisCluster.fit(X3) irisCenters = irisCluster.cluster_centers_ irisCenters