# on your dataset. # # Hint: If you don't see all three variables: 'bgr','wc' and 'rc', then # you probably didn't complete the previous step properly. # # .. your code here .. print(df.var()) print(df.describe()) # TODO: This method assumes your dataframe is called df. If it isn't, # make the appropriate changes. Don't alter the code in scaleFeatures() # just yet though! # # .. your code adjustment here .. if scaleFeatures: df = helper.scaleFeatures(df) # TODO: Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # # .. your code here .. from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(df) T = pca.transform(df) # Plot the transformed data as a scatter plot. Recall that transforming # the data will result in a NumPy NDArray. You can either use MatPlotLib
# # Hint: If you don't see all three variables: 'bgr','wc' and 'rc', then # you probably didn't complete the previous step properly. # for i in sub.columns: print [i,sub[i].var()] # print sub.i.var sub.describe() # TODO: This method assumes your dataframe is called df. If it isn't, # make the appropriate changes. Don't alter the code in scaleFeatures() # just yet though! # # .. your code adjustment here .. if scaleFeatures: sub = helper.scaleFeatures(sub) # TODO: Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # pca = PCA(n_components=2) pca.fit(sub) PCA(copy=True, whiten=False) T = pca.transform(sub) # Plot the transformed data as a scatter plot. Recall that transforming # the data will result in a NumPy NDArray. You can either use MatPlotLib # to graph it directly, or you can convert it to DataFrame and have pandas
# Print out the results. Also print out the results of running .describe # on your dataset. # # Hint: If you don't see all three variables: 'bgr','wc' and 'rc', then # you probably didn't complete the previous step properly. # df.var(axis=0) # This method assumes your dataframe is called df. If it isn't, # make the appropriate changes. Don't alter the code in scaleFeatures() # just yet though! # # .. your code adjustment here .. if scaleFeatures: df = helper.scaleFeatures(df) # Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # pca = PCA(n_components=2) pca.fit(df) T = pca.transform(df) # Plot the transformed data as a scatter plot. Recall that transforming # the data will result in a NumPy NDArray. You can either use MatPlotLib # to graph it directly, or you can convert it to DataFrame and have pandas # do it for you.
# variance will dominate. Go ahead and peek into your data using a # command that will check the variance of every feature in your dataset. # Print out the results. Also print out the results of running .describe # on your dataset. # # Hint: If you don't see all three variables: 'bgr','wc' and 'rc', then # you probably didn't complete the previous step properly. # # .. your code here .. # TODO: This method assumes your dataframe is called df. If it isn't, # make the appropriate changes. Don't alter the code in scaleFeatures() # just yet though! # # .. your code adjustment here .. if scaleFeatures: df2 = helper.scaleFeatures(df2) # TODO: Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # # .. your code here .. from sklearn.decomposition import PCA pca = PCA(n_components=2, svd_solver='full') pca.fit(df2) PCA(copy=True, n_components=2, whiten=False) T = pca.transform(df2) # Plot the transformed data as a scatter plot. Recall that transforming
# # Hint: If you don't see all three variables: 'bgr','wc' and 'rc', then # you probably didn't complete the previous step properly. # # .. your code here .. print (s1.var()) print (s1.describe()) # TODO: This method assumes your dataframe is called df. If it isn't, # make the appropriate changes. Don't alter the code in scaleFeatures() # just yet though! # # .. your code adjustment here .. if scaleFeatures: s1 = helper.scaleFeatures(s1) # TODO: Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # # .. your code here .. from sklearn.decomposition import PCA # train on input pca = PCA(n_components=2) pca.fit(s1) # transform input
# Print out the results. Also print out the results of running .describe # on your dataset. # # Hint: If you don't see all three variables: 'bgr','wc' and 'rc', then # you probably didn't complete the previous step properly. # # .. your code here .. print(df1.var()) print(df1.describe()) # TODO: This method assumes your dataframe is called df. If it isn't, # make the appropriate changes. Don't alter the code in scaleFeatures() # just yet though! # # .. your code adjustment here .. if scaleFeatures: df1 = helper.scaleFeatures(df1) # TODO: Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # # .. your code here .. from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(df1) PCA(copy=True, n_components=2, whiten=False) T = pca.transform(df1) # Plot the transformed data as a scatter plot. Recall that transforming
# Print out the results. Also print out the results of running .describe # on your dataset. # # Hint: If you don't see all three variables: 'bgr','wc' and 'rc', then # you probably didn't complete the previous step properly. # # .. your code here .. kidney_df1.describe() # TODO: This method assumes your dataframe is called df. If it isn't, # make the appropriate changes. Don't alter the code in scaleFeatures() # just yet though! # # .. your code adjustment here .. #if helper.scaleFeatures: df = helper.scaleFeatures(df) if helper.scaleFeatures: kidney_df1 = helper.scaleFeatures(kidney_df1) # TODO: Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # # .. your code here .. from sklearn.decomposition import PCA pca = PCA(n_components=2, svd_solver='full') pca.fit(kidney_df1) T = pca.transform(kidney_df1) #print T.shape #print kidney_df1.shape # Plot the transformed data as a scatter plot. Recall that transforming # the data will result in a NumPy NDArray. You can either use MatPlotLib
mdf = df.loc[:,['bgr','wc','rc']] # Printing out and checking the dataframe's dtypes. print(mdf.dtypes) mdf = mdf.apply(pd.to_numeric, args=('coerce',)) print(mdf.dtypes) print(mdf) # Checking the variance of every feature in your dataset. print(mdf.var(axis=0)) print(mdf.describe()) if scaleFeatures: mdf = helper.scaleFeatures(mdf) # Running PCA on the dataset and reducing it to 2 components pca = PCA(n_components = 2) print(pca.fit(mdf)) T = pca.transform(mdf) #data returns in a NumPy NDArray print(T) # Converting to a Pandas Dataframe. # # Note: Since we transformed via PCA, we no longer have column names.
# command that will check the variance of every feature in your dataset. # Print out the results. Also print out the results of running .describe # on your dataset. # # Hint: If you don't see all three variables: 'bgr','wc' and 'rc', then # you probably didn't complete the previous step properly. # # .. your code here .. cols.describe() # TODO: This method assumes your dataframe is called df. If it isn't, # make the appropriate changes. Don't alter the code in scaleFeatures() # just yet though! # # .. your code adjustment here .. if scaleFeatures: cols = helper.scaleFeatures(cols) # TODO: Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # # .. your code here .. from sklearn.decomposition import PCA pca = PCA(n_components=2, svd_solver='full') pca.fit(cols) T = pca.transform(cols) # Plot the transformed data as a scatter plot. Recall that transforming # the data will result in a NumPy NDArray. You can either use MatPlotLib # to graph it directly, or you can convert it to DataFrame and have pandas # do it for you.
# # Hint: If you don't see all three variables: 'bgr','wc' and 'rc', then # you probably didn't complete the previous step properly. # # .. your code here .. print(s1.var()) print(s1.describe()) # TODO: This method assumes your dataframe is called df. If it isn't, # make the appropriate changes. Don't alter the code in scaleFeatures() # just yet though! # # .. your code adjustment here .. if scaleFeatures: s1 = helper.scaleFeatures(s1) # TODO: Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # # .. your code here .. from sklearn.decomposition import PCA # train on input pca = PCA(n_components=2) pca.fit(s1) # transform input T = pca.transform(s1)