def month_by_month(): """Creates line plot of expenses per month. Can be filtered by either type of payment, expense or both""" exp = str(cat.get()) #gets value of expense filter from GUI payment = str(pay.get())# gets value of payment filter from GUI months = [] amounts = [] for month in range(1,13): df_month = df[df['Month']==month] update_filts(df_month) #updates filters for monthly dataframe filt = exp_filters[exp]|pay_filters[payment] # combines boolean filters selecting only wanted values months.append(month) amounts.append(df_month[filt]['Amount'].sum()) # sums all values from 'Amount' column from filtered dataframe plt.plot(months,amounts) plt.title(('{} Expenses Using {} Payment Method').format(exp,payment)) plt.xtitle('Month') plt.ytitle('Amount Spent') update_filts() #resets filters to values from main dataframe plt.show()
def seaborn_charts(df): bar_plot = sns.barplot(data=df, x="col_name", y="col_name", style="summer", hue="summer") # to set/reset x, y & title axes names plot.xtitle("x title") plot.ytitle("y title") plot.title("my title") scatter_plot = sns.scatterplot(data=df, x="", y="") line_plot = sns.lineplot(data=df, x="", y=["", ""]) count_plot = sns.countplot(data=df, x="") heat_map = sns.heatmap(data=["some_list"], cmap=["yellow", "red"])
# import libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt # importing dataset dataset = pd.read_csv('Mall_Customers.csv') X = dataset.iloc[:,3:].values # using dendrograms to determine the optimal number of clusters import scipy.cluster.hierarchy as sch dendrogram = sch.dendrogram(sch.linkage(X , method = 'ward')) plt.title('Dendrogram') plt.xtitle('Customers') plt.ytitle('Euclidean Distance') plt.show() # fitting Hierarchical Clustering Algorithm to the dataset from sklearn.cluster import AgglomerativeClustering hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward') y_hc = hc.fit_predict(X) # visualising HC plt.scatter(X[y_hc == 0,0], X[y_hc == 0,1], s =100, c = 'red' , label = 'Cluster - 1' ) plt.scatter(X[y_hc == 1,0], X[y_hc == 1,1], s =100, c = 'yellow' , label = 'Cluster - 2' ) plt.scatter(X[y_hc == 2,0], X[y_hc == 2,1], s =100, c = 'black' , label = 'Cluster - 3' ) plt.scatter(X[y_hc == 3,0], X[y_hc == 3,1], s =100, c = 'orange' , label = 'Cluster - 4' ) plt.scatter(X[y_hc == 4,0], X[y_hc == 4,1], s =100, c = 'blue' , label = 'Cluster - 5' ) plt.xlabel('Annual Income - k$') plt.ylabel('Spending')
### Most consistent stocks per threshold\ l = {} overlap = pd.DataFrame(np.zeros((Betas.shape[0], len(thres_array))), index=Betas.index, columns=thres_array) for thres in thres_array: l[thres] = consistency[thres][ consistency[thres].argsort() > Betas.shape[0] - 200] overlap.loc[l.index, thres] += 1 plt.figure() for d in [0, 1, 2, 4, 5, 10]: plt.plot(thres_array, [np.sum(degree[t] == 0) for t in thres_array], label=d) plt.xtitle('threshold for the correlation') plt.ytitle('proportion of nodes with given degree') plt.title('Evolution of the degree with the threshold value') plt.legend(loc='upper left') plt.show() #### Decomposition in the eigenvalue sapce import pygsp Gg = pygsp.graphs.Graph(cor_res) Gg.compute_fourier_basis() D = np.zeros(Gg.N) D[:5] = Gg.e[:5] D = np.diag(D) Alt = Gg.U * D * U.T #### Definition of a sufficient statisitic ### Distances between matrices
T = np.array([[1,0], [1,0], [1,0], [1,0], [1,0], [0,1], [0,1], [0,1], [0,1], [0,1]]) #------------------------------------------- plt.scatter(X[:,0], X[:,1]) plt.title('Datos para ajustar ELM') plt.xtitle('X') plt.ytitle('Y') plt.show() #------------------------------------------- L = 70 # numero de capas ocultas 5e06 no puede operar N,d = X.shape m = T.shape[1] a, b = generar_a_b(d,L) #print("Dimension a: ",a.shape) H, H_tr = generar_H(a,b,X,N,L) C = .10 beta = [] # Version cuando N grande if N>L:
classifier_2 = KNeighborsClassifier(n_neighbors=28, metric='minkowski', p=2) classifier_2.fit(X_train, y_train) y_pred_2 = classifier_2.predict(X_test) Cm_3 = metrics.classification_report(y_test, y_pred_2) Cm_4 = metrics.accuracy_score(y_test, y_pred_2) from sklearn.metrics import roc_curve, auc y_pred_proba = classifier.predict_proba(X_train)[:, 1] fpr, tpr, threshold = roc_curve(y_train, y_pred_proba) auc_logit = auc(y_train, y_pred_proba) plt.figure(figsize=(5, 5), dpi=100) plt.plot(fpr, tpr, linestyle='-') plt.xtitle("False Positive Rate") plt.ytitle("True Positive Rate") from sklearn.datasets.samples_generator import make_blobs from matplotlib.colors import ListedColormap X_train, y_train = make_blobs(n_samples=100, centers=2, random_state=0, cluster_std=0.60) for i, j in enumerate(np.unique(y_train)): plt.scatter(X_train[y_train == j, 0], X_train[y_train == j, 1], c=ListedColormap(('red', 'green'))(i), s=50, cmap='autumn', label=j)