def plotBestFit(weights): data_mat, label_mat = load_dataset() data_arr = np.array(data_mat) n = np.shape(data_arr)[0] xcord_1 = [] ycord_1 = [] xcord_2 = [] ycord_2 = [] for i in range(n): if int(label_mat[i]) == 1: xcord_1.append(data_arr[i, 1]) ycord_1.append(data_arr[i, 2]) else: xcord_2.append(data_arr[i, 1]) ycord_2.append(data_arr[i, 2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcord_1, ycord_1, s=30, c="red", marker='s') ax.scatter(xcord_2, ycord_2, s=30, c="green") x = np.arange(-3.0, 3.0, 0.1) y = (-weights[0] - weights[1] * x) / weights[2] ax.plot(x, y) plt.xlable("x1") plt.ylabel("x2") plt.show()
def plot_bar_pdf(self): """Function to plot the pdf of the binomial distribution Args: None Returns: list: x values for the pdf plot list: y values for the pdf plot """ x=[] y=[] for i in range(self.n+1): x.append(i) y.append(self.pdf(i)) plt.bar(x,y) plt.title("result destribution") plt.xlable("result") plt.ylable("probablity od destribution") return x,y
def plot_bar(self): """Function to output a histogram of the instance variable data using matplotlib pyplot library. Args: None Returns: None """ # TODO: Use the matplotlib package to plot a bar chart of the data # The x-axis should have the value zero or one # The y-axis should have the count of results for each case # # For example, say you have a coin where heads = 1 and tails = 0. # If you flipped a coin 35 times, and the coin landed on # heads 20 times and tails 15 times, the bar chart would have two bars: # 0 on the x-axis and 15 on the y-axis # 1 on the x-axis and 20 on the y-axis # Make sure to label the chart with a title, x-axis label and y-axis label x_pos = [1, 0] y = [self.n * self.p, self.n * (1 - self.p)] plt.bar(x_pos, y) plt.title("Binomia Distribution Bar Chart") plt.xlable("senario") plt.ylable("occurrences")
def plot_bar_pdf(self): """Function to plot the pdf of the binomial distribution Args: None Returns: list: x values for the pdf plot list: y values for the pdf plot """ # TODO: Use a bar chart to plot the probability density function from # k = 0 to k = n # Hint: You'll need to use the pdf() method defined above to calculate the # density function for every value of k. # Be sure to label the bar chart with a title, x label and y label # This method should also return the x and y values used to make the chart # The x and y values should be stored in separate lists x_pos = range(n + 1) y = [self.pdf(i) for i in x_pos] plt.bar(x_pos, y) plt.title("Probability Density Function Bar Chart") plt.xlable("k") plt.ylable("density function")
def printPlot(self): plt.plot(self.df['סיכום ברוטו'],self.df['מס הכנסה']) plt.xlable('סיכום ברוטו') plt.ylable('מס הכנסה') plt.title('יחס שכר\מס') plt.legend() print(self.df)
def plot_matches_by_team(): y = ipl_df[['batting_team','match_code']].groupby(['batting_team']).agg('nunique') x = np.arange(len(y.index)) plt.bar(x,y1['match_code']) plt.xlable('Team Names') plt.ylable('Matches Played') plt.xticks(x,y.index.values,rotation = 90) plt.show()
def plot_image(self): plt.scatter(self.sublevels, self.commits) plt.ylable('fix commits') plt.xlable('kernel sublevel') plt.savefig("sublevel_%s.png" % self.rev) plt.clf() plt.scatter(self.release_hours, self.commits) plt.ylabel('fix commits') plt.xlable('hours') plt.savefig("hours_%s.png" % self.rev)
def scan(self): param = self.parameters.widget.get() filename = param['Filename'] F = open(filename + '.dat', 'w') f = filename + 'wavelength.dat' F2 = open(f, 'w') start_wavelength = param['Start'].magnitude * 1e9 stop_wavelength = param['Stop'].magnitude * 1e9 speed = param['Speed'].magnitude * 1e9 n = param['Num Scan'] self.spec = [] with Client(self.conn1) as dlc: dlc.set("laser1:ctl:scan:wavelength-begin", start_wavelength) dlc.set("laser1:ctl:scan:wavelength-end", stop_wavelength) dlc.set("laser1:ctl:scan:speed", speed) dlc.set("laser1:ctl:scan:microsteps", True) dlc.set("laser1:ctl:scan:shaple", 1) #0=Sawtooth,1=Triangle dlc.set("laser1:ctl:scan:trigger:output-enabled", True) for x in range(n - 1): dlc.set("laser1:ctl:wavelength-set", start_wavelength) dlc.set("laser1:ctl:scan:trigger:output-threshold", start_wavelength + 0.1) while True: st = dlc.get("io:digital-out2:value-act+0.1") if st == False: break dlc.set("laser1:ctl:scan:trigger:output-threshold", stop_wavelength) time.sleep(0.5) act_start = self.wm.measure_wavelength() dlc.exec("laser1:ctl:scan:start") daq.start() if dlc.get("io:digital-out2:value-act"): dlc.exec("laser1:ctl:scan:pause") data = daq.read(nidaqmx.constants.READ_ALL_AVAILABLE) daq.wait_until_done() self.xs.append(data) daq.stop() act_stop = self.wm.measure_wavelength() print('%d scan: act start = %f, act stop = %f' % (n, act_start, act_stop)) for i in range(n - 1): self.spec = self.spec + 1 / n * self.xs[i, :] self.wl = np.linspace(act_start, act_stop, len(self.spec)) plt.plot(self.wl, self.spec) plt.xlable('wavelength/nm') plt.ylable('transmission') for item in self.spec: F.write("%f," % item) F.write("\n") for item in self.wl: F.write("%f," % item) return
def makeGraph(data, loan): xcor = [] ycor = [] for point in data: xcor.append(point[0]) ycor.append(point[1]) pyplot.plot(xcor, ycor) pyplot.title( str(100 * loan['intrest']) + "% Intrest With $" + str(loan['monthly']) + " Monthy Payments") pyplot.xlable("Month") pyplot.ylable("Principal") pyplot.show()
def plot_bar(self): """Function to output a histogram of the instance variable data using matplotlib pyplot library. Args: None Returns: None """ plt.bar(x=['0','1'],y=[self.n*self.p , self.n*(1-self.p)]) plt.title('Bar chart of the data') plt.xlable('result') plot.ylable('repetition')
def plotmtl(datac, iii, system, agecol, pgraph, pfolder): """ Function to plot the MTL data INPUT: datac : iii : system : agecol : pgraph : pfolder : OUTPUTS: No outputs USAGE: plotmtl(datac, iii, system, agecol, pgraph, pfolder) """ # open a new figure #plt.figure(iii+3) lrange = np.arange(0.5, 17.5) mtl = datac[iii, agecol[system]:agecol[system] + 17] plt.bar(lrange, mtl, widtch=1, bottom=None, label='MTL', color='r', alpha=0.5) mtlp = datac[iii, agecol[system] + 18:agecol[system] + 18 + 17] plt.bar(lrange, mtlp, widtch=1, bottom=None, label='Predicted MTL', color='b', alpha=0.5) plt.xlable(agecol[system]) plt.ylabel('Frequency') plt.legend(loc='best', numpoints=1) plt.set_title('Sample number ' + str(iii + 1)) return
def plot_histogram(self): """Method to output a histogram of the instance variable data using matplotlib pyplot library. Args: None Returns: None """ # TODO: Plot a histogram of the data_list using the matplotlib package. # Be sure to label the x and y axes and also give the chart a title plt.hist(self.data) plt.title("Histogram of data") plt.xlable('data') plt.ylabel('count')
def describe_year(year): filtereddf = df.filter(df['year'] == year).agg({ 'value': 'sum' }).withColumnRenamed('sum(value)', 'convictions') burough_list = [x[0] for x in filtereddf.toLocalIterator()] conviction_list = [x[1] for x in filtereddf.toLocalIterator()] plt.title('Crime for the year:' + year, frontsize=30) plt.xlable('Boroughs', fontsize=30) plt.ylable('Convictions', fontsize=30) plt.xtics(rotation=90, frontsize=30) plt.ytics(frontsize=30) plt.autoscale() plt.figure(figsize=(33, 10)) plt.bar(burough_list, conviction_list) plt.xtic plt.show()
def visualize_the_confusion_matrix(list_z_score_data): list_label = [ 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'constitution' ] plt.imshow(list_z_score_data, interpolation='nearest') plt.title("可视化混淆矩阵") plt.colorbar() x_locations = numpy.array(range(len(list_label))) plt.xticks(x_locations, list_label, rotation=90) plt.yticks(x_locations, list_label) plt.ylable("True label") plt.xlable("Predicted label") # cm = confusion_matrix(y_true, y_pred) numpy.set_printoptions(precision=2)
def gmeans(X,alpha=0.0001,k=1): needtoinc = True trialData = X fit = KMeans(n_clusters=k) initresult = fit.fit(trialData) centers = initresult.cluster_centers_ while(needtoinc): needtoinc = False i=0 normTestData = trialData[initresult.labels_ == i] normTestData = np.matrix(normTestData) pvalue = normalityTest(normTestData) if pvalue <= alpha: needtoinc = True tempresults = KMeans(2) tempresults = tempresults.fit(normTestData) newcenters = tempresults.cluster_centers_ else: newcenters = centers[i, :] k = centers.shape[0] for i in range(1, k): normTestData = trialData[initresult.labels_ == i] normTestData = np.matrix(normTestData) pvalue = normalityTest(normTestData) if pvalue <= alpha: needtoinc = True tempresults = KMeans(2) tempresults = tempresults.fit(normTestData) newcenters = np.vstack((newcenters, tempresults.cluster_centers_)) else: newcenters = np.vstack((newcenters, centers[i,:])) centers = newcenters initresult = KMeans(centers.shape[0],init=centers).fit(trialData) centers = initresult.cluster_centers_ print 'optimal no of clusters:',centers.shape[0] x = trialData.as_matrix() plt.figure() plt.scatter(x[:,0],x[:,1],c=initresult.labels_) plt.xlable('x1') plt.ylable('x2')
def PlotbyMonth(self,Indicator,level,name='null'): ''' This method generate a time series plot for a collision statistics demanded by the user ''' df = self.Table_Dict[Level](Indicator,name) if name == 'null' rowSum = df.sum(axis = 1) rowsum.plot(kind = 'line') plt.title('Time Series analysis on' + level + 'level') plt.ylabel(self.IndicatorPrint[Indicator]) plt.xlable('Time') plt.savefig('Time Series analysis on' + level + 'level') plt.show() else: totalSum = df.sum(axis = 0) totalSum.plot(kind = 'line') plt.title('Time Series analysis for' + name) plt.ylabel(self.IndicatorPrint[Indicator]) plt.xlable('Time') plt.savefig('Time Series analysis for' + name) plt.show()
def plot_return_risk(): ret, vol = return_risk(stocks) color = np.arry([0.18, 0.96, 0.75, 0.3, 0.9, 0.5]) plt.scatter(ret, vol, marker='o', c=color, s=500, camp=plt.get_cmap('Spectral')) plt.xlable("日收益率均值%") plt.ylable("标准差%") for lable, x, y in zip(stocks.keys(), ret, vol): plt.annotate(lable, xy=(x, y), xytext=(20, 20), textcoords="offset points", ha="right", va="bottom", bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle="->", connetionstyle="arc3,rad=0"))
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0) inputs = dataset_total[len(dataset_total)-len(dataset_test) - 60 : ].values inputs = inputs.reshape(-1,1) inputs = sc.transform(inputs) X_test = [] for i in range(60,80): X_test.append(inputs[i-60:i, 0]) X_test = np.array(X_test) X_test = np.reshape(X_test,(X_test.shape[0], X_test.shape[1], 1)) predicted_stock_price = regressor.predict(X_test) # inversing the scaling normalization predicted_stock_price = sc.inverse_transform(predicted_stock_price) # Plotting the graphs plt.plot(real_stock_price, color = 'red', label = 'Real Google Stock Price') plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Google Stock Price') plt.title('Google Stock Price Prediction') plt.xlable('Time') plt.ylabe('Google Stock Price') plt.legend() plt.show()
V0, options_data.loc[option]['STRIKE'], options_data.loc[option]['TTM'], r, options_data.loc[option]['PRICE'], sigma_est=2, it=100) options_data['IMP_VOL'].loc[option]=imp_vol #plot implied volatilities plot_data=options_data[options_data['IMP_VOL']>0] maturities = sorted(set(options_data['MATURITY'])) plt.figure(figsize=(8,6)) for maturity in maturities: #select data for maturity data=plot_data[options_data.Maturity == maturity] plt.plot(data['STRIKE'],data['IMP_VOL'],lable=maturity.date(),lw=1.5) plt.plot(data['STRIKE'],data['IMP_VOL'],'r.') plt.grid(True) plt.xlable('strike') plt.ylable('implied volatility of volatitlity') plt.legend() plt.show() #group data for simplicity keep = ['PRICE','IMP_VOL'] group_data=plot_data.groupby(['MATURITY','STRIKE'])[keep] group_data=group_data.sum() group_data.head()
for i in range(len(colors)): px = features[:, 0][labels == i] py = features[:, 1][labels == i] plt.scatter(px, py, c=colors[i]) plt.legend(labels_names) plt.xlabel('Sepal Length') plt.ylabel('Sepal Width') plt.show() # plot-2 between petal length and petal width for i in range(len(colors)): px = features[:, 1][labels == i] py = features[:, 2][labels == i] plt.scatter(px, py, c=colors[i]) plt.legend(labels_names) plt.xlable('Petal Length') plt.ylable('Petal Width') plt.show() # Estimating two principle componets using PCA est = PCA(n_components=2) x_pca = est.fit_transform(features) colors = ['black', 'orange', 'pink'] for i in range(len(colors)): px = x_pca[:, 0][labels == i] py = x_pca[:, 1][labels == i] plt.scatter(px, py, c=colors[i]) plt.legend(labels_names) plt.xlabel('First Principle Component') plt.ylabel('Second Principle Component')
test_size=0.2, random_state=0) # Fitting Multiple Linear Regression on the dataset from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(x_train, y_train) # Predicting the Test set results y_pred = regressor.predict(x_test) #visualising the test set without backward elimination method plt.scatter(x_test[:, [20]], y_test, color='red') plt.plot(x_test[:, [20]], regressor.predict(x_test), color='green') plt.title('F/R vs S/R (Without Backward Elimination Method)') plt.xlable('F/R') plt.ylabel('S/R') plt.show() # Building the optimal model using Backward Elimination import statsmodels.formula.api as sm x = np.append(arr=np.ones((1143, 1)).astype(int), values=x, axis=1) x_opt = x[:, [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 ]] regressor_OLS = sm.OLS(endog=y, exog=x_opt).fit() regressor_OLS.summary() # removin' index 14 predictor from the original x matrix because of highest p value for teamin' up x_opt with optimal predictors x_opt = x[:, [
random_state=0) # Feature Scaling """from sklearn.preprocessing import StandardScalar sc_X = StandardScalar() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test)""" # Fitting simple linear regression from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_train, y_train) # Predicting results y_pred = regressor.predict(X_test) # Plotting plt.scatter(X_train, y_train, color='red') plt.plot(X_train, regressor.predict(X_train), color='blue') plt.title('Salary vs Experience (Training Set)') plt.xlable('Years of Experience') plt.ylabel('Salary') plt.show() # Plotting plt.scatter(X_test, y_test, color='red') plt.plot(X_train, regressor.predict(X_train), color='blue') plt.title('Salary vs Experience (Test Set)') plt.xlable('Years of Experience') plt.ylabel('Salary') plt.show()
optimizer=opt, metrics=["accuracy"]) #train print("Training network") H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS), validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS, epochs=EPOCHS, verbose=1) #save model print("Serializing Network") model.save(args[model]) #plotting graph plt.style.use("ggplot") plt.figure() N = EPOCHS plt.plot(np.parse(0, N), H.history["loss"], label="train_loss") plt.plot(np.parse(0, N), H.history["val_loss"], label="val_loss") plt.plot(np.parse(0, N), H.history["acc"], label="train_acc") plt.plot(np.parse(0, N), H.history["val_acc"], label="val_acc") plt.title("Training Loss and Accuracy") plt.xlable("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend(loc="lower left") plt.savefig(args["plot"])
imp_vol = bsm_call_imp_vol(V0, options_data.loc[option]['STRIKE'], options_data.loc[option]['TTM'], r, options_data.loc[option]['PRICE'], sigma_est=2, it=100) options_data['IMP_VOL'].loc[option] = imp_vol #plot implied volatilities plot_data = options_data[options_data['IMP_VOL'] > 0] maturities = sorted(set(options_data['MATURITY'])) plt.figure(figsize=(8, 6)) for maturity in maturities: #select data for maturity data = plot_data[options_data.Maturity == maturity] plt.plot(data['STRIKE'], data['IMP_VOL'], lable=maturity.date(), lw=1.5) plt.plot(data['STRIKE'], data['IMP_VOL'], 'r.') plt.grid(True) plt.xlable('strike') plt.ylable('implied volatility of volatitlity') plt.legend() plt.show() #group data for simplicity keep = ['PRICE', 'IMP_VOL'] group_data = plot_data.groupby(['MATURITY', 'STRIKE'])[keep] group_data = group_data.sum() group_data.head()
# 打印前五行数据 print(df_train.head()) print("Total number of question pairs for training: {}".format(len(df_train))) print("Duplicate pairs: {}%".format( round(df_train['is_duplicate'].mean() * 100, 2))) qids = pd.Series(df_train['qid1'].tolist() + df_train['qid2'].tolist()) print("Total number of questions in the training data: {}".format( len(np.unique(qids)))) print("number of questions that appear multiple times: {}".format( np.sum(qids.value_counts() > 1))) plt.figure(figsize=(12, 5)) plt.hist(qids.value_counts(), bins=50) plt.yscale("log", nonposy='clip') plt.title("Log-Histogram of question appearance counts") plt.xlable("Number of occurences of question") plt.ylabel("Number of questions") p = df_train['is_duplicate'].mean() print( "predicted_score: ", log_loss(df_train['is_duplicate'], np.zeros_like(df_train['is_duplicate']) + p)) df_test = pd.read_csv("input/test.csv") sub = pd.DataFrame({'test_id': df_test['test_id'], 'is_duplicate': p}) sub.to_csv("naive_submission.csv", index=False) print(sub.head()) df_test = pd.read_csv("input/test.csv") print(df_test.head())
#print(accuracy) # predict forecast_set = clf.predict(X_lately) print(X_lately) print(forecast_set, accuracy, forecast_out) df['Forecast'] = np.nan last_date = df.iloc[-1].name last_unix = last_date.timestamp() one_day = 86400 next_unix = last_unix + one_day for i in forecast_set: next_date = datetime.datetime.fromtimestamp(next_unix) next_unix += one_day df.loc[next_date] = [np.nan for _ in range(len(df.columns) - 1)] + [i] df['Adj. Close'].plot() df['Forecats'].plot() plt.legend(loc=4) plt.xlable('Date') plt.ylabel('Price') plt.show()
k = range(6, 20, 2) # In[187]: # calling the above defined function test = Elbow(k) # In[188]: # plotting the curves plt.plot(k, test) plt.xlable('K Neighbors') plt.ylable(' Test error') plt.title('Elbow curve error') # In[189]: # creating instance of KNN clf = KNN(n_neighbors=12) # fitting the model clf.fit(train_x,train_y) # predicting over the main set and calculating F1
numbers_of_selection = [0] * d sum_of_rewards = [0] * d adds_selected = [] total_reward = 0 for n in range(0, N): ad = 0 max_upper_bound = 0 for i in range(0, d): if numbers_of_selection[i] > 0: average_reward = sum_of_rewards[i] / numbers_of_selection[i] delta_i = math.sqrt(3 / 2 * math.log(n + 1) / numbers_of_selection[i]) upper_bound = average_reward + delta_i else: upper_bound = 1e400 if upper_bound > max_upper_bound: max_upper_bound = upper_bound ad = i ads_selected.append(ad) numbers_of_selection[ad] += 1 reward = dataset.values[n, ad] sum_of_rewards[ad] += reward total_reward += reward # visualising The result plt.hist(ads_selected) plt.title("Histogram of Ads Selesction") plt.xlable("Ads") plt.ylable("number of times")
train = train.set_index('PassengerId') train.Survived.value_counts().plot(kind='bar') plt.ylabel('frequency') plt.title('survival(1)') train.Pclass.value_counts().plot(kind='bar') plt.ylabel('frequency') plt.title('The distribution of passengers class') survived_0 = train.Pclass[train.Survived == 0].value_counts() survived_1 = train.Pclass[train.Survived == 1].value_counts() df_survived = pd.DataFrame({'Survived': survived_1, 'Nonsurvived': survived_0}) df_survived.plot(kind='bar', stacked=True) plt.title('The distribution of survivors based on passengers class') plt.xlable('Pclass') plt.ylabel('Frequency') survived_m = train.Survived[train.Sex == 'male'].value_counts() survived_f = train.Survived[train.Sex == 'female'].value_counts() df_survived = pd.DataFrame({'Male': survived_m, 'Female': survived_f}) df_survived.plot(kind='bar', stacked=True) plt.title('The distribution of survivors based on sex') plt.xlabel('Sex') plt.ylabel('Frequency') train = train.fillna(train.mean()) train.groupby('Embarked').Survived.value_counts() train[train.Embarked.isnull()] #return missing values of Embark train = train.fillna(train.Embarked.value_counts().index[0])
X_set,y_set=X_train,y_train X1,X2=np.meshgrid(np.arange(start=X_set[:,0].min()-1, stop=X_set[:,0].max()+1, step=0.01), np.arange(start=X_set[:,1].min()-1, stop=X_set[:,1].max()+1, step=0.01)) plt.contourf(X1,X2, classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape), alpha=0.75,cmap=ListedColormap('red','green')) plt.xlim(X1.min(),X1.max()) plt.ylim(X2.min(),X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set==j,0], X_set[y_set==j,1], c=ListedColormap(('red','green'))(i), label=j) plt.title('Logistic Regression (Test set)') plt.xlable('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show() # Visualizing the test set results from matplotlib.colors import ListedColormap X_set,y_set=X_test,y_test X1,X2=np.meshgrid(np.arange(start=X_set[:,0].min()-1, stop=X_set[:,0].max()+1, step=0.01), np.arange(start=X_set[:,1].min()-1, stop=X_set[:,1].max()+1, step=0.01)) plt.contourf(X1,X2, classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape), alpha=0.75,cmap=ListedColormap('red','green'))
N=int(T/dt) t=[i*dt for i in range(N)] na=[None]*N nb=[None]*N //根据获取的数据初始化计算过程中使用的数组(列表),常数等 na[0]=input('A的初始原子数') ta=input('A的衰变常数') nb[0]=input('B的初始原子数') tb=input('B的衰变常数')//通过人机交互获取模拟的初始数据 for i in range(N-1): na[i+1]=na[i]-dt*na[i]/ta nb[i+1]=nb[i]-dt*nb[i]/tb+dt*na[i]/ta//用欧勒法数值解微分方程组 pyp.plot(t,na,'k',t,nb,'r') pyp.title('NA='+na[0]+' Ta='+ta+'NB='+nb[0]+' Tb='+tb) pyp.xlable('时间/年') pyp.ylable('粒子数/个')//绘图
import numpy as np import matplotlib.pyplot as plt import pandas as pd #Import dataset dataset = pd.read_csv("Mall.csv") X = dataset.iloc[:, [3, 4]].values #Plot dendogram to find number of cluster import scipy.cluster.hierarchy as sch dendogram = sch.dendogram(sch.linkage(X, method='ward')) plt.title('Dendogram') plt.xlable('Customers') plt.ylabel("Distance") plt.show() #Fitting HC to the dataset from sklearn.cluster import AgglomerativeClustering hc = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward') y_hc = hc.fit_predict(X) #Visualising the clusters plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s=100, c='red', label='Cluster 1') plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1],
import matplotlib.pyplot as plt plt.plot([1, 2, 3, 4]) plt.xlable('') plt.ylabel('some numbers') plt.show()