def draw_bar(grades): xticks = ['A', 'B', 'C', 'D', 'E'] gradeGroup = {} #对每一类成绩进行频数统计 for grade in grades: gradeGroup[grade] = gradeGroup.get(grade, 0) + 1 #创建柱状图 #第一个参数为柱的横坐标 #第二个参数为柱的高度 #参数align为柱的对齐方式,以第一个参数为参考标准 plt.bar(range(5), [gradeGroup.get(xtick, 0) for xtick in xticks], align='center') #设置柱的文字说明 #第一个参数为文字说明的横坐标 #第二个参数为文字说明的内容 plt.xticks(range(5), xticks) #设置横坐标的文字说明 plt.xlabel('Grade') #设置纵坐标的文字说明 plt.ylabel('Frequency') #设置标题 plt.title('Grades Of Male Students') #绘图 plt.show()
def predict_prices(dates, prices, x): dates = np.reshape(dates, (len(dates), 1)) svr_len = SVR(kernel='linear', C=1e3) svr_poly = SVR(kernel='poly', C=1e3, degree=2) svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) svr_lin.fit(dates, prices) svr_poly.fit(dates, prices) svr_rbf.fit(dates, prices) plt.scatter(dates, prices, color='black', label='data') plt.plot(dates, svr_rbf.predict(dates), color='red', label='RBF model') plt.plot(dates, svr_lin.predict(dates), color='green', label='Linear model') plt.plot(dates, svr_poly.predict(dates), color='blue', label='Polynomial model') plt.xlabel('Date') plt.ylabel('Price') plt.title('Sipport Vector Regression') plt.legend() plt.show() return svr_rbf.predict(x)[0], svr_lin.predict(x)[0], svr_poly.predict(x)[0]
def draw_box(heights): #创建箱形图 #第一个参数为待绘制的定量数据 #第二个参数为数据的文字说明 plt.boxplot([heights], labels=['Heights']) plt.title('Heights Of Male Students') plt.show()
def GenerateOutcomes(x, z, num_cont, num_bin): """ Following the generating procedure defined by Madras in Algorithm 2 """ # As defined by Madras num_z = z.shape[1] w = -11 beta_a = 6 # Algorithm 2 # horizontal concatenation xz = np.concatenate((x, z), 1) W = np.ones(xz.shape[1])*.5 # lists to store generated values y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1 = list(), list(), list(), list() mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1 = list(), list(), list(), list() # loop over observations because all need individual beta sample for obs in xz: # sample new beta beta_cont = choice([0, .1, .2, .3, .4], num_cont, p=[.5, .125, .125, .125, .125]) beta_bin = choice([0, .1, .2, .3, .4], num_bin, p=[.6, .1, .1, .1, .1]) beta_z = choice([.4, .6], num_z, p=[.5, .5]) # in x, continuous variables come first beta = np.concatenate((beta_cont, beta_bin, beta_z), 0) # calculate y dist mu1 = np.matmul(np.exp(obs + W), beta) mu_t0_a0.append(mu1) mu2 = np.matmul(obs, beta)-w mu_t1_a0.append(mu2) mu3 = np.matmul(np.exp(obs + W), beta) + beta_a mu_t0_a1.append(mu3) mu4 = np.matmul(obs, beta) - w + beta_a mu_t1_a1.append(mu4) # sample new y y_t0_a0.append(np.random.normal(mu1, 1, 1)[0]) y_t1_a0.append(np.random.normal(mu2, 1, 1)[0]) y_t0_a1.append(np.random.normal(mu3, 1, 1)[0]) y_t1_a1.append(np.random.normal(mu4, 1, 1)[0]) plt_entries = {'y_t0_a0': y_t0_a0, 'y_t1_a0': y_t1_a0, 'y_t0_a1': y_t0_a1, 'y_t1_a1': y_t1_a1} plt.figure() plt.title('Generated data') for label, entry in plt_entries.items(): plt.hist(entry, label=label, alpha=0.5, bins=20) plt.legend() plt.show() y_all = np.transpose(np.vstack((y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1))) mu_all = np.transpose(np.vstack((mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1))) # column names should be consistent with above vstack y_column = 'y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1' mu_column = 'mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1' return y_all, mu_all, y_column, mu_column
def show_train_history(train_history, train, validation): plt.plot(train_history, history[train]) plt.plot(train_history, history[validation]) plt.title('Train History') plt.ylabel(train) plt.xlabel('Epoch') plt.legend(['train', 'validation'], loc='upper left') plt.show()
def draw_hist(heights): #创建直方图 #第一个参数为待绘制的定量数据,不同于定性数据,这里并没有事先进行频数统计 #第二个参数为划分的区间个数 plt.hist(heights, 100) plt.xlabel('Heights') plt.ylabel('Frequency') plt.title('Heights Of Male Students') plt.show()
def draw_scatter(heights, weights): #创建散点图 #第一个参数为点的横坐标 #第二个参数为点的纵坐标 plt.scatter(heights, weights) plt.xlabel('Heights') plt.ylabel('Weights') plt.title('Heights & Weights Of Male Students') plt.show()
def draw_cumulative_hist(heights): #创建累积曲线 #第一个参数为待绘制的定量数据 #第二个参数为划分的区间个数 #normed参数为是否无量纲化 #histtype参数为'step',绘制阶梯状的曲线 #cumulative参数为是否累积 plt.hist(heights, 20, normed=True, histtype='step', cumulative=True) plt.xlabel('Heights') plt.ylabel('Frequency') plt.title('Heights Of Male Students') plt.show()
def draw_pie(grades): labels = ['A', 'B', 'C', 'D', 'E'] gradeGroup = {} for grade in grades: gradeGroup[grade] = gradeGroup.get(grade, 0) + 1 #创建饼形图 #第一个参数为扇形的面积 #labels参数为扇形的说明文字 #autopct参数为扇形占比的显示格式 plt.pie([gradeGroup.get(label, 0) for label in labels], labels=labels, autopct='%1.1f%%') plt.title('Grades Of Male Students') plt.show()
weekAverages[d]=sum(weekRatings[d]*1.0/len(weekRatings[d])) weekAverages x = list(weekAverages,keys()) Y=[weekAverages[x] for x in X] import matplotlib.pylot as plt plt.plot(X,Y) plt.bar(X,Y) # zoom in more to see the detail plt.ylim(3.6, 3.8) plt.bar(X, Y) plt.ylim(3.6,3.8) plt.xlabel("Weekday") plt.ylabel("Rating") plt.xticks([0,1,2,3,4,5,6],['S','M','T','W','T','F','S']) plt.title("Rating as a function of weekday") plt.bar(X,Y) #L4 Live-coding: MatPlotLib path = "datasets/yelp_data/review.json" f = open(path,'r',encoding = 'utf8') import json import time dataset = [] for i in range(50000): d = json.loads(f.readline()) # d['data'] d['timeStruct'] = time.strptime(d['data'],'%Y-%m-%d') d['timeInt'] = time.mktime(d['timeStruct']) dataset.append(d) dataset[0]
# In[3]: from matplotliv import pyplot as plt # In[4]: from matplotlib import pyplot as plt # In[9]: x = [1, 2, 3] y = [1, 4, 9] z = [10, 5, 0] plt.plot(x, y) plt.plot(x, z) plt.title("test plot") plt.xlabel("x") plt.ylabel("y and z") plt.legend(["this is y", "this is z"]) plt.show() # In[10]: sample_data = pd.read_csv('sample_data.csv') # In[11]: sample_data # In[12]:
for index, fft_val in enumerate(red_fft): if fft_val > max_val: max_val = fft_val max_index = index heartrate = freqs[max_index] * 60 print('Estimated Heartate: {} bpm'.format(heartrate)) # Plotting if PLOT: plt.figure(figsize=(16,9)) plt.plot(x, colors['red'], color='#fc4f30') plt.xlabel('Time [s]') plt.ylabel('Normalized Pixel Color') plt.title('Time-Series Red Channel Pixel Data') fig1 = plt.gcf() plt.show() if SAVE: plt.draw() fig1.savefig('./{}_time_series.png'.format(filename), dpi=200) # Plot the highpass data plt.figure(figsize=(16,9)) plt.plot(x_filt, colors['red_filt'], color='#fc4f30') plt.xlabel('Time [s]') plt.ylabel('Normalized Pixel Color') plt.title('Filtered Red Channel Pixel Data') fig2 = plt.gcf() plt.show() if SAVE:
df.as_matrix() #returns numpy array. #Data Visualization Reference. import numpy as np import pandas as pd import matplotlib.pylot as plt %matplotlib inline #jupyter notebook only. below line for everything else. plt.show() x = np.arange(0, 10) y = x ** 2 plt.plot(x, y, 'red') #shows red line. plt.plot(x, y, '*') #shows stars on graph. plt.plot(x, y, 'r--') #shows red line with dashes. plt.xlim(0, 4) #shows x-axis limits at 0 and 4. plt.ylim(0, 10) #shows y-axis limits at 0 and 10. plt.title("title goes here") plt.xlabel('x label goes here') plt.ylabel('y label goes here') mat = np.arange(0, 100).reshape(10, 10) #makes array. plt.imshow(mat, cmap = 'RdYlGn') mat = np.random.randint(0, 1000, (10, 10)) plt.imshow(mat) plt.colorbar() df = pd.read_csv('salaries.csv') df.plot(x = 'salary', y = 'age', kind = 'scatter') #kind could be 'line' or whatever else you need. #SciKit-Learn Reference/Pre-Processing. import numpy as np from sklearn.preprocessing import MinMaxScaler data = np.random.randint(0, 100, (10, 2)) scaler_model = MinMaxScaler()
plt.style.use('fivethirtyeight') #Generate data with two classes X, y = make_classification(class_sep=1.2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, n_features=5, n_clusters_per_class=1, n_samples=10000, flip_y=0, random_state=10) pca = PCA(n_components=2) X = pca.fit_transform(X) y = y.astype('str') y[y=='1'] = 'L' y[y=='0'] = 'S' X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=0) X_1, X_2 = X_train[y_train=='S'], X_train[y_train=='L'] #Scatter plot of the dataset plt.scatter(zip(*X_1)[0], zip(*X_1)[1], color='#labc9c') plt.scatter(zip(*X_2)[0], zip(*X_2)[1], color='#e67e22') x_coords = zip(*X_1)[0] + zip(*X_2)[0] y_coords = zip(*X_1)[1] + zip(*X_2)[1] plt.axis([min(x_coords), max(x_coords), min(y_coords, max(y_coords)]) plt.title("Original Dataset") plt.show()
stdout.write("\r%d%% completed" % comp) stdout.flush() stdout.write("\n") # Calculate and print the position of minimum in MSE msemin = np.argmin(mse) print("Suggested number of components: ", msemin+1) stdout.write("\n") if plot_components is True: with plt.style.context(('ggplot')): plt.plot(component, np.array(mse), '-v', color = 'blue', mfc='blue') plt.plot(component[msemin], np.array(mse)[msemin], 'P', ms=10, mfc='red') plt.xlabel('Number of PLS components') plt.ylabel('MSE') plt.title('PLS') plt.xlim(xmin=-1) plt.show() # Run PLS with suggested number of components pls = PLSRegression(n_components=msemin+1) pls.fit(X_calib, Y_calib) Y_pred = pls.predict(X_valid) # Calculate and print scores score_p = r2_score(Y_valid, Y_pred) mse_p = mean_squared_error(Y_valid, Y_pred) sep = np.std(Y_pred[:,0]-Y_valid) rpd = np.std(Y_valid)/sep bias = np.mean(Y_pred[:,0]-Y_valid)
# initialize time and x and y expenditure at initial time t_0 = 0 init_data = np.array([14, 5]) # starting RK45 integration method sys_1 = integrate.RK45(model, t_0, init_data, 1000, 0.001) # storing initial data sol_x = [sys_1.y[0]] sol_y = [sys_1.y[1]] time = [t_0] for i in range(5000): sys_1.step() # performing integration step sol_x.append( sys_1.y[0] ) # storing the results in our solution list, y is the attribute current state sol_y.append(sys_1.y[1]) time.append(sys_1.t) plt.figure(figsize=(20, 10)) # plotting results in a graph plt.plot(time, sol_x, 'b--', label='Country A') plt.plot(time, sol_y, 'r--', label='Country B') plt.ylabel('Military Expenditure (billions USD)', fontsize=16) plt.xlabel('Time (years)', fontsize=16) plt.legend(loc='best', fontsize=22) plt.title('Simple Arms Race: Aggressive vs. Passive', fontsize=28) plt.show()
stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2, classifer.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=ListedColormap(('red', 'green'))(i), label=j) plt.title('Classifier (Training set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show() # Visualising the Test set results from matplotlib.colors import ListedColormap X_set, y_set = X_test, y_test X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01))
# initialize time and x and y expenditure at initial time t_0 = 0 init_data = np.array([3, 3.5]) # starting RK45 integration method sys_1 = integrate.RK45(model, t_0, init_data, 1000, 0.001) # storing initial data sol_x = [sys_1.y[0]] sol_y = [sys_1.y[1]] time = [t_0] for i in range(5000): sys_1.step() # performing integration step sol_x.append( sys_1.y[0] ) # storing the results in our solution list, y is the attribute current state sol_y.append(sys_1.y[1]) time.append(sys_1.t) plt.figure(figsize=(20, 10)) # plotting results in a graph plt.ylim(2, 5.5) plt.plot(time, sol_x, 'b--', label='Country A (passive)') plt.plot(time, sol_y, 'r--', label='Country B (passive)') plt.ylabel('Military Expenditure (billions USD)', fontsize=16) plt.xlabel('Time (years)', fontsize=16) plt.legend(loc='best', fontsize=22) plt.title('Arms Race: Passive vs. Passive', fontsize=28) plt.show()
#import scipy as sp import matplotlib.pylot as plt import pandas as pd data=pd.read_csv("scratch3.csv") data['bedrooms'].value_counts().plot(kind='bar') plt.title('number of bedrooms') plt.xlabel('bedrooms') plt.ylabel('count') plt.show()
import pylab as pl import matplotlib.pylot as plt x = [1, 2, 3, 4, 5, 6, 7, 8] y = [9, 8, 8.25, 8, 7.5, 8, 8, 8.75] pl.plot(x, y, 'D') plt.title("Grafica de promedios semestral") plt.xlabel("Semestres cursados") plt.ylabel("Promedio") pl.savefig('promedios.png') plt.show()
''' import json from textblob import TextBlob from wordcloud import WordCloud import matplotlib.pylot as plt # Get the JSON data tweetFile = open("tweets.json", "r") tweetData = json.load(tweetFile) tweetFile.close() polarity_values = [] for tweet in tweetData: tweets.append(tweet["text"]) giant_string = " ".join(tweets) tb = TextBlob(tweet_text) print("{}: {}".format(tweet_text, tb.polarity)) polarity_values.append(tb.polarity) # bins = [-1, -0.5, 0, 0.5, 1] plt.hist(polarity_values, bins) plt.title("tweet polarity") plt.ylabel("Count of tweets") plt.xlabel("Polarity") plt.show()
from pylab import * import matplotlib.pylot as plt # budget, fear factor and external factor constraints b_x, b_y = 8.0, 10.0 c_x, c_y = (x/b_x), (y/b_y) f_x, f_y = (1.0-y/x), (1.0-y/x) e_x, e_y = 0.75, 0.9 x, y = meshgrid(arrange(1, 4, 0.1), arrange(1, 4, 0.1)) xdot = f_x*y - c_x*x + e_x ydot = f_y*x - c_y*y + e_y plt.figure(figsize=(10, 10)) plt.title('Phase Plot: Aggressive vs. Aggressive', fontsize = 28) streamplot(x, y, xdot, ydot) show()
nan_indices.append(i) unique_provinces = list(unique_provinces) province_confirmed_cases = list(province_confirmed_cases) for i in nan_indices: unique_provices.pop(i) province_confirmed_cases.pop(i) # Plot a bar graph to see the total confirmed cases across different countries plt.figure(figsize=(32,32)) plt.barh(unique_countries, country_confirmed_cases) plt.title('Number of Covid-19 Confirmed Cases in Countries') plt.xlabel('Number of Covid Confirmed Caese') plt.show() # Plot a bar graph to see the total confirmed cases b/w mainland china and outside mainland china china_confirmed = latest_confirmed[confirmed_cases['Country/Region']=='China'].sum() outside_mainland_china_confirmed = np.sum(country_confirmed_cases)-china_confirmed plt.figure(figsize=(16, 9)) plt.barh('Mainland China',china_confirmed) plt.barh('Outside Mainland China',outside_mainland_china_confirmed) plt.title('Number of Confirmed Coronavirus cases') plt.show() # Print the total cases in mainland china outside of it
percent_popular = len(np_ratings[popular_apps]) / len(np_ratings) * 100 print("percent_popular") unpopular_apps = np_ratings < 4 print("percent_unpopular", len(np_ratings[unpopular_apps])) percent_unpopular = 100 - (np_ratings[unpopular_apps]) / len(np_ratings) * 100 print("percent_unpopular") somewhat_popular = 100 - (percent_popular + percent_unpopular) print("somewhat_popular") # do a visualization with out new data labels = "Sucks", "Meh", "Love it!" sizes = [unpopular_apps, somewhat_popular, popular_apps] colors = ['yellowgreen', 'lightgreen', 'lightskyblue'] explode = (0.1, 0.1, 0.15) plt.pie(sizes, explode=explode, colors=color, autopct='%1.1%', shadow=True, startangle=140) plt.axis('equal') plt.legend(labels, loc=1) plt.title("Do we love our apps?") plt.xlabel("User Ratings - App Installs (10,000+ apps)") plt.show() # print ('processed', line_count, 'lines of data') print(categories) print('first row of data', installs [0]) print('last row of data', installs [-1])
# Inner product of vectors print(a.dot(b)) print(np.dot(a, b)) # Matrix / vector product; both produce the rank 1 array [29 67] print(c.dot(d)) print(np.dot(c, d)) # Matrix / matrix product; both produce the rank 2 array # [[19 22] # [43 50]] print(a.dot(c)) print(np.dot(b, d)) # In[24]: import numpy as np from matplotlib import pylot as plt x = np.arrange(1, 11) y = 2 * x + 5 plt.title("Matplotlib demo") plt.xlabel("x axis caption") plt.ylabel("y axis caption") plt.plot(x, y, "ob") plt.show() # In[ ]:
import covid import matplotlib.pylot as plt cov=covid.Covid() name = input("ENTER the country name") print(name) virusdata=covid.get_status_by_country active=virusdata['active'] recover=virusdata['recovered'] deaths=virusdata['deaths'] plt.pie([active,recover,deaths]).labels plt.title(name) plt.legend() plt.show
negative = percentage(negative, noOfSearchTerms) neutral = percentage(neutral, noOfSearchTerms) positive = format(positive, '.2f') negative = format(negative, '.2f') neutral = format(neutral, '.2f') print("How are poeple reacting on " + searchTerm + " by analyzing " + str(noOfSearchTerms) + "Tweets.") if (polarity == 0.00): print("Neutral") elif (polarity < 0.00): print("Negative") elif (polarity > 0.00): print("Positive") labels = [ 'Positive [' + str(positive) + '%]', 'Neutral [' + str(neutral) + '%]', 'Negative [' + str(negative) + '%]' ] sizes = [positive, neutral, negative] colors = ['yellowgreen', 'gold', 'red'] patches, texts = plt.pie(sizes, colors=colors, startangle=90) plt.legend(patches, labels, loc="best") plt.title('How people are reacting on ' + searchTerm + ' by analyzing ' + str(noOfSearchTerms) + ' Tweets.') plt.axis('equal') plt.tight.layout() plt.show()
data2.groupby([clusterNos]).mean() plt.scatter(data2.ApplicantIncome, data2.LoanAmount, c=clusterNos) plt.scatter(data2.ApplicantIncome, data2.Credit_History, c=clusterNos) #better distinction plt.scatter(data2.ApplicantIncome, data2.Loan_Amount_Term, c=clusterNos) #better distinction #Now use this information ; #which customers you would like to target. #hierarchical clustering import scipy.cluster.hierarchy as shc dend = shc.dendrogram(shc.linkage(data2_scaled, method='ward')) plt.figure(figsize=(10, 7)) plt.title("Dendrogram") dend = shc.dendrogram(shc.linkage(data2_scaled, method='ward')) plt.axhline(y=6, color='r', linestyle='--') plt.show() #another method for Hcluster from sklearn from sklearn.cluster import AgglomerativeClustering aggCluster = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='ward') aggCluster.fit_predict(data2_scaled) aggCluster aggCluster.labels_ #compare compare = pd.DataFrame({
import matplotlib.pylot as plt years = [ 1950, 1995, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015 ] pops = [2.5, 2.7, 3, 3.3, 3.6, 4, 4.4, 4.8, 5.3, 5.7, 6.1, 6.5, 6.9, 7.3] death = [1.2, 1.7, 1.8, 2.2, 2.5, 2.7, 2.9, 3, 3.1, 3.3, 3.5, 3.8, 4.0, 4.3] ''' plt.plot(years, pops,'---', color=(255/255, 100/255, 100/255)) plt.plot(years, death, color=(.6, .6, .1)) ''' lines = plt.plot(years, pops, years, death) plt.grid(True) plt.setp(lines, color=(1, .4, .4), marker='o') plt.ylabel("Population in Billions") plt.xlabel("Population growth by Year") plt.title("Population Growth") plt.show()
print("percent_popular") unpopular_apps = np_ratings < 4 print("popular apps", len(np_ratings[unpopular_apps])) percent_unpopular = int( len(np_ratings[unpopular_apps]) / len(np_ratings) * 100) print(percent_unpopular) kinda_popular = 100 - (percent_popular + percent_unpopular) print("kinda popular") # do a visualization with our shiny new data labels = "Sucks", "Meh", "Love it!" sizes = [percent_unpopular, kinda_popular, percent_popular] colors = ['yellowgreen', 'lightgreen', 'lightskyblue'] explode = (0.1, 0.1, 0.15) plt.pie(sizes, explode=explode, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140) plt.axis('equal') plt.legend(labels, loc=1) plt.title("Do we love us some apps?") plt.xlabel("User ratings - App Installs (10,000+ apps") plt.show()