def predict_prices(dates, prices, x): dates = np.reshape(dates, (len(dates), 1)) svr_len = SVR(kernel='linear', C=1e3) svr_poly = SVR(kernel='poly', C=1e3, degree=2) svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) svr_lin.fit(dates, prices) svr_poly.fit(dates, prices) svr_rbf.fit(dates, prices) plt.scatter(dates, prices, color='black', label='data') plt.plot(dates, svr_rbf.predict(dates), color='red', label='RBF model') plt.plot(dates, svr_lin.predict(dates), color='green', label='Linear model') plt.plot(dates, svr_poly.predict(dates), color='blue', label='Polynomial model') plt.xlabel('Date') plt.ylabel('Price') plt.title('Sipport Vector Regression') plt.legend() plt.show() return svr_rbf.predict(x)[0], svr_lin.predict(x)[0], svr_poly.predict(x)[0]
def draw_scatter(heights, weights): #创建散点图 #第一个参数为点的横坐标 #第二个参数为点的纵坐标 plt.scatter(heights, weights) plt.xlabel('Heights') plt.ylabel('Weights') plt.title('Heights & Weights Of Male Students') plt.show()
def plot_regression_line(x, y, b): # plotting the actual points as scatter plot plt.scatter(x, y, color="m", marker="o", s=30) # predict response vector y_pred = b[0] + b[1] * x # plotting the regression line plt.plot(x, y_pred, color="g") # putting labels plt.xlabel('x') plt.ylabel('y') # function to show plot plt.show()
def plot_hull(self, show_points=False): """ Function that plots the boundaries of a convex hull using matplotlib.pyplot. Input hull must be of type: scipy.spatial.qhull.ConvexHull points input must be of the original coordinates. """ hull = self.convex_hull(self.dots) plt.figure() for simplex in hull.simplices: plt.plot(self.dots[simplex,0], \ self.dots[simplex,1], 'k-') if show_points: plt.scatter(self.dots[:,0], \ self.dots[:,1], s=10,c='g') plt.scatter(self.dots[:,0], \ self.dots[:,1], s=30,c='orange') plt.show()
stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2, classifer.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=ListedColormap(('red', 'green'))(i), label=j) plt.title('Classifier (Training set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show() # Visualising the Test set results from matplotlib.colors import ListedColormap X_set, y_set = X_test, y_test X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1,
DATE = datetime(now.year, now.month, now.date) # UTC date hours = range(0, 1) # model run hour for the date for h in hours: FileNames = download_hrrr_nat_subsection(DATE, h) print FileNames # There are 50 sigma levels in HRRR p1 = [] p2 = [] t1 = [] t2 = [] # Create a vertical temperature profile at a few points for f in FileNames: grbs = pygrib.open(f + '.small') pres = grbs.select(name='Pressure') temp = grbs.select(name='Temperature') # for l in range(0, 50): p1.append(pres[l].values[1, 1] / 100) p2.append(pres[l].values[30, 30] / 100) t1.append(temp[l].values[1, 1] - 273.15) t2.append(temp[l].values[30, 30] - 273.15) # plt.scatter(t1, p1, color='b') plt.plot(t1, p1, color='b') plt.scatter(t2, p2, color='r') plt.plot(t2, p2, color='r') plt.gca().invert_yaxis() plt.show()
#Rafael Almeida # K-MEANS import pandas as pd import numpy as np import matplotlib.pylot as plt %matplotlib inline df = pd.DataFrame({ 'x': [12, 20, 28, 18, 29, 33, 24, 45, 45, 52, 51, 52, 55, 53, 55, 61, 64, 69, 72], 'y': [39, 36, 30, 52, 54, 46, 55, 59, 63, 70, 66, 63, 58, 23, 14, 8, 19, 7, 24] }) np.random.seed(200) k = 3 # centroids[i] = [x,y] centroids = { i +1 [np.random.randint(0, 80), np.random.randint(0, 80)] for i in range (k) } fig = plt.figure(figsize = (5,5)) plt.scatter(df['x'], df['y'], color= 'k') colmap = {1: 'r', 2: 'g', 3: 'b'} for i in centroids.keys(): plt.scatter(*centroids[i], color=colmap[i]) plt.xlim(0, 80) plt.ylim(0, 80) plt.show()
import matplotlib.pylot as plt import pandas as pd #fetch best performing model best_model = RF_gscv.best_estimator_ best_model2 = MLP_gscv.best_estimator_ #fit permutation importance on test data perm = PermutationImportance(best_model).fit(test_img, test_lab) perm2 = PermutationImportance(best_model2).fit(test_img, test_lab) #show weights wghts = eli5.format_as_dataframe(eli5.explain_weights(perm)) wghts2 = eli5.format_as_dataframe(eli5.explain_weights(perm2)) #write dataframes to csv wghts.to_csv( 'D:/studies/phd/WV3_Data_July2019/010039360030_01/L_Sabie_subset/rf_permImportance.csv', encoding='utf-8', index=False) wghts2.to_csv( 'D:/studies/phd/WV3_Data_July2019/010039360030_01/L_Sabie_subset/mlp_permImportance.csv', encoding='utf-8', index=False) gLawn = mlp_map_prob[:, 3] w = x_img_arr[:, -9] plt.scatter(w, gLawn) plt.xlabel('proximity_to_water') plt.ylabel('gLawn_probability') plt.show()
plt.style.use('fivethirtyeight') #Generate data with two classes X, y = make_classification(class_sep=1.2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, n_features=5, n_clusters_per_class=1, n_samples=10000, flip_y=0, random_state=10) pca = PCA(n_components=2) X = pca.fit_transform(X) y = y.astype('str') y[y=='1'] = 'L' y[y=='0'] = 'S' X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=0) X_1, X_2 = X_train[y_train=='S'], X_train[y_train=='L'] #Scatter plot of the dataset plt.scatter(zip(*X_1)[0], zip(*X_1)[1], color='#labc9c') plt.scatter(zip(*X_2)[0], zip(*X_2)[1], color='#e67e22') x_coords = zip(*X_1)[0] + zip(*X_2)[0] y_coords = zip(*X_1)[1] + zip(*X_2)[1] plt.axis([min(x_coords), max(x_coords), min(y_coords, max(y_coords)]) plt.title("Original Dataset") plt.show()
import pandas as pd import numpy as np import random as rd import matplotlib.pylot as plt #data #data = pd.read_csv('data/clustering.csv') url = 'hhttps://raw.githubusercontent.com/DUanalytics/pyAnalytics/master/data/clustering.csv' data = pd.read_csv(url) data.shape data.head() data.describe() data.columns #visualise plt.scatter(data.ApplicantIncome, data.LoanAmount) plt.xlabel('Income') plt.ylabel('LoanAmt') plt.show() #standardize data : Scaling #missing values #https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html data.dtypes data.isnull().any() data.isnull().any(axis=1) data.index[data.isnull().any(axis=1)] data.iloc[6] data.isnull().sum().sum() #75 missing values data.isnull().sum(axis=0) #columns missing
import tensorflow as tf import numpy as np import matplotlib.pylot as plt np.random.seed(5) steps=3000 learning_rate=0.01 x_data=np.linspace(-1,1,100)[,np.newaxis] y_data=np.squard(x_data)*0.4+np.random.randn(*x_data.shape)*0.5 x=tf.placeholder(tf.float32,[None,1]) y=tf.placeholder(tf.float32,[None,1]) weight_L1=tf.Variable(tf.random_normal([1,10])) biases_L1=tf.Variable(tf.zeros[1,10]) Output_L1=tf.matmul(x,weight_L1)+biases_L1 L1=tf.nn.tanh(Output_L1) weight_L2=tf.Variable(tf.random_normal([10,1])) biases_L2=tf.Variable(tf.zeros[1,1]) Output_L2=tf.matmul(L1,weight_L2)+biases_L2 pred=tf.nn.tanh(Output_L2) loss=tf.reduce_mean(tf.square(y-pred)) train=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) plt.figure() plt.scatter(x_data,y_data) with tf.Session() as sess: init=tf.global_variables_initializer() sess.run(init) for i in range(steps): sess.run(train,feed_dict={x:x_data,y:y_data}) pred_value=sess.run(pred,feed_dict={x:x_data}) plt.plot(x_data,pred_value) plt.show()
import pandas as pd import matplotlib.pylot as plt from sklearn.linear_model import LinearRegression x = np.arange(0, 100) y = np.arange(0, 100) print(x) print(y) lr = LinearRegression() x.ndim y.ndim x.shape y.shape x = x.reshape(-1, 1) x.shape(-1, 1) x.ndim lr.fit(x, y) plt.scatter(x, y, color='red') plt.plot(x, lr.prdict(x), color='blue') plt.title('Linear Regression Demo') plt.xlabel('X') plt.ylabel('y') plt.show()
import numpy as np import tensorflow as tf import matplotlib.pylot as plt # 随机生成1000个点,围绕在y=0.1x+0.3的直线周围 num_points = 1000 vectors_set = [] for i in range(num_points): x1 = np.random.normal(0.0, 0.55) y1 = x1 * 0.1 + 0.3 + np.random.normal(0.0, 0.03) vectors_set.append([x1, y1]) # 生成一些样本 x_data = [v[0] for v in vectors_set] y_data = [v[1] for v in vectors_set] plt.scatter(x_data, y_data, c='r') plt.show()