def check_classifiers_classes(name, Classifier): X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1) X, y = shuffle(X, y, random_state=7) X = StandardScaler().fit_transform(X) # We need to make sure that we have non negative data, for things # like NMF X -= X.min() - .1 y_names = np.array(["one", "two", "three"])[y] for y_names in [y_names, y_names.astype('O')]: if name in ["LabelPropagation", "LabelSpreading"]: # TODO some complication with -1 label y_ = y else: y_ = y_names classes = np.unique(y_) # catch deprecation warnings with warnings.catch_warnings(record=True): classifier = Classifier() if name == 'BernoulliNB': classifier.set_params(binarize=X.mean()) set_fast_parameters(classifier) # fit classifier.fit(X, y_) y_pred = classifier.predict(X) # training set performance assert_array_equal(np.unique(y_), np.unique(y_pred)) if np.any(classifier.classes_ != classes): print("Unexpected classes_ attribute for %r: " "expected %s, got %s" % (classifier, classes, classifier.classes_))
def numpy_normalize(self): '''By using numpy's implementation of std, memory consumption can be reduced by half''' std = self.X.std() mean = self.X.mean() scaler = StandardScaler(copy=False) scaler.std = std scaler.mean = mean self.X_normalized = scaler.fit_transform(self.X) print("Data normalized with numpy")
def calc_pca(data, n_comps=2, standardize=False): if standardize: data = StandardScaler().fit_transform(data) data = data.mean(axis=0) - data cov_mat = np.cov(data, rowvar=False) evals, evecs = np.linalg.eig(cov_mat) idx = np.argsort(evals)[::-1] evecs = evecs[:, idx] evals = evals[idx] evecs = evecs[:, :n_comps] return np.dot(data, evecs), evals, evecs
def qq_plot(df): # not working for some reasom. No time to find out price = df.price[(df.price <= 2000) & (df.price > 500)] price_log = np.log(price) price_mm = MinMaxScaler().fit_transform( price.values.reshape(-1, 1).astype(np.float64)).flatten() price_z = StandardScaler().fit_transform( price.values.reshape(-1, 1).astype(np.float64)).flatten() sm.qqplot(price_log, loc=price_log.mean(), scale=price_log.std()).savefig('qq_price_log.png') sm.qqplot(price_mm, loc=price_mm.mean(), scale=price_mm.std()).savefig('qq_price_mm.png') sm.qqplot(price_z, loc=price_z.mean(), scale=price_z.std()).savefig('qq_price_z.png') return
def standardize(array, name): """Recieves a dataFrame or Series (from pandas) and returns a numpy array with zero mean and unit variance.""" # Transform to numpy array nparray = array.as_matrix().reshape(array.shape[0],1).astype('float32') print('------------') print(name) print('Different values before:', np.unique(nparray).shape[0]) # Standardize the data nparray = StandardScaler().fit_transform(nparray) # Print some information print('Mean:', nparray.mean()) print('Max:', nparray.max()) print('Min:', nparray.min()) print('Std:', nparray.std()) print('Different values after:', np.unique(nparray).shape[0]) return nparray
def standardize_dataframe(df): """ In order to perform a principal component analysis on the data, you first need to standardize it. The goal here is to have column means at 0 and standard deviation at 1. Input: DataFrame Returns: Standardized DataFrame """ features = [] for i in range(1, 55): features.append(f"Atr{i}") x_ = df.loc[:, features].values y = df.loc[:, ['Class']].values x = StandardScaler().fit_transform(x_) # check that column means are 0, standard deviation of 1 print(x.mean(axis=0)) print(x.std(axis=0, ddof=1)) return x
# In[] #Matriz de covarianza, correlaciones, gráfica de dependencia líneal y número de condición cov_df = df_num_norm.cov() var_global = sum(np.diag(cov_df)) det = np.linalg.det(cov_df) corr_df = df_num_norm.corr() sns.heatmap(corr_df, center=0, cmap='Blues_r') cond_cov = np.linalg.cond(cov_df) # In[] #Identificación de outliers y Eliminación del 10% #a=[] a_rob = [] media_num_norm = np.array(df_num_norm.mean()) mediana_num_norm = np.array(df_num_norm.median()) inv_cov = np.linalg.inv(np.array(cov_df)) for i in range(len(df_num_norm.index)): #b = distance.mahalanobis(np.array(df_num_norm.iloc[i,:]),media_num_norm,inv_cov) b_rob = distance.mahalanobis(np.array(df_num_norm.iloc[i, :]), mediana_num_norm, inv_cov) #a.append(b) a_rob.append(b_rob) #df_num_norm['mahal_normal'] = a df_num_norm['mahal_rob'] = a_rob #df_v2['mahal_normal'] = a df_v2['mahal_rob'] = a_rob
price_mm = MinMaxScaler().fit_transform(price.values.reshape(-1, 1).astype(np.float64)).flatten() price_z = StandardScaler().fit_transform(price.values.reshape(-1, 1).astype(np.float64)).flatten() # Q-Q plot of the initial feature # In[ ]: sm.qqplot(price, loc=price.mean(), scale=price.std()) # Q-Q plot after StandardScaler. Shape doesn’t change # In[ ]: sm.qqplot(price_z, loc=price_z.mean(), scale=price_z.std()) # Q-Q plot after MinMaxScaler. Shape doesn’t change # In[ ]: sm.qqplot(price_mm, loc=price_mm.mean(), scale=price_mm.std()) # Q-Q plot after taking the logarithm. Things are getting better! # In[ ]: sm.qqplot(price_log, loc=price_log.mean(), scale=price_log.std())
X_test = test.drop(['Name', 'Ticket', 'Cabin'], axis=1) # In[240]: X_train.head() # In[241]: X_test.head() # In[253]: from sklearn.linear_model import LogisticRegressionCV m = LogisticRegressionCV() m.fit(X_train, y_train) print([s.mean() for s in m.scores_[1]]) y_test = m.predict(X_test) submission = X_test.copy() submission['Survived'] = y_test submission.head() submission.to_csv('submission.csv', columns=['Survived']) # In[254]: from sklearn.svm import SVC m = SVC() m.fit(X_train, y_train) print(m.score(X_train, y_train)) y_test = m.predict(X_test)
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.preprocessing import StandardScaler iris = load_iris() X = iris.data # Xshape(150, 4) # X的归一化 X_norm = StandardScaler().fit_transform(X) X_norm.mean(axis=0) # 这样每一维均值为0了 # 求特征值和特征向量 # np.cov直接求协方差矩阵,每一行代表一个特征,每一列代表样本 ew, ev = np.linalg.eig(np.cov(X_norm.T)) print(ew) print(ev) # 特征向量特征值的排序 ew_oreder = np.argsort(ew)[::-1] print("ew_oreder", ew_oreder) ew_sort = ew[ew_oreder] print("ew_sort", ew_sort) ev_sort = ev[:, ew_oreder] # ev的每一列代表一个特征向量 print("ev_sort", ev_sort) print(ev_sort.shape) # (4,4) # 我们指定降成2维, 然后取出排序后的特征向量的前两列就是基 K = 2 V = ev_sort[:, :2] # 4*2
def fit(self, X, y=None): """Estimate the CSP decomposition on epochs. Parameters ---------- X : Dataframe, shape (n_epochs,n_columns) dataframe with mode magnitudes at each channel corresponding window, trial and label - The data on which to estimate the CSP. Returns ------- self : instance of CSP Returns the modified instance. """ y = X['label'].reset_index(drop=True) trials = X['trial'].reset_index(drop=True) X = X.values[:, :-4] X = StandardScaler().fit_transform(X) if not isinstance(X, np.ndarray): raise ValueError("X should be of type ndarray (got %s)." % type(X)) self._check_Xy(X, y) n_channels = X.shape[1] self._classes = np.unique(y) n_classes = len(self._classes) if n_classes < 2: raise ValueError("n_classes must be >= 2.") covs = np.zeros((n_classes, n_channels, n_channels)) sample_weights = list() for class_idx, this_class in enumerate(self._classes): if self.cov_est == "concat": # concatenate epochs class_ = X[y == this_class].T cov = _regularized_covariance( class_, reg=self.reg, method_params=self.cov_method_params, rank=self.rank) weight = sum(y == this_class) elif self.cov_est == "epoch": class_ = X[y == this_class] cov = np.zeros((n_channels, n_channels)) for this_X in class_: cov += _regularized_covariance( this_X, reg=self.reg, method_params=self.cov_method_params, rank=self.rank) cov /= len(class_) weight = len(class_) covs[class_idx] = cov if self.norm_trace: # Append covariance matrix and weight. Prior to version 0.15, # trace normalization was applied, but was breaking results for # some usecases by changing the apparent ranking of patterns. # Trace normalization of the covariance matrix was removed # without signigificant effect on patterns or performances. # If the user interested in this feature, we suggest trace # normalization of the epochs prior to the CSP. covs[class_idx] /= np.trace(cov) sample_weights.append(weight) if n_classes == 2: eigen_values, eigen_vectors = linalg.eigh(covs[0], covs.sum(0)) # sort eigenvectors ix = np.argsort(np.abs(eigen_values - 0.5))[::-1] else: # The multiclass case is adapted from # http://github.com/alexandrebarachant/pyRiemann eigen_vectors, D = _ajd_pham(covs) # Here we apply an euclidean mean. See pyRiemann for other metrics mean_cov = np.average(covs, axis=0, weights=sample_weights) eigen_vectors = eigen_vectors.T # normalize for ii in range(eigen_vectors.shape[1]): tmp = np.dot(np.dot(eigen_vectors[:, ii].T, mean_cov), eigen_vectors[:, ii]) eigen_vectors[:, ii] /= np.sqrt(tmp) # class probability class_probas = [np.mean(y == _class) for _class in self._classes] # mutual information mutual_info = [] for jj in range(eigen_vectors.shape[1]): aa, bb = 0, 0 for (cov, prob) in zip(covs, class_probas): tmp = np.dot(np.dot(eigen_vectors[:, jj].T, cov), eigen_vectors[:, jj]) aa += prob * np.log(np.sqrt(tmp)) bb += prob * (tmp**2 - 1) mi = -(aa + (3.0 / 16) * (bb**2)) mutual_info.append(mi) ix = np.argsort(mutual_info)[::-1] # sort eigenvectors eigen_vectors = eigen_vectors[:, ix] self.filters_ = eigen_vectors.T self.patterns_ = linalg.pinv2(eigen_vectors) pick_filters = self.filters_[:self.n_components] X = np.dot(pick_filters, X.T) # compute features (mean band power) X = pd.DataFrame(X.T) X, y = mean_trial(X, trials, y) # To standardize features self.mean_ = X.mean(axis=0) self.std_ = X.std(axis=0) return self
def classify(filename='4_14_type4_apollo3d.txt', useFrac=1.0, trainFraction=0.5, equalClassSize=True, thres=0.5, useFeatures=[0], useAll=True, batch=False, useCache=True, featureSelect=False, kickType=[11], draw=False, scale=False, C=1.0, B=1.0, returnProb=False): features, labels = parse(filename=filename, useCache=useCache, ezKickSuccess=False, kickType=kickType, ignoreSelfFailure=False, useDirectFeatures=True, nfeatures=8) num2Use = int(useFrac*len(features)) features = features[:num2Use]; labels = labels[:num2Use] if scale: features = StandardScaler().fit_transform(features) print "features mean:", features.mean(axis=0) print "features std:", features.std(axis=0) if not useAll: # labels = np.random.random(features.shape[0]) < 0.5 newFeatures = features[:, useFeatures] # print newFeatures[:100,:] # newFeatures = np.random.random((features.shape[0], 9)) else: newFeatures = features if equalClassSize: newFeatures, labels = balanceClasses(newFeatures, labels) print "we have " + str(newFeatures.shape[0]) + " samples." print "we have " + str(np.sum(labels == 1)) + " positive labels" print "ratio: " + str(float(np.sum(labels == -1))/np.sum(labels == 1)) print "using approximately " + str(trainFraction*100) + "% as training examples" r = np.random.random(newFeatures.shape[0]) < trainFraction; r2 = np.invert(r) trainingSet = newFeatures[r, :]; trainLabels = labels[r] testingSet = newFeatures[r2, :]; testLabels = labels[r2] if not equalClassSize: testingSet, testLabels = balanceClasses(testingSet, testLabels) clf = LogisticRegression(C=C, class_weight='auto', intercept_scaling=B, penalty='l2') # clf = svm.SVC(C=C, kernel='rbf', class_weight='auto', probability=returnProb) else: clf = LogisticRegression(C=C, intercept_scaling=B, penalty='l2') # clf = svm.SVC(C=C, kernel='rbf', class_weight='auto', probability=returnProb) # clf = RandomForestClassifier() # clf = KNeighborsClassifier(n_neighbors=15) # print np.arange(20)[clf2.get_support()] # clf = AdaBoostClassifier() # clf = GradientBoostingClassifier(init=LogisticRegression) # clf = GaussianNB() # clf = DecisionTreeClassifier() if featureSelect: rfecv = RFE(estimator=clf, step=1, n_features_to_select=8) # rfecv = RFECV(estimator=clf, step=1, cv=10) rfecv.fit(newFeatures, labels) print("Optimal number of features : %d" % rfecv.n_features_) print rfecv.ranking_ print np.arange(20)[rfecv.support_] return clf.fit(trainingSet, trainLabels) def myPredict(clf, x, thres=0.5): probArray = clf.predict_proba(x)[:,1] predictLabels = 1*(probArray > thres) predictLabels = 2*predictLabels - 1 return predictLabels, probArray # d = np.reshape(np.linspace(0, 10, num=1000), (-1, 1)) # # print d.shape # results = clf.predict(d) # for i in xrange(1000): # if results[i] == 1: # print "dist:", i*0.01 # break if returnProb: predictLabels, probArray = myPredict(clf, testingSet, thres=thres) else: predictLabels = clf.predict(testingSet) # print "accuracy rate from classifier: " + str(clf.score(testingSet, testLabels)) suffix = "" if useAll else str(features) if draw and returnProb: area = drawPrecisionRecallCurve(filename[:-4] + suffix, testLabels, probArray) roc_auc = drawROCCurve(filename[:-4] + suffix, testLabels, probArray) false_neg = false_pos = true_neg = true_pos = 0 for i in xrange(len(predictLabels)): if predictLabels[i] == testLabels[i] == -1: true_neg += 1 elif predictLabels[i] == testLabels[i] == 1: true_pos += 1 elif predictLabels[i] == -1 and testLabels[i] == 1: false_neg += 1 else: false_pos += 1 good = true_neg + true_pos print "accuracy rate: ", good/float(len(predictLabels)), good print "true negative rate: ", true_neg/float(len(predictLabels)), true_neg print "true positive rate: ", true_pos/float(len(predictLabels)), true_pos print "false negative rate: ", false_neg/float(len(predictLabels)), false_neg print "false positive rate: ", false_pos/float(len(predictLabels)), false_pos precision = true_pos/float(true_pos + false_pos) recall = true_pos/float(true_pos + false_neg) print "precision: ", precision print "recall: ", recall print "f1 score: ", 2*(precision*recall)/(precision + recall) return good/float(len(predictLabels))
df = pd.read_csv("C:\\Users\\akroc\\Desktop\\Spotify PCA\\traingdata.csv") print(df.head()) print(df.describe()) df.drop(['time_signature', 'mode', 'key', 'duration_ms'], axis=1, inplace=True) print(df.head()) # Standardization is important in PCA since it is a variance maximizing exercise. # It projects your original data onto directions which maximize the variance. from sklearn.preprocessing import StandardScaler x = df.values #returns a numpy array x_scaled = StandardScaler().fit_transform(x) df_scaled = pd.DataFrame(x_scaled, columns=df.columns) print(df_scaled.head()) # Calculate a PCA manually # calculate the mean vector mean_vector = x_scaled.mean(axis=0) print(mean_vector) # calculate the covariance matrix cov_mat = np.cov((x_scaled).T) print(cov_mat.shape) print(cov_mat) # calculate the eigenvectors and eigenvalues of our covariance matrix for the dataset eig_val_cov, eig_vec_cov = np.linalg.eig(cov_mat) # Print the eigen vectors and corresponding eigenvalues # in order of descending eigenvalues for i in range(len(eig_val_cov)): eigvec_cov = eig_vec_cov[:, i] print('Eigenvector {}: \n{}'.format(i + 1, eigvec_cov)) print('Eigenvalue {} from covariance matrix: {}'.format( i + 1, eig_val_cov[i])) print(50 * '-')
BASE = '../' PATH = BASE + 'data/' filehandler = open(PATH + "Audio_X.pkl", "rb") X_ = pkl.load(filehandler) filehandler.close() filehandler = open(PATH + "Audio_Y.pkl", "rb") Y_ = pkl.load(filehandler) filehandler.close() print('Loaded Training Set') X_ = StandardScaler().fit_transform(X_) print(X_.mean(axis=0)) print(X_.std(axis=0)) Y = torch.from_numpy(Y_.flatten()) X = torch.from_numpy(X_).float() audio_model = EncodingNN() #Defining criterion num_epochs = 1200 learning_rate = 1e-4 criterion = nn.MSELoss() optimizer = torch.optim.Adam(audio_model.parameters(), lr=learning_rate) # Train the model
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.preprocessing import StandardScaler np.random.seed(24) #加载数据 iris = load_iris() X = iris.data X_norm = StandardScaler().fit_transform(X) X_norm.mean(axis=0) # 求特征值和特征向量 ew, ev = np.linalg.eig(np.cov(X_norm.T)) # 特征向量特征值的排序 ew_oreder = np.argsort(ew)[::-1] ew_sort = ew[ew_oreder] ev_sort = ev[:, ew_oreder] # ev的每一列代表一个特征向量 ev_sort.shape # (4,4) # 我们指定降成2维, 然后取出排序后的特征向量的前两列就是基 K = 2 V = ev_sort[:, :2] # 4*2 # 最后,我们得到降维后的数据 X_new = X_norm.dot(V) # shape (150,2) colors = ['red', 'black', 'orange'] plt.figure()
positions = pd.DataFrame() deviations = pd.DataFrame() slopes = pd.DataFrame() for i in range(len(pair_list)): instrument = pair_list[i] shape = shape_list[i] if shape == 1: values = ratios.loc[start:end, instrument] else: values = (ratios.loc[start: end, instrument] * -1)\ + (2 * ratios.loc[start: end, instrument].values[-1]) pos = get_channel_mean_pos_std(values.values, win) positions[instrument] = pos['pos'].values.ravel() deviations[instrument] = pos['std'].values.ravel() slopes[instrument] = pos['slope'].values.ravel() slopes_mean[win[0]] = slopes.mean(axis=1) position_mean[win[0]] = positions.mean(axis=1) plt_index = np.arange(start, end + 1) position_mean.index = plt_index slopes_mean.index = plt_index position_mean.plot(colors=color_list) plt.plot(plt_index, np.ones(plt_index.shape[0]) * 2, color='grey') plt.plot(plt_index, np.ones(plt_index.shape[0]) * -2, color='grey') plt.plot(plt_index, np.ones(plt_index.shape[0]) * 0, color='grey') plt.tight_layout() plt.title('Mean of Currency Set Channel Positions on Mulitple Windows') slopes_mean.plot(colors=color_list) plt.plot(plt_index, np.ones(plt_index.shape[0]) * 0, color='grey') plt.tight_layout()
header=0, names=None, index_col=0) # 设置要进行聚类的字段 loan = np.array(loan_data[[ 'INA5', 'INA15', 'INP5', 'INP15', 'OUTA5', 'OUTA15', 'OUTP5', 'OUTP15' ]]) before = KMeans(n_clusters=4).fit(loan) # 降维前 print('KMeans降维前-分类结果') print(before.labels_) # 归一化 准备降维 X_norm = StandardScaler().fit_transform(loan) X_norm.mean(axis=0) # 每一维均值为0 # 降维法1:------------------------------ # 求特征值和特征向量 ew, ev = np.linalg.eig(np.cov(X_norm.T)) # np.cov直接求协方差矩阵,每一行代表一个特征,每一轮代表样本 # 特征向量特征值的排序 ew_oreder = np.argsort(ew)[::-1] ew_sort = ew[ew_oreder] ev_sort = ev[:, ew_oreder] # ev的每一列代表一个特征向量 ev_sort.shape # (4,4) # 我们指定降成2维, 然后取出排序后的特征向量的前两列就是基 K = 2 V = ev_sort[:, :2] # 4*2
df = pd.read_csv(URL) df df.columns df.index = df.loc[:,'Customer Id'] df.drop('Customer Id',axis=1,inplace=True) df.info() df.drop('Address',axis=1,inplace=True) df.info() from sklearn.preprocessing import StandardScaler X = df.values X = np.nan_to_num(X) X = StandardScaler().fit_transform(X) X.std(axis=0) X.mean(axis=0) k_means = KMeans(init='k-means++',n_clusters=3,n_init=12) k_means.fit(X) labels = k_means.labels_ df['Cluster'] = labels df.groupby('Cluster').mean().T plt.scatter(X[:,0],X[:,3], c = labels.astype(np.float), alpha = .5) # Hierarchical clustering %reset -f import numpy as np
# возьмем признак price из датасета Renthop и пофильтруем руками совсем экстремальные значения для наглядности price = df.price[(df.price <= 20000) & (df.price > 500)] price_log = np.log(price) price_mm = MinMaxScaler().fit_transform( price.values.reshape(-1, 1).astype(np.float64)).flatten() # много телодвижений, чтобы sklearn не сыпал warning-ами price_z = StandardScaler().fit_transform( price.values.reshape(-1, 1).astype(np.float64)).flatten() sm.qqplot(price_log, loc=price_log.mean(), scale=price_log.std()).savefig('qq_price_log.png') sm.qqplot(price_mm, loc=price_mm.mean(), scale=price_mm.std()).savefig('qq_price_mm.png') sm.qqplot(price_z, loc=price_z.mean(), scale=price_z.std()).savefig('qq_price_z.png') # In[46]: from demo import get_data x_data, y_data = get_data() x_data.head(5) # In[47]: x_data = x_data.values from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier
# print(reducedDataSet.shape) # # labels = model.labels_ # # print('labels') # print(labels) # # # print('labels') # # print(labels) # # # sil = metrics.silhouette_score(X, labels, metric='euclidian', sample_size=5000) from sklearn.decomposition import PCA import numpy as np pca = cluster.FeatureAgglomeration(n_clusters=2) pca.fit(X) U, S, VT = np.linalg.svd(X - X.mean(0)) X_train_pca = pca.transform(X) X_train_pca2 = (X - pca.mean_).dot(pca.components_.T) X_projected = pca.inverse_transform(X_train_pca) X_projected2 = X_train_pca.dot(pca.components_) + pca.mean_ loss = ((X - X_projected) ** 2).mean() print(loss)
from random import random from sklearn import cluster from sklearn.preprocessing import StandardScaler import numpy as np X = np.array([[1.1], [0.9], [2.1]]) X = np.array([[random() * 6] for _ in range(1000)]) # remove mean, set variance=1 XS = StandardScaler().fit_transform( X) # computes mean&std and transforms data to (0,1) print(XS.mean()) # ~0 print(XS.std()) # ~1 # print(XS) # x = X[:, 0] # print(x) # numpy flat array, ~ [1,1,2]
ratings ##### 99 and 99.0 have been replaced with 0 in 10000 rows and 150 columns # Visualize the ratings for joke # 148 import matplotlib.pyplot as plt plt.figure(figsize=(20, 12)) plt.hist(ratings[148], bins=5) plt.xlabel('Rating') plt.ylabel('Number of ratings') plt.suptitle('Joke- Ratings/Num of ratings') #Lets normalize all these ratings using StandardScaler and save them in ratings_diff variable from sklearn.preprocessing import StandardScaler ratings_diff = StandardScaler().fit_transform(ratings) ratings_diff # Using the popularity based recommendation system find the jokes that will be highly recommended #Find the mean for which column in ratings_diff i.e for each joke #Here each row represents a joke and the columns are different entities who have rated this joke mean_ratings = ratings_diff.mean(axis=0) mean_ratings #Consider all the mean ratings and find the jokes with the highest mean value and display the top 10 joke IDs #First create a dataframe mean_ratings = pd.DataFrame(mean_ratings) mean_ratings.iloc[:, 0] mean_ratings.iloc[:, 0].argsort()[:-20:-1] mean_ratings.plot() x = ratings.iloc[1:4, :-100] ##jokeCorr = joke.corrwith(joke[50]) from sklearn.metrics.pairwise import cosine_similarity df1 = ratings.iloc[:100] df2 = ratings.iloc[100:200] x = df1.iloc[1] cs1 = cosine_similarity(x.values.reshape(1, -1), df1) cs2 = cosine_similarity(x.values.reshape(1, -1), df2)
# Plot 1 - every currency, all indicators # ------------------------------------------------------------------------- # Call Subplots fig, ax = plt.subplots(5, 1, figsize=(10, 10), sharex=True) # Plot currencies df_scaled.plot(ax=ax[0]) # Plot Slopes waves slopes_waves.plot(ax=ax[1]) # Plot mean position Waves mean_position_waves.plot(ax=ax[2]) # Plot sum (slopes * mean) location_measure.plot(figsize=(10, 3), ax=ax[3]) location_measure.mean(axis=1).plot(ax=ax[3], color='black') # Plot Channel Mean Position (c3) channel_mean_scaled.plot(ax=ax[4]) # get std lines on channel position x = np.arange(end - interval, end + 1) ax[1].plot(x, np.zeros(df.shape[0]), color='black') ax[2].plot(x, np.ones(df.shape[0]) * indicator_std, color='black') ax[2].plot(x, np.ones(df.shape[0]) * -indicator_std, color='black') # Legends ax[0].legend() ax[1].legend() ax[2].legend() ax[3].legend() ax[4].legend() # Name Rows