def xperiment(): df = pull_merged_data(feature=FEATURE).dropna() X, Y = df[PARAM].as_matrix(), df[FEATURE].as_matrix() inspection.category_frequencies(Y) Y, X = drop_lowNs(10, Y, X) inspection.correlation(X, names=PARAM) pairwise_T2(X, Y, dumproot=projectroot, xpid=f"PairwiseT2_{FEATURE}.xlsx") F, p = manova(X, Y) print("-"*50) lda = LDA(n_components=2).fit(X, Y) # type: LDA smexvar = lda.explained_variance_ratio_ scat = scatter.Scatter2D(lda.transform(X), Y, title=f"LDA ({smexvar.sum():.2%})\nMANOVA: F = {F:.4f}, p = {p:.4f}", axlabels=[f"Latent0{i} ({ev:.2%})" for i, ev in enumerate(smexvar, start=1)]) is_many = len(np.unique(Y)) > 5 scat.split_scatter(legend=not is_many, show=True, center=is_many, label=is_many)
def normality(): paramnames = df.columns[1:] full(X, names=paramnames) for i, colname in enumerate(paramnames): outpath = f"{projectroot}N27.Results/{colname}.png" fullplot(X[:, i], colname, histbins=7, show=False, dumppath=outpath, histlabels=(r"$\delta^{13}C$ izotóparány", "Előfordulási valószínűség"), problabels=("Elméleti Z-érték", r"$\delta^{13}C$ izotóparány")) correlation(X, paramnames)
def inspect_classes(): from csxdata.stats import normaltest, inspection from csxdata.visual.histogram import fullplot names = [] for l in "YP": for i in range(10): names.append(l + str(i)) X, Y = load_dataset(as_matrix=False, as_string=True) inspection.category_frequencies(Y) inspection.correlation(X, names=names) normaltest.full(X, names=names) for name, column in zip(names, X.T): fullplot(column, name)
import numpy as np from matplotlib import pyplot as plt from csxdata.stats.inspection import correlation from SciProjects.rich.stockshu.data_util import pull_data Y, header = pull_data() X = np.arange(1, len(Y)) correlation(Y, names=header) for y, col in zip(Y.T, header): plt.plot(X, y, label=col) plt.legend() plt.show()
from csxdata.stats.inspection import correlation from rich.currency.util import pull_data X, Y, header = pull_data() correlation(X, Y)
from SciProjects.sophie import projectroot from csxdata.utilities.parser import parse_csv from csxdata.utilities.vectorop import dropna from csxdata.stats.inspection import category_frequencies, correlation from csxdata.stats.normaltest import full X, Y, head = parse_csv(projectroot + "01GEO.csv", indeps=2, headers=1, decimal=True) category_frequencies(Y) X, Y = dropna(X, Y) correlation(X, ["X", "Y", "DH1", "DH2"]) full(X)
def correlations(): from csxdata.stats.inspection import correlation correlation(X, names)