def ex_dark_light_scatter_sns(): X, Y = make_regression(n_samples=100, n_features=2, noise=50.0) Y[Y <= 0] = 0 Y[Y > 0] = 1 P = tools_plot_v2.Plotter(folder_out, dark_mode=False) P.plot_2D_features_multi_Y(X, Y, filename_out='seaborn_scatter_light.png') P = tools_plot_v2.Plotter(folder_out,dark_mode=True) P.plot_2D_features_multi_Y(X, Y, filename_out='seaborn_scatter_dark.png') return
def ex_dark_light_line(): tpr = numpy.linspace(0,1,20) fpr = numpy.linspace(0.3,0.9,20) P = tools_plot_v2.Plotter(folder_out, dark_mode=False) P.plot_tp_fp(tpr, fpr, 0.5, caption='', filename_out='matplotlib_line_light.png') P = tools_plot_v2.Plotter(folder_out, dark_mode=True) P.plot_tp_fp(tpr, fpr, 0.5, caption='', filename_out='matplotlib_line_dark.png') return
def __init__(self,Classifier,folder_out=None,dark_mode=False): self.classifier = Classifier self.P = tools_plot_v2.Plotter(folder_out,dark_mode) self.folder_out = folder_out if folder_out is not None and (not os.path.exists(folder_out)): os.mkdir(folder_out) return
def ex_train_test(X,Y): C = classifier_LM.classifier_LM() ML = tools_ML_v2.ML(C, folder_out + 'original/') P = tools_plot_v2.Plotter(folder_out+'original/') df = pd.DataFrame(data=(numpy.hstack((Y.reshape((-1, 1)), X))),columns=['target'] + ['%d' % c for c in range(X.shape[1])]) P.pairplots_df(df, idx_target=0) ML.E2E_train_test_df(df,idx_target=0) ML = tools_ML_v2.ML(C, folder_out + 'sampled/') P = tools_plot_v2.Plotter(folder_out + 'sampled/') X_Sampled, Y_Sampled = get_SMOTE_UnderSampler(X,Y,do_debug=True) df_sampled = pd.DataFrame(data=(numpy.hstack((Y_Sampled.reshape((-1, 1)), X_Sampled))),columns=['target'] + ['%d' % c for c in range(X.shape[1])]) ML.E2E_train_test_df(df_sampled, idx_target=0) P.pairplots_df(df_sampled, idx_target=0) return
def __init__(self, app, folder_out, dark_mode): self.folder_out = folder_out self.P = tools_plot_v2.Plotter(folder_out, dark_mode) self.app = app self.filename_retro_df = 'retro.csv' self.TS = tools_TS.tools_TS( Classifier=TS_AutoRegression.TS_AutoRegression(folder_out), dark_mode=dark_mode, folder_out=folder_out) self.clear_cache() return
def ex_random(): X, Y = make_regression(n_samples=1250, n_features=3, noise=50.0) Y[Y <= 0] = 0 Y[Y > 0] = 1 C = classifier_LM.classifier_LM() P = tools_plot_v2.Plotter(folder_out) df = pd.DataFrame(data=(numpy.hstack((Y.reshape((-1, 1)), X))),columns=['target'] + ['%d' % c for c in range(X.shape[1])]) ML = tools_ML_v2.ML(C, folder_out) ML.E2E_train_test_df(df,idx_target=0) P.pairplots_df(df, idx_target=0) return
def ex_titanic(): #C = classifier_KNN.classifier_KNN() #C = classifier_DTree.classifier_DT(folder_out=folder_out) #C = classifier_RF.classifier_RF() #C = classifier_Ada.classifier_Ada() C = classifier_LM.classifier_LM() P = tools_plot_v2.Plotter(folder_out) df,idx_target = pd.read_csv(folder_in+'dataset_titanic.csv', sep='\t'),0 df.drop(labels=['alive', 'deck'], axis=1, inplace=True) ML = tools_ML_v2.ML(C) ML.E2E_train_test_df(df,idx_target=idx_target) P.pairplots_df(df, idx_target=idx_target) return
import numpy as numpy import math from sklearn import linear_model from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score import pandas as pd from sklearn import metrics # ---------------------------------------------------------------------------------------------------------------------- import tools_plot_v2 # ---------------------------------------------------------------------------------------------------------------------- folder_in = './data/ex_datasets/' folder_out = './data/output/' # ---------------------------------------------------------------------------------------------------------------------- P = tools_plot_v2.Plotter(folder_out) # ---------------------------------------------------------------------------------------------------------------------- def get_data_v1(filename_in, idx_target=0): df = pd.read_csv(filename_in, sep='\t') columns = df.columns.to_numpy() idx = numpy.delete(numpy.arange(0, len(columns)), idx_target) df_train, df_test = train_test_split(df.dropna(), test_size=0.5, shuffle=True) X_train, Y_train = df_train.iloc[:, idx].to_numpy( ), df_train.iloc[:, [idx_target]].to_numpy() X_test, Y_test = df_test.iloc[:, idx].to_numpy( ), df_test.iloc[:, [idx_target]].to_numpy()
import numpy import pandas as pd import matplotlib.pyplot as plt from statsmodels.tsa.seasonal import seasonal_decompose from pandas.plotting import autocorrelation_plot from pandas.plotting import lag_plot import statsmodels.api as sm # ---------------------------------------------------------------------------------------------------------------------- import tools_plot_v2 # ---------------------------------------------------------------------------------------------------------------------- folder_in = './data/ex_TS/' folder_out = './data/output/' # ---------------------------------------------------------------------------------------------------------------------- P = tools_plot_v2.Plotter(folder_out, dark_mode=True) # ---------------------------------------------------------------------------------------------------------------------- def ex_decompose(ts): dates = numpy.array('2000-01-01', dtype=numpy.datetime64) + numpy.arange( ts.shape[0]) df = pd.DataFrame({'data': ts.to_numpy()}, index=dates) plt.clf() plt.rcParams.update({'figure.figsize': (12, 7)}) seasonal_decompose( df, model='multiplicative').plot().suptitle('Multiplicative Decomposition') plt.tight_layout() plt.savefig(folder_out + 'decompose2_mult.png')
def __init__(self,Classifier=None,dark_mode=False,folder_out=None): self.classifier = Classifier self.Plotter = tools_plot_v2.Plotter(folder_out,dark_mode=dark_mode) self.folder_out = folder_out return
import cv2 import numpy as numpy import pandas as pd from sklearn.impute import SimpleImputer # ---------------------------------------------------------------------------------------------------------------------- import tools_plot_v2 # ---------------------------------------------------------------------------------------------------------------------- folder_in = './data/ex_datasets/' folder_out = './data/output/' # ---------------------------------------------------------------------------------------------------------------------- P = tools_plot_v2.Plotter() # ---------------------------------------------------------------------------------------------------------------------- def ex_is_missing(df): A = (df.isnull()).to_numpy() cv2.imwrite(folder_out + 'nans_1.png', 255 * A) print(df) print() print(A) return # ---------------------------------------------------------------------------------------------------------------------- def ex_replace(df): dct_replace = {numpy.NaN: 999.0} print(df) print() df.replace(dct_replace, inplace=True) print(df) return