def example_trajs(): print('Generowanie i zapisywanie przykładowych trajektorii...') path = 'data/part0/example_traj' dirmake(path) logg('Generowanie przykładowych trajektorii - start') AD = andi.andi_datasets() for model in range(5): try: dataset = AD.create_dataset(100, 1, [0.7], [model], 2) except: dataset = AD.create_dataset(100, 1, [1.7], [model], 2) x = dataset[0][2:102] y = dataset[0][102:] plt.figure(figsize=(2, 2)) plt.cla() plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.xlabel('x') plt.ylabel('y') plt.title(AD.avail_models_name[model], loc='left') plt.plot(x, y, color=colors[model], linewidth=2, alpha=0.5) plt.scatter(x, y, c=range(len(x)), cmap=color_maps[model], marker='.', s=100) plt.savefig(path + '/' + str(AD.avail_models_name[model]) + '.pdf', transparent=True, bbox_inches='tight', dpi=300) logg('Generowanie przykładowych trajektorii - stop') print(' --- ZAKOŃCZONO')
def TAMSD_estimation(trajectories, exps, part, Model): global liczydlo if part == 0: D, real_exp, est_exp, tamsds = TAMSD_estimation_traj( 0, 1, [exps, trajectories]) return D, real_exp, est_exp, tamsds if part >= 1: print('Obliczanie estymacji TAMSDS...') if part in [1, 2, 3, 4, 5, 6]: trajectories = trajectories[:-number_to_learn] else: trajectories = trajectories[:-floor(10**(part - 6))] liczydlo = 0 traj_num = len(trajectories) traj_info = pd.DataFrame(columns=['D', 'expo', 'expo_est', 'tamsds'], index=range(traj_num)) # 2 argumenty iterwane do poola give = [] logg('TAMSD - estymacja - start') start = datetime.now() for i in range(traj_num): give.append([exps[i], trajectories[i]]) with mp.Pool(3) as pool: temp = partial(TAMSD_estimation_traj, part, traj_num) result = pool.map(temp, give) pool.close() pool.join() print(' --- ZAKOŃCZONO') print('Translacja wyników TAMSD...') liczydlo = 0 for i in result: traj_info.loc[liczydlo] = i liczydlo += 1 if liczydlo % 500 == 0: print(f'TAMSD - translacja - {liczydlo}/{traj_num}') stop = datetime.now() logg(f'TAMSD - estymacja - koniec {stop - start}') print(' --- ZAKOŃCZONO') path = f'data/part{part}/model{Model}/TAMSD/' dirmake(path) fname = path + str('estimated.csv') print(f'Zapisywanie wyników do pliku {fname}') traj_info.to_csv(fname) print(' --- ZAKOŃCZONO')
def get_features(trajectories, exps, part, Model): if part >= 1: print('Wyciąganie parametrów z trajektorji...') global l_t ### odczyt danych z trajektorii l_t = 0 traj_num = len(trajectories) # 2 argumenty iterwane do poola give = [] logg('ML - wyciąganie danych - start') start = datetime.now() for i in range(traj_num): give.append([exps[i], trajectories[i]]) with mp.Pool(3) as pool: temp = partial(get_info, 5, traj_num) result = pool.map(temp, give) pool.close() pool.join() ### zapis do pandas traj_info = pd.DataFrame(columns=[ 'alpha', 'diffusivity', 'efficiency', 'slowdown', 'MSD_ratio1', 'MSD_ratio5', 'antigaussinity1', 'antigaussinity5', 'straigthness', 'autocorrelation1', 'autocorrelation5', 'max_distance', 'trappedness', 'fractal_dim' ], index=range(traj_num)) l_t = 0 for traj in result: traj_info.loc[l_t] = traj l_t += 1 if l_t % 500 == 0: print(f'translacja - {l_t}/{traj_num}') stop = datetime.now() logg(f'ML - wyciąganie danych - koniec {stop - start}') # zapis do pliku path = f'data/part{part}/model{Model}/ML' dirmake(path) fname = f'data/part{part}/model{Model}/ML/features.csv' print(f'Zapisywanie danych do pliku {fname}') traj_info.to_csv(fname) print(' --- ZAKOŃCZONO') return traj_info
Q_generate_plot = int(input("Ile trajektorii zapisać w postaci graficznej: ")) Q_TAMSD = input("Czy chcesz przeliczyć TAMSD? (Y/n): ") Q_TAMSD_plot = int(input("Ile obliczonych TAMSD chcesz zapisać w postaci graficznej: ")) Q_ML_features = input("Czy chcesz wyciągnąć paramtry trajektorii do ML? (Y/n): ") Q_ML_linreg = input("Czy chcesz użyć wielowymiarowej regresji liniowej? (Y/n): ") Q_ML_dectree = input("Czy chcesz użyć decision tree? (Y/n): ") Q_ML_randomforest = input("Czy chcesz użyć random forest? (Y/n): ") Q_ML_gradientboosting = input("Czy chcesz użyć gradient boosting? (Y/n): ") logg(f'Wybrane decyzje: {Q_generate}, {Q_generate_plot}, {Q_TAMSD}, {Q_TAMSD_plot}'+ f', {Q_ML_features}, {Q_ML_linreg}, {Q_ML_dectree}, {Q_ML_randomforest}, {Q_ML_gradientboosting}.') print(64 * '-') N = N_long path = f'data/part{part}/model{Model}/' dirmake(path) traject_loaded = False expo_loaded = False features_loaded = False if Q_generate == 'Y': # generowanie trajektorii generate_trajectories(N, part, Model) print(64 * '-') if Q_generate_plot > 0: # rysowanie trajektorii if not traject_loaded: trajectories = read_trajectories(part, Model) traject_loaded = True print(f'Tworzenie {Q_generate_plot} wykresów...')
def decision_tree(features, part, Model): if part in [0, 1, 2, 3, 4, 5, 6]: train_data, train_labels, test_data, test_label = split_data( features, number_to_learn) else: train_data, train_labels, test_data, test_label = split_data( features, floor(10**(part - 6))) hiperparam_data = train_data[:floor(number_to_learn / 10)] hiperparam_labels = train_labels[:floor(number_to_learn / 10)] print('Wyznaczanie drzewa decyzyjnego...') max_depth = list(range(2, 20, 1)) min_samples_split = list(range(1, 11)) min_samples_leaf = list(range(1, 6)) max_features = ['auto', 'sqrt'] random_grid = { 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'max_features': max_features } log(f'ML - drzewo decyzyjne - szukanie superparametrów - start') start = datetime.now() model = DecisionTreeRegressor() model = RandomizedSearchCV(estimator=model, param_distributions=random_grid, n_iter=30, cv=3, verbose=2, random_state=42, n_jobs=3, return_train_score=True, refit=True) model = model.fit(hiperparam_data, hiperparam_labels) stop = datetime.now() log(f'ML - drzewo decyzyjne - szukanie superparametrów - koniec {stop - start}' ) model_params = pd.DataFrame(model.best_params_, index=['decision tree']) dirmake(f'data/part{part}/model{Model}/ML/decision_tree') model_params.to_csv( f'data/part{part}/model{Model}/ML/decision_tree/model_params.csv') model = DecisionTreeRegressor(**model.best_params_) log(f'ML - drzewo decyzyjne - nauczanie - start') start = datetime.now() model.fit(train_data, train_labels) stop = datetime.now() log(f'ML - drzewo decyzyjne - nauczanie - koniec {stop - start}') print(' --- ZAKOŃCZONO') path = f'data/part{part}/model{Model}/ML/decision_tree/model.pk1' print('Zapisywanie modelu do pliku {}'.format(path)) save_model(model, path) plt.cla() plt.figure(figsize=(10, 6.5)) plot_tree(model, max_depth=3, feature_names=list(test_data), fontsize=10, filled=True) path = f'data/part{part}/model{Model}/ML/decision_tree/tree.pdf' plt.savefig(path, transparent=True, bbox_inches='tight') plt.cla() plt.figure(figsize=(15, 15)) plot_tree(model, feature_names=list(test_data), filled=True) path = f'data/part{part}/model{Model}/ML/decision_tree/full_tree.pdf' plt.savefig(path, transparent=True, bbox_inches='tight') print(' --- ZAKOŃCZONO') print('Testowanie modelu drzewa decyzyjnego...') log(f'ML - drzewo decyzyjne - przewidywanie - start') start = datetime.now() predicted_labels = model.predict(test_data) stop = datetime.now() log(f'ML - drzewo decyzyjne - przewidywanie - koniec {stop - start}') print(' --- ZAKOŃCZONO') print('Translacja przewidywań...') results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels}) print(' --- ZAKOŃCZONO') print('Translacja wyników do pliku...') results.to_csv( f'data/part{part}/model{Model}/ML/decision_tree/estimated.csv') print(' --- ZAKOŃCZONO')
def save_model(model, path): dirmake('/'.join(path.split('/')[:-1])) with open(path, 'wb') as f: dump(model, f)
def gradient_boosting(features, part, Model): if part in [0, 1, 2, 3, 4, 5, 6]: train_data, train_labels, test_data, test_label = split_data( features, number_to_learn) else: train_data, train_labels, test_data, test_label = split_data( features, floor(10**(part - 6))) hiperparam_data = train_data[:floor(number_to_learn / 10)] hiperparam_labels = train_labels[:floor(number_to_learn / 10)] print('Wyznaczanie modelu gradient boosting...') # https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74 learning_rate = [0.001 * i for i in range(1, 21)] n_estimators = list(range(100, 1001, 100)) max_depth = list(range(2, 20, 1)) min_samples_split = list(range(2, 11)) min_samples_leaf = list(range(1, 6)) max_features = ['auto', 'sqrt'] random_grid = { 'learning_rate': learning_rate, 'n_estimators': n_estimators, 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'max_features': max_features } model = GradientBoostingRegressor() log(f'ML - wzmocnienie gradientowe - szukanie superparametrów - start') start = datetime.now() model = RandomizedSearchCV(estimator=model, param_distributions=random_grid, n_iter=30, cv=3, verbose=2, random_state=42, n_jobs=3, return_train_score=True, refit=True) model.fit(hiperparam_data, hiperparam_labels) stop = datetime.now() log(f'ML - wzmocenienie gradientowe - szukanie superparametrów - koniec {stop - start}' ) model_params = pd.DataFrame(model.best_params_, index=['gradient boosting']) dirmake(f'data/part{part}/model{Model}/ML/gradient_boosting') model_params.to_csv( f'data/part{part}/model{Model}/ML/gradient_boosting/model_params.csv') model = GradientBoostingRegressor(**model.best_params_) # params = pd.read_csv(f'data/part{part}/model{Model}/ML/gradient_boosting/model_params.csv', index_col='Unnamed: 0') # params = params.to_dict(orient = 'list') # for key,value in params.items(): # params[key] = value[0] # model = GradientBoostingRegressor(**params) log(f'ML - wzmocnienie gradientowe - nauczanie - start') start = datetime.now() model.fit(train_data, train_labels) stop = datetime.now() log(f'ML - wzmocenienie gradientowe - nauczanie - koniec {stop - start}') print(' --- ZAKOŃCZONO') path = f'data/part{part}/model{Model}/ML/gradient_boosting/model.pk1' print('Zapisywanie modelu do pliku {} oraz jego parametrów'.format(path)) save_model(model, path) print(' --- ZAKOŃCZONO') print('Testowanie modelu gradient boosting...') log(f'ML - wzmocnienie gradientowe - przewidywanie - start') start = datetime.now() predicted_labels = model.predict(test_data) stop = datetime.now() log(f'ML - wzmocenienie gradientowe - przewidywanie - koniec {stop - start}' ) print(' --- ZAKOŃCZONO') print('Translacja przewidywań...') results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels}) print(' --- ZAKOŃCZONO') print('Zapisywanie wyników do pliku...') results.to_csv( f'data/part{part}/model{Model}/ML/gradient_boosting/estimated.csv') print(' --- ZAKOŃCZONO')
def random_forest(features, part, Model): if part in [0, 1, 2, 3, 4, 5, 6]: train_data, train_labels, test_data, test_label = split_data( features, number_to_learn) else: train_data, train_labels, test_data, test_label = split_data( features, floor(10**(part - 6))) hiperparam_data = train_data[:floor(number_to_learn / 10)] hiperparam_labels = train_labels[:floor(number_to_learn / 10)] print('Wyznaczanie modelu random forest...') # https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74 n_estimators = list(range(100, 1001, 100)) max_depth = list(range(2, 20, 1)) min_samples_split = list(range(2, 11)) min_samples_leaf = list(range(1, 6)) max_features = ['auto', 'sqrt'] max_samples = [0.1 * i for i in range(1, 10)] random_grid = { 'n_estimators': n_estimators, 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'max_features': max_features, 'max_samples': max_samples } model = RandomForestRegressor(bootstrap=True, max_samples=0.5) log(f'ML - las losowy - szukanie superparametrów - start') start = datetime.now() model = RandomizedSearchCV(estimator=model, param_distributions=random_grid, n_iter=30, cv=3, verbose=2, random_state=42, n_jobs=3, return_train_score=True, refit=True) model.fit(hiperparam_data, hiperparam_labels) stop = datetime.now() log(f'ML - las losowy - szukanie superparametrów - koniec {stop - start}') model_params = pd.DataFrame(model.best_params_, index=['random forest']) dirmake(f'data/part{part}/model{Model}/ML/random_forest') model_params.to_csv( f'data/part{part}/model{Model}/ML/random_forest/model_params.csv') log(f'ML - las losowy - nauczanie - start') start = datetime.now() model = RandomForestRegressor(**model.best_params_) model.fit(train_data, train_labels) stop = datetime.now() log(f'ML - las losowy - nauczanie - koniec {stop - start}') print(' --- ZAKOŃCZONO') path = f'data/part{part}/model{Model}/ML/random_forest/model.pk1' print('Zapisywanie modelu do pliku {} oraz jego parametrów'.format(path)) save_model(model, path) print(' --- ZAKOŃCZONO') print('Testowanie modelu random forest...') log(f'ML - las losowy - przewidywanie - start') start = datetime.now() predicted_labels = model.predict(test_data) stop = datetime.now() log(f'ML - las losowy - przewidywanie - koniec {stop - start}') print(' --- ZAKOŃCZONO') print('Translacja przewidywań...') results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels}) print(' --- ZAKOŃCZONO') print('Zapisywanie wyników do pliku...') results.to_csv( f'data/part{part}/model{Model}/ML/random_forest/estimated.csv') print(' --- ZAKOŃCZONO')
def plot_noisy3(): normal_models = {} path_results = 'data/part7-10/' dirmake(path_results) R2s = {} MAEs = {} MSEs = {} for model in models: R2 = [] MAE = [] MSE = [] for part in parts: path_table = 'data/part7-10/results/part' + str( part) + '/table.csv' table = pd.read_csv(path_table, index_col='Unnamed: 0') R2.append(table.loc[model.replace('_', ' ')]['R^2']) MAE.append(table.loc[model.replace('_', ' ')]['MAE']) MSE.append(table.loc[model.replace('_', ' ')]['MSE']) R2s[model] = R2 MAEs[model] = MAE MSEs[model] = MSE for stat in ['R2', 'MAE', 'MSE']: statistica = eval(stat) plt.clf() plt.figure(figsize=(4, 4)) plt.semilogx([10, 100, 1000, 10000, 100000], statistica, label=stat) plt.title(model, loc='left') plt.xticks([10, 100, 1000, 10000, 100000]) plt.yticks(statistica) dirmake(path_results + '/' + stat + '/') plt.savefig(path_results + stat + '/' + model + '.pdf') for stat in ['R2', 'MAE', 'MSE']: statistica = eval(stat + 's') path_result = path_results + stat + '.pdf' plt.close('all') plt.figure(figsize=(3, 3)) n = 0 for key, value in statistica.items(): if stat == 'R2': plt.semilogx([10, 100, 1000, 10000, 100000], value, label=key.replace('_', ' '), c=colors[n + 1], marker='x') else: plt.loglog([10, 100, 1000, 10000, 100000], value, label=key.replace('_', ' '), c=colors[n], marker='x') n += 1 plt.xticks([10, 100, 1000, 10000, 100000]) if stat == 'MSE': plt.legend(loc='center left', bbox_to_anchor=(1.05, 0.5)) plt.title(stat, loc='left') plt.savefig(path_result, transparent=True, bbox_inches='tight', dpi=300)
def test_models2(part, part_test, num_of_learning=100000, mode='full'): ''' Do testowania modelu z częsi 1 do danych z części 3-6 i do testowania modelu\ z części 3 na danych z części 1 i do testowania modeli części 7-10 na części 1''' Model = 'A' tests = 'A' if part == 1: if part_test in [3, 4, 5, 6]: path_results = f'data/part1-6/results/part' + str(part_test) + '/' path_estimate = f'data/part1-6/estimations/part' + str( part_test) + '/' elif part == 3: if part_test == 1: path_results = f'data/part3-1/results/part' + str(part_test) + '/' path_estimate = f'data/part3-1/estimations/part' + str( part_test) + '/' if part in [1, 7, 8, 9, 10]: if part_test == 1: path_results = f'data/part7-10/results/part' + str(part) + '/' path_estimate = f'data/part7-10/estimations/part' + str(part) + '/' dataset = read_ML_features(part_test, tests) if not 'no_gen' in mode: trajectories = read_trajectories(part_test, tests, 'end', num_of_learning) # to test else: trajectories = 'trajectories' _, _, test_data, test_label = split_data(dataset, num_of_learning) test_label = list(test_label) dirmake(path_results) dirmake(path_estimate) models = [ 'TAMSD', 'linear_regression', 'decision_tree', 'random_forest', 'gradient_boosting' ] ttable = pd.DataFrame() err = {} plt.figure(figsize=(2, 2)) for model in models: # # # estymowanie parametrów if model == 'TAMSD': estimated_label, test_labels = estimate(trajectories, test_label, part, Model, tests, 'TAMSD', 100000, part_test) else: estimated_label, test_labels = estimate(test_data, test_label, part, Model, tests, model, 100000, part_test) # # # określanie mocy table, er = test_model(estimated_label, test_labels, model.replace('_', ' '), path_results, mode) if 'full' in mode or 'table' in mode: ttable = pd.concat([ttable, table]) err[model] = er print(f'Zrobione {Model} - {tests} - {model}') # # # rysowanki if 'full' in mode or 'table' in mode: print(f'Zapisywanie tabeli do pliku {path_results+"table.csv"}') ttable.to_csv(path_results + 'table.csv') print(f'Zrobione tabele dla {Model}-{tests}') if 'full' in mode or 'plot' in mode: # box plot plt.clf() plt.figure(figsize=(5, 5)) box = plt.boxplot(labels=err.keys(), x=err.values(), patch_artist=True, notch=True, vert=True) colors = ['lightblue', 'orange', 'lightgreen', 'pink', 'violet'] for plott, color in zip(box['boxes'], colors): print(plott) plott.set_facecolor(color) plt.savefig(path_results + 'boxplots.pdf', transparent=True, bbox_inches='tight', dpi=300) # displots plt.clf() sns.displot(err, common_norm=False, stat="density") plt.plot([0, 0], [0, 1.3], color='black') plt.savefig(path_results + 'displot1.pdf', transparent=True, bbox_inches='tight', dpi=300) plt.clf() sns.displot(err, kind="kde", common_norm=False, bw_adjust=0.7, fill=True) plt.plot([0, 0], [0, 1.3], color='black') plt.savefig(path_results + 'displot2.pdf', transparent=True, bbox_inches='tight', dpi=300) plt.clf() sns.displot(err, kind="kde", common_norm=False, bw_adjust=2) plt.plot([0, 0], [0, 1.3], color='black') plt.savefig(path_results + 'displot3.pdf', transparent=True, bbox_inches='tight', dpi=300) plt.clf() sns.displot(err, kind="ecdf") plt.plot([0, 0], [0, 1.3], color='black') plt.savefig(path_results + 'displot4.pdf', transparent=True, bbox_inches='tight', dpi=300) print(f'Zrobione wykresy dla {Model}-{tests}')
def estimate(test_data, test_label, part, Model, Model_test, model, learning_number=100000, part_test=None): if part_test == None: part_test = part path_estimations = f'data/part{part}/estimations/model{Model}/test{Model_test}/{model}.csv' dirmake(f'data/part{part}/estimations/model{Model}/test{Model_test}/') elif part_test >= 3: path_estimations = f'data/part1-6/estimations/part{part_test}/{model}.csv' dirmake(f'data/part1-6/estimations/part{part_test}/') elif part_test == 1: if part == 3: path_estimations = f'data/part3-1/estimations/part{part_test}/{model}.csv' dirmake(f'data/part3-1/estimations/part{part_test}/') elif part in [1, 7, 8, 9, 10]: path_estimations = f'data/part7-10/estimations/part{part}/{model}.csv' dirmake(f'data/part7-10/estimations/part{part}/') if isfile(path_estimations): results = pd.read_csv(path_estimations, index_col='Unnamed: 0') else: if model == 'TAMSD': print('Obliczanie estymacji TAMSDS...') logg('TAMSD - estymacja - start') start = datetime.now() traj_num = len(test_label) give = [] for i in range(traj_num): give.append([test_label[i], test_data[i]]) with mp.Pool(3) as pool: temp = partial(TAMSD_estimation_traj, part, traj_num) result = pool.map(temp, give) pool.close() pool.join() stop = datetime.now() logg(f'TAMSD - estymacja - koniec {stop - start}') print(' --- ZAKOŃCZONO') print('Translacja wyników TAMSD...') results = pd.DataFrame(columns=['D', 'expo', 'expo_est', 'tamsds'], index=range(traj_num)) liczydlo = 0 for i in result: results.loc[liczydlo] = i liczydlo += 1 print(' --- ZAKOŃCZONO') results.to_csv(path_estimations) else: path_model = f'data/part{part}/model{Model}/ML/{model}/model.pk1' print('Ładowanie modelu ...') model = load_model(path_model) print(' --- ZAKOŃCZONO') print(f'Testowanie modelu {model}...') logg(f'ML - {model} - przewidywanie - start') start = datetime.now() predicted_labels = model.predict(test_data) stop = datetime.now() logg(f'ML - {model} - przewidywanie - koniec {stop - start}') print(' --- ZAKOŃCZONO') print('Translacja przewidywań...') print(len(test_label)) results = pd.DataFrame({ 'expo': test_label, 'expo_est': predicted_labels }) print(' --- ZAKOŃCZONO') print(f'Zapisywanie wstymacji wyników do pliku - model {model}...') results.to_csv(path_estimations) print(' --- ZAKOŃCZONO') print(64 * '-') results = results.dropna() return list(results['expo_est']), list(results['expo'])
def test_models(part, Model, num_of_learning=100000, tests=None, mode='full'): if tests == None: tests = Model if part >= 1: path_results = f'data/part{part}/results/model{Model}/test{tests}/' path_estimate = f'data/part{part}/estimations/model{Model}/test{tests}/' dataset = read_ML_features(part, tests) if not 'no_gen' in mode: trajectories = read_trajectories(part, tests, 'end', num_of_learning) else: trajectories = 'trajectories' _, _, test_data, test_label = split_data(dataset, num_of_learning) test_label = list(test_label) dirmake(path_results) dirmake(path_estimate) models = [ 'TAMSD', 'linear_regression', 'decision_tree', 'random_forest', 'gradient_boosting' ] ttable = pd.DataFrame( columns=['R^2', 'eps = 0.05', 'eps = 0.025', 'max_error']) err = {} plt.figure(figsize=(2, 2)) for model in models: # # # estymowanie parametrów if model == 'TAMSD': estimated_label, test_labels = estimate( trajectories, test_label, part, Model, tests, 'TAMSD') else: estimated_label, test_labels = estimate( test_data, test_label, part, Model, tests, model) # # # określanie mocy table, er = test_model(estimated_label, test_labels, model.replace('_', ' '), path_results, mode) if 'full' in mode or 'table' in mode: ttable = pd.concat([ttable, table]) err[model] = er print(f'Zrobione {Model} - {tests} - {model}') # # # rysowanki if 'full' in mode or 'table' in mode: print(f'Zapisywanie tabeli do pliku {path_results+"table.csv"}') ttable.to_csv(path_results + 'table.csv') print(f'Zrobione tabele dla {Model}-{tests}') if 'full' in mode or 'plot' in mode: # box plot plt.clf() plt.figure(figsize=(5, 5)) labels = ['TAMSD', 'LR', 'DT', 'RF', 'GB'] box = plt.boxplot(labels=labels, x=err.values(), patch_artist=True, notch=True, vert=True) colors = ['lightblue', 'orange', 'lightgreen', 'pink', 'violet'] for plott, color in zip(box['boxes'], colors): print(plott) plott.set_facecolor(color) plt.savefig(path_results + 'boxplots.pdf', transparent=True, bbox_inches='tight', dpi=300) # displots plt.clf() sns.displot(err, common_norm=False, stat="density") plt.plot([0, 0], [0, 1.3], color='black') plt.savefig(path_results + 'displot1.pdf', transparent=True, bbox_inches='tight', dpi=300) plt.clf() sns.displot(err, kind="kde", common_norm=False, bw_adjust=0.7, fill=True) plt.plot([0, 0], [0, 1.3], color='black') plt.savefig(path_results + 'displot2.pdf', transparent=True, bbox_inches='tight', dpi=300) plt.clf() sns.displot(err, kind="kde", common_norm=False, bw_adjust=2) plt.plot([0, 0], [0, 1.3], color='black') plt.savefig(path_results + 'displot3.pdf', transparent=True, bbox_inches='tight', dpi=300) plt.clf() sns.displot(err, kind="ecdf") plt.plot([0, 0], [0, 1.3], color='black') plt.savefig(path_results + 'displot4.pdf', transparent=True, bbox_inches='tight', dpi=300) print(f'Zrobione wykresy dla {Model}-{tests}')
def example_TAMSD(): print('Generowanie i zapisywanie przykładowych TAMSD...') path = 'data/part0/example_TAMSD' dirmake(path) logg('Generowanie przykładowych TAMSD - start') AD = andi.andi_datasets() dataset = AD.create_dataset(200, 1, [0.7], [2], 2) x = dataset[0][2:202] y = dataset[0][202:] trajectory = [x, y] D, expo, expo_est, tamsds = TAMSD_estimation(trajectory, 0.7, 0, 'A') tamsds = tamsds[:100] t = range(1, len(tamsds) + 1) expo_est = estimate_expo(t, tamsds, D, 100) plt.cla() plt.figure(figsize=(3, 3)) plt.plot(t, tamsds, '.', label='punkty TAMSD') plt.plot(t, [4 * D * i**expo_est for i in t], 'b', label=r'Wyestymowana krzywa wzorcowa') plt.plot(t, [4 * D * i**expo for i in t], 'r', label=r'Prawdziwa krzywa wzorcowa') plt.xlabel('t') plt.ylabel(r'$\rho(t)$') plt.title('c', loc='left') plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.savefig(path + '/TAMSD.pdf', transparent=True, bbox_inches='tight', dpi=300) plt.cla() plt.loglog(t, tamsds, '.', label='punkty TAMSD') plt.loglog(t, [4 * D * i**expo_est for i in t], 'b', label=r'Wyestymowana krzywa TAMSD') plt.loglog(t, [4 * D * i**expo for i in t], 'r', label=r'Prawdziwa krzywa wzorcowa') plt.xlabel('t') plt.ylabel(r'$\rho(t)$') plt.legend(loc='lower left', bbox_to_anchor=(1.05, 1)) plt.title('d', loc='left') plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.savefig(path + '/TAMSD_loglog.pdf', transparent=True, bbox_inches='tight', dpi=300) # perfekcyjne tamsd plt.cla() D = 0.3 t = [0.1 * i for i in range(101)] exps = [0.7, 1, 1.3] label = ['superdyfuzja', 'dyfuzja normalna', 'subdyfuzja'] for expo in exps: plt.plot(t, [4 * D * i**expo for i in t], color=colors[exps.index(expo)], label=r'$\alpha=\ $' + str(expo) + ' - ' + label[-exps.index(expo) - 1]) plt.xlabel('t') plt.ylabel(r'$\rho(t)$') plt.legend(loc='lower left', bbox_to_anchor=(1.05, 1), ncol=3) plt.title('a', loc='left') plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.savefig(path + '/perfect_TAMSD.pdf', transparent=True, bbox_inches='tight', dpi=300) # perfekcyjne tamsd - loglog plt.cla() D = 1 t = [0.1 * i for i in range(101)] exps = [0.7, 1, 1.3] for expo in exps: plt.loglog(t, [4 * D * i**expo for i in t], color=colors[exps.index(expo)], label=r'$\alpha=\ $' + str(expo)) plt.xlabel('t') plt.ylabel(r'$\rho(t)$') plt.title('b', loc='left') plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.savefig(path + '/perfect_TAMSD_loglog.pdf', transparent=True, bbox_inches='tight', dpi=300) logg('Generowanie przykładowych TAMSD - stop') print(' --- ZAKOŃCZONO')