def example_trajs():
    print('Generowanie i zapisywanie przykładowych trajektorii...')
    path = 'data/part0/example_traj'
    dirmake(path)
    logg('Generowanie przykładowych trajektorii - start')
    AD = andi.andi_datasets()
    for model in range(5):
        try:
            dataset = AD.create_dataset(100, 1, [0.7], [model], 2)
        except:
            dataset = AD.create_dataset(100, 1, [1.7], [model], 2)

        x = dataset[0][2:102]
        y = dataset[0][102:]
        plt.figure(figsize=(2, 2))
        plt.cla()
        plt.gca().spines['top'].set_visible(False)
        plt.gca().spines['right'].set_visible(False)
        plt.xlabel('x')
        plt.ylabel('y')
        plt.title(AD.avail_models_name[model], loc='left')
        plt.plot(x, y, color=colors[model], linewidth=2, alpha=0.5)
        plt.scatter(x,
                    y,
                    c=range(len(x)),
                    cmap=color_maps[model],
                    marker='.',
                    s=100)
        plt.savefig(path + '/' + str(AD.avail_models_name[model]) + '.pdf',
                    transparent=True,
                    bbox_inches='tight',
                    dpi=300)
    logg('Generowanie przykładowych trajektorii - stop')
    print(' --- ZAKOŃCZONO')
Exemple #2
0
def TAMSD_estimation(trajectories, exps, part, Model):
    global liczydlo
    if part == 0:
        D, real_exp, est_exp, tamsds = TAMSD_estimation_traj(
            0, 1, [exps, trajectories])
        return D, real_exp, est_exp, tamsds
    if part >= 1:
        print('Obliczanie estymacji TAMSDS...')
        if part in [1, 2, 3, 4, 5, 6]:
            trajectories = trajectories[:-number_to_learn]
        else:
            trajectories = trajectories[:-floor(10**(part - 6))]
        liczydlo = 0
        traj_num = len(trajectories)
        traj_info = pd.DataFrame(columns=['D', 'expo', 'expo_est', 'tamsds'],
                                 index=range(traj_num))
        # 2 argumenty iterwane do poola
        give = []
        logg('TAMSD - estymacja - start')
        start = datetime.now()
        for i in range(traj_num):
            give.append([exps[i], trajectories[i]])
        with mp.Pool(3) as pool:
            temp = partial(TAMSD_estimation_traj, part, traj_num)
            result = pool.map(temp, give)
            pool.close()
            pool.join()
        print(' --- ZAKOŃCZONO')
        print('Translacja wyników TAMSD...')
        liczydlo = 0
        for i in result:
            traj_info.loc[liczydlo] = i
            liczydlo += 1
            if liczydlo % 500 == 0:
                print(f'TAMSD - translacja - {liczydlo}/{traj_num}')
        stop = datetime.now()
        logg(f'TAMSD - estymacja - koniec {stop - start}')
        print(' --- ZAKOŃCZONO')
        path = f'data/part{part}/model{Model}/TAMSD/'
        dirmake(path)
        fname = path + str('estimated.csv')
        print(f'Zapisywanie wyników do pliku {fname}')
        traj_info.to_csv(fname)
        print(' --- ZAKOŃCZONO')
Exemple #3
0
def get_features(trajectories, exps, part, Model):
    if part >= 1:
        print('Wyciąganie parametrów z trajektorji...')
        global l_t
        ### odczyt danych z trajektorii
        l_t = 0
        traj_num = len(trajectories)
        # 2 argumenty iterwane do poola
        give = []
        logg('ML - wyciąganie danych - start')
        start = datetime.now()
        for i in range(traj_num):
            give.append([exps[i], trajectories[i]])
        with mp.Pool(3) as pool:
            temp = partial(get_info, 5, traj_num)
            result = pool.map(temp, give)
            pool.close()
            pool.join()
        ### zapis do pandas
        traj_info = pd.DataFrame(columns=[
            'alpha', 'diffusivity', 'efficiency', 'slowdown', 'MSD_ratio1',
            'MSD_ratio5', 'antigaussinity1', 'antigaussinity5', 'straigthness',
            'autocorrelation1', 'autocorrelation5', 'max_distance',
            'trappedness', 'fractal_dim'
        ],
                                 index=range(traj_num))
        l_t = 0
        for traj in result:
            traj_info.loc[l_t] = traj
            l_t += 1
            if l_t % 500 == 0:
                print(f'translacja - {l_t}/{traj_num}')
        stop = datetime.now()
        logg(f'ML - wyciąganie danych - koniec {stop - start}')
        # zapis do pliku
        path = f'data/part{part}/model{Model}/ML'
        dirmake(path)
        fname = f'data/part{part}/model{Model}/ML/features.csv'
        print(f'Zapisywanie danych do pliku {fname}')
        traj_info.to_csv(fname)
        print(' --- ZAKOŃCZONO')
        return traj_info
    Q_generate_plot = int(input("Ile trajektorii zapisać w postaci graficznej: "))
    Q_TAMSD = input("Czy chcesz przeliczyć TAMSD? (Y/n): ")
    Q_TAMSD_plot = int(input("Ile obliczonych TAMSD chcesz zapisać w postaci graficznej: "))
    Q_ML_features = input("Czy chcesz wyciągnąć paramtry trajektorii do ML? (Y/n): ")
    Q_ML_linreg = input("Czy chcesz użyć wielowymiarowej regresji liniowej? (Y/n): ")
    Q_ML_dectree = input("Czy chcesz użyć decision tree? (Y/n): ")
    Q_ML_randomforest = input("Czy chcesz użyć random forest? (Y/n): ")
    Q_ML_gradientboosting = input("Czy chcesz użyć gradient boosting? (Y/n): ")

    logg(f'Wybrane decyzje: {Q_generate}, {Q_generate_plot}, {Q_TAMSD}, {Q_TAMSD_plot}'+
        f', {Q_ML_features}, {Q_ML_linreg}, {Q_ML_dectree}, {Q_ML_randomforest}, {Q_ML_gradientboosting}.')
    print(64 * '-')
    
    N = N_long
    path = f'data/part{part}/model{Model}/'
    dirmake(path)
    traject_loaded = False
    expo_loaded = False
    features_loaded = False
    
    if Q_generate == 'Y': 
        # generowanie trajektorii
        generate_trajectories(N, part, Model)
        print(64 * '-')
        
    if Q_generate_plot > 0:
        # rysowanie trajektorii
        if not traject_loaded:
            trajectories = read_trajectories(part, Model)
            traject_loaded = True
        print(f'Tworzenie {Q_generate_plot} wykresów...')
def decision_tree(features, part, Model):
    if part in [0, 1, 2, 3, 4, 5, 6]:
        train_data, train_labels, test_data, test_label = split_data(
            features, number_to_learn)
    else:
        train_data, train_labels, test_data, test_label = split_data(
            features, floor(10**(part - 6)))
    hiperparam_data = train_data[:floor(number_to_learn / 10)]
    hiperparam_labels = train_labels[:floor(number_to_learn / 10)]
    print('Wyznaczanie drzewa decyzyjnego...')
    max_depth = list(range(2, 20, 1))
    min_samples_split = list(range(1, 11))
    min_samples_leaf = list(range(1, 6))
    max_features = ['auto', 'sqrt']
    random_grid = {
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'max_features': max_features
    }
    log(f'ML - drzewo decyzyjne - szukanie superparametrów - start')
    start = datetime.now()
    model = DecisionTreeRegressor()
    model = RandomizedSearchCV(estimator=model,
                               param_distributions=random_grid,
                               n_iter=30,
                               cv=3,
                               verbose=2,
                               random_state=42,
                               n_jobs=3,
                               return_train_score=True,
                               refit=True)
    model = model.fit(hiperparam_data, hiperparam_labels)
    stop = datetime.now()
    log(f'ML - drzewo decyzyjne - szukanie superparametrów - koniec {stop - start}'
        )
    model_params = pd.DataFrame(model.best_params_, index=['decision tree'])
    dirmake(f'data/part{part}/model{Model}/ML/decision_tree')
    model_params.to_csv(
        f'data/part{part}/model{Model}/ML/decision_tree/model_params.csv')
    model = DecisionTreeRegressor(**model.best_params_)
    log(f'ML - drzewo decyzyjne - nauczanie - start')
    start = datetime.now()
    model.fit(train_data, train_labels)
    stop = datetime.now()
    log(f'ML - drzewo decyzyjne - nauczanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    path = f'data/part{part}/model{Model}/ML/decision_tree/model.pk1'
    print('Zapisywanie modelu do pliku {}'.format(path))
    save_model(model, path)
    plt.cla()
    plt.figure(figsize=(10, 6.5))
    plot_tree(model,
              max_depth=3,
              feature_names=list(test_data),
              fontsize=10,
              filled=True)
    path = f'data/part{part}/model{Model}/ML/decision_tree/tree.pdf'
    plt.savefig(path, transparent=True, bbox_inches='tight')
    plt.cla()
    plt.figure(figsize=(15, 15))
    plot_tree(model, feature_names=list(test_data), filled=True)
    path = f'data/part{part}/model{Model}/ML/decision_tree/full_tree.pdf'
    plt.savefig(path, transparent=True, bbox_inches='tight')
    print(' --- ZAKOŃCZONO')
    print('Testowanie modelu drzewa decyzyjnego...')
    log(f'ML - drzewo decyzyjne - przewidywanie - start')
    start = datetime.now()
    predicted_labels = model.predict(test_data)
    stop = datetime.now()
    log(f'ML - drzewo decyzyjne - przewidywanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    print('Translacja przewidywań...')
    results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels})
    print(' --- ZAKOŃCZONO')
    print('Translacja wyników do pliku...')
    results.to_csv(
        f'data/part{part}/model{Model}/ML/decision_tree/estimated.csv')
    print(' --- ZAKOŃCZONO')
def save_model(model, path):
    dirmake('/'.join(path.split('/')[:-1]))
    with open(path, 'wb') as f:
        dump(model, f)
def gradient_boosting(features, part, Model):
    if part in [0, 1, 2, 3, 4, 5, 6]:
        train_data, train_labels, test_data, test_label = split_data(
            features, number_to_learn)
    else:
        train_data, train_labels, test_data, test_label = split_data(
            features, floor(10**(part - 6)))
    hiperparam_data = train_data[:floor(number_to_learn / 10)]
    hiperparam_labels = train_labels[:floor(number_to_learn / 10)]
    print('Wyznaczanie modelu gradient boosting...')
    # https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74
    learning_rate = [0.001 * i for i in range(1, 21)]
    n_estimators = list(range(100, 1001, 100))
    max_depth = list(range(2, 20, 1))
    min_samples_split = list(range(2, 11))
    min_samples_leaf = list(range(1, 6))
    max_features = ['auto', 'sqrt']
    random_grid = {
        'learning_rate': learning_rate,
        'n_estimators': n_estimators,
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'max_features': max_features
    }
    model = GradientBoostingRegressor()
    log(f'ML - wzmocnienie gradientowe - szukanie superparametrów - start')
    start = datetime.now()
    model = RandomizedSearchCV(estimator=model,
                               param_distributions=random_grid,
                               n_iter=30,
                               cv=3,
                               verbose=2,
                               random_state=42,
                               n_jobs=3,
                               return_train_score=True,
                               refit=True)
    model.fit(hiperparam_data, hiperparam_labels)
    stop = datetime.now()
    log(f'ML - wzmocenienie gradientowe - szukanie superparametrów - koniec {stop - start}'
        )
    model_params = pd.DataFrame(model.best_params_,
                                index=['gradient boosting'])
    dirmake(f'data/part{part}/model{Model}/ML/gradient_boosting')
    model_params.to_csv(
        f'data/part{part}/model{Model}/ML/gradient_boosting/model_params.csv')
    model = GradientBoostingRegressor(**model.best_params_)
    # params = pd.read_csv(f'data/part{part}/model{Model}/ML/gradient_boosting/model_params.csv', index_col='Unnamed: 0')
    # params = params.to_dict(orient = 'list')
    # for key,value in params.items():
    #     params[key] = value[0]
    # model = GradientBoostingRegressor(**params)
    log(f'ML - wzmocnienie gradientowe - nauczanie - start')
    start = datetime.now()
    model.fit(train_data, train_labels)
    stop = datetime.now()
    log(f'ML - wzmocenienie gradientowe - nauczanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    path = f'data/part{part}/model{Model}/ML/gradient_boosting/model.pk1'
    print('Zapisywanie modelu do pliku {} oraz jego parametrów'.format(path))
    save_model(model, path)
    print(' --- ZAKOŃCZONO')
    print('Testowanie modelu gradient boosting...')
    log(f'ML - wzmocnienie gradientowe - przewidywanie - start')
    start = datetime.now()
    predicted_labels = model.predict(test_data)
    stop = datetime.now()
    log(f'ML - wzmocenienie gradientowe - przewidywanie - koniec {stop - start}'
        )
    print(' --- ZAKOŃCZONO')
    print('Translacja przewidywań...')
    results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels})
    print(' --- ZAKOŃCZONO')
    print('Zapisywanie wyników do pliku...')
    results.to_csv(
        f'data/part{part}/model{Model}/ML/gradient_boosting/estimated.csv')
    print(' --- ZAKOŃCZONO')
def random_forest(features, part, Model):
    if part in [0, 1, 2, 3, 4, 5, 6]:
        train_data, train_labels, test_data, test_label = split_data(
            features, number_to_learn)
    else:
        train_data, train_labels, test_data, test_label = split_data(
            features, floor(10**(part - 6)))
    hiperparam_data = train_data[:floor(number_to_learn / 10)]
    hiperparam_labels = train_labels[:floor(number_to_learn / 10)]
    print('Wyznaczanie modelu random forest...')
    # https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74
    n_estimators = list(range(100, 1001, 100))
    max_depth = list(range(2, 20, 1))
    min_samples_split = list(range(2, 11))
    min_samples_leaf = list(range(1, 6))
    max_features = ['auto', 'sqrt']
    max_samples = [0.1 * i for i in range(1, 10)]
    random_grid = {
        'n_estimators': n_estimators,
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'max_features': max_features,
        'max_samples': max_samples
    }
    model = RandomForestRegressor(bootstrap=True, max_samples=0.5)
    log(f'ML - las losowy - szukanie superparametrów - start')
    start = datetime.now()
    model = RandomizedSearchCV(estimator=model,
                               param_distributions=random_grid,
                               n_iter=30,
                               cv=3,
                               verbose=2,
                               random_state=42,
                               n_jobs=3,
                               return_train_score=True,
                               refit=True)
    model.fit(hiperparam_data, hiperparam_labels)
    stop = datetime.now()
    log(f'ML - las losowy - szukanie superparametrów - koniec {stop - start}')
    model_params = pd.DataFrame(model.best_params_, index=['random forest'])
    dirmake(f'data/part{part}/model{Model}/ML/random_forest')
    model_params.to_csv(
        f'data/part{part}/model{Model}/ML/random_forest/model_params.csv')
    log(f'ML - las losowy - nauczanie - start')
    start = datetime.now()
    model = RandomForestRegressor(**model.best_params_)
    model.fit(train_data, train_labels)
    stop = datetime.now()
    log(f'ML - las losowy - nauczanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    path = f'data/part{part}/model{Model}/ML/random_forest/model.pk1'
    print('Zapisywanie modelu do pliku {} oraz jego parametrów'.format(path))
    save_model(model, path)
    print(' --- ZAKOŃCZONO')
    print('Testowanie modelu random forest...')
    log(f'ML - las losowy - przewidywanie - start')
    start = datetime.now()
    predicted_labels = model.predict(test_data)
    stop = datetime.now()
    log(f'ML - las losowy - przewidywanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    print('Translacja przewidywań...')
    results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels})
    print(' --- ZAKOŃCZONO')
    print('Zapisywanie wyników do pliku...')
    results.to_csv(
        f'data/part{part}/model{Model}/ML/random_forest/estimated.csv')
    print(' --- ZAKOŃCZONO')
Exemple #9
0
def plot_noisy3():
    normal_models = {}
    path_results = 'data/part7-10/'
    dirmake(path_results)
    R2s = {}
    MAEs = {}
    MSEs = {}
    for model in models:
        R2 = []
        MAE = []
        MSE = []
        for part in parts:
            path_table = 'data/part7-10/results/part' + str(
                part) + '/table.csv'
            table = pd.read_csv(path_table, index_col='Unnamed: 0')
            R2.append(table.loc[model.replace('_', ' ')]['R^2'])
            MAE.append(table.loc[model.replace('_', ' ')]['MAE'])
            MSE.append(table.loc[model.replace('_', ' ')]['MSE'])
        R2s[model] = R2
        MAEs[model] = MAE
        MSEs[model] = MSE
        for stat in ['R2', 'MAE', 'MSE']:
            statistica = eval(stat)
            plt.clf()
            plt.figure(figsize=(4, 4))
            plt.semilogx([10, 100, 1000, 10000, 100000],
                         statistica,
                         label=stat)
            plt.title(model, loc='left')
            plt.xticks([10, 100, 1000, 10000, 100000])
            plt.yticks(statistica)
            dirmake(path_results + '/' + stat + '/')
            plt.savefig(path_results + stat + '/' + model + '.pdf')
    for stat in ['R2', 'MAE', 'MSE']:
        statistica = eval(stat + 's')
        path_result = path_results + stat + '.pdf'
        plt.close('all')
        plt.figure(figsize=(3, 3))
        n = 0
        for key, value in statistica.items():
            if stat == 'R2':
                plt.semilogx([10, 100, 1000, 10000, 100000],
                             value,
                             label=key.replace('_', ' '),
                             c=colors[n + 1],
                             marker='x')
            else:
                plt.loglog([10, 100, 1000, 10000, 100000],
                           value,
                           label=key.replace('_', ' '),
                           c=colors[n],
                           marker='x')
            n += 1
        plt.xticks([10, 100, 1000, 10000, 100000])
        if stat == 'MSE':
            plt.legend(loc='center left', bbox_to_anchor=(1.05, 0.5))
        plt.title(stat, loc='left')
        plt.savefig(path_result,
                    transparent=True,
                    bbox_inches='tight',
                    dpi=300)
Exemple #10
0
def test_models2(part, part_test, num_of_learning=100000, mode='full'):
    ''' Do testowania modelu z częsi 1 do danych z części 3-6 i do testowania modelu\
        z części 3 na danych z części 1 i do testowania modeli części 7-10 na części 1'''
    Model = 'A'
    tests = 'A'
    if part == 1:
        if part_test in [3, 4, 5, 6]:
            path_results = f'data/part1-6/results/part' + str(part_test) + '/'
            path_estimate = f'data/part1-6/estimations/part' + str(
                part_test) + '/'
    elif part == 3:
        if part_test == 1:
            path_results = f'data/part3-1/results/part' + str(part_test) + '/'
            path_estimate = f'data/part3-1/estimations/part' + str(
                part_test) + '/'
    if part in [1, 7, 8, 9, 10]:
        if part_test == 1:
            path_results = f'data/part7-10/results/part' + str(part) + '/'
            path_estimate = f'data/part7-10/estimations/part' + str(part) + '/'
    dataset = read_ML_features(part_test, tests)
    if not 'no_gen' in mode:
        trajectories = read_trajectories(part_test, tests, 'end',
                                         num_of_learning)  # to test
    else:
        trajectories = 'trajectories'
    _, _, test_data, test_label = split_data(dataset, num_of_learning)

    test_label = list(test_label)

    dirmake(path_results)
    dirmake(path_estimate)

    models = [
        'TAMSD', 'linear_regression', 'decision_tree', 'random_forest',
        'gradient_boosting'
    ]

    ttable = pd.DataFrame()
    err = {}

    plt.figure(figsize=(2, 2))

    for model in models:
        # # # estymowanie parametrów
        if model == 'TAMSD':
            estimated_label, test_labels = estimate(trajectories, test_label,
                                                    part, Model, tests,
                                                    'TAMSD', 100000, part_test)
        else:
            estimated_label, test_labels = estimate(test_data, test_label,
                                                    part, Model, tests, model,
                                                    100000, part_test)
        # # # określanie mocy
        table, er = test_model(estimated_label, test_labels,
                               model.replace('_', ' '), path_results, mode)
        if 'full' in mode or 'table' in mode:
            ttable = pd.concat([ttable, table])
        err[model] = er
        print(f'Zrobione {Model} - {tests} - {model}')
        # # # rysowanki

    if 'full' in mode or 'table' in mode:
        print(f'Zapisywanie tabeli do pliku {path_results+"table.csv"}')
        ttable.to_csv(path_results + 'table.csv')

    print(f'Zrobione tabele dla {Model}-{tests}')

    if 'full' in mode or 'plot' in mode:
        # box plot
        plt.clf()
        plt.figure(figsize=(5, 5))
        box = plt.boxplot(labels=err.keys(),
                          x=err.values(),
                          patch_artist=True,
                          notch=True,
                          vert=True)
        colors = ['lightblue', 'orange', 'lightgreen', 'pink', 'violet']
        for plott, color in zip(box['boxes'], colors):
            print(plott)
            plott.set_facecolor(color)
        plt.savefig(path_results + 'boxplots.pdf',
                    transparent=True,
                    bbox_inches='tight',
                    dpi=300)

        # displots
        plt.clf()
        sns.displot(err, common_norm=False, stat="density")
        plt.plot([0, 0], [0, 1.3], color='black')
        plt.savefig(path_results + 'displot1.pdf',
                    transparent=True,
                    bbox_inches='tight',
                    dpi=300)

        plt.clf()
        sns.displot(err,
                    kind="kde",
                    common_norm=False,
                    bw_adjust=0.7,
                    fill=True)
        plt.plot([0, 0], [0, 1.3], color='black')
        plt.savefig(path_results + 'displot2.pdf',
                    transparent=True,
                    bbox_inches='tight',
                    dpi=300)

        plt.clf()
        sns.displot(err, kind="kde", common_norm=False, bw_adjust=2)
        plt.plot([0, 0], [0, 1.3], color='black')
        plt.savefig(path_results + 'displot3.pdf',
                    transparent=True,
                    bbox_inches='tight',
                    dpi=300)

        plt.clf()
        sns.displot(err, kind="ecdf")
        plt.plot([0, 0], [0, 1.3], color='black')
        plt.savefig(path_results + 'displot4.pdf',
                    transparent=True,
                    bbox_inches='tight',
                    dpi=300)

    print(f'Zrobione wykresy dla {Model}-{tests}')
Exemple #11
0
def estimate(test_data,
             test_label,
             part,
             Model,
             Model_test,
             model,
             learning_number=100000,
             part_test=None):
    if part_test == None:
        part_test = part
        path_estimations = f'data/part{part}/estimations/model{Model}/test{Model_test}/{model}.csv'
        dirmake(f'data/part{part}/estimations/model{Model}/test{Model_test}/')
    elif part_test >= 3:
        path_estimations = f'data/part1-6/estimations/part{part_test}/{model}.csv'
        dirmake(f'data/part1-6/estimations/part{part_test}/')
    elif part_test == 1:
        if part == 3:
            path_estimations = f'data/part3-1/estimations/part{part_test}/{model}.csv'
            dirmake(f'data/part3-1/estimations/part{part_test}/')
        elif part in [1, 7, 8, 9, 10]:
            path_estimations = f'data/part7-10/estimations/part{part}/{model}.csv'
            dirmake(f'data/part7-10/estimations/part{part}/')
    if isfile(path_estimations):
        results = pd.read_csv(path_estimations, index_col='Unnamed: 0')
    else:
        if model == 'TAMSD':
            print('Obliczanie estymacji TAMSDS...')
            logg('TAMSD - estymacja - start')
            start = datetime.now()
            traj_num = len(test_label)
            give = []
            for i in range(traj_num):
                give.append([test_label[i], test_data[i]])

            with mp.Pool(3) as pool:
                temp = partial(TAMSD_estimation_traj, part, traj_num)
                result = pool.map(temp, give)
                pool.close()
                pool.join()
            stop = datetime.now()
            logg(f'TAMSD - estymacja - koniec {stop - start}')
            print(' --- ZAKOŃCZONO')

            print('Translacja wyników TAMSD...')
            results = pd.DataFrame(columns=['D', 'expo', 'expo_est', 'tamsds'],
                                   index=range(traj_num))
            liczydlo = 0
            for i in result:
                results.loc[liczydlo] = i
                liczydlo += 1
            print(' --- ZAKOŃCZONO')
            results.to_csv(path_estimations)
        else:
            path_model = f'data/part{part}/model{Model}/ML/{model}/model.pk1'
            print('Ładowanie modelu ...')
            model = load_model(path_model)
            print(' --- ZAKOŃCZONO')
            print(f'Testowanie modelu {model}...')
            logg(f'ML - {model} - przewidywanie - start')
            start = datetime.now()
            predicted_labels = model.predict(test_data)
            stop = datetime.now()
            logg(f'ML - {model} - przewidywanie - koniec {stop - start}')
            print(' --- ZAKOŃCZONO')
            print('Translacja przewidywań...')
            print(len(test_label))
            results = pd.DataFrame({
                'expo': test_label,
                'expo_est': predicted_labels
            })
            print(' --- ZAKOŃCZONO')
            print(f'Zapisywanie wstymacji wyników do pliku - model {model}...')
            results.to_csv(path_estimations)
            print(' --- ZAKOŃCZONO')
            print(64 * '-')
    results = results.dropna()
    return list(results['expo_est']), list(results['expo'])
Exemple #12
0
def test_models(part, Model, num_of_learning=100000, tests=None, mode='full'):
    if tests == None:
        tests = Model
    if part >= 1:
        path_results = f'data/part{part}/results/model{Model}/test{tests}/'
        path_estimate = f'data/part{part}/estimations/model{Model}/test{tests}/'
        dataset = read_ML_features(part, tests)
        if not 'no_gen' in mode:
            trajectories = read_trajectories(part, tests, 'end',
                                             num_of_learning)
        else:
            trajectories = 'trajectories'
        _, _, test_data, test_label = split_data(dataset, num_of_learning)

        test_label = list(test_label)

        dirmake(path_results)
        dirmake(path_estimate)

        models = [
            'TAMSD', 'linear_regression', 'decision_tree', 'random_forest',
            'gradient_boosting'
        ]

        ttable = pd.DataFrame(
            columns=['R^2', 'eps = 0.05', 'eps = 0.025', 'max_error'])
        err = {}

        plt.figure(figsize=(2, 2))

        for model in models:
            # # # estymowanie parametrów
            if model == 'TAMSD':
                estimated_label, test_labels = estimate(
                    trajectories, test_label, part, Model, tests, 'TAMSD')
            else:
                estimated_label, test_labels = estimate(
                    test_data, test_label, part, Model, tests, model)
            # # # określanie mocy
            table, er = test_model(estimated_label, test_labels,
                                   model.replace('_', ' '), path_results, mode)
            if 'full' in mode or 'table' in mode:
                ttable = pd.concat([ttable, table])
            err[model] = er
            print(f'Zrobione {Model} - {tests} - {model}')
            # # # rysowanki

        if 'full' in mode or 'table' in mode:
            print(f'Zapisywanie tabeli do pliku {path_results+"table.csv"}')
            ttable.to_csv(path_results + 'table.csv')

        print(f'Zrobione tabele dla {Model}-{tests}')

        if 'full' in mode or 'plot' in mode:
            # box plot
            plt.clf()
            plt.figure(figsize=(5, 5))
            labels = ['TAMSD', 'LR', 'DT', 'RF', 'GB']
            box = plt.boxplot(labels=labels,
                              x=err.values(),
                              patch_artist=True,
                              notch=True,
                              vert=True)
            colors = ['lightblue', 'orange', 'lightgreen', 'pink', 'violet']
            for plott, color in zip(box['boxes'], colors):
                print(plott)
                plott.set_facecolor(color)
            plt.savefig(path_results + 'boxplots.pdf',
                        transparent=True,
                        bbox_inches='tight',
                        dpi=300)

            # displots
            plt.clf()
            sns.displot(err, common_norm=False, stat="density")
            plt.plot([0, 0], [0, 1.3], color='black')
            plt.savefig(path_results + 'displot1.pdf',
                        transparent=True,
                        bbox_inches='tight',
                        dpi=300)

            plt.clf()
            sns.displot(err,
                        kind="kde",
                        common_norm=False,
                        bw_adjust=0.7,
                        fill=True)
            plt.plot([0, 0], [0, 1.3], color='black')
            plt.savefig(path_results + 'displot2.pdf',
                        transparent=True,
                        bbox_inches='tight',
                        dpi=300)

            plt.clf()
            sns.displot(err, kind="kde", common_norm=False, bw_adjust=2)
            plt.plot([0, 0], [0, 1.3], color='black')
            plt.savefig(path_results + 'displot3.pdf',
                        transparent=True,
                        bbox_inches='tight',
                        dpi=300)

            plt.clf()
            sns.displot(err, kind="ecdf")
            plt.plot([0, 0], [0, 1.3], color='black')
            plt.savefig(path_results + 'displot4.pdf',
                        transparent=True,
                        bbox_inches='tight',
                        dpi=300)

        print(f'Zrobione wykresy dla {Model}-{tests}')
def example_TAMSD():
    print('Generowanie i zapisywanie przykładowych TAMSD...')
    path = 'data/part0/example_TAMSD'
    dirmake(path)
    logg('Generowanie przykładowych TAMSD - start')

    AD = andi.andi_datasets()
    dataset = AD.create_dataset(200, 1, [0.7], [2], 2)
    x = dataset[0][2:202]
    y = dataset[0][202:]
    trajectory = [x, y]
    D, expo, expo_est, tamsds = TAMSD_estimation(trajectory, 0.7, 0, 'A')
    tamsds = tamsds[:100]
    t = range(1, len(tamsds) + 1)
    expo_est = estimate_expo(t, tamsds, D, 100)

    plt.cla()
    plt.figure(figsize=(3, 3))
    plt.plot(t, tamsds, '.', label='punkty TAMSD')
    plt.plot(t, [4 * D * i**expo_est for i in t],
             'b',
             label=r'Wyestymowana krzywa wzorcowa')
    plt.plot(t, [4 * D * i**expo for i in t],
             'r',
             label=r'Prawdziwa krzywa wzorcowa')
    plt.xlabel('t')
    plt.ylabel(r'$\rho(t)$')
    plt.title('c', loc='left')
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.savefig(path + '/TAMSD.pdf',
                transparent=True,
                bbox_inches='tight',
                dpi=300)

    plt.cla()
    plt.loglog(t, tamsds, '.', label='punkty TAMSD')
    plt.loglog(t, [4 * D * i**expo_est for i in t],
               'b',
               label=r'Wyestymowana krzywa TAMSD')
    plt.loglog(t, [4 * D * i**expo for i in t],
               'r',
               label=r'Prawdziwa krzywa wzorcowa')
    plt.xlabel('t')
    plt.ylabel(r'$\rho(t)$')
    plt.legend(loc='lower left', bbox_to_anchor=(1.05, 1))
    plt.title('d', loc='left')
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.savefig(path + '/TAMSD_loglog.pdf',
                transparent=True,
                bbox_inches='tight',
                dpi=300)

    # perfekcyjne tamsd
    plt.cla()
    D = 0.3
    t = [0.1 * i for i in range(101)]
    exps = [0.7, 1, 1.3]
    label = ['superdyfuzja', 'dyfuzja normalna', 'subdyfuzja']
    for expo in exps:
        plt.plot(t, [4 * D * i**expo for i in t],
                 color=colors[exps.index(expo)],
                 label=r'$\alpha=\ $' + str(expo) + ' - ' +
                 label[-exps.index(expo) - 1])
    plt.xlabel('t')
    plt.ylabel(r'$\rho(t)$')
    plt.legend(loc='lower left', bbox_to_anchor=(1.05, 1), ncol=3)
    plt.title('a', loc='left')
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.savefig(path + '/perfect_TAMSD.pdf',
                transparent=True,
                bbox_inches='tight',
                dpi=300)

    # perfekcyjne tamsd - loglog
    plt.cla()
    D = 1
    t = [0.1 * i for i in range(101)]
    exps = [0.7, 1, 1.3]
    for expo in exps:
        plt.loglog(t, [4 * D * i**expo for i in t],
                   color=colors[exps.index(expo)],
                   label=r'$\alpha=\ $' + str(expo))
    plt.xlabel('t')
    plt.ylabel(r'$\rho(t)$')
    plt.title('b', loc='left')
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.savefig(path + '/perfect_TAMSD_loglog.pdf',
                transparent=True,
                bbox_inches='tight',
                dpi=300)

    logg('Generowanie przykładowych TAMSD - stop')
    print(' --- ZAKOŃCZONO')