def read_trajectories(part, Model, mode='all', N=0):
    print('Ładowanie trajektorii')
    if part >= 1:
        path = f'data/part{part}/model{Model}/generated/'
        trajectories = []
        log(f'GEN - odczyt trajektorii - start [{path+"task1.txt"}]')
        start = datetime.now()
        file_path = path + 'task1.txt'
        with open(file_path) as f:
            n = 0
            trajs = f.readlines()

        if mode == 'start':
            trajs = trajs[:N]
        if mode == 'end':
            trajs = trajs[N:]

        l_t = len(trajs)
        for traj in trajs:
            traj = traj.strip()
            traj = traj.split(';')[1:]
            T = int(len(traj) / 2)
            sx, sy = traj[:T], traj[T:]  # strings
            x, y = [], []  # floats
            for i in range(len(sx)):
                x.append(float(sx[i]))
                y.append(float(sy[i]))
            trajectories.append([x, y])
            n += 1
            if n % 500 == 0:
                print(f'czytanie trajektorii - {n}/{l_t}')
        stop = datetime.now()
        log(f'GEN - odczyt trajektorii - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    return trajectories
Esempio n. 2
0
def linear_regression(features, part, Model):
    if part in [0, 1, 2, 3, 4, 5, 6]:
        train_data, train_labels, test_data, test_label = split_data(
            features, number_to_learn)
    else:
        train_data, train_labels, test_data, test_label = split_data(
            features, floor(10**(part - 6)))
    print('Wyznaczanie modelu wilowymiarowej regresji liniowej...')
    log(f'ML - regresja liniowa - nauczanie - start')
    start = datetime.now()
    model = LinearRegression(normalize=True, n_jobs=-1)
    model = model.fit(train_data, train_labels)
    stop = datetime.now()
    log(f'ML - regresja liniowa - nauczanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    path = f'data/part{part}/model{Model}/ML/linear_regression/model.pk1'
    print('Zapisywanie modelu do pliku {}'.format(path))
    save_model(model, path)
    print(' --- ZAKOŃCZONO')
    print('Testowanie modelu wielowymiarowej regresji liniowej...')
    log(f'ML - regresja liniowa - przewidywanie - start')
    start = datetime.now()
    predicted_labels = model.predict(test_data)
    stop = datetime.now()
    log(f'ML - regresja liniowa - przewidywanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    print('Translacja przewidywań...')
    results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels})
    print(' --- ZAKOŃCZONO')
    print('Translacja wyników do pliku...')
    results.to_csv(
        f'data/part{part}/model{Model}/ML/linear_regression/estimated.csv')
    print(' --- ZAKOŃCZONO')
Esempio n. 3
0
def decision_tree(features, part, Model):
    if part in [0, 1, 2, 3, 4, 5, 6]:
        train_data, train_labels, test_data, test_label = split_data(
            features, number_to_learn)
    else:
        train_data, train_labels, test_data, test_label = split_data(
            features, floor(10**(part - 6)))
    hiperparam_data = train_data[:floor(number_to_learn / 10)]
    hiperparam_labels = train_labels[:floor(number_to_learn / 10)]
    print('Wyznaczanie drzewa decyzyjnego...')
    max_depth = list(range(2, 20, 1))
    min_samples_split = list(range(1, 11))
    min_samples_leaf = list(range(1, 6))
    max_features = ['auto', 'sqrt']
    random_grid = {
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'max_features': max_features
    }
    log(f'ML - drzewo decyzyjne - szukanie superparametrów - start')
    start = datetime.now()
    model = DecisionTreeRegressor()
    model = RandomizedSearchCV(estimator=model,
                               param_distributions=random_grid,
                               n_iter=30,
                               cv=3,
                               verbose=2,
                               random_state=42,
                               n_jobs=3,
                               return_train_score=True,
                               refit=True)
    model = model.fit(hiperparam_data, hiperparam_labels)
    stop = datetime.now()
    log(f'ML - drzewo decyzyjne - szukanie superparametrów - koniec {stop - start}'
        )
    model_params = pd.DataFrame(model.best_params_, index=['decision tree'])
    dirmake(f'data/part{part}/model{Model}/ML/decision_tree')
    model_params.to_csv(
        f'data/part{part}/model{Model}/ML/decision_tree/model_params.csv')
    model = DecisionTreeRegressor(**model.best_params_)
    log(f'ML - drzewo decyzyjne - nauczanie - start')
    start = datetime.now()
    model.fit(train_data, train_labels)
    stop = datetime.now()
    log(f'ML - drzewo decyzyjne - nauczanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    path = f'data/part{part}/model{Model}/ML/decision_tree/model.pk1'
    print('Zapisywanie modelu do pliku {}'.format(path))
    save_model(model, path)
    plt.cla()
    plt.figure(figsize=(10, 6.5))
    plot_tree(model,
              max_depth=3,
              feature_names=list(test_data),
              fontsize=10,
              filled=True)
    path = f'data/part{part}/model{Model}/ML/decision_tree/tree.pdf'
    plt.savefig(path, transparent=True, bbox_inches='tight')
    plt.cla()
    plt.figure(figsize=(15, 15))
    plot_tree(model, feature_names=list(test_data), filled=True)
    path = f'data/part{part}/model{Model}/ML/decision_tree/full_tree.pdf'
    plt.savefig(path, transparent=True, bbox_inches='tight')
    print(' --- ZAKOŃCZONO')
    print('Testowanie modelu drzewa decyzyjnego...')
    log(f'ML - drzewo decyzyjne - przewidywanie - start')
    start = datetime.now()
    predicted_labels = model.predict(test_data)
    stop = datetime.now()
    log(f'ML - drzewo decyzyjne - przewidywanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    print('Translacja przewidywań...')
    results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels})
    print(' --- ZAKOŃCZONO')
    print('Translacja wyników do pliku...')
    results.to_csv(
        f'data/part{part}/model{Model}/ML/decision_tree/estimated.csv')
    print(' --- ZAKOŃCZONO')
Esempio n. 4
0
def gradient_boosting(features, part, Model):
    if part in [0, 1, 2, 3, 4, 5, 6]:
        train_data, train_labels, test_data, test_label = split_data(
            features, number_to_learn)
    else:
        train_data, train_labels, test_data, test_label = split_data(
            features, floor(10**(part - 6)))
    hiperparam_data = train_data[:floor(number_to_learn / 10)]
    hiperparam_labels = train_labels[:floor(number_to_learn / 10)]
    print('Wyznaczanie modelu gradient boosting...')
    # https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74
    learning_rate = [0.001 * i for i in range(1, 21)]
    n_estimators = list(range(100, 1001, 100))
    max_depth = list(range(2, 20, 1))
    min_samples_split = list(range(2, 11))
    min_samples_leaf = list(range(1, 6))
    max_features = ['auto', 'sqrt']
    random_grid = {
        'learning_rate': learning_rate,
        'n_estimators': n_estimators,
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'max_features': max_features
    }
    model = GradientBoostingRegressor()
    log(f'ML - wzmocnienie gradientowe - szukanie superparametrów - start')
    start = datetime.now()
    model = RandomizedSearchCV(estimator=model,
                               param_distributions=random_grid,
                               n_iter=30,
                               cv=3,
                               verbose=2,
                               random_state=42,
                               n_jobs=3,
                               return_train_score=True,
                               refit=True)
    model.fit(hiperparam_data, hiperparam_labels)
    stop = datetime.now()
    log(f'ML - wzmocenienie gradientowe - szukanie superparametrów - koniec {stop - start}'
        )
    model_params = pd.DataFrame(model.best_params_,
                                index=['gradient boosting'])
    dirmake(f'data/part{part}/model{Model}/ML/gradient_boosting')
    model_params.to_csv(
        f'data/part{part}/model{Model}/ML/gradient_boosting/model_params.csv')
    model = GradientBoostingRegressor(**model.best_params_)
    # params = pd.read_csv(f'data/part{part}/model{Model}/ML/gradient_boosting/model_params.csv', index_col='Unnamed: 0')
    # params = params.to_dict(orient = 'list')
    # for key,value in params.items():
    #     params[key] = value[0]
    # model = GradientBoostingRegressor(**params)
    log(f'ML - wzmocnienie gradientowe - nauczanie - start')
    start = datetime.now()
    model.fit(train_data, train_labels)
    stop = datetime.now()
    log(f'ML - wzmocenienie gradientowe - nauczanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    path = f'data/part{part}/model{Model}/ML/gradient_boosting/model.pk1'
    print('Zapisywanie modelu do pliku {} oraz jego parametrów'.format(path))
    save_model(model, path)
    print(' --- ZAKOŃCZONO')
    print('Testowanie modelu gradient boosting...')
    log(f'ML - wzmocnienie gradientowe - przewidywanie - start')
    start = datetime.now()
    predicted_labels = model.predict(test_data)
    stop = datetime.now()
    log(f'ML - wzmocenienie gradientowe - przewidywanie - koniec {stop - start}'
        )
    print(' --- ZAKOŃCZONO')
    print('Translacja przewidywań...')
    results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels})
    print(' --- ZAKOŃCZONO')
    print('Zapisywanie wyników do pliku...')
    results.to_csv(
        f'data/part{part}/model{Model}/ML/gradient_boosting/estimated.csv')
    print(' --- ZAKOŃCZONO')
Esempio n. 5
0
def random_forest(features, part, Model):
    if part in [0, 1, 2, 3, 4, 5, 6]:
        train_data, train_labels, test_data, test_label = split_data(
            features, number_to_learn)
    else:
        train_data, train_labels, test_data, test_label = split_data(
            features, floor(10**(part - 6)))
    hiperparam_data = train_data[:floor(number_to_learn / 10)]
    hiperparam_labels = train_labels[:floor(number_to_learn / 10)]
    print('Wyznaczanie modelu random forest...')
    # https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74
    n_estimators = list(range(100, 1001, 100))
    max_depth = list(range(2, 20, 1))
    min_samples_split = list(range(2, 11))
    min_samples_leaf = list(range(1, 6))
    max_features = ['auto', 'sqrt']
    max_samples = [0.1 * i for i in range(1, 10)]
    random_grid = {
        'n_estimators': n_estimators,
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'max_features': max_features,
        'max_samples': max_samples
    }
    model = RandomForestRegressor(bootstrap=True, max_samples=0.5)
    log(f'ML - las losowy - szukanie superparametrów - start')
    start = datetime.now()
    model = RandomizedSearchCV(estimator=model,
                               param_distributions=random_grid,
                               n_iter=30,
                               cv=3,
                               verbose=2,
                               random_state=42,
                               n_jobs=3,
                               return_train_score=True,
                               refit=True)
    model.fit(hiperparam_data, hiperparam_labels)
    stop = datetime.now()
    log(f'ML - las losowy - szukanie superparametrów - koniec {stop - start}')
    model_params = pd.DataFrame(model.best_params_, index=['random forest'])
    dirmake(f'data/part{part}/model{Model}/ML/random_forest')
    model_params.to_csv(
        f'data/part{part}/model{Model}/ML/random_forest/model_params.csv')
    log(f'ML - las losowy - nauczanie - start')
    start = datetime.now()
    model = RandomForestRegressor(**model.best_params_)
    model.fit(train_data, train_labels)
    stop = datetime.now()
    log(f'ML - las losowy - nauczanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    path = f'data/part{part}/model{Model}/ML/random_forest/model.pk1'
    print('Zapisywanie modelu do pliku {} oraz jego parametrów'.format(path))
    save_model(model, path)
    print(' --- ZAKOŃCZONO')
    print('Testowanie modelu random forest...')
    log(f'ML - las losowy - przewidywanie - start')
    start = datetime.now()
    predicted_labels = model.predict(test_data)
    stop = datetime.now()
    log(f'ML - las losowy - przewidywanie - koniec {stop - start}')
    print(' --- ZAKOŃCZONO')
    print('Translacja przewidywań...')
    results = pd.DataFrame({'expo': test_label, 'expo_est': predicted_labels})
    print(' --- ZAKOŃCZONO')
    print('Zapisywanie wyników do pliku...')
    results.to_csv(
        f'data/part{part}/model{Model}/ML/random_forest/estimated.csv')
    print(' --- ZAKOŃCZONO')
def generate_trajectories(N, part, Model):
    print('Generowanie trajektorii')
    if part == 1:
        path = f'data/part1/model{Model}/generated/'
        dirmake(path)
        start = datetime.now()
        log(f'GEN - generowanie trajektorii - start [{path}]')
        if Model == 'A':
            AD.andi_dataset(N=N,
                            tasks=1,
                            dimensions=2,
                            save_dataset=True,
                            min_T=99,
                            max_T=100,
                            path_datasets=path)
        if Model == 'B':
            AD.andi_dataset(N=N,
                            tasks=1,
                            dimensions=2,
                            save_dataset=True,
                            min_T=20,
                            max_T=21,
                            path_datasets=path)
        if Model == 'C':
            AD.andi_dataset(N=N,
                            tasks=1,
                            dimensions=2,
                            save_dataset=True,
                            min_T=20,
                            max_T=100,
                            path_datasets=path)
        stop = datetime.now()
        log(f'GEN - generowanie trajektorii - koniec [{stop - start}]')

    if part == 2:
        path = f'data/part2/model{Model}/generated/'
        dirmake(path)

        models = [2]  # FBM
        if Model == 'A':
            T = 100
        if Model == 'B':
            T = 20

        log(f'GEN - generowanie trajektorii - start [{path}]')
        start = datetime.now()
        if Model in ['A', 'B']:
            andi_dataset_2(N=N,
                           tasks=[1],
                           dimensions=[2],
                           save_dataset=True,
                           min_T=T - 1,
                           max_T=T,
                           path_datasets=path,
                           models=models)
        else:
            andi_dataset_2(N=N,
                           tasks=[1],
                           dimensions=[2],
                           save_dataset=True,
                           min_T=20,
                           max_T=100,
                           path_datasets=path,
                           models=models)
        stop = datetime.now()
        log(f'GEN - generowanie trajektorii - koniec [{stop - start}]')

    if part in [3, 4, 5, 6]:
        path = f'data/part{part}/model{Model}/generated/'
        dirmake(path)
        log(f'GEN - generowanie trajektorii - start [{path}]')
        start = datetime.now()
        if Model == 'A':
            T = 100
        if Model == 'B':
            T = 20
        noise = [0, 0.1, 0.3, 1][part - 3]
        andi_dataset_3(N=N,
                       tasks=[1],
                       dimensions=[2],
                       save_dataset=True,
                       min_T=T - 1,
                       max_T=T,
                       path_datasets=path,
                       noise=[noise])
        stop = datetime.now()
        log(f'GEN - generowanie trajektorii - koniec [{stop - start}]')

    if part in [7, 8, 9, 10]:
        path = f'data/part{part}/model{Model}/generated/'
        dirmake(path)
        start = datetime.now()
        log(f'GEN - generowanie trajektorii - start [{path}]')
        N = floor(1.1 * 10**(part - 6))
        if Model == 'A':
            AD.andi_dataset(N=N,
                            tasks=1,
                            dimensions=2,
                            save_dataset=True,
                            min_T=99,
                            max_T=100,
                            path_datasets=path)
        if Model == 'B':
            AD.andi_dataset(N=N,
                            tasks=1,
                            dimensions=2,
                            save_dataset=True,
                            min_T=20,
                            max_T=21,
                            path_datasets=path)
        if Model == 'C':
            AD.andi_dataset(N=N,
                            tasks=1,
                            dimensions=2,
                            save_dataset=True,
                            min_T=20,
                            max_T=100,
                            path_datasets=path)
        stop = datetime.now()
        log(f'GEN - generowanie trajektorii - koniec [{stop - start}]')
    print(' --- ZAKOŃCZONO')