Esempi in Python per TabularPrediction.Dataset

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: autogluon.tabular

Classe/tipologia: TabularPrediction

Metodo/funzione: Dataset

Esempi su hotexamples.com: 6

TabularPrediction.Dataset in Python: 6 esempi trovati. Questi sono i migliori esempi reali in Python per autogluon.tabular.TabularPrediction.Dataset, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

fit(14)

Dataset(6)

load(4)

Metodi utilizzati di frequente

fit (14)

Dataset (6)

load (4)

Esempio n. 1

Mostra file

def load_data(directory_prefix, train_file, test_file, name, url=None):
    if not os.path.exists(directory_prefix):
        os.mkdir(directory_prefix)
    directory = directory_prefix + name + "/"
    train_file_path = directory + train_file
    test_file_path = directory + test_file
    if (not os.path.exists(train_file_path)) or (not os.path.exists(test_file_path)):
        # fetch files from s3:
        print("%s data not found locally, so fetching from %s" % (name, url))
        zip_name = ag.download(url, directory_prefix)
        ag.unzip(zip_name, directory_prefix)
        os.remove(zip_name)

    train_data = task.Dataset(file_path=train_file_path)
    test_data = task.Dataset(file_path=test_file_path)
    return train_data, test_data

Esempio n. 2

Mostra file

""" Example script for predicting columns of tables, demonstrating more advanced usage of fit().
    Note that all settings demonstrated here are just chosen for demonstration purposes (to minimize runtime), and do not represent wise choices to use in practice.
    To maximize predictive accuracy, we recommend you do NOT specify `hyperparameters` or `hyperparameter_tune`, and instead only specify the following fit() arguments: eval_metric=YOUR_METRIC, presets='best_quality'
"""

import autogluon.core as ag
from autogluon.tabular import TabularPrediction as task

# Training time:
train_data = task.Dataset(
    file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv'
)  # can be local CSV file as well, returns Pandas DataFrame
train_data = train_data.head(100)  # subsample for faster demo
print(train_data.head())
label_column = 'class'  # specifies which column do we want to predict
savedir = 'ag_hpo_models/'  # where to save trained models

hyperparams = {
    'NN': {
        'num_epochs': 10,
        'activation': 'relu',
        'dropout_prob': ag.Real(0.0, 0.5)
    },
    'GBM': {
        'num_boost_round': 1000,
        'learning_rate': ag.Real(0.01, 0.1, log=True)
    }
}

predictor = task.fit(
    train_data=train_data,

Esempio n. 3

Mostra file

Most users can get strong performance without specifying custom feature generators due to the generic and powerful default feature generator used by AutoGluon.
An advanced user may wish to create a custom feature generator to:
    1. Experiment with different preprocessing pipelines to improve model quality.
    2. Have full control over what data is being sent to downstream models.
    3. Migrate existing pipelines into AutoGluon for ease of use and deployment.
    4. Contribute new feature generators to AutoGluon.
"""

################
# Loading Data #
################

from autogluon.tabular import TabularPrediction as task

train_data = task.Dataset(
    file_path=
    'https://autogluon.s3.amazonaws.com/datasets/AdultIncomeBinaryClassification/train_data.csv'
)  # can be local CSV file as well, returns Pandas DataFrame
test_data = task.Dataset(
    file_path=
    'https://autogluon.s3.amazonaws.com/datasets/AdultIncomeBinaryClassification/test_data.csv'
)  # another Pandas DataFrame
label_column = 'class'  # specifies which column do we want to predict
sample_train_data = train_data.head(100)  # subsample for faster demo

# Separate features and labels
# Make sure to not include your label/target column when sending input to the feature generators, or else the label will be transformed as well.
X = sample_train_data.drop(columns=[label_column])
y = sample_train_data[label_column]

X_test = test_data.drop(columns=[label_column])
y_test = test_data[label_column]

Esempio n. 4

Mostra file

train_file = 'train_data.csv'
test_file = 'test_data.csv'
train_file_path = directory + train_file
test_file_path = directory + test_file

if (not os.path.exists(train_file_path)) or (
        not os.path.exists(test_file_path)):  # fetch files from s3:
    print("%s data not found locally, so fetching from %s" %
          (dataset['name'], dataset['url']))
    os.system("wget " + dataset['url'] +
              " -O temp.zip && unzip -o temp.zip && rm temp.zip")

savedir = directory + 'agModels/'

label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
train_data = train_data.head(subsample_size)  # subsample for faster demo
test_data = test_data.head(subsample_size)  # subsample for faster run
print(train_data.head())

# Fit model ensemble:
predictor = task.fit(train_data=train_data,
                     label=label_column,
                     problem_type='multiclass',
                     output_directory=savedir,
                     cache_data=True,
                     auto_stack=True,
                     time_limits=time_limits)

# Distill ensemble-predictor into single model:

Esempio n. 5

Mostra file

from autogluon.tabular import TabularPrediction as task
from autogluon.tabular.task.tabular_prediction.predictor_v2 import TabularPredictorV2

################
# Loading data #
################

train_data = task.Dataset(
    file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv')
test_data = task.Dataset(
    file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv')
label = 'class'
eval_metric = 'roc_auc'
hyperparameters = {'RF': {}}
train_data = train_data.head(1000)  # subsample for faster demo

##################################
# Fitting with the old Predictor #
##################################

predictor1 = task.fit(train_data,
                      label=label,
                      eval_metric=eval_metric,
                      hyperparameters=hyperparameters,
                      num_bagging_folds=2)
predictor1.leaderboard(test_data)

##################################
# Fitting with the new Predictor #
##################################

Esempio n. 6

Mostra file

File: scratch.py Progetto: wwolfyy/jp-codes-autogluon

from datetime import datetime
import pandas as pd

# %% define data

root_folder = "/home/lstm/Google Drive/MATLAB data files/Project__autoML/datasets for autoML/data_weekly_archive/"
data_folder = "20200213/"
data_file = "GCP_trainvalid_KOSPIb1f0bNsCFCCOFOC20200213.csv"
data_ref = 'KOSPIb1f0bNsCFCCOFOC20200213'
target_col = "target"
most_recent_folder = "20112032/"
most_recent_file = "GCP_trainvalid_KOSPIb1f0bNsCFCCOFOC2020112032.csv"

cols_2_drop_4_training = ["timestamp", "split_tag", "weight_vector"]

data_trainvalid = task.Dataset(file_path=root_folder + data_folder + data_file)
data_trainvalid["DoW"] = data_trainvalid["DoW"].astype('category')

train_data = data_trainvalid.loc[data_trainvalid.split_tag == 'TRAIN', :]
print(train_data.head())
print(train_data.tail())

valid_data = data_trainvalid.loc[
    data_trainvalid.split_tag ==
    'VALIDATE', :]  # do not provide if bagging/stacking
print(valid_data.head())
print(valid_data.tail())
latest_valid_date = valid_data["timestamp"][valid_data["timestamp"] ==
                                            valid_data["timestamp"].max()]

## REDO TEST DATA (to be pre-processed in matlab first)