Esempio n. 1
0
def test_mixed_column_type():
    train_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/'
                              'glue/sts/train.parquet')
    dev_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/'
                            'glue/sts/dev.parquet')
    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:1000]]
    dev_data = dev_data.iloc[valid_perm[:10]]

    # Add more columns as feature
    train_data = pd.DataFrame({'sentence1': train_data['sentence1'],
                               'sentence2': train_data['sentence2'],
                               'sentence3': train_data['sentence2'],
                               'categorical0': train_data['genre'],
                               'numerical0': train_data['score'],
                               'genre': train_data['genre'],
                               'score': train_data['score']})
    dev_data = pd.DataFrame({'sentence1': dev_data['sentence1'],
                             'sentence2': dev_data['sentence2'],
                             'sentence3': dev_data['sentence2'],
                             'categorical0': dev_data['genre'],
                             'numerical0': dev_data['score'],
                             'genre': dev_data['genre'],
                             'score': dev_data['score']})
    # Train Regression
    predictor = TextPredictor(label='score', verbosity=4)
    predictor.fit(train_data,
                   hyperparameters=get_test_hyperparameters(),
                   time_limit=30,
                   seed=123)

    dev_rmse = predictor.evaluate(dev_data, metrics=['rmse'])
    verify_predictor_save_load(predictor, dev_data)

    # Train Classification
    predictor = TextPredictor(label='genre', verbosity=4)
    predictor.fit(train_data,
                   hyperparameters=get_test_hyperparameters(),
                   time_limit=30,
                   seed=123)

    dev_rmse = predictor.evaluate(dev_data, metrics=['acc'])
    verify_predictor_save_load(predictor, dev_data, verify_proba=True)

    # Specify the feature column
    predictor = TextPredictor(label='score', verbosity=4)
    predictor.fit(train_data[['sentence1', 'sentence3', 'categorical0', 'score']],
                   hyperparameters=get_test_hyperparameters(),
                   time_limit=30,
                   seed=123)
    dev_rmse = predictor.evaluate(dev_data, metrics=['rmse'])
    verify_predictor_save_load(predictor, dev_data)
Esempio n. 2
0
def test_predictor_fit(key):
    train_data = load_pd.load(DATA_INFO[key]['train'])
    dev_data = load_pd.load(DATA_INFO[key]['dev'])
    label = DATA_INFO[key]['label']
    eval_metric = DATA_INFO[key]['metric']
    verify_proba = DATA_INFO[key]['verify_proba']

    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:100]]
    dev_data = dev_data.iloc[valid_perm[:10]]
    predictor = TextPredictor(label=label, eval_metric=eval_metric)
    predictor.fit(train_data,
                  hyperparameters=get_test_hyperparameters(),
                  time_limit=30,
                  seed=123)
    dev_score = predictor.evaluate(dev_data)
    verify_predictor_save_load(predictor, dev_data, verify_proba=verify_proba)

    # Test for continuous fit
    predictor.fit(train_data,
                  hyperparameters=get_test_hyperparameters(),
                  time_limit=30,
                  seed=123)
    verify_predictor_save_load(predictor, dev_data, verify_proba=verify_proba)

    # Saving to folder, loading the saved model and call fit again (continuous fit)
    with tempfile.TemporaryDirectory() as root:
        predictor.save(root)
        predictor = TextPredictor.load(root)
        predictor.fit(train_data,
                      hyperparameters=get_test_hyperparameters(),
                      time_limit=30,
                      seed=123)
Esempio n. 3
0
def test_sst(hyperparameters):
    train_data = load_pd.load(
        'https://autogluon-text-data.s3-accelerate.amazonaws.com/'
        'glue/sst/train.parquet')
    dev_data = load_pd.load(
        'https://autogluon-text-data.s3-accelerate.amazonaws.com/'
        'glue/sst/dev.parquet')
    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:100]]
    dev_data = dev_data.iloc[valid_perm[:10]]
    predictor = TextPredictor(label='label', eval_metric='acc')
    predictor.fit(train_data, hyperparameters=hyperparameters)
    dev_acc = predictor.evaluate(dev_data, metrics=['acc'])
    verify_predictor_save_load(predictor, dev_data, verify_proba=True)
Esempio n. 4
0
def test_predictor_fit(key):
    train_data = load_pd.load(DATA_INFO[key]['train'])
    dev_data = load_pd.load(DATA_INFO[key]['dev'])
    label = DATA_INFO[key]['label']
    eval_metric = DATA_INFO[key]['metric']
    verify_proba = DATA_INFO[key]['verify_proba']

    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:100]]
    dev_data = dev_data.iloc[valid_perm[:10]]
    predictor = TextPredictor(label=label, eval_metric=eval_metric)
    predictor.fit(train_data,
                  hyperparameters=get_test_hyperparameters(),
                  time_limit=30,
                  seed=123)
    dev_score = predictor.evaluate(dev_data)
    verify_predictor_save_load(predictor, dev_data, verify_proba=verify_proba)