def test_mixed_column_type(): train_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sts/train.parquet') dev_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sts/dev.parquet') rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:1000]] dev_data = dev_data.iloc[valid_perm[:10]] # Add more columns as feature train_data = pd.DataFrame({'sentence1': train_data['sentence1'], 'sentence2': train_data['sentence2'], 'sentence3': train_data['sentence2'], 'categorical0': train_data['genre'], 'numerical0': train_data['score'], 'genre': train_data['genre'], 'score': train_data['score']}) dev_data = pd.DataFrame({'sentence1': dev_data['sentence1'], 'sentence2': dev_data['sentence2'], 'sentence3': dev_data['sentence2'], 'categorical0': dev_data['genre'], 'numerical0': dev_data['score'], 'genre': dev_data['genre'], 'score': dev_data['score']}) # Train Regression predictor = TextPredictor(label='score', verbosity=4) predictor.fit(train_data, hyperparameters=get_test_hyperparameters(), time_limit=30, seed=123) dev_rmse = predictor.evaluate(dev_data, metrics=['rmse']) verify_predictor_save_load(predictor, dev_data) # Train Classification predictor = TextPredictor(label='genre', verbosity=4) predictor.fit(train_data, hyperparameters=get_test_hyperparameters(), time_limit=30, seed=123) dev_rmse = predictor.evaluate(dev_data, metrics=['acc']) verify_predictor_save_load(predictor, dev_data, verify_proba=True) # Specify the feature column predictor = TextPredictor(label='score', verbosity=4) predictor.fit(train_data[['sentence1', 'sentence3', 'categorical0', 'score']], hyperparameters=get_test_hyperparameters(), time_limit=30, seed=123) dev_rmse = predictor.evaluate(dev_data, metrics=['rmse']) verify_predictor_save_load(predictor, dev_data)
def test_predictor_fit(key): train_data = load_pd.load(DATA_INFO[key]['train']) dev_data = load_pd.load(DATA_INFO[key]['dev']) label = DATA_INFO[key]['label'] eval_metric = DATA_INFO[key]['metric'] verify_proba = DATA_INFO[key]['verify_proba'] rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] predictor = TextPredictor(label=label, eval_metric=eval_metric) predictor.fit(train_data, hyperparameters=get_test_hyperparameters(), time_limit=30, seed=123) dev_score = predictor.evaluate(dev_data) verify_predictor_save_load(predictor, dev_data, verify_proba=verify_proba) # Test for continuous fit predictor.fit(train_data, hyperparameters=get_test_hyperparameters(), time_limit=30, seed=123) verify_predictor_save_load(predictor, dev_data, verify_proba=verify_proba) # Saving to folder, loading the saved model and call fit again (continuous fit) with tempfile.TemporaryDirectory() as root: predictor.save(root) predictor = TextPredictor.load(root) predictor.fit(train_data, hyperparameters=get_test_hyperparameters(), time_limit=30, seed=123)
def test_sst(hyperparameters): train_data = load_pd.load( 'https://autogluon-text-data.s3-accelerate.amazonaws.com/' 'glue/sst/train.parquet') dev_data = load_pd.load( 'https://autogluon-text-data.s3-accelerate.amazonaws.com/' 'glue/sst/dev.parquet') rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] predictor = TextPredictor(label='label', eval_metric='acc') predictor.fit(train_data, hyperparameters=hyperparameters) dev_acc = predictor.evaluate(dev_data, metrics=['acc']) verify_predictor_save_load(predictor, dev_data, verify_proba=True)
def test_predictor_fit(key): train_data = load_pd.load(DATA_INFO[key]['train']) dev_data = load_pd.load(DATA_INFO[key]['dev']) label = DATA_INFO[key]['label'] eval_metric = DATA_INFO[key]['metric'] verify_proba = DATA_INFO[key]['verify_proba'] rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] predictor = TextPredictor(label=label, eval_metric=eval_metric) predictor.fit(train_data, hyperparameters=get_test_hyperparameters(), time_limit=30, seed=123) dev_score = predictor.evaluate(dev_data) verify_predictor_save_load(predictor, dev_data, verify_proba=verify_proba)