def train(args): if args.task is not None: feature_columns, label_columns, stop_metric, eval_metrics = TASKS[ args.task] else: raise NotImplementedError if args.exp_dir is None: args.exp_dir = 'autogluon_{}'.format(args.task) model = task.fit(train_data=args.train_file, label=label_columns, feature_columns=feature_columns, output_directory=args.exp_dir, stopping_metric=stop_metric, ngpus_per_trial=1, eval_metric=eval_metrics) dev_metrics_scores = model.evaluate(args.dev_file, metrics=eval_metrics) with open(os.path.join(args.exp_dir, 'final_model_dev_score.json'), 'w') as of: json.dump(dev_metrics_scores, of) dev_prediction = model.predict(args.dev_file) with open(os.path.join(args.exp_dir, 'dev_predictions.txt'), 'w') as of: for ele in dev_prediction: of.write(str(ele) + '\n') model.save(os.path.join(args.exp_dir, 'saved_model')) model = task.load(os.path.join(args.exp_dir, 'saved_model')) test_prediction = model.predict(args.test_file) with open(os.path.join(args.exp_dir, 'test_predictions.txt'), 'w') as of: for ele in test_prediction: of.write(str(ele) + '\n')
def test_mixed_column_type(): train_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sts/train.parquet') dev_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sts/dev.parquet') rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] # Add more columns as feature train_data = pd.DataFrame({'sentence1': train_data['sentence1'], 'sentence2': train_data['sentence2'], 'sentence3': train_data['sentence2'], 'categorical0': train_data['genre'], 'numerical0': train_data['score'], 'genre': train_data['genre'], 'score': train_data['score']}) dev_data = pd.DataFrame({'sentence1': dev_data['sentence1'], 'sentence2': dev_data['sentence2'], 'sentence3': dev_data['sentence2'], 'categorical0': dev_data['genre'], 'numerical0': dev_data['score'], 'genre': dev_data['genre'], 'score': dev_data['score']}) # Train Regression predictor1 = task.fit(train_data, hyperparameters=test_hyperparameters, label='score', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts_score', plot_results=False) dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse']) verify_predictor_save_load(predictor1, dev_data) # Train Classification predictor2 = task.fit(train_data, hyperparameters=test_hyperparameters, label='genre', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts_genre', plot_results=False) dev_rmse = predictor2.evaluate(dev_data, metrics=['acc']) verify_predictor_save_load(predictor2, dev_data, verify_proba=True) # Specify the feature column predictor3 = task.fit(train_data, hyperparameters=test_hyperparameters, feature_columns=['sentence1', 'sentence3', 'categorical0'], label='score', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts_score', plot_results=False) dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse']) verify_predictor_save_load(predictor3, dev_data)
def test_no_text_column_raise(): data = [('😁😁😁😁😁😁', 'grin')] * 20 + [('😃😃😃😃😃😃😃😃', 'smile') ] * 50 + [('😉😉😉', 'wink')] * 30 df = pd.DataFrame(data, columns=['data', 'label']) with pytest.raises(NotImplementedError): predictor = task.fit(df, label='label', verbosity=4)
def predict(args): model = task.load(args.model_dir) test_prediction = model.predict(args.test_file) if args.exp_dir is None: args.exp_dir = '.' with open(os.path.join(args.exp_dir, 'test_predictions.txt'), 'w') as of: for ele in test_prediction: of.write(str(ele) + '\n')
def test_cpu_only_raise(): train_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sst/train.parquet') dev_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sst/dev.parquet') rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] with pytest.raises(RuntimeError): predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='label', num_trials=1, ngpus_per_trial=0, verbosity=4, output_directory='./sst', plot_results=False) os.environ['AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU'] = '1' predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='label', num_trials=1, ngpus_per_trial=0, verbosity=4, output_directory='./sst', plot_results=False) os.environ['AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU'] = '0' with pytest.raises(RuntimeError): predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='label', num_trials=1, ngpus_per_trial=0, verbosity=4, output_directory='./sst', plot_results=False)
def test_no_job_finished_raise(): train_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sst/train.parquet') with pytest.raises(RuntimeError): # Setting a very small time limits to trigger the bug predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='label', num_trials=1, ngpus_per_trial=0, verbosity=4, time_limits=10, output_directory='./sst_raise', plot_results=False)
def test_emoji(): data = [] for i in range(50): data.append(('😁' * (i + 1), 'grin')) for i in range(30): data.append(('😃' * (i + 1), 'smile')) for i in range(20): data.append(('😉' * (i + 1), 'wink')) df = pd.DataFrame(data, columns=['data', 'label']) predictor = task.fit(df, label='label', verbosity=3)
def test_empty_text_item(): train_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/glue/sts/train.parquet') rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) train_data = train_data.iloc[train_perm[:100]] train_data.iat[0, 0] = None train_data.iat[10, 0] = None predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='score', num_trials=1, ngpus_per_trial=0, verbosity=4, output_directory='./sts_empty_text_item', plot_results=False)
def verify_predictor_save_load(predictor, df, verify_proba=False, verify_embedding=True): with tempfile.TemporaryDirectory() as root: predictor.save(root) predictions = predictor.predict(df) loaded_predictor = task.load(root) predictions2 = loaded_predictor.predict(df) npt.assert_equal(predictions, predictions2) if verify_proba: predictions_prob = predictor.predict_proba(df) predictions2_prob = loaded_predictor.predict_proba(df) npt.assert_equal(predictions_prob, predictions2_prob) if verify_embedding: embeddings = predictor.extract_embedding(df) assert embeddings.shape[0] == len(df)
def test_sts(): train_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/glue/sts/train.parquet') dev_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/glue/sts/dev.parquet') rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='score', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts', plot_results=False) dev_rmse = predictor.evaluate(dev_data, metrics=['rmse']) verify_predictor_save_load(predictor, dev_data)
def test_mrpc(): train_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/glue/mrpc/train.parquet' ) dev_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/glue/mrpc/dev.parquet' ) rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='label', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./mrpc', plot_results=False) dev_acc = predictor.evaluate(dev_data, metrics=['acc']) dev_prediction = predictor.predict(dev_data) dev_pred_prob = predictor.predict_proba(dev_data)
def __init__(self): self.predictor_rank = task2.load( '/content/common-alternusvera/PU/ag_predict') self.predictor_sts = task.load( '/content/common-alternusvera/PU/saved_dir')