def train(args): if args.task is not None: feature_columns, label_columns, stop_metric, eval_metrics = TASKS[ args.task] else: raise NotImplementedError if args.exp_dir is None: args.exp_dir = 'autogluon_{}'.format(args.task) model = task.fit(train_data=args.train_file, label=label_columns, feature_columns=feature_columns, output_directory=args.exp_dir, stopping_metric=stop_metric, ngpus_per_trial=1, eval_metric=eval_metrics) dev_metrics_scores = model.evaluate(args.dev_file, metrics=eval_metrics) with open(os.path.join(args.exp_dir, 'final_model_dev_score.json'), 'w') as of: json.dump(dev_metrics_scores, of) dev_prediction = model.predict(args.dev_file) with open(os.path.join(args.exp_dir, 'dev_predictions.txt'), 'w') as of: for ele in dev_prediction: of.write(str(ele) + '\n') model.save(os.path.join(args.exp_dir, 'saved_model')) model = task.load(os.path.join(args.exp_dir, 'saved_model')) test_prediction = model.predict(args.test_file) with open(os.path.join(args.exp_dir, 'test_predictions.txt'), 'w') as of: for ele in test_prediction: of.write(str(ele) + '\n')
def predict(args): model = task.load(args.model_dir) test_prediction = model.predict(args.test_file) if args.exp_dir is None: args.exp_dir = '.' with open(os.path.join(args.exp_dir, 'test_predictions.txt'), 'w') as of: for ele in test_prediction: of.write(str(ele) + '\n')
def verify_predictor_save_load(predictor, df, verify_proba=False, verify_embedding=True): with tempfile.TemporaryDirectory() as root: predictor.save(root) predictions = predictor.predict(df) loaded_predictor = task.load(root) predictions2 = loaded_predictor.predict(df) npt.assert_equal(predictions, predictions2) if verify_proba: predictions_prob = predictor.predict_proba(df) predictions2_prob = loaded_predictor.predict_proba(df) npt.assert_equal(predictions_prob, predictions2_prob) if verify_embedding: embeddings = predictor.extract_embedding(df) assert embeddings.shape[0] == len(df)
def __init__(self): self.predictor_rank = task2.load( '/content/common-alternusvera/PU/ag_predict') self.predictor_sts = task.load( '/content/common-alternusvera/PU/saved_dir')
def test_mixed_column_type(): train_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sts/train.parquet') dev_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sts/dev.parquet') rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] # Add more columns as feature train_data = pd.DataFrame({ 'sentence1': train_data['sentence1'], 'sentence2': train_data['sentence2'], 'sentence3': train_data['sentence2'], 'categorical0': train_data['genre'], 'numerical0': train_data['score'], 'genre': train_data['genre'], 'score': train_data['score'] }) dev_data = pd.DataFrame({ 'sentence1': dev_data['sentence1'], 'sentence2': dev_data['sentence2'], 'sentence3': dev_data['sentence2'], 'categorical0': dev_data['genre'], 'numerical0': dev_data['score'], 'genre': dev_data['genre'], 'score': dev_data['score'] }) # Train Regression predictor1 = task.fit(train_data, hyperparameters=test_hyperparameters, label='score', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts_score', plot_results=False) dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse']) dev_prediction = predictor1.predict(dev_data) # Tran Classification predictor2 = task.fit(train_data, hyperparameters=test_hyperparameters, label='genre', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts_genre', plot_results=False) dev_rmse = predictor2.evaluate(dev_data, metrics=['acc']) dev_prediction = predictor2.predict(dev_data) # Specify the feature column predictor3 = task.fit( train_data, hyperparameters=test_hyperparameters, feature_columns=['sentence1', 'sentence3', 'categorical0'], label='score', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts_score', plot_results=False) dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse']) dev_prediction = predictor1.predict(dev_data) model_path = 'saved_model' predictor1.save(model_path) loaded_predictor = task.load(model_path) loaded_predictions = loaded_predictor.predict(dev_data) np.testing.assert_array_almost_equal(dev_prediction, loaded_predictions)