def train(args):
    if args.task is not None:
        feature_columns, label_columns, stop_metric, eval_metrics = TASKS[
            args.task]
    else:
        raise NotImplementedError
    if args.exp_dir is None:
        args.exp_dir = 'autogluon_{}'.format(args.task)
    model = task.fit(train_data=args.train_file,
                     label=label_columns,
                     feature_columns=feature_columns,
                     output_directory=args.exp_dir,
                     stopping_metric=stop_metric,
                     ngpus_per_trial=1,
                     eval_metric=eval_metrics)
    dev_metrics_scores = model.evaluate(args.dev_file, metrics=eval_metrics)
    with open(os.path.join(args.exp_dir, 'final_model_dev_score.json'),
              'w') as of:
        json.dump(dev_metrics_scores, of)
    dev_prediction = model.predict(args.dev_file)
    with open(os.path.join(args.exp_dir, 'dev_predictions.txt'), 'w') as of:
        for ele in dev_prediction:
            of.write(str(ele) + '\n')
    model.save(os.path.join(args.exp_dir, 'saved_model'))
    model = task.load(os.path.join(args.exp_dir, 'saved_model'))
    test_prediction = model.predict(args.test_file)
    with open(os.path.join(args.exp_dir, 'test_predictions.txt'), 'w') as of:
        for ele in test_prediction:
            of.write(str(ele) + '\n')
def predict(args):
    model = task.load(args.model_dir)
    test_prediction = model.predict(args.test_file)
    if args.exp_dir is None:
        args.exp_dir = '.'
    with open(os.path.join(args.exp_dir, 'test_predictions.txt'), 'w') as of:
        for ele in test_prediction:
            of.write(str(ele) + '\n')
def verify_predictor_save_load(predictor, df, verify_proba=False,
                               verify_embedding=True):
    with tempfile.TemporaryDirectory() as root:
        predictor.save(root)
        predictions = predictor.predict(df)
        loaded_predictor = task.load(root)
        predictions2 = loaded_predictor.predict(df)
        npt.assert_equal(predictions, predictions2)
        if verify_proba:
            predictions_prob = predictor.predict_proba(df)
            predictions2_prob = loaded_predictor.predict_proba(df)
            npt.assert_equal(predictions_prob, predictions2_prob)
        if verify_embedding:
            embeddings = predictor.extract_embedding(df)
            assert embeddings.shape[0] == len(df)
 def __init__(self):
     self.predictor_rank = task2.load(
         '/content/common-alternusvera/PU/ag_predict')
     self.predictor_sts = task.load(
         '/content/common-alternusvera/PU/saved_dir')
def test_mixed_column_type():
    train_data = load_pd.load(
        'https://autogluon-text.s3-accelerate.amazonaws.com/'
        'glue/sts/train.parquet')
    dev_data = load_pd.load(
        'https://autogluon-text.s3-accelerate.amazonaws.com/'
        'glue/sts/dev.parquet')
    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:100]]
    dev_data = dev_data.iloc[valid_perm[:10]]

    # Add more columns as feature
    train_data = pd.DataFrame({
        'sentence1': train_data['sentence1'],
        'sentence2': train_data['sentence2'],
        'sentence3': train_data['sentence2'],
        'categorical0': train_data['genre'],
        'numerical0': train_data['score'],
        'genre': train_data['genre'],
        'score': train_data['score']
    })
    dev_data = pd.DataFrame({
        'sentence1': dev_data['sentence1'],
        'sentence2': dev_data['sentence2'],
        'sentence3': dev_data['sentence2'],
        'categorical0': dev_data['genre'],
        'numerical0': dev_data['score'],
        'genre': dev_data['genre'],
        'score': dev_data['score']
    })
    # Train Regression
    predictor1 = task.fit(train_data,
                          hyperparameters=test_hyperparameters,
                          label='score',
                          num_trials=1,
                          verbosity=4,
                          ngpus_per_trial=1,
                          output_directory='./sts_score',
                          plot_results=False)
    dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse'])
    dev_prediction = predictor1.predict(dev_data)

    # Tran Classification
    predictor2 = task.fit(train_data,
                          hyperparameters=test_hyperparameters,
                          label='genre',
                          num_trials=1,
                          verbosity=4,
                          ngpus_per_trial=1,
                          output_directory='./sts_genre',
                          plot_results=False)
    dev_rmse = predictor2.evaluate(dev_data, metrics=['acc'])
    dev_prediction = predictor2.predict(dev_data)

    # Specify the feature column
    predictor3 = task.fit(
        train_data,
        hyperparameters=test_hyperparameters,
        feature_columns=['sentence1', 'sentence3', 'categorical0'],
        label='score',
        num_trials=1,
        verbosity=4,
        ngpus_per_trial=1,
        output_directory='./sts_score',
        plot_results=False)
    dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse'])
    dev_prediction = predictor1.predict(dev_data)
    model_path = 'saved_model'
    predictor1.save(model_path)
    loaded_predictor = task.load(model_path)
    loaded_predictions = loaded_predictor.predict(dev_data)
    np.testing.assert_array_almost_equal(dev_prediction, loaded_predictions)