def train(args):
    if args.task is not None:
        feature_columns, label_columns, stop_metric, eval_metrics = TASKS[
            args.task]
    else:
        raise NotImplementedError
    if args.exp_dir is None:
        args.exp_dir = 'autogluon_{}'.format(args.task)
    model = task.fit(train_data=args.train_file,
                     label=label_columns,
                     feature_columns=feature_columns,
                     output_directory=args.exp_dir,
                     stopping_metric=stop_metric,
                     ngpus_per_trial=1,
                     eval_metric=eval_metrics)
    dev_metrics_scores = model.evaluate(args.dev_file, metrics=eval_metrics)
    with open(os.path.join(args.exp_dir, 'final_model_dev_score.json'),
              'w') as of:
        json.dump(dev_metrics_scores, of)
    dev_prediction = model.predict(args.dev_file)
    with open(os.path.join(args.exp_dir, 'dev_predictions.txt'), 'w') as of:
        for ele in dev_prediction:
            of.write(str(ele) + '\n')
    model.save(os.path.join(args.exp_dir, 'saved_model'))
    model = task.load(os.path.join(args.exp_dir, 'saved_model'))
    test_prediction = model.predict(args.test_file)
    with open(os.path.join(args.exp_dir, 'test_predictions.txt'), 'w') as of:
        for ele in test_prediction:
            of.write(str(ele) + '\n')
예제 #2
0
def test_mixed_column_type():
    train_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/'
                              'glue/sts/train.parquet')
    dev_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/'
                            'glue/sts/dev.parquet')
    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:100]]
    dev_data = dev_data.iloc[valid_perm[:10]]

    # Add more columns as feature
    train_data = pd.DataFrame({'sentence1': train_data['sentence1'],
                               'sentence2': train_data['sentence2'],
                               'sentence3': train_data['sentence2'],
                               'categorical0': train_data['genre'],
                               'numerical0': train_data['score'],
                               'genre': train_data['genre'],
                               'score': train_data['score']})
    dev_data = pd.DataFrame({'sentence1': dev_data['sentence1'],
                             'sentence2': dev_data['sentence2'],
                             'sentence3': dev_data['sentence2'],
                             'categorical0': dev_data['genre'],
                             'numerical0': dev_data['score'],
                             'genre': dev_data['genre'],
                             'score': dev_data['score']})
    # Train Regression
    predictor1 = task.fit(train_data,
                          hyperparameters=test_hyperparameters,
                          label='score', num_trials=1,
                          verbosity=4,
                          ngpus_per_trial=1,
                          output_directory='./sts_score',
                          plot_results=False)
    dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse'])
    verify_predictor_save_load(predictor1, dev_data)

    # Train Classification
    predictor2 = task.fit(train_data,
                          hyperparameters=test_hyperparameters,
                          label='genre', num_trials=1,
                          verbosity=4,
                          ngpus_per_trial=1,
                          output_directory='./sts_genre',
                          plot_results=False)
    dev_rmse = predictor2.evaluate(dev_data, metrics=['acc'])
    verify_predictor_save_load(predictor2, dev_data, verify_proba=True)

    # Specify the feature column
    predictor3 = task.fit(train_data,
                          hyperparameters=test_hyperparameters,
                          feature_columns=['sentence1', 'sentence3', 'categorical0'],
                          label='score', num_trials=1,
                          verbosity=4,
                          ngpus_per_trial=1,
                          output_directory='./sts_score',
                          plot_results=False)
    dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse'])
    verify_predictor_save_load(predictor3, dev_data)
예제 #3
0
def test_no_text_column_raise():
    data = [('😁😁😁😁😁😁', 'grin')] * 20 + [('😃😃😃😃😃😃😃😃', 'smile')
                                        ] * 50 + [('😉😉😉', 'wink')] * 30

    df = pd.DataFrame(data, columns=['data', 'label'])
    with pytest.raises(NotImplementedError):
        predictor = task.fit(df, label='label', verbosity=4)
def predict(args):
    model = task.load(args.model_dir)
    test_prediction = model.predict(args.test_file)
    if args.exp_dir is None:
        args.exp_dir = '.'
    with open(os.path.join(args.exp_dir, 'test_predictions.txt'), 'w') as of:
        for ele in test_prediction:
            of.write(str(ele) + '\n')
예제 #5
0
def test_cpu_only_raise():
    train_data = load_pd.load(
        'https://autogluon-text.s3-accelerate.amazonaws.com/'
        'glue/sst/train.parquet')
    dev_data = load_pd.load(
        'https://autogluon-text.s3-accelerate.amazonaws.com/'
        'glue/sst/dev.parquet')
    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:100]]
    dev_data = dev_data.iloc[valid_perm[:10]]
    with pytest.raises(RuntimeError):
        predictor = task.fit(train_data,
                             hyperparameters=test_hyperparameters,
                             label='label',
                             num_trials=1,
                             ngpus_per_trial=0,
                             verbosity=4,
                             output_directory='./sst',
                             plot_results=False)
    os.environ['AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU'] = '1'
    predictor = task.fit(train_data,
                         hyperparameters=test_hyperparameters,
                         label='label',
                         num_trials=1,
                         ngpus_per_trial=0,
                         verbosity=4,
                         output_directory='./sst',
                         plot_results=False)

    os.environ['AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU'] = '0'
    with pytest.raises(RuntimeError):
        predictor = task.fit(train_data,
                             hyperparameters=test_hyperparameters,
                             label='label',
                             num_trials=1,
                             ngpus_per_trial=0,
                             verbosity=4,
                             output_directory='./sst',
                             plot_results=False)
예제 #6
0
def test_no_job_finished_raise():
    train_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/'
                              'glue/sst/train.parquet')
    with pytest.raises(RuntimeError):
        # Setting a very small time limits to trigger the bug
        predictor = task.fit(train_data, hyperparameters=test_hyperparameters,
                             label='label', num_trials=1,
                             ngpus_per_trial=0,
                             verbosity=4,
                             time_limits=10,
                             output_directory='./sst_raise',
                             plot_results=False)
def test_emoji():
    data = []
    for i in range(50):
        data.append(('😁' * (i + 1), 'grin'))

    for i in range(30):
        data.append(('😃' * (i + 1), 'smile'))

    for i in range(20):
        data.append(('😉' * (i + 1), 'wink'))
    df = pd.DataFrame(data, columns=['data', 'label'])

    predictor = task.fit(df, label='label', verbosity=3)
예제 #8
0
def test_empty_text_item():
    train_data = load_pd.load(
        'https://autogluon-text.s3-accelerate.amazonaws.com/glue/sts/train.parquet')
    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    train_data = train_data.iloc[train_perm[:100]]
    train_data.iat[0, 0] = None
    train_data.iat[10, 0] = None
    predictor = task.fit(train_data, hyperparameters=test_hyperparameters,
                         label='score', num_trials=1,
                         ngpus_per_trial=0,
                         verbosity=4,
                         output_directory='./sts_empty_text_item',
                         plot_results=False)
예제 #9
0
def verify_predictor_save_load(predictor, df, verify_proba=False,
                               verify_embedding=True):
    with tempfile.TemporaryDirectory() as root:
        predictor.save(root)
        predictions = predictor.predict(df)
        loaded_predictor = task.load(root)
        predictions2 = loaded_predictor.predict(df)
        npt.assert_equal(predictions, predictions2)
        if verify_proba:
            predictions_prob = predictor.predict_proba(df)
            predictions2_prob = loaded_predictor.predict_proba(df)
            npt.assert_equal(predictions_prob, predictions2_prob)
        if verify_embedding:
            embeddings = predictor.extract_embedding(df)
            assert embeddings.shape[0] == len(df)
예제 #10
0
def test_sts():
    train_data = load_pd.load(
        'https://autogluon-text.s3-accelerate.amazonaws.com/glue/sts/train.parquet')
    dev_data = load_pd.load(
        'https://autogluon-text.s3-accelerate.amazonaws.com/glue/sts/dev.parquet')
    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:100]]
    dev_data = dev_data.iloc[valid_perm[:10]]
    predictor = task.fit(train_data, hyperparameters=test_hyperparameters,
                         label='score', num_trials=1,
                         verbosity=4,
                         ngpus_per_trial=1,
                         output_directory='./sts',
                         plot_results=False)
    dev_rmse = predictor.evaluate(dev_data, metrics=['rmse'])
    verify_predictor_save_load(predictor, dev_data)
def test_mrpc():
    train_data = load_pd.load(
        'https://autogluon-text.s3-accelerate.amazonaws.com/glue/mrpc/train.parquet'
    )
    dev_data = load_pd.load(
        'https://autogluon-text.s3-accelerate.amazonaws.com/glue/mrpc/dev.parquet'
    )
    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:100]]
    dev_data = dev_data.iloc[valid_perm[:10]]
    predictor = task.fit(train_data,
                         hyperparameters=test_hyperparameters,
                         label='label',
                         num_trials=1,
                         verbosity=4,
                         ngpus_per_trial=1,
                         output_directory='./mrpc',
                         plot_results=False)
    dev_acc = predictor.evaluate(dev_data, metrics=['acc'])
    dev_prediction = predictor.predict(dev_data)
    dev_pred_prob = predictor.predict_proba(dev_data)
 def __init__(self):
     self.predictor_rank = task2.load(
         '/content/common-alternusvera/PU/ag_predict')
     self.predictor_sts = task.load(
         '/content/common-alternusvera/PU/saved_dir')