Exemple #1
0
def test_load_old_checkpoint():
    dataset = AmazonReviewSentimentCrossLingualDataset()
    sha1sum_id = "4ba096cdf6bd76c06386f2c27140db055e59c91b"
    checkpoint_name = "mdeberta-v3-base-checkpoint"
    save_path = os.path.join(get_home_dir(), "checkpoints")
    file_path = os.path.join(save_path, f"{checkpoint_name}.zip")
    checkpoint_path = os.path.join(get_home_dir(), "checkpoints",
                                   checkpoint_name)
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    download(
        url=
        f"s3://automl-mm-bench/unit-tests-0.4/checkpoints/{checkpoint_name}.zip",
        path=file_path,
        sha1_hash=sha1sum_id,
    )
    protected_zip_extraction(
        file_path,
        sha1_hash=sha1sum_id,
        folder=save_path,
    )
    predictor = TextPredictor.load(checkpoint_path)
    verify_predictor_save_load(predictor, dataset.test_df)

    # continuous training
    predictor.fit(
        dataset.train_df,
        presets="multilingual",
        time_limit=10,
        hyperparameters={"optimization.top_k_average_method": "uniform_soup"},
    )
    verify_predictor_save_load(predictor, dataset.test_df)
def test_predictor_fit(key):
    train_data = load_pd.load(DATA_INFO[key]['train'])
    dev_data = load_pd.load(DATA_INFO[key]['dev'])
    label = DATA_INFO[key]['label']
    eval_metric = DATA_INFO[key]['metric']
    verify_proba = DATA_INFO[key]['verify_proba']

    rng_state = np.random.RandomState(123)
    train_perm = rng_state.permutation(len(train_data))
    valid_perm = rng_state.permutation(len(dev_data))
    train_data = train_data.iloc[train_perm[:100]]
    dev_data = dev_data.iloc[valid_perm[:10]]
    predictor = TextPredictor(label=label, eval_metric=eval_metric)
    predictor.fit(train_data,
                  hyperparameters=get_test_hyperparameters(),
                  time_limit=30,
                  seed=123)
    dev_score = predictor.evaluate(dev_data)
    verify_predictor_save_load(predictor, dev_data, verify_proba=verify_proba)

    # Test for continuous fit
    predictor.fit(train_data,
                  hyperparameters=get_test_hyperparameters(),
                  time_limit=30,
                  seed=123)
    verify_predictor_save_load(predictor, dev_data, verify_proba=verify_proba)

    # Saving to folder, loading the saved model and call fit again (continuous fit)
    with tempfile.TemporaryDirectory() as root:
        predictor.save(root)
        predictor = TextPredictor.load(root)
        predictor.fit(train_data,
                      hyperparameters=get_test_hyperparameters(),
                      time_limit=30,
                      seed=123)
Exemple #3
0
def predict(args):
    if args.use_tabular:
        predictor = TabularPredictor.load(args.model_dir)
    else:
        predictor = TextPredictor.load(args.model_dir)
    test_prediction = predictor.predict(args.test_file, as_pandas=True)
    if args.exp_dir is None:
        args.exp_dir = '.'
    test_prediction.to_csv(os.path.join(args.exp_dir, 'test_prediction.csv'))
Exemple #4
0
 def load(cls, path: str, reset_paths=True, verbose=True):
     model = super().load(path=path,
                          reset_paths=reset_paths,
                          verbose=verbose)
     if model._load_model:
         try_import_autogluon_text()
         from autogluon.text import TextPredictor
         model.model = TextPredictor.load(
             os.path.join(path, cls.nn_model_name))
     model._load_model = None
     return model
Exemple #5
0
    def load(cls, path: str, reset_paths=True, verbose=True):
        try:
            from autogluon.text import TextPredictor
        except ImportError:
            raise ImportError(AG_TEXT_IMPORT_ERROR)

        model = super().load(path=path,
                             reset_paths=reset_paths,
                             verbose=verbose)
        model.model = TextPredictor.load(os.path.join(path, cls.nn_model_name))
        return model
Exemple #6
0
def verify_predictor_save_load(predictor,
                               df,
                               verify_proba=False,
                               verify_embedding=True):
    with tempfile.TemporaryDirectory() as root:
        predictor.save(root)
        predictions = predictor.predict(df)
        loaded_predictor = TextPredictor.load(root)
        predictions2 = loaded_predictor.predict(df)
        npt.assert_equal(predictions, predictions2)
        if verify_proba:
            predictions_prob = predictor.predict_proba(df)
            predictions2_prob = loaded_predictor.predict_proba(df)
            npt.assert_equal(predictions_prob, predictions2_prob)
        if verify_embedding:
            embeddings = predictor.predict_features(df)
            assert embeddings.shape[0] == len(df)
Exemple #7
0
def verify_predictor_save_load(predictor, df, verify_proba=False,
                               verify_embedding=True):
    with tempfile.TemporaryDirectory() as root:
        predictor.save(root)
        predictions = predictor.predict(df, as_pandas=False)
        loaded_predictor = TextPredictor.load(root)
        predictions2 = loaded_predictor.predict(df, as_pandas=False)
        predictions2_df = loaded_predictor.predict(df, as_pandas=True)
        npt.assert_equal(predictions, predictions2)
        npt.assert_equal(predictions2,
                         predictions2_df.to_numpy())
        if verify_proba:
            predictions_prob = predictor.predict_proba(df, as_pandas=False)
            predictions2_prob = loaded_predictor.predict_proba(df, as_pandas=False)
            predictions2_prob_df = loaded_predictor.predict_proba(df, as_pandas=True)
            npt.assert_equal(predictions_prob, predictions2_prob)
            npt.assert_equal(predictions2_prob, predictions2_prob_df.to_numpy())
        if verify_embedding:
            embeddings = predictor.extract_embedding(df)
            assert embeddings.shape[0] == len(df)
def main(args):
    tasks = {
        'cola': ['CoLA.tsv', 'glue/cola/test.tsv'],
        'sst': ['SST-2.tsv', 'glue/sst/test.tsv'],
        'mrpc': ['MRPC.tsv', 'glue/mrpc/test.tsv'],
        'sts': ['STS-B.tsv', 'glue/sts/test.tsv'],
        'qqp': ['QQP.tsv', 'glue/qqp/test.tsv'],
        'mnli_m': ['MNLI-m.tsv', 'glue/mnli/test_matched.tsv'],
        'mnli_mm': ['MNLI-mm.tsv', 'glue/mnli/test_mismatched.tsv'],
        'qnli': ['QNLI.tsv', 'glue/qnli/test.tsv'],
        'rte': ['RTE.tsv', 'glue/rte/test.tsv'],
        'wnli': ['WNLI.tsv', 'glue/wnli/test.tsv'],
        'ax': ['AX.tsv', 'glue/rte_diagnostic/diagnostic.tsv']
    }

    os.makedirs(args.save_dir, exist_ok=True)

    for task, (save_name, test_file_path) in tasks.items():
        if task == 'ax':
            # For AX, we need to load the mnli-m checkpoint and run inference
            test_df = pd.read_csv(test_file_path, sep='\t', header=0)
            test_index = test_df['index']
            predictor = TextPredictor.load(f'{args.prefix}_mnli_m')
            label_column = predictor.label
            predictions = predictor.predict(test_df)
        else:
            test_index = get_test_index(test_file_path)
            prediction_df = pd.read_csv(
                f'{args.prefix}_{task}/test_prediction.csv', index_col=0)
            label_column = prediction_df.columns[0]
            predictions = prediction_df[label_column]
        if task == 'sts':
            predictions = np.clip(predictions, 0, 5)
        with open(os.path.join(args.save_dir, save_name), 'w') as of:
            of.write('index\t{}\n'.format(label_column))
            for i in range(len(predictions)):
                of.write('{}\t{}\n'.format(test_index[i], predictions[i]))
Exemple #9
0
def test_standalone_with_emoji():
    import tempfile
    from unittest import mock

    requests_gag = mock.patch(
        'requests.Session.request',
        mock.Mock(side_effect=RuntimeError(
            'Please use the `responses` library to mock HTTP in your tests.'
        ))
    )

    data = []
    for i in range(50 * 3):
        data.append(('😁' * (i + 1), 'grin'))

    for i in range(30 * 3):
        data.append(('😃' * (i + 1), 'smile'))

    for i in range(20 * 3):
        data.append(('😉' * (i + 1), 'wink'))
    df = pd.DataFrame(data, columns=['data', 'label'])
    predictor = TextPredictor(label='label', verbosity=3)
    predictor.fit(
        df,
        hyperparameters=get_test_hyperparameters(),
        time_limit=5,
        seed=123,
    )

    predictions1 = predictor.predict(df, as_pandas=False)
    with tempfile.TemporaryDirectory() as root:
        predictor.save(root, standalone=True)
        with requests_gag:  # no internet connections
            offline_predictor = TextPredictor.load(root)
            predictions2 = offline_predictor.predict(df, as_pandas=False)

    npt.assert_equal(predictions1, predictions2)