Ejemplo n.º 1
0
def test_text_classifier_param_selector(results_base_path, tasks_base_path):
    corpus = NLPTaskDataFetcher.load_corpus(u'imdb', base_path=tasks_base_path)
    glove_embedding = WordEmbeddings(u'en-glove')
    search_space = SearchSpace()
    search_space.add(Parameter.EMBEDDINGS,
                     hp.choice,
                     options=[[glove_embedding]])
    search_space.add(Parameter.HIDDEN_SIZE,
                     hp.choice,
                     options=[64, 128, 256, 512])
    search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1, 2])
    search_space.add(Parameter.REPROJECT_WORDS,
                     hp.choice,
                     options=[True, False])
    search_space.add(Parameter.REPROJECT_WORD_DIMENSION,
                     hp.choice,
                     options=[64, 128])
    search_space.add(Parameter.BIDIRECTIONAL, hp.choice, options=[True, False])
    search_space.add(Parameter.DROPOUT, hp.uniform, low=0.25, high=0.75)
    search_space.add(Parameter.WORD_DROPOUT, hp.uniform, low=0.25, high=0.75)
    search_space.add(Parameter.LOCKED_DROPOUT, hp.uniform, low=0.25, high=0.75)
    search_space.add(Parameter.LEARNING_RATE, hp.uniform, low=0, high=1)
    search_space.add(Parameter.MINI_BATCH_SIZE,
                     hp.choice,
                     options=[4, 8, 16, 32])
    search_space.add(Parameter.ANNEAL_FACTOR, hp.uniform, low=0, high=0.75)
    search_space.add(Parameter.PATIENCE, hp.choice, options=[3, 5])
    param_selector = TextClassifierParamSelector(
        corpus,
        False,
        results_base_path,
        document_embedding_type=u'lstm',
        max_epochs=2)
    param_selector.optimize(search_space, max_evals=2)
    shutil.rmtree(results_base_path)
Ejemplo n.º 2
0
def test_text_classifier_param_selector(results_base_path, tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(tasks_base_path / "imdb")

    search_space = SearchSpace()

    # document embeddings parameter
    search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[[glove_embedding]])
    search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[64, 128, 256, 512])
    search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1, 2])
    search_space.add(Parameter.REPROJECT_WORDS, hp.choice, options=[True, False])
    search_space.add(Parameter.REPROJECT_WORD_DIMENSION, hp.choice, options=[64, 128])
    search_space.add(Parameter.BIDIRECTIONAL, hp.choice, options=[True, False])
    search_space.add(Parameter.DROPOUT, hp.uniform, low=0.25, high=0.75)
    search_space.add(Parameter.WORD_DROPOUT, hp.uniform, low=0.25, high=0.75)
    search_space.add(Parameter.LOCKED_DROPOUT, hp.uniform, low=0.25, high=0.75)

    # training parameter
    search_space.add(Parameter.LEARNING_RATE, hp.uniform, low=0, high=1)
    search_space.add(Parameter.MINI_BATCH_SIZE, hp.choice, options=[4, 8, 16, 32])
    search_space.add(Parameter.ANNEAL_FACTOR, hp.uniform, low=0, high=0.75)
    search_space.add(Parameter.PATIENCE, hp.choice, options=[3, 5])

    param_selector = TextClassifierParamSelector(
        corpus, False, results_base_path, document_embedding_type="lstm", max_epochs=2
    )
    param_selector.optimize(search_space, max_evals=2)

    # clean up results directory
    shutil.rmtree(results_base_path)
    del param_selector, search_space
Ejemplo n.º 3
0
def test_text_classifier_param_selector(results_base_path, tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(tasks_base_path / "imdb")
    label_type = "sentiment"

    search_space = SearchSpace()

    # document embeddings parameter
    search_space.add(Parameter.TRANSFORMER_MODEL,
                     hp.choice,
                     options=["albert-base-v1"])
    search_space.add(Parameter.LAYERS, hp.choice, options=["-1", "-2"])

    # training parameter
    search_space.add(Parameter.LEARNING_RATE, hp.uniform, low=0, high=1)
    search_space.add(Parameter.MINI_BATCH_SIZE,
                     hp.choice,
                     options=[4, 8, 16, 32])
    search_space.add(Parameter.ANNEAL_FACTOR, hp.uniform, low=0, high=0.75)
    search_space.add(Parameter.PATIENCE, hp.choice, options=[3, 5])

    param_selector = TextClassifierParamSelector(corpus,
                                                 label_type,
                                                 False,
                                                 results_base_path,
                                                 max_epochs=2)
    param_selector.optimize(search_space, max_evals=2)

    # clean up results directory
    shutil.rmtree(results_base_path)
    del param_selector, search_space