Ejemplo n.º 1
0
def run():
    ## Load example data frame
    dataframe = pd.read_csv("../data/spanish/train.tsv", sep="\t")
    train_sequences = dataframe['tweet'].values.tolist()
    train_targets = dataframe['offensive'].values
    print(train_sequences[0:3])
    print(train_targets[0:3])

    #Possible metrics: ['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_weighted']

    autoBOTLibObj = autoBOTLib.GAlearner(train_sequences,
                                         train_targets,
                                         scoring_metric="accuracy",
                                         representation_type="neurosymbolic",
                                         time_constraint=8).evolve()
    autoBOTLib.store_autobot_model(
        autoBOTLibObj, "../stored_models/example_spanish_model.pickle")

    fitness_summary = autoBOTLibObj.visualize_fitness(
        image_path="./spanish_fitness.png")
    importances_local, importances_global = autoBOTLibObj.feature_type_importances(
    )
    final_learners = autoBOTLibObj.summarise_final_learners()

    ## storing the results for analysis
    importances_local.to_csv("spanish_local.tsv", sep="\t")
    importances_global.to_csv("spanish_global.tsv", sep="\t")
    final_learners.to_csv("final_learners.tsv", sep="\t")
    fitness_summary.to_csv("fitness_summary.tsv", sep="\t")
Ejemplo n.º 2
0
def run():
    ## Load example data frame
    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t").iloc[:300]
    train_sequences = dataframe['text_a']
    train_targets_c1 = dataframe['label'].values.tolist()
    train_targets_c2 = [
        0 if len(x) < 100 else 1 for x in train_sequences.values
    ]
    joint_target_space = [[train_targets_c1[enx], train_targets_c2[enx]]
                          for enx in range(len(train_targets_c1))]

    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,
        joint_target_space,
        representation_type=
        "neurosymbolic-lite",  ## See the documentation for all possible representation types.
        n_fold_cv=3,
        memory_storage="memory2",
        sparsity=0.1,
        learner_preset="test",
        upsample=
        False,  ## Suitable for imbalanced data - randomized upsampling tends to help.
        time_constraint=0.1).evolve(
            strategy="evolution"
        )  ## strategy = "direct-learning" trains a single learner.

    test_sequences = pd.read_csv("../data/insults/test.tsv", sep="\t")["text_a"]
    predictions = autoBOTLibObj.predict(test_sequences)
    prob_predictions = autoBOTLibObj.predict_proba(test_sequences)
    print(predictions)
    print(prob_predictions)

    autoBOTLibObj.generate_report(output_folder="./report/", job_id="MLC")
Ejemplo n.º 3
0
def test_minimal_mlc():
    ## Load example data frame
    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
    train_sequences = dataframe['text_a']
    train_targets_c1 = dataframe['label'].values.tolist()
    train_targets_c2 = [
        0 if len(x) < 100 else 1 for x in train_sequences.values
    ]
    joint_target_space = [[train_targets_c1[enx], train_targets_c2[enx]]
                          for enx in range(len(train_targets_c1))]

    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,
        joint_target_space,
        representation_type=
        "symbolic",  ## See the documentation for all possible representation types.
        n_fold_cv=3,
        sparsity=0.1,
        upsample=
        False,  ## Suitable for imbalanced data - randomized upsampling tends to help.
        time_constraint=0.2).evolve(
            strategy="direct-learning"
        )  ## strategy = "direct-learning" trains a single learner.

    dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
    test_sequences = dataframe2['text_a']
    predictions = autoBOTLibObj.predict(test_sequences)
    prob_predictions = autoBOTLibObj.predict_proba(test_sequences)
    print(predictions)
    print(prob_predictions)

    autoBOTLibObj.generate_report(output_folder="./report/",
                                  job_id="as9y0gb98ss")
Ejemplo n.º 4
0
def test_minimal():
    ## Load example data frame
    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t").iloc[:500]
    train_sequences = dataframe['text_a']
    train_targets = dataframe['label']

    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,
        train_targets,
        representation_type=
        "symbolic",  ## See the documentation for all possible representation types.
        n_fold_cv=3,
        memory_storage="memory2",
        sparsity=0.1,
        upsample=
        False,  ## Suitable for imbalanced data - randomized upsampling tends to help.
        time_constraint=0.1).evolve(
            strategy="evolution"
        )  ## strategy = "direct-learning" trains a single learner.

    dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
    test_sequences = dataframe2['text_a']
    predictions = autoBOTLibObj.predict(test_sequences)
    prob_predictions = autoBOTLibObj.predict_proba(test_sequences)
    print(predictions)
    print(prob_predictions)

    autoBOTLibObj.generate_report(output_folder="./report/",
                                  job_id="as9y0gb98s")
Ejemplo n.º 5
0
def run():
    ## Load example data frame
    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
    train_sequences = dataframe['text_a']
    train_targets = dataframe['label']

    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,
        train_targets,
        learner_preset="mini-l1",
        validation_type=
        "train_test",  ## This parallelizes at the individual (not learner) level -> this results in additional memory overhead as shown in the paper.
        validation_percentage=0.15,
        num_cpu=10,
        representation_type=
        "neurosymbolic-lite",  ## full representation space -- note that this includes sentence-transformers. For a lightweight version, consider neurosymbolic-lite
        time_constraint=0.1).evolve(
            strategy="evolution"
        )  ## strategy = "direct-learning" trains a single learner.

    dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
    test_sequences = dataframe2['text_a']
    predictions = autoBOTLibObj.predict(test_sequences)
    prob_predictions = autoBOTLibObj.predict_proba(test_sequences)
    print(predictions)
    print(prob_predictions)

    importances_local, importances_global = autoBOTLibObj.feature_type_importances(
    )
    print(importances_global)
    print(importances_local)
    importances_local.to_csv("local_insults.tsv", sep="\t")

    topic_df = autoBOTLibObj.get_topic_explanation()
    print(topic_df)
Ejemplo n.º 6
0
def run():
    ## Load example data frame
    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
    train_sequences = dataframe['text_a'].values.tolist()
    train_targets = dataframe['label'].values

    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,  # input sequences
        train_targets,  # target space 
        time_constraint=3,  # time in hoursc
        num_cpu="all",  # number of CPUs to use
        latent_dim=768,  ## latent dim for neural representations
        sparsity=0.1,  ## latent_dim/sparsity dim for sparse representations
        task_name="example test",  # task identifier
        scoring_metric="f1",  # sklearn-compatible scoring metric as the fitness.
        hof_size=3,  # size of the hall of fame
        top_k_importances=25,  # how many top features to output as final ranking
        memory_storage="./memory",  # tripled base for concept features
        representation_type="neurosymbolic")  # or symbolic or neural

    autoBOTLibObj.evolve(
        nind=8,  ## population size
        strategy="evolution",  ## optimization strategy
        crossover_proba=0.6,  ## crossover rate
        mutpb=0.4)  ## mutation rate

    ## Persistence demonstration (how to store models for further use?)
    autoBOTLib.store_autobot_model(
        autoBOTLibObj, "../stored_models/example_insults_model.pickle")
    autoBOTLibObj = autoBOTLib.load_autobot_model(
        "../stored_models/example_insults_model.pickle")

    dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
    test_sequences = dataframe2['text_a'].values.tolist()
    test_targets = dataframe2['label'].values
    predictions = autoBOTLibObj.predict(test_sequences)
    print(predictions)
    performance = autoBOTLib.compute_metrics(
        "first_run_task_name", predictions,
        test_targets)  ## compute F1, acc and F1_acc (as in GLUE)

    ## visualize performance
    print(performance)

    ## Visualize importances (global -> type, local -> individual features)
    importances_local, importances_global = autoBOTLibObj.feature_type_importances(
    )
    print(importances_global)
    print(importances_local)

    final_learners = autoBOTLibObj.summarise_final_learners()
    print(final_learners)

    ## Visualize the fitness trace
    fitness_summary = autoBOTLibObj.visualize_fitness(
        image_path="./fitness_new.png")
    print(fitness_summary)
Ejemplo n.º 7
0
def test_minimal():
    ## Load example data frame
    dataframe = pd.read_csv("./data/insults/train.tsv", sep="\t")
    train_sequences = dataframe['text_a'].values.tolist()
    train_targets = dataframe['label'].values

    autoBOTLibObj = autoBOTLib.GAlearner(train_sequences,
                                         train_targets,
                                         time_constraint=0.1).evolve()

    dataframe2 = pd.read_csv("./data/insults/test.tsv", sep="\t")
    test_sequences = dataframe2['text_a'].values.tolist()
    predictions = autoBOTLibObj.predict(test_sequences)
Ejemplo n.º 8
0
def test_initializations(fold_number, representation_type, sparsity,
                         time_constraint):

    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
    train_sequences = dataframe['text_a']
    train_targets = dataframe['label']
    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,
        train_targets,
        representation_type=
        representation_type,  ## See the documentation for all possible representation types.
        n_fold_cv=fold_number,
        memory_storage="memory2",
        sparsity=sparsity,
        upsample=
        False,  ## Suitable for imbalanced data - randomized upsampling tends to help.
        time_constraint=time_constraint)
Ejemplo n.º 9
0
def test_custom_features():

    ## Load example data frame
    dataframe = pd.read_csv("./data/insults/train.tsv", sep="\t")
    train_sequences = dataframe['text_a'].values.tolist()
    train_targets = dataframe['label'].values

    ## Let's say we wish to use only the following two text-to-feature transformer objects
    tfidf_word_unigram = TfidfVectorizer(ngram_range=(1, 2),
                                         sublinear_tf=False,
                                         max_features=100)

    tfidf_char_bigram = TfidfVectorizer(analyzer='char',
                                        ngram_range=(1, 2),
                                        max_features=100)

    ## Note: You can use any transformer class that is implemented in accordance with the scikit-learn API (.fit, .transform, .fit_transform, .get_feature_names, etc.)

    ## Next, put them into a list. Note the use of text_col class.
    custom_features = [
        ('word_features',
         pipeline.Pipeline([
             ('s1',
              autoBOTLib.feature_constructors.text_col(key='no_stopwords')),
             ('word_tfidf_unigram', tfidf_word_unigram)
         ])),
        ('char_features',
         pipeline.Pipeline([
             ('s2',
              autoBOTLib.feature_constructors.text_col(key='no_stopwords')),
             ('char_tfidf_bigram', tfidf_char_bigram)
         ]))
    ]

    ## Finally, specify this list as
    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,
        train_targets,
        time_constraint=1,
        custom_transformer_pipeline=custom_features).evolve()

    dataframe2 = pd.read_csv("./data/insults/test.tsv", sep="\t")
    test_sequences = dataframe2['text_a'].values.tolist()
    predictions = autoBOTLibObj.predict(test_sequences)
Ejemplo n.º 10
0
def run():
    ## Load example data frame
    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t").iloc[:]
    train_sequences = dataframe['text_a']
    train_targets = dataframe['label']
    reptype = "neurosymbolic"
    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,
        train_targets,
        representation_type=
        reptype,  ## See the documentation for all possible representation types.
        n_fold_cv=3,
        framework="torch",
        memory_storage="memory",
        learner_preset="default",
        verbose=1,
        sparsity=0.1,
        visualize_progress=
        True,  ## Stores progress as PROGRESS_{generation}.pdf file
        upsample=
        False,  ## Suitable for imbalanced data - randomized upsampling tends to help.
        time_constraint=1).evolve(
            strategy="evolution",
            nind=3)  ## strategy = "direct-learning" trains a single learner.

    # Store
    autoBOTLib.store_autobot_model(autoBOTLibObj, f"model.pickle")

    # Load
    autoBOTObj = autoBOTLib.load_autobot_model(f"model.pickle")

    # Predict
    dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
    test_sequences = dataframe2['text_a']
    predictions = autoBOTLibObj.predict(test_sequences)
    autoBOTLibObj.predict_proba(test_sequences)
    # autoBOTLibObj.generate_report(output_folder="./report/",
    #                              job_id="REPORTNEW")
    test_classes = dataframe2['label'].values.tolist()
    output_classification_results(predictions,
                                  test_classes,
                                  f"./predictions/TORCH.json",
                                  model_spec={})
Ejemplo n.º 11
0
def run():
    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
    train_sequences = dataframe['text_a'].iloc[0:20]
    train_targets = dataframe['label'].iloc[0:20]

    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences, train_targets,
        time_constraint=0.1).evolve(representation_step_only=True)

    input_instance_embedding = autoBOTLibObj.transform(train_sequences)

    all_feature_names = []
    for transformer in autoBOTLibObj.vectorizer.named_steps[
            'union'].transformer_list:
        features = transformer[1].steps[1][1].get_feature_names()
        all_feature_names += features

    assert input_instance_embedding.shape[1] == len(all_feature_names)

    print(input_instance_embedding.shape)
Ejemplo n.º 12
0
def run():
    ## Load example data frame
    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
    train_sequences = dataframe['text_a'].values.tolist()
    train_targets = dataframe['label'].values

    ## Define custom transformer classes as in the example above
    tfidf_word_unigram = TfidfVectorizer(ngram_range=(1, 2),
                                         sublinear_tf=False,
                                         max_features=100)

    tfidf_char_bigram = TfidfVectorizer(analyzer='char',
                                        ngram_range=(1, 2),
                                        max_features=100)
    custom_features = [
        ('word_features_custom',
         pipeline.Pipeline([
             ('s1',
              autoBOTLib.feature_constructors.text_col(key='no_stopwords')),
             ('word_tfidf_unigram', tfidf_word_unigram)
         ])),
        ('char_features_cusom',
         pipeline.Pipeline([
             ('s2',
              autoBOTLib.feature_constructors.text_col(key='no_stopwords')),
             ('char_tfidf_bigram', tfidf_char_bigram)
         ]))
    ]

    ## Finally, use the flag "combine_with_existing_representation" to append the new transformer pipeline to an existing one (e.g., neurosymbolic). This way, you can easily extend current autoBOTLib!
    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,
        train_targets,
        time_constraint=1,
        representation_type="neurosymbolic",
        custom_transformer_pipeline=custom_features,
        combine_with_existing_representation=True).evolve()

    dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
    test_sequences = dataframe2['text_a'].values.tolist()
    predictions = autoBOTLibObj.predict(test_sequences)
Ejemplo n.º 13
0
def run():
    jid = secrets.token_hex(nbytes=16)
    df_path = None

    ## Load example data frame
    dataframe = pd.read_csv(df_path, sep="\t")
    train_sequences = None
    train_sequences = None
    train_targets = None

    print(len(train_sequences))
    print(len(train_targets))
    classx = "genericTargetName"

    autoBOTObj = autoBOTLib.GAlearner(
        train_sequences,  # input sequences
        train_targets,  # target space
        time_constraint=1,  # time in hoursc
        num_cpu=32,  # number of CPUs to use
        sparsity=0.1,
        task_name="example test",  # task identifier
        scoring_metric="f1",  # sklearn-compatible scoring metric as the fitness.
        hof_size=3,  # size of the hall of fame
        top_k_importances=25,  # how many top features to output as final ranking
        memory_storage="./memory",  # tripled base for concept features
        representation_type="neurosymbolic")  # or symbolic or neural

    autoBOTObj.evolve(
        nind=8,  ## population size
        strategy="evolution",  ## optimization strategy
        crossover_proba=0.6,  ## crossover rate
        mutpb=0.4)  ## mutation rate

    autoBOTLib.store_autobot_model(autoBOTObj,
                                   f"./models/{jid}_{classx}_model.pickle")

    test_sequences = None
    autoBOTObj = autoBOTLib.load_autobot_model(
        f"./models/{jid}_{classx}_model.pickle")
    autoBOTObj.predict(test_sequences)
def run():
    dataframe = pd.read_csv("../data/depression/train.tsv", sep="\t")
    train_sequences = dataframe['text_a'][0:]
    train_targets = dataframe['label'][0:]

    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences, train_targets,
        time_constraint=0.1).evolve(representation_step_only=True)

    input_instance_embedding = autoBOTLibObj.transform(train_sequences)

    print(input_instance_embedding.shape)
    transf = umap.UMAP()
    embedding = transf.fit_transform(input_instance_embedding)
    sns.scatterplot(embedding[:, 0],
                    embedding[:, 1],
                    hue=train_targets,
                    palette="coolwarm")
    plt.gca().set_aspect('equal', 'datalim')
    plt.title(
        f'UMAP-based document projection ({input_instance_embedding.shape[1]}D -> 2D)',
        fontsize=12)
    plt.show()  #or store with plt.savefig("path.pdf", dpi=300)
Ejemplo n.º 15
0
def run():
    ## Load example data frame
    dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
    train_sequences = dataframe['text_a'].values.tolist()
    train_targets = dataframe['label'].values

    ## The syntax for specifying a learner and the hyperparameter space!
    ## These are the hyperparameters to be explored for each representation.
    classifier_hyperparameters = {
        "loss": ["hinge"],
        "penalty": ["elasticnet"],
        "alpha": [0.01, 0.001],
        "l1_ratio": [0, 0.001, 1]
    }

    ## This is the classifier compatible with the hyperparameters.
    custom_classifier = SGDClassifier()

    autoBOTLibObj = autoBOTLib.GAlearner(
        train_sequences,  # input sequences
        train_targets,  # target space 
        time_constraint=1,  # time in hours
        num_cpu=4,  # number of CPUs to use
        task_name="example test",  # task identifier
        hof_size=3,  # size of the hall of fame
        top_k_importances=25,  # how many top features to output as final ranking
        memory_storage="./memory",
        representation_type="symbolic",
        learner=custom_classifier,
        learner_hyperparameters=classifier_hyperparameters
    )  # or neurosymbolic or neural

    autoBOTLibObj.evolve(
        nind=10,  ## population size
        strategy="evolution",  ## optimization strategy
        crossover_proba=0.6,  ## crossover rate
        mutpb=0.4)  ## mutation rate
Ejemplo n.º 16
0
tfidf_char_bigram = TfidfVectorizer(analyzer='char',
                                    ngram_range=(1, 2),
                                    max_features=100)

## Note: You can use any transformer class that is implemented in accordance with the scikit-learn API (.fit, .transform, .fit_transform, .get_feature_names, etc.)

## Next, put them into a list. Note the use of text_col class.
custom_features = [
    ('word_features',
     pipeline.Pipeline([
         ('s1', autoBOTLib.feature_constructors.text_col(key='no_stopwords')),
         ('word_tfidf_unigram', tfidf_word_unigram)
     ])),
    ('char_features',
     pipeline.Pipeline([
         ('s2', autoBOTLib.feature_constructors.text_col(key='no_stopwords')),
         ('char_tfidf_bigram', tfidf_char_bigram)
     ]))
]

## Finally, specify this list as
autoBOTLibLibObj = autoBOTLib.GAlearner(
    train_sequences,
    train_targets,
    time_constraint=1,
    custom_transformer_pipeline=custom_features).evolve()

dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
test_sequences = dataframe2['text_a'].values.tolist()
predictions = autoBOTLibLibObj.predict(test_sequences)
Ejemplo n.º 17
0
## A simple example showcasing the minimal usecase of autoBOTLib on an insults classification data.

import autoBOTLib
import pandas as pd

## Load example data frame
dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
train_sequences = dataframe['text_a'].values.tolist()
train_targets = dataframe['label'].values

autoBOTLibObj = autoBOTLib.GAlearner(train_sequences,
                               train_targets,
                               time_constraint=0.1).evolve()

dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
test_sequences = dataframe2['text_a'].values.tolist()
predictions = autoBOTLibObj.predict(test_sequences)
Ejemplo n.º 18
0
## These are the hyperparameters to be explored for each representation.
classifier_hyperparameters = {
    "loss": ["hinge"],
    "penalty": ["elasticnet"],
    "alpha": [0.01, 0.001],
    "l1_ratio": [0, 0.001, 1]
}

## This is the classifier compatible with the hyperparameters.
custom_classifier = SGDClassifier()

autoBOTLibObj = autoBOTLib.GAlearner(
    train_sequences,  # input sequences
    train_targets,  # target space 
    time_constraint=1,  # time in hours
    num_cpu=4,  # number of CPUs to use
    task_name="example test",  # task identifier
    hof_size=3,  # size of the hall of fame
    top_k_importances=25,  # how many top features to output as final ranking
    memory_storage="./memory",
    representation_type="symbolic",
    classifier=custom_classifier,
    classifier_hyperparameters=classifier_hyperparameters
)  # or neurosymbolic or neural

autoBOTLibObj.evolve(
    nind=10,  ## population size
    strategy="evolution",  ## optimization strategy
    crossover_proba=0.6,  ## crossover rate
    mutpb=0.4)  ## mutation rate
Ejemplo n.º 19
0
                                     max_features=100)

tfidf_char_bigram = TfidfVectorizer(analyzer='char',
                                    ngram_range=(1, 2),
                                    max_features=100)
custom_features = [
    ('word_features_custom',
     pipeline.Pipeline([
         ('s1', autoBOTLib.feature_constructors.text_col(key='no_stopwords')),
         ('word_tfidf_unigram', tfidf_word_unigram)
     ])),
    ('char_features_cusom',
     pipeline.Pipeline([
         ('s2', autoBOTLib.feature_constructors.text_col(key='no_stopwords')),
         ('char_tfidf_bigram', tfidf_char_bigram)
     ]))
]

## Finally, use the flag "combine_with_existing_representation" to append the new transformer pipeline to an existing one (e.g., neurosymbolic). This way, you can easily extend current autoBOTLib!
autoBOTLibObj = autoBOTLib.GAlearner(
    train_sequences,
    train_targets,
    time_constraint=1,
    representation_type="neurosymbolic",
    custom_transformer_pipeline=custom_features,
    combine_with_existing_representation=True).evolve()

dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
test_sequences = dataframe2['text_a'].values.tolist()
predictions = autoBOTLibObj.predict(test_sequences)
Ejemplo n.º 20
0
import autoBOTLib
import pandas as pd

## Load example data frame
dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
train_sequences = dataframe['text_a'].values.tolist()
train_targets = dataframe['label'].values

autoBOTLibObj = autoBOTLib.GAlearner(
    train_sequences,  # input sequences
    train_targets,  # target space 
    time_constraint=1,  # time in hoursc
    num_cpu="all",  # number of CPUs to use
    latent_dim=512,  ## latent dim for neural representations
    sparsity=0.05,  ## latent_dim/sparsity dim for sparse representations
    task_name="example test",  # task identifier
    scoring_metric="f1",  # sklearn-compatible scoring metric as the fitness.
    hof_size=3,  # size of the hall of fame
    top_k_importances=25,  # how many top features to output as final ranking
    memory_storage="./memory",  # tripled base for concept features
    representation_type="neurosymbolic")  # or symbolic or neural

autoBOTLibObj.evolve(
    nind=8,  ## population size
    strategy="evolution",  ## optimization strategy
    crossover_proba=0.6,  ## crossover rate
    mutpb=0.4)  ## mutation rate

## Persistence demonstration (how to store models for further use?)
autoBOTLib.store_autobot_model(