def run(): ## Load example data frame dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t") train_sequences = dataframe['text_a'].values.tolist() train_targets = dataframe['label'].values autoBOTLibObj = autoBOTLib.GAlearner( train_sequences, # input sequences train_targets, # target space time_constraint=3, # time in hoursc num_cpu="all", # number of CPUs to use latent_dim=768, ## latent dim for neural representations sparsity=0.1, ## latent_dim/sparsity dim for sparse representations task_name="example test", # task identifier scoring_metric="f1", # sklearn-compatible scoring metric as the fitness. hof_size=3, # size of the hall of fame top_k_importances=25, # how many top features to output as final ranking memory_storage="./memory", # tripled base for concept features representation_type="neurosymbolic") # or symbolic or neural autoBOTLibObj.evolve( nind=8, ## population size strategy="evolution", ## optimization strategy crossover_proba=0.6, ## crossover rate mutpb=0.4) ## mutation rate ## Persistence demonstration (how to store models for further use?) autoBOTLib.store_autobot_model( autoBOTLibObj, "../stored_models/example_insults_model.pickle") autoBOTLibObj = autoBOTLib.load_autobot_model( "../stored_models/example_insults_model.pickle") dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t") test_sequences = dataframe2['text_a'].values.tolist() test_targets = dataframe2['label'].values predictions = autoBOTLibObj.predict(test_sequences) print(predictions) performance = autoBOTLib.compute_metrics( "first_run_task_name", predictions, test_targets) ## compute F1, acc and F1_acc (as in GLUE) ## visualize performance print(performance) ## Visualize importances (global -> type, local -> individual features) importances_local, importances_global = autoBOTLibObj.feature_type_importances( ) print(importances_global) print(importances_local) final_learners = autoBOTLibObj.summarise_final_learners() print(final_learners) ## Visualize the fitness trace fitness_summary = autoBOTLibObj.visualize_fitness( image_path="./fitness_new.png") print(fitness_summary)
def run(): ## Load example data frame dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t").iloc[:] train_sequences = dataframe['text_a'] train_targets = dataframe['label'] reptype = "neurosymbolic" autoBOTLibObj = autoBOTLib.GAlearner( train_sequences, train_targets, representation_type= reptype, ## See the documentation for all possible representation types. n_fold_cv=3, framework="torch", memory_storage="memory", learner_preset="default", verbose=1, sparsity=0.1, visualize_progress= True, ## Stores progress as PROGRESS_{generation}.pdf file upsample= False, ## Suitable for imbalanced data - randomized upsampling tends to help. time_constraint=1).evolve( strategy="evolution", nind=3) ## strategy = "direct-learning" trains a single learner. # Store autoBOTLib.store_autobot_model(autoBOTLibObj, f"model.pickle") # Load autoBOTObj = autoBOTLib.load_autobot_model(f"model.pickle") # Predict dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t") test_sequences = dataframe2['text_a'] predictions = autoBOTLibObj.predict(test_sequences) autoBOTLibObj.predict_proba(test_sequences) # autoBOTLibObj.generate_report(output_folder="./report/", # job_id="REPORTNEW") test_classes = dataframe2['label'].values.tolist() output_classification_results(predictions, test_classes, f"./predictions/TORCH.json", model_spec={})
def run(): jid = secrets.token_hex(nbytes=16) df_path = None ## Load example data frame dataframe = pd.read_csv(df_path, sep="\t") train_sequences = None train_sequences = None train_targets = None print(len(train_sequences)) print(len(train_targets)) classx = "genericTargetName" autoBOTObj = autoBOTLib.GAlearner( train_sequences, # input sequences train_targets, # target space time_constraint=1, # time in hoursc num_cpu=32, # number of CPUs to use sparsity=0.1, task_name="example test", # task identifier scoring_metric="f1", # sklearn-compatible scoring metric as the fitness. hof_size=3, # size of the hall of fame top_k_importances=25, # how many top features to output as final ranking memory_storage="./memory", # tripled base for concept features representation_type="neurosymbolic") # or symbolic or neural autoBOTObj.evolve( nind=8, ## population size strategy="evolution", ## optimization strategy crossover_proba=0.6, ## crossover rate mutpb=0.4) ## mutation rate autoBOTLib.store_autobot_model(autoBOTObj, f"./models/{jid}_{classx}_model.pickle") test_sequences = None autoBOTObj = autoBOTLib.load_autobot_model( f"./models/{jid}_{classx}_model.pickle") autoBOTObj.predict(test_sequences)
scoring_metric="f1", # sklearn-compatible scoring metric as the fitness. hof_size=3, # size of the hall of fame top_k_importances=25, # how many top features to output as final ranking memory_storage="./memory", # tripled base for concept features representation_type="neurosymbolic") # or symbolic or neural autoBOTLibObj.evolve( nind=8, ## population size strategy="evolution", ## optimization strategy crossover_proba=0.6, ## crossover rate mutpb=0.4) ## mutation rate ## Persistence demonstration (how to store models for further use?) autoBOTLib.store_autobot_model( autoBOTLibObj, "../stored_models/example_insults_model.pickle") autoBOTLibObj = autoBOTLib.load_autobot_model( "../stored_models/example_insults_model.pickle") dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t") test_sequences = dataframe2['text_a'].values.tolist() test_targets = dataframe2['label'].values predictions = autoBOTLibObj.predict(test_sequences) performance = autoBOTLib.compute_metrics( "first_run_task_name", predictions, test_targets) ## compute F1, acc and F1_acc (as in GLUE) ## visualize performance print(performance) ## Visualize importances (global -> type, local -> individual features) importances_local, importances_global = autoBOTLibObj.feature_type_importances( )