def test_text_generation(self): x = client("tools/data/nlp_data/shakespeare.txt") x.generate_text() self.assertTrue('text_generation' in x.models)
def test_text_classification(self): x = client("tools/data/nlp_data/smallSentimentAnalysis.csv") x.text_classification_query("get captions", epochs=1)
def test_get_ner(self): x = client("tools/data/nlp_data/miniDocumentSummarization.csv") x.named_entity_query("get ner from text") self.assertTrue('named_entity_recognition' in x.models) del x.models['named_entity_recognition']
def test_summarization(self): x = client("tools/data/nlp_data/miniDocumentSummarization.csv") x.summarization_query("summarize text", epochs=1)
def test_captioning(self): x = client("tools/data/nlp_data/image-caption.csv") x.image_caption_query("get captions", epochs=1)
class TestQueries(unittest.TestCase): newClient = client('tools/data/structured_data/housing.csv') """ TEST QUERIES Tests some queries in queries.py """ # Tests whether regression_ann_query works without errors, and creates a key in models dictionary @ordered def test_regression_ann(self): self.newClient.regression_query_ann('predict median house value', epochs=3) self.assertTrue('regression_ANN' in self.newClient.models) del self.newClient.models['regression_ANN'] # Tests whether classification_ann_query works without errors, and creates a key in models dictionary @ordered def test_classification_ann(self): self.newClient.classification_query_ann('predict ocean proximity', epochs=3) self.assertTrue('classification_ANN' in self.newClient.models) del self.newClient.models['classification_ANN'] # Tests whether neural_network_query uses the correct model @ordered def test_nn_query(self): # see if properly chooses regression with a numeric target column self.newClient.neural_network_query('predict median house value', epochs=3) self.assertTrue('regression_ANN' in self.newClient.models) # see if properly chooses classification with a categorical target column self.newClient.neural_network_query('predict ocean proximity', epochs=3) self.assertTrue('classification_ANN' in self.newClient.models) ''' @ordered def test_convolutional_query(self): client_image = client("tools/data/image_data/character_dataset_mini") client_image.convolutional_query("predict character", epochs=2) self.assertTrue('convolutional_NN' in client_image.models) ''' @ordered def test_convolutional_query_customarch(self): data_path = "tools/data/image_data/character_dataset_mini_preprocessed" client_image_customarch = client(data_path) custom_arch_path = "tools/data/custom_model_config/custom_CNN.json" client_image_customarch.convolutional_query( "predict character", data_path=data_path, custom_arch=custom_arch_path, preprocess=False, epochs=2) self.assertTrue('convolutional_NN' in client_image_customarch.models) @ordered def test_convolutional_query_pretrained(self): client_image = client("tools/data/image_data/character_dataset_mini") client_image.convolutional_query("predict character", pretrained={ 'arch': 'vggnet19', 'weights': 'imagenet' }, epochs=2) self.assertTrue('convolutional_NN' in client_image.models) # Tests whether decision_tree_query works without errors, and creates a key in models dictionary @ordered def test_decision_tree(self): self.newClient.decision_tree_query('predict ocean proximity') self.assertTrue('decision_tree' in self.newClient.models) # Tests whether svm_query works without errors, and creates a key in models dictionary @ordered def test_svm(self): self.newClient.svm_query('predict ocean proximity') self.assertTrue('svm' in self.newClient.models) # Tests whether nearest_neighbor_query works without errors, and creates a key in models dictionary @ordered def test_nearest_neighbors(self): self.newClient.nearest_neighbor_query('predict ocean proximity') self.assertTrue('nearest_neighbor' in self.newClient.models) # Tests whether kmeans_clustering_query works without errors, and creates a key in models dictionary @ordered def test_kmeans(self): self.newClient.kmeans_clustering_query(clusters=4) self.assertTrue('k_means_clustering' in self.newClient.models) # Tests whether xgboost_query works without errors, and creates a key in models dictionary @ordered def test_xgboost(self): self.newClient.xgboost_query('predict ocean proximity') self.assertTrue('xgboost' in self.newClient.models) # Tests whether summarization works without errors, and creates a key in models dictionary @ordered def test_summarization(self): x = client("tools/data/nlp_data/miniDocumentSummarization.csv") x.summarization_query("summarize text", epochs=1) # Tests whether image captioning works without errors, and creates a key in models dictionary @ordered def test_captioning(self): x = client("tools/data/nlp_data/image-caption.csv") x.image_caption_query("get captions", epochs=1) # Tests whether text classification works without errors, and creates a key in models dictionary @ordered def test_text_classification(self): x = client("tools/data/nlp_data/smallSentimentAnalysis.csv") x.text_classification_query("get captions", epochs=1) # Tests whether name entity recognition query works without errors, and creates a key in models dictionary @ordered def test_get_ner(self): x = client("tools/data/nlp_data/miniDocumentSummarization.csv") x.get_named_entities("get ner from text") self.assertTrue('named_entity_recognition' in x.models) del x.models['named_entity_recognition'] # Test whether content based recommender works without error, and creates a key in models dictionary @ordered def test_content_recommender(self): x = client('tools/data/recommender_systems_data/disney_plus_shows.csv') x.content_recommender_query() assert ('recommendations' in x.recommend('Coco')) """ TEST ANALYZE() FUNCTION Tests all branches of .analyze() function in generate_plots """ # Tests analyze() function for k_means_clustering @ordered def test_analyze_kmeans(self): self.newClient.analyze(model='k_means_clustering') self.assertTrue( 'n_centers' in self.newClient.models['k_means_clustering']) self.assertTrue( 'centroids' in self.newClient.models['k_means_clustering']) self.assertTrue( 'inertia' in self.newClient.models['k_means_clustering']) # Tests analyze() function on regression_ANN @ordered def test_analyze_regression(self): self.newClient.analyze(model='regression_ANN') self.assertTrue('MSE' in self.newClient.models['regression_ANN']) self.assertTrue('MAE' in self.newClient.models['regression_ANN']) # Tests analyze() function on classification_ANN @ordered def test_analyze_classification(self): self.newClient.analyze(model='classification_ANN') self.assertTrue('plots' in self.newClient.models['classification_ANN']) self.assertTrue('roc_curve' in self.newClient.models['classification_ANN']['plots']) self.assertTrue('confusion_matrix' in self.newClient.models['classification_ANN']['plots']) self.assertTrue( 'scores' in self.newClient.models['classification_ANN']) self.assertTrue('recall_score' in self.newClient.models['classification_ANN']['scores']) self.assertTrue('precision_score' in self.newClient.models['classification_ANN']['scores']) self.assertTrue('f1_score' in self.newClient.models['classification_ANN']['scores']) # Tests analyze() function for classifier models @ordered def test_analyze_sklearn_classifiers(self): for mod in ['svm', 'nearest_neighbor', 'decision_tree', 'xgboost']: self.newClient.analyze(model=mod) modeldict = self.newClient.models[mod] self.assertTrue('plots' in modeldict) self.assertTrue('roc_curve' in modeldict['plots']) self.assertTrue('confusion_matrix' in modeldict['plots']) self.assertTrue('scores' in modeldict) self.assertTrue('recall_score' in modeldict['scores']) self.assertTrue('precision_score' in modeldict['scores']) self.assertTrue('f1_score' in modeldict['scores']) # Tests invalid model input @ordered def test_invalid_model(self): with self.assertRaises(NameError): self.newClient.analyze(model='I dont exist')
class TestQueries(unittest.TestCase): newClient = client('tools/data/structured_data/housing.csv') """ TEST QUERIES Tests some queries in queries.py """ # Tests whether regression_ann_query works without errors, and creates a key in models dictionary @ordered def test_regression_ann(self): self.newClient.regression_query_ann('predict median house value', epochs=3) self.assertTrue('regression_ANN' in self.newClient.models) del self.newClient.models['regression_ANN'] # Tests whether classification_ann_query works without errors, and creates a key in models dictionary @ordered def test_classification_ann(self): self.newClient.classification_query_ann('predict ocean proximity', epochs=3) self.assertTrue('classification_ANN' in self.newClient.models) del self.newClient.models['classification_ANN'] # Tests whether neural_network_query uses the correct model @ordered def test_nn_query(self): # see if properly chooses regression with a numeric target column self.newClient.neural_network_query('predict median house value', epochs=3) self.assertTrue('regression_ANN' in self.newClient.models) # see if properly chooses classification with a categorical target column self.newClient.neural_network_query('predict ocean proximity', epochs=3) self.assertTrue('classification_ANN' in self.newClient.models) # Tests whether decision_tree_query works without errors, and creates a key in models dictionary @ordered def test_decision_tree(self): self.newClient.decision_tree_query('predict ocean proximity') self.assertTrue('decision_tree' in self.newClient.models) # Tests whether svm_query works without errors, and creates a key in models dictionary @ordered def test_svm(self): self.newClient.svm_query('predict ocean proximity') self.assertTrue('svm' in self.newClient.models) # Tests whether nearest_neighbor_query works without errors, and creates a key in models dictionary @ordered def test_nearest_neighbors(self): self.newClient.nearest_neighbor_query('predict ocean proximity') self.assertTrue('nearest_neighbor' in self.newClient.models) # Tests whether kmeans_clustering_query works without errors, and creates a key in models dictionary @ordered def test_kmeans(self): self.newClient.kmeans_clustering_query(clusters=4) self.assertTrue('k_means_clustering' in self.newClient.models) # Tests whether xgboost_query works without errors, and creates a key in models dictionary @ordered def test_xgboost(self): self.newClient.xgboost_query('predict ocean proximity') self.assertTrue('xgboost' in self.newClient.models) # Tests whether summarization works without errors, and creates a key in models dictionary @ordered def test_summarization(self): x = client("tools/data/nlp_data/miniDocumentSummarization.csv") x.summarization_query("summarize text", epochs=1) # Tests whether image captioning works without errors, and creates a key in models dictionary @ordered def test_captioning(self): x = client("tools/data/nlp_data/image-caption.csv") x.image_caption_query("get captions", epochs=1) # Tests whether text classification works without errors, and creates a key in models dictionary @ordered def test_text_classification(self): x = client("tools/data/nlp_data/smallSentimentAnalysis.csv") x.text_classification_query("get captions", epochs=1) """ TEST ANALYZE() FUNCTION Tests all branches of .analyze() function in generate_plots """ # Tests analyze() function for k_means_clustering @ordered def test_analyze_kmeans(self): self.newClient.analyze(model='k_means_clustering') self.assertTrue( 'n_centers' in self.newClient.models['k_means_clustering']) self.assertTrue( 'centroids' in self.newClient.models['k_means_clustering']) self.assertTrue( 'inertia' in self.newClient.models['k_means_clustering']) # Tests analyze() function on regression_ANN @ordered def test_analyze_regression(self): self.newClient.analyze(model='regression_ANN') self.assertTrue('MSE' in self.newClient.models['regression_ANN']) self.assertTrue('MAE' in self.newClient.models['regression_ANN']) # Tests analyze() function on classification_ANN @ordered def test_analyze_classification(self): self.newClient.analyze(model='classification_ANN') self.assertTrue('plots' in self.newClient.models['classification_ANN']) self.assertTrue('roc_curve' in self.newClient.models['classification_ANN']['plots']) self.assertTrue('confusion_matrix' in self.newClient.models['classification_ANN']['plots']) self.assertTrue( 'scores' in self.newClient.models['classification_ANN']) self.assertTrue('recall_score' in self.newClient.models['classification_ANN']['scores']) self.assertTrue('precision_score' in self.newClient.models['classification_ANN']['scores']) self.assertTrue('f1_score' in self.newClient.models['classification_ANN']['scores']) # Tests analyze() function for classifier models @ordered def test_analyze_sklearn_classifiers(self): for mod in ['svm', 'nearest_neighbor', 'decision_tree', 'xgboost']: self.newClient.analyze(model=mod) modeldict = self.newClient.models[mod] self.assertTrue('plots' in modeldict) self.assertTrue('roc_curve' in modeldict['plots']) self.assertTrue('confusion_matrix' in modeldict['plots']) self.assertTrue('scores' in modeldict) self.assertTrue('recall_score' in modeldict['scores']) self.assertTrue('precision_score' in modeldict['scores']) self.assertTrue('f1_score' in modeldict['scores']) # Tests invalid model input @ordered def test_invalid_model(self): with self.assertRaises(NameError): self.newClient.analyze(model='I dont exist')
def test_content_recommender(self): x = client('tools/data/recommender_systems_data/disney_plus_shows.csv') x.content_recommender_query() assert ('recommendations' in x.recommend('Coco'))
# -*- coding: utf-8 -*- """Using the Decision Tree Query through Libra Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1mLgo2BMORoe_OzvxdRvoWfG787YWD9Fu """ !pip install libra from libra import client """Here's the link for the used dataset: https://archive.ics.uci.edu/ml/machine-learning-databases/00529/ """ dia_client = client('diabetes_data_upload.csv') dia_client.decision_tree_query('Predict diabetes by class') """Let's analyze the model""" dia_client.analyze() """You can use .model() to access the entire dictionary too. Below, we use it to get the plots generated for this specific model.""" dia_client.model()['plots']
Here's another great example of how to use Libra. Let's assume we want to analyze some data and classify wheat types. First, we call libra. """ !pip install libra from libra import client """Now, we'll be uploading our dataset. Since Colab resets storage, you'll have to upload it again through https://drive.google.com/file/d/15jM8aW9sw18efiQj39BFEbZn6TAAKEVf/view?usp=sharing The original, unformatted version is here: http://archive.ics.uci.edu/ml/datasets/seeds Now, let's create the client object. """ client_wheat = client('wheat.csv') """After the dataset has been uploaded, we can start creating a neural network to analyze parts of it. We will be classifying the examples based on their wheat type""" client_wheat.svm_query('Classify by wheat type') """Here are some other useful methods""" client_wheat.analyze() """Here's how to get more specific information. client_name.models['query type'] will return all the information available. client_name.info() will give you the keys for the model. Now you can use this to access specific information that you require """ client_wheat.models['svm']
# Installing Libra """ pip install -U libra """# Importing client from libra Everything is built around the client object. You can call different queries on it and everything will be stored under the models field of the object. """ from libra import client """# Using Libra We pass the location of our file in the client object and named it as newClient. Now to access various qureies refer to the documentation. I am using decision tree here. So the command for it is newClient.decision_tree_query() and pass in an instruction. Instructiion is an English language statement that represents the task you would like to be completed. eg: 'predict the median house value' or 'please estimate the number of households'. Should correspond to a column in the dataset. Libra Automatically detects the target column but just to be sure that it select the right column, I have passed the target column name. """ newClient = client('/content/drive/My Drive/Colab Notebooks/creditcard.csv') newClient.decision_tree_query('Class') """With just two lines of code we acquired a score of aprroximately 0.99 which is the best we can get. If you check out other kernels you would observe that only a handful of people got 0.99 accuracy and that took them hours to preprocess the data and write code for it. In that case libra saves you alot of time and gives the best result. Libra uses something known as Intelligent preprocessing so that you don't need to preprocess the data, all by yourself. # You don't need to worry about analysing the results. newClient.analyze() Creates confusion matrix and ROC Curve for all classification problems. Also calculates recall, precision, f1, and f2 score. """ newClient.analyze() """newClient.info() Returns all the keys, representing each category of data generated for the dataset.""" newClient.info() """`newClient.model()` returns a dictionary of that model. It includes everything from accuracy, precision, recall,F! score to all the preprocsiing technique that have been used. It's more helpful for people who are already know about these concepts and can code them. Non tech users need not to worry about this."""
#!/usr/local/bin/python3.7 import pandas as pd import libra from libra import client clous = client('housing.csv') data = pd.read_csv('housing.csv') data.columns data.head(10) clous.neural_network_query('estimate ocean proximity', epochs=30) clous.analyze() clous.accuracy() clous.losses() clous.info() clous.neural_network_query('model median house value', drop=['ocean_proximity'], save_model=True)