Example #1
0
 def test_text_generation(self):
     x = client("tools/data/nlp_data/shakespeare.txt")
     x.generate_text()
     self.assertTrue('text_generation' in x.models)
Example #2
0
 def test_text_classification(self):
     x = client("tools/data/nlp_data/smallSentimentAnalysis.csv")
     x.text_classification_query("get captions", epochs=1)
Example #3
0
 def test_get_ner(self):
     x = client("tools/data/nlp_data/miniDocumentSummarization.csv")
     x.named_entity_query("get ner from text")
     self.assertTrue('named_entity_recognition' in x.models)
     del x.models['named_entity_recognition']
Example #4
0
 def test_summarization(self):
     x = client("tools/data/nlp_data/miniDocumentSummarization.csv")
     x.summarization_query("summarize text", epochs=1)
Example #5
0
 def test_captioning(self):
     x = client("tools/data/nlp_data/image-caption.csv")
     x.image_caption_query("get captions", epochs=1)
Example #6
0
class TestQueries(unittest.TestCase):
    newClient = client('tools/data/structured_data/housing.csv')
    """
    TEST QUERIES
    
    Tests some queries in queries.py
    """

    # Tests whether regression_ann_query works without errors, and creates a key in models dictionary
    @ordered
    def test_regression_ann(self):
        self.newClient.regression_query_ann('predict median house value',
                                            epochs=3)
        self.assertTrue('regression_ANN' in self.newClient.models)
        del self.newClient.models['regression_ANN']

    # Tests whether classification_ann_query works without errors, and creates a key in models dictionary
    @ordered
    def test_classification_ann(self):
        self.newClient.classification_query_ann('predict ocean proximity',
                                                epochs=3)
        self.assertTrue('classification_ANN' in self.newClient.models)
        del self.newClient.models['classification_ANN']

    # Tests whether neural_network_query uses the correct model
    @ordered
    def test_nn_query(self):
        # see if properly chooses regression with a numeric target column
        self.newClient.neural_network_query('predict median house value',
                                            epochs=3)
        self.assertTrue('regression_ANN' in self.newClient.models)

        # see if properly chooses classification with a categorical target column
        self.newClient.neural_network_query('predict ocean proximity',
                                            epochs=3)
        self.assertTrue('classification_ANN' in self.newClient.models)

    '''
    @ordered
    def test_convolutional_query(self):
        client_image = client("tools/data/image_data/character_dataset_mini")
        client_image.convolutional_query("predict character", epochs=2)
        self.assertTrue('convolutional_NN' in client_image.models)
    '''

    @ordered
    def test_convolutional_query_customarch(self):
        data_path = "tools/data/image_data/character_dataset_mini_preprocessed"
        client_image_customarch = client(data_path)
        custom_arch_path = "tools/data/custom_model_config/custom_CNN.json"

        client_image_customarch.convolutional_query(
            "predict character",
            data_path=data_path,
            custom_arch=custom_arch_path,
            preprocess=False,
            epochs=2)
        self.assertTrue('convolutional_NN' in client_image_customarch.models)

    @ordered
    def test_convolutional_query_pretrained(self):
        client_image = client("tools/data/image_data/character_dataset_mini")
        client_image.convolutional_query("predict character",
                                         pretrained={
                                             'arch': 'vggnet19',
                                             'weights': 'imagenet'
                                         },
                                         epochs=2)
        self.assertTrue('convolutional_NN' in client_image.models)

    # Tests whether decision_tree_query works without errors, and creates a key in models dictionary
    @ordered
    def test_decision_tree(self):
        self.newClient.decision_tree_query('predict ocean proximity')
        self.assertTrue('decision_tree' in self.newClient.models)

    # Tests whether svm_query works without errors, and creates a key in models dictionary
    @ordered
    def test_svm(self):
        self.newClient.svm_query('predict ocean proximity')
        self.assertTrue('svm' in self.newClient.models)

    # Tests whether nearest_neighbor_query works without errors, and creates a key in models dictionary
    @ordered
    def test_nearest_neighbors(self):
        self.newClient.nearest_neighbor_query('predict ocean proximity')
        self.assertTrue('nearest_neighbor' in self.newClient.models)

    # Tests whether kmeans_clustering_query works without errors, and creates a key in models dictionary
    @ordered
    def test_kmeans(self):
        self.newClient.kmeans_clustering_query(clusters=4)
        self.assertTrue('k_means_clustering' in self.newClient.models)

    # Tests whether xgboost_query works without errors, and creates a key in models dictionary
    @ordered
    def test_xgboost(self):
        self.newClient.xgboost_query('predict ocean proximity')
        self.assertTrue('xgboost' in self.newClient.models)

    # Tests whether summarization works without errors, and creates a key in models dictionary
    @ordered
    def test_summarization(self):
        x = client("tools/data/nlp_data/miniDocumentSummarization.csv")
        x.summarization_query("summarize text", epochs=1)

    # Tests whether image captioning works without errors, and creates a key in models dictionary
    @ordered
    def test_captioning(self):
        x = client("tools/data/nlp_data/image-caption.csv")
        x.image_caption_query("get captions", epochs=1)

    # Tests whether text classification works without errors, and creates a key in models dictionary
    @ordered
    def test_text_classification(self):
        x = client("tools/data/nlp_data/smallSentimentAnalysis.csv")
        x.text_classification_query("get captions", epochs=1)

    # Tests whether name entity recognition query works without errors, and creates a key in models dictionary
    @ordered
    def test_get_ner(self):
        x = client("tools/data/nlp_data/miniDocumentSummarization.csv")
        x.get_named_entities("get ner from text")
        self.assertTrue('named_entity_recognition' in x.models)
        del x.models['named_entity_recognition']

    # Test whether content based recommender works without error, and creates a key in models dictionary
    @ordered
    def test_content_recommender(self):
        x = client('tools/data/recommender_systems_data/disney_plus_shows.csv')
        x.content_recommender_query()
        assert ('recommendations' in x.recommend('Coco'))

    """
    TEST ANALYZE() FUNCTION
    
    Tests all branches of .analyze() function in generate_plots
    """

    # Tests analyze() function for k_means_clustering
    @ordered
    def test_analyze_kmeans(self):
        self.newClient.analyze(model='k_means_clustering')
        self.assertTrue(
            'n_centers' in self.newClient.models['k_means_clustering'])
        self.assertTrue(
            'centroids' in self.newClient.models['k_means_clustering'])
        self.assertTrue(
            'inertia' in self.newClient.models['k_means_clustering'])

    # Tests analyze() function on regression_ANN
    @ordered
    def test_analyze_regression(self):
        self.newClient.analyze(model='regression_ANN')
        self.assertTrue('MSE' in self.newClient.models['regression_ANN'])
        self.assertTrue('MAE' in self.newClient.models['regression_ANN'])

    # Tests analyze() function on classification_ANN
    @ordered
    def test_analyze_classification(self):
        self.newClient.analyze(model='classification_ANN')
        self.assertTrue('plots' in self.newClient.models['classification_ANN'])
        self.assertTrue('roc_curve' in
                        self.newClient.models['classification_ANN']['plots'])
        self.assertTrue('confusion_matrix' in
                        self.newClient.models['classification_ANN']['plots'])

        self.assertTrue(
            'scores' in self.newClient.models['classification_ANN'])
        self.assertTrue('recall_score' in
                        self.newClient.models['classification_ANN']['scores'])
        self.assertTrue('precision_score' in
                        self.newClient.models['classification_ANN']['scores'])
        self.assertTrue('f1_score' in
                        self.newClient.models['classification_ANN']['scores'])

    # Tests analyze() function for classifier models
    @ordered
    def test_analyze_sklearn_classifiers(self):
        for mod in ['svm', 'nearest_neighbor', 'decision_tree', 'xgboost']:
            self.newClient.analyze(model=mod)
            modeldict = self.newClient.models[mod]

            self.assertTrue('plots' in modeldict)
            self.assertTrue('roc_curve' in modeldict['plots'])
            self.assertTrue('confusion_matrix' in modeldict['plots'])

            self.assertTrue('scores' in modeldict)
            self.assertTrue('recall_score' in modeldict['scores'])
            self.assertTrue('precision_score' in modeldict['scores'])
            self.assertTrue('f1_score' in modeldict['scores'])

    # Tests invalid model input
    @ordered
    def test_invalid_model(self):
        with self.assertRaises(NameError):
            self.newClient.analyze(model='I dont exist')
Example #7
0
class TestQueries(unittest.TestCase):

    newClient = client('tools/data/structured_data/housing.csv')
    """
    TEST QUERIES
    
    Tests some queries in queries.py
    """
    # Tests whether regression_ann_query works without errors, and creates a key in models dictionary
    @ordered
    def test_regression_ann(self):
        self.newClient.regression_query_ann('predict median house value',
                                            epochs=3)
        self.assertTrue('regression_ANN' in self.newClient.models)
        del self.newClient.models['regression_ANN']

    # Tests whether classification_ann_query works without errors, and creates a key in models dictionary
    @ordered
    def test_classification_ann(self):
        self.newClient.classification_query_ann('predict ocean proximity',
                                                epochs=3)
        self.assertTrue('classification_ANN' in self.newClient.models)
        del self.newClient.models['classification_ANN']

    # Tests whether neural_network_query uses the correct model
    @ordered
    def test_nn_query(self):
        # see if properly chooses regression with a numeric target column
        self.newClient.neural_network_query('predict median house value',
                                            epochs=3)
        self.assertTrue('regression_ANN' in self.newClient.models)

        # see if properly chooses classification with a categorical target column
        self.newClient.neural_network_query('predict ocean proximity',
                                            epochs=3)
        self.assertTrue('classification_ANN' in self.newClient.models)

    # Tests whether decision_tree_query works without errors, and creates a key in models dictionary
    @ordered
    def test_decision_tree(self):
        self.newClient.decision_tree_query('predict ocean proximity')
        self.assertTrue('decision_tree' in self.newClient.models)

    # Tests whether svm_query works without errors, and creates a key in models dictionary
    @ordered
    def test_svm(self):
        self.newClient.svm_query('predict ocean proximity')
        self.assertTrue('svm' in self.newClient.models)

    # Tests whether nearest_neighbor_query works without errors, and creates a key in models dictionary
    @ordered
    def test_nearest_neighbors(self):
        self.newClient.nearest_neighbor_query('predict ocean proximity')
        self.assertTrue('nearest_neighbor' in self.newClient.models)

    # Tests whether kmeans_clustering_query works without errors, and creates a key in models dictionary
    @ordered
    def test_kmeans(self):
        self.newClient.kmeans_clustering_query(clusters=4)
        self.assertTrue('k_means_clustering' in self.newClient.models)

    # Tests whether xgboost_query works without errors, and creates a key in models dictionary
    @ordered
    def test_xgboost(self):
        self.newClient.xgboost_query('predict ocean proximity')
        self.assertTrue('xgboost' in self.newClient.models)

    # Tests whether summarization works without errors, and creates a key in models dictionary
    @ordered
    def test_summarization(self):
        x = client("tools/data/nlp_data/miniDocumentSummarization.csv")
        x.summarization_query("summarize text", epochs=1)

    # Tests whether image captioning works without errors, and creates a key in models dictionary
    @ordered
    def test_captioning(self):
        x = client("tools/data/nlp_data/image-caption.csv")
        x.image_caption_query("get captions", epochs=1)

    # Tests whether text classification works without errors, and creates a key in models dictionary
    @ordered
    def test_text_classification(self):
        x = client("tools/data/nlp_data/smallSentimentAnalysis.csv")
        x.text_classification_query("get captions", epochs=1)

    """
    TEST ANALYZE() FUNCTION
    
    Tests all branches of .analyze() function in generate_plots
    """
    # Tests analyze() function for k_means_clustering
    @ordered
    def test_analyze_kmeans(self):
        self.newClient.analyze(model='k_means_clustering')
        self.assertTrue(
            'n_centers' in self.newClient.models['k_means_clustering'])
        self.assertTrue(
            'centroids' in self.newClient.models['k_means_clustering'])
        self.assertTrue(
            'inertia' in self.newClient.models['k_means_clustering'])

    # Tests analyze() function on regression_ANN
    @ordered
    def test_analyze_regression(self):
        self.newClient.analyze(model='regression_ANN')
        self.assertTrue('MSE' in self.newClient.models['regression_ANN'])
        self.assertTrue('MAE' in self.newClient.models['regression_ANN'])

    # Tests analyze() function on classification_ANN
    @ordered
    def test_analyze_classification(self):
        self.newClient.analyze(model='classification_ANN')
        self.assertTrue('plots' in self.newClient.models['classification_ANN'])
        self.assertTrue('roc_curve' in
                        self.newClient.models['classification_ANN']['plots'])
        self.assertTrue('confusion_matrix' in
                        self.newClient.models['classification_ANN']['plots'])

        self.assertTrue(
            'scores' in self.newClient.models['classification_ANN'])
        self.assertTrue('recall_score' in
                        self.newClient.models['classification_ANN']['scores'])
        self.assertTrue('precision_score' in
                        self.newClient.models['classification_ANN']['scores'])
        self.assertTrue('f1_score' in
                        self.newClient.models['classification_ANN']['scores'])

    # Tests analyze() function for classifier models
    @ordered
    def test_analyze_sklearn_classifiers(self):
        for mod in ['svm', 'nearest_neighbor', 'decision_tree', 'xgboost']:
            self.newClient.analyze(model=mod)
            modeldict = self.newClient.models[mod]

            self.assertTrue('plots' in modeldict)
            self.assertTrue('roc_curve' in modeldict['plots'])
            self.assertTrue('confusion_matrix' in modeldict['plots'])

            self.assertTrue('scores' in modeldict)
            self.assertTrue('recall_score' in modeldict['scores'])
            self.assertTrue('precision_score' in modeldict['scores'])
            self.assertTrue('f1_score' in modeldict['scores'])

    # Tests invalid model input
    @ordered
    def test_invalid_model(self):
        with self.assertRaises(NameError):
            self.newClient.analyze(model='I dont exist')
Example #8
0
 def test_content_recommender(self):
     x = client('tools/data/recommender_systems_data/disney_plus_shows.csv')
     x.content_recommender_query()
     assert ('recommendations' in x.recommend('Coco'))
# -*- coding: utf-8 -*-
"""Using the Decision Tree Query through Libra

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1mLgo2BMORoe_OzvxdRvoWfG787YWD9Fu
"""

!pip install libra

from libra import client

"""Here's the link for the used dataset: 
https://archive.ics.uci.edu/ml/machine-learning-databases/00529/
"""

dia_client = client('diabetes_data_upload.csv')

dia_client.decision_tree_query('Predict diabetes by class')

"""Let's analyze the model"""

dia_client.analyze()

"""You can use .model() to access the entire dictionary too. Below, we use it to get the plots generated for this specific model."""

dia_client.model()['plots']
Example #10
0
Here's another great example of how to use Libra. Let's assume we want to analyze some data and classify wheat types. First, we call libra.
"""

!pip install libra


from libra import client

"""Now, we'll be uploading our dataset. Since Colab resets storage, you'll have to upload it again through https://drive.google.com/file/d/15jM8aW9sw18efiQj39BFEbZn6TAAKEVf/view?usp=sharing

The original, unformatted version is here: http://archive.ics.uci.edu/ml/datasets/seeds

Now, let's create the client object.
"""

client_wheat = client('wheat.csv')

"""After the dataset has been uploaded, we can start creating a neural network to analyze parts of it. We will be classifying the examples based on their wheat type"""

client_wheat.svm_query('Classify by wheat type')

"""Here are some other useful methods"""

client_wheat.analyze()

"""Here's how to get more specific information. client_name.models['query type'] will return all the information available. 

client_name.info() will give you the keys for the model. Now you can use this to access specific information that you require
"""

client_wheat.models['svm']
Example #11
0
# Installing Libra
"""

pip install -U libra

"""# Importing client from libra
Everything is built around the client object. You can call different queries on it and everything will be stored under the models field of the object.
"""

from libra import client

"""# Using Libra
We pass the location of our file in the client object and named it as newClient. Now to access various qureies refer to the documentation. I am using decision tree here. So the command for it is newClient.decision_tree_query() and pass in an instruction. Instructiion is an English language statement that represents the task you would like to be completed. eg: 'predict the median house value' or 'please estimate the number of households'. Should correspond to a column in the dataset. Libra Automatically detects the target column but just to be sure that it select the right column, I have passed the target column name.
"""

newClient = client('/content/drive/My Drive/Colab Notebooks/creditcard.csv')
newClient.decision_tree_query('Class')

"""With just two lines of code we acquired a score of aprroximately 0.99 which is the best we can get. If you check out other kernels you would observe that only a handful of people got 0.99 accuracy and that took them hours to preprocess the data and write code for it. In that case libra saves you alot of time and gives the best result. Libra uses something known as Intelligent preprocessing so that you don't need to preprocess the data, all by yourself.

# You don't need to worry about analysing the results.
newClient.analyze() Creates confusion matrix and ROC Curve for all classification problems. Also calculates recall, precision, f1, and f2 score.
"""

newClient.analyze()

"""newClient.info() Returns all the keys, representing each category of data generated for the dataset."""

newClient.info()

"""`newClient.model()` returns a dictionary of that model. It includes everything from accuracy, precision, recall,F! score to all the preprocsiing technique that have been used. It's more helpful for people who are already know about these concepts and can code them. Non tech users need not to worry about this."""
Example #12
0
#!/usr/local/bin/python3.7

import pandas as pd
import libra
from libra import client

clous = client('housing.csv')

data = pd.read_csv('housing.csv')
data.columns
data.head(10)

clous.neural_network_query('estimate ocean proximity', epochs=30)
clous.analyze()

clous.accuracy()
clous.losses()
clous.info()

clous.neural_network_query('model median house value',
                           drop=['ocean_proximity'],
                           save_model=True)