Ejemplo n.º 1
0
    def test_experiment_boston(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/boston.csv",
                
                filter_type = "remover",
                model_features = "medv",

                max_samples="auto",
                contamination=0.1,
                max_features=1.0,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.boston_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 14)  # 13 features  + 1 anomaly score
        self.assertEqual(len(names), 14)
Ejemplo n.º 2
0
    def test_experiment_report_contexts(self):

        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/reports_contexts_small.csv",
                column = "context",
                question = "What is the best weed herbicide?",
                top = 5,
                inner_batch_size = 5,
                tokenizer_fn = "facebook/dpr-reader-single-nq-base",
                tokenizer_max_len = 512,
                dpr_fn = "facebook/dpr-reader-single-nq-base",

            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.report_contexts_test_data()
        print("######################################################")
        print(data)
        with server.Server() as s:
            print("######################################################")
            print("s")
            response = s.test(data=data,timeout=10)
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 4)  # 1 feature
Ejemplo n.º 3
0
    def test_experiment_boston(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/boston.csv",
                target="medv",
                strategy_num="mean",
                strategy_cat="most_frequent",
                fillvalue_num=0,
                fillvalue_cat="",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.boston_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 13)  # 13 features
        self.assertEqual(len(names), 13)
Ejemplo n.º 4
0
    def test_experiment_boston(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/boston.csv",
                filter_type="remover",
                model_features="medv",
                n_clusters=3,
                n_init=10,
                max_iter=300,
                algorithm="auto",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.boston_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]),
                         17)  # 13 features+ 1 cluster + 3 distance to clusters
        self.assertEqual(len(names), 17)
Ejemplo n.º 5
0
    def test_experiment_face_detection_cuda(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/football_teams.zip",
                device="cuda",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )

        data = datasets.image_testdata(kind='people', ext='jpg')

        with server.Server() as s:
            response = s.test(data=data, timeout=10)

        if 'tensor' in response.keys():
            tensor_shape = response["tensor"]['shape']

            self.assertEqual(tensor_shape[1], 5)  # output 5 features

        else:  # is a ndarray
            ndarray = response["ndarray"]

            self.assertEqual(len(ndarray[0]), 5)  # 5 features

        names = response["names"]
        self.assertEqual(len(names), 5)  # 5 feature names
Ejemplo n.º 6
0
    def test_experiment_titanic(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/titanic.csv",
                target="Survived",

                filter_type="remover",
                model_features="",

                one_hot_features="",

                n_estimators=10,
                criterion="gini",
                max_depth=None,
                max_features="auto",
                class_weight=None,

                method="predict_proba",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.titanic_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 14)  # 11 features + 1 class + 2 probas
        self.assertEqual(len(names), 14)
Ejemplo n.º 7
0
    def test_experiment_titanic(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/titanic.csv",
                target="Survived",

                high_cardinality_features="Pclass",

                method="kmeans",

                threshold=0.1,
                n=10,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.titanic_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 11)  # 11 features
        self.assertEqual(len(names), 11)
Ejemplo n.º 8
0
    def test_experiment_boston(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/boston.csv",
                target="medv",
                filter_type="remover",
                model_features="",
                one_hot_features="",
                kernel="rbf",
                degree=3,
                gamma="auto",
                C=1.0,
                max_iter=-1,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.boston_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 14)  # 13 features + 1 prediction
        self.assertEqual(len(names), 14)
Ejemplo n.º 9
0
    def test_experiment_iris(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/iris.csv",
                target="Species",

                date=None,
                group=["SepalLengthCm"],
                budget=20,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.iris_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        print(names)
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 12)  # 4 original features + 8 new features
        self.assertEqual(len(names), 12)
Ejemplo n.º 10
0
    def test_experiment_boston(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/boston.csv",
                target="medv",
                filter_type="remover",
                model_features="",
                ordinal_features="",
                fit_intercept=True,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.boston_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 14)  # 13 features + 1 prediction
        self.assertEqual(len(names), 14)
Ejemplo n.º 11
0
    def test_experiment_titanic(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/titanic.csv",
                target="Survived",
                filter_type="remover",
                model_features="",
                one_hot_features="",
                time_left_for_this_task=30,
                per_run_time_limit=30,
                ensemble_size=5,
                method="predict_proba",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.titanic_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]),
                         14)  # 11 features + 1 class + 2 probas
        self.assertEqual(len(names), 14)
Ejemplo n.º 12
0
    def test_experiment_report_contexts_word2vec(self):

        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/reports_contexts.csv",
                column="context",
                question="Qual é o melhor herbicida para erva da ninha ?",
                retriever_type="word2vec",
                bm25_k1=2,
                bm25_b=0.75,
                top=10,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.report_contexts_test_data()
        with server.Server() as s:
            response = s.test(data=data)
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 4)  # 1 feature
Ejemplo n.º 13
0
    def test_experiment_titanic(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/titanic.csv",
                target="Survived",
                filter_type="remover",
                model_features="",
                one_hot_features="",
                C=1.0,
                kernel="rbf",
                degree=3,
                gamma="auto",
                probability=True,
                max_iter=-1,
                method="predict_proba",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.titanic_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]),
                         14)  # 11 features + 1 class + 2 probas
        self.assertEqual(len(names), 14)
Ejemplo n.º 14
0
    def test_hotel_bookings(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/hotel_bookings.csv",
                target="is_canceled",

                high_cardinality_features="hotel",

                method="kmeans",

                threshold=0.1,
                n=10,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.hotel_bookings_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 31)  # 31 features
        self.assertEqual(len(names), 31)
Ejemplo n.º 15
0
    def test_yolo_empty_output(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset=EXPERIMENT_DATASET,
                score_threshold=0.9999,
                iou_threshold=0.9999,
                yolo_weight_type="tiny",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )

        data = datasets.image_testdata(kind='text', ext='png')

        with server.Server() as s:
            response = s.test(data=data, timeout=10)

        if 'tensor' in response.keys():
            tensor_shape = response["tensor"]['shape']

            self.assertEqual(tensor_shape[1], 6)  # outputs 6 features

        else:  # is a ndarray
            ndarray = response["ndarray"]

            self.assertEqual(len(ndarray[0]), 6)  # 6 features

        names = response["names"]
        self.assertEqual(len(names), 6)  # 6 feature names
Ejemplo n.º 16
0
    def test_yolo_tiny_portuguese(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset=EXPERIMENT_DATASET,
                language="português",
                yolo_weight_type="tiny",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )

        for ext in ['png', 'jpg']:

            data = datasets.image_testdata(kind='objects', ext=ext)

            with server.Server() as s:
                response = s.test(data=data, timeout=10)

            if 'tensor' in response.keys():
                tensor_shape = response["tensor"]['shape']

                self.assertEqual(tensor_shape[1], 6)  # outputs 6 features

            else:  # is a ndarray
                ndarray = response["ndarray"]

                self.assertEqual(len(ndarray[0]), 6)  # 6 features

            names = response["names"]
            self.assertEqual(len(names), 6)  # 6 feature names
Ejemplo n.º 17
0
    def test_experiment_titanic(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/titanic.csv",
                target="Survived",
                filter_type="remover",
                model_features="",
                one_hot_features="",
                hidden_layer_sizes=100,
                activation="relu",
                solver="adam",
                learning_rate="constant",
                max_iter=200,
                shuffle=True,
                method="predict_proba",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.titanic_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]),
                         14)  # 11 features + 1 class + 2 probas
        self.assertEqual(len(names), 14)
Ejemplo n.º 18
0
    def test_experiment_ocr_output_data(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/ocr_dataset.zip",
                target="target",
                filter_type="incluir",
                model_features="input_image",
                bbox_conf=60,
                segmentation_mode="Considere um único bloco de texto uniforme",
                ocr_engine="Mecanismo de redes neurais com apenas LSTM",
                language="por",
                bbox_return="np_array",
                image_return_format="N/A"),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )

        for ext in ['png', 'jpg']:
            data = datasets.image_testdata(kind='text', ext=ext)

            with server.Server() as s:
                response = s.test(data=data, timeout=10)

            print(response)
            for bbox in response['ndarray']:
                xmin, ymin, xmax, ymax, text = bbox
                self.assertGreater(xmax, xmin, "BoundingBox incorreta.")
                self.assertGreater(ymax, ymin, "BoundingBox incorreta.")
Ejemplo n.º 19
0
    def test_experiment_titanic(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/titanic.csv",
                target="Survived",
                filter_type="remover",
                model_features="",
                ordinal_features="",
                penalty="l2",
                C=1.0,
                fit_intercept=True,
                class_weight=None,
                solver="liblinear",
                max_iter=100,
                multi_class="auto",
                method="predict_proba",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.titanic_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]),
                         14)  # 11 features + 1 class + 2 probas
        self.assertEqual(len(names), 14)
Ejemplo n.º 20
0
    def test_experiment_hotel_bookings(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/hotel_bookings.csv",
                target="is_canceled",

                date="reservation_status_date",
                group=["hotel"],
                budget=20,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.hotel_bookings_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        print(names)
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 110)  # 31 original features + 79 new features
        self.assertEqual(len(names), 110)
Ejemplo n.º 21
0
    def test_experiment_imdb(self):
        os.chdir("tasks/nlp-glove-embeddings-sentence-classification")

        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/imdb.csv",
                target="label",
                language="english",

                train_batch_size=10,
                eval_batch_size=2,
                max_epochs=200,
                accumulate_grad_batches=8,
                learning_rate=0.12,
                seed=7,
                hidden_dim=300,

                filter_type="incluir",
                model_features="text",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.imdb_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 1)  # 1 features
        self.assertEqual(len(names), 1)
Ejemplo n.º 22
0
    def test_experiment(self):

        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/reports_contexts_small.csv",
                question = "Qual é o melhor herbicida para erva da ninha ?",
                top = 10,
                column_context = "context",
                column_question = "question",
                column_answer_start = "answer_start",
                column_answer_end= "answer_end",
                train_from_zero = False,
                train_from_squad = False,
                dev_size_from_data= 0.2,
                test_size_from_dev= 0.5,
                batch_dataset_preparation = 30 ,
                model_name= "neuralmind/bert-large-portuguese-cased",
                train_batch_size= 2,
                eval_batch_size= 2,
                max_length= 384,
                doc_stride= 128,
                learning_rate= 3.0e-5,
                eps= 1.0e-08,
                seed = 13,
                num_gpus= 1,
                profiler= True,
                max_epochs= 2,
                accumulate_grad_batches= 16,
                check_val_every_n_epoch= 1,
                progress_bar_refresh_rate= 1,
                gradient_clip_val= 1.0,
                fast_dev_run= False,
                monitor= 'avg_train_loss',
                min_delta= 0.01,
                patience= 1,
                verbose= False,
                mode= 'min'
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )

        data = datasets.document_reader_test_data()

        with server.Server() as s:
            response = s.test(data=data)

        names = response["names"]    
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 5)
        self.assertEqual(len(names), 5)
Ejemplo n.º 23
0
    def test_experiment_face_detection_without_people(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/football_teams.zip",
                image_size=64,
                margin=5,
                min_face_size=10,
                factor=0.709,
                keep_all=True,
                device="cpu",
                seed=7,
                inference_batch_size=2,
                input_square_transformation_size=128,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )

        for ext in ['png', 'jpg']:

            data = datasets.image_testdata(kind='objects', ext=ext)

            with server.Server() as s:
                response = s.test(data=data, timeout=10)

            if 'tensor' in response.keys():
                tensor_shape = response["tensor"]['shape']

                self.assertEqual(tensor_shape[1], 5)  # outputs 5 features

            else:  # is a ndarray
                ndarray = response["ndarray"]

                self.assertEqual(len(ndarray[0]), 5)  # 5 features

            names = response["names"]
            self.assertEqual(len(names), 5)  # 5 feature names
Ejemplo n.º 24
0
    def test_hotel_bookings(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/hotel_bookings.csv",
                group_col="hotel",
                period="mês",
                date_col="reservation_status_date",
                target_col="reservation_status",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.hotel_bookings_testdata()
        with server.Server() as s:
            response = s.test(data=data)
Ejemplo n.º 25
0
    def test_experiment_titanic(self):
        papermill.execute_notebook("Experiment.ipynb",
                                   "/dev/null",
                                   parameters=dict(
                                       dataset="/tmp/data/titanic.csv",
                                       target="Survived",
                                       norm="l2",
                                   ))

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.titanic_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 11)  # 11 features
        self.assertEqual(len(names), 11)
Ejemplo n.º 26
0
    def test_experiment_titanic(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(dataset="/tmp/data/titanic.csv",
                            features_to_filter=["Name"]),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.titanic_testdata_full()
        with server.Server() as s:
            response = s.test(data=data)

        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 11)  # 12 features - 1 removed
        self.assertEqual(len(names), 11)
Ejemplo n.º 27
0
    def test_experiment_iris_empty_return(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(dataset="/tmp/data/iris.csv",
                            target="SepalLengthCm",
                            condition="Menor que",
                            value=5),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.iris_testdata_full()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray), 0)  # 0 features
        self.assertEqual(len(names), 5)
Ejemplo n.º 28
0
    def test_experiment_iris(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/iris.csv",
                target="Species",
                norm="l2",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.iris_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 4)  # 4 features
        self.assertEqual(len(names), 4)
Ejemplo n.º 29
0
    def test_experiment_hotel_bookings(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(dataset="/tmp/data/hotel_bookings.csv",
                            features_to_filter=[
                                "reservation_status_date", "arrival_date_year"
                            ]),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.hotel_bookings_testdata_full()
        with server.Server() as s:
            response = s.test(data=data)

        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 30)  # 32 features - 2 removed
        self.assertEqual(len(names), 30)
Ejemplo n.º 30
0
    def test_experiment_boston(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/boston.csv",
                target="medv",
                cutoff=0.9,
                threshold=0.0,
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.boston_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 12)  # 13 features - 1 removed
        self.assertEqual(len(names), 12)