Esempio n. 1
0
 async def test_get_similarity(self):
     input_sentence1 = (
         "The end is the beginning , and the beginning is the end"
     )
     input_sentence2 = (
         "The end was the beginning , and the beginning was the end"
     )
     async for ctx, results in MemoryOrchestrator.run(
         DataFlow.auto(get_similarity, GetSingle),
         [
             Input(
                 value=[get_similarity.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence1,
                 definition=get_similarity.op.inputs["text_1"],
             ),
             Input(
                 value=input_sentence2,
                 definition=get_similarity.op.inputs["text_2"],
             ),
             Input(
                 value="en_core_web_sm",
                 definition=get_similarity.op.inputs["spacy_model"],
             ),
         ],
     ):
         similarity_score = results[
             get_similarity.op.outputs["result"].name
         ]
         self.assertGreater(similarity_score, 0.9)
Esempio n. 2
0
 async def test_simple_imputer(self):
     input_data = [[np.nan, 2], [6, np.nan], [7, 6]]
     output_data = [[6.5, 2], [6, 4], [7, 6]]
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(simple_imputer, GetSingle),
         [
             Input(
                 value=[simple_imputer.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=simple_imputer.op.inputs["data"],
             ),
             Input(
                 value=np.nan,
                 definition=simple_imputer.op.inputs["missing_values"],
             ),
             Input(
                 value="mean",
                 definition=simple_imputer.op.inputs["strategy"],
             ),
         ],
     ):
         self.assertTrue((results[simple_imputer.op.outputs["result"].name]
                          == output_data).all())
Esempio n. 3
0
 async def test_principal_component_analysis(self):
     input_data, _ = make_classification(
         n_samples=10,
         n_features=10,
         n_informative=8,
         n_redundant=2,
         random_state=7,
     )
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(principal_component_analysis, GetSingle),
         [
             Input(
                 value=[
                     principal_component_analysis.op.outputs["result"].name
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=principal_component_analysis.op.inputs["data"],
             ),
             Input(
                 value=8,
                 definition=principal_component_analysis.op.
                 inputs["n_components"],
             ),
         ],
     ):
         self.assertTrue((10, 8) == results[
             principal_component_analysis.op.outputs["result"].name].shape)
Esempio n. 4
0
async def operation_db():
    """
    Create the database and table (myTable) for the db operations
    """
    sdb = SqliteDatabase(SqliteDatabaseConfig(filename="examples.db"))

    dataflow = DataFlow(
        operations={"db_query_create": db_query_create_table.op},
        configs={"db_query_create": DatabaseQueryConfig(database=sdb)},
        seed=[],
    )

    inputs = [
        Input(
            value="myTable",
            definition=db_query_create_table.op.inputs["table_name"],
        ),
        Input(
            value={
                "key": "INTEGER NOT NULL PRIMARY KEY",
                "firstName": "text",
                "lastName": "text",
                "age": "int",
            },
            definition=db_query_create_table.op.inputs["cols"],
        ),
    ]

    async for ctx, result in MemoryOrchestrator.run(dataflow, inputs):
        pass
Esempio n. 5
0
 async def test_get_embedding(self):
     input_sentence = (
         "The end is the beginning , and the beginning is the end")
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(get_embedding, GetSingle),
         [
             Input(
                 value=[get_embedding.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence,
                 definition=get_embedding.op.inputs["text"],
             ),
             Input(
                 value="en_core_web_sm",
                 definition=get_embedding.op.inputs["spacy_model"],
             ),
         ],
     ):
         embeddings = results[get_embedding.op.outputs["result"].name]
         self.assertEqual(len(input_sentence.split()), len(embeddings))
         self.assertEqual(
             embeddings[randint(0,
                                len(input_sentence.split()) - 1)].shape,
             embeddings[randint(0,
                                len(input_sentence.split()) - 1)].shape,
         )
Esempio n. 6
0
    async def test_associatedefinition(self):
        feed_def = Definition(name="feed", primitive="string")
        dead_def = Definition(name="dead", primitive="string")
        output = Definition(name="output", primitive="string")

        feed_input = Input(value="my favorite value", definition=feed_def)
        face_input = Input(
            value="face", definition=output, parents=[feed_input]
        )

        dead_input = Input(
            value="my second favorite value", definition=dead_def
        )
        beef_input = Input(
            value="beef", definition=output, parents=[dead_input]
        )

        test_result = {"feed": "face", "dead": "beef"}
        for test_value in test_result.keys():
            async for ctx, results in MemoryOrchestrator.run(
                DataFlow.auto(AssociateDefinition),
                [
                    feed_input,
                    face_input,
                    dead_input,
                    beef_input,
                    Input(
                        value={test_value: "output"},
                        definition=AssociateDefinition.op.inputs["spec"],
                    ),
                ],
            ):
                self.assertEqual(
                    results, {test_value: test_result[test_value]}
                )
Esempio n. 7
0
 async def test_calcHist(self):
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(calcHist, GetSingle),
         [
             Input(
                 value=[
                     calcHist.op.outputs["result"].name,
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=self.INPUT_ARRAY,
                 definition=calcHist.op.inputs["images"],
             ),
             Input(
                 value=None,
                 definition=calcHist.op.inputs["mask"],
             ),
             Input(
                 value=[0, 1],
                 definition=calcHist.op.inputs["channels"],
             ),
             Input(
                 value=[32, 32],
                 definition=calcHist.op.inputs["histSize"],
             ),
             Input(
                 value=[0, 256, 0, 256],
                 definition=calcHist.op.inputs["ranges"],
             ),
         ],
     ):
         self.assertEqual(results[calcHist.op.outputs["result"].name].shape,
                          (32, 32))
Esempio n. 8
0
 async def test_convert_color(self):
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(convert_color, GetSingle),
         [
             Input(
                 value=[
                     convert_color.op.outputs["result"].name,
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=self.INPUT_ARRAY,
                 definition=convert_color.op.inputs["src"],
             ),
             Input(
                 value="BGR2RGB",
                 definition=convert_color.op.inputs["code"],
             ),
         ],
     ):
         self.assertEqual(
             cv2.cvtColor(
                 results[convert_color.op.outputs["result"].name],
                 cv2.COLOR_RGB2BGR,
             ).flatten().tolist(),
             self.INPUT_ARRAY.flatten().tolist(),
         )
Esempio n. 9
0
 async def test_pos_tagger(self):
     input_sentence = (
         "The end is the beginning , and the beginning is the end"
     )
     async for ctx, results in MemoryOrchestrator.run(
         DataFlow.auto(pos_tagger, GetSingle),
         [
             Input(
                 value=[pos_tagger.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence,
                 definition=pos_tagger.op.inputs["text"],
             ),
             Input(
                 value="en_core_web_sm",
                 definition=pos_tagger.op.inputs["spacy_model"],
             ),
         ],
     ):
         pos_tags = results[pos_tagger.op.outputs["result"].name]
         words = input_sentence.split()
         for i, _ in enumerate(words):
             self.assertEqual(pos_tags[i][0], words[i])
             self.assertIn(pos_tags[i][1], ["DT", "NN", "VBZ", "CC", ","])
Esempio n. 10
0
 async def records(self) -> AsyncIterator[Record]:
     async for record in self.sctx.records():
         async for ctx, result in MemoryOrchestrator.run(
                 self.parent.config.dataflow,
             [
                 Input(
                     value=record.feature(feature.name),
                     definition=Definition(name=feature.name,
                                           primitive=str(feature.dtype())),
                 ) for feature in self.parent.config.features
             ],
         ):
             if result:
                 record.evaluated(result)
             yield record
Esempio n. 11
0
 async def test_standard_scaler(self):
     input_data = [[0, 0], [0, 0], [1, 1], [1, 1]]
     output_data = [[-1, -1], [-1, -1], [1, 1], [1, 1]]
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(standard_scaler, GetSingle),
         [
             Input(
                 value=[standard_scaler.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=standard_scaler.op.inputs["data"],
             ),
         ],
     ):
         self.assertTrue((results[standard_scaler.op.outputs["result"].name]
                          == output_data))
Esempio n. 12
0
 async def test_HuMoments(self):
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(HuMoments, GetSingle),
         [
             Input(
                 value=[
                     HuMoments.op.outputs["result"].name,
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=self.INPUT_ARRAY,
                 definition=HuMoments.op.inputs["m"],
             ),
         ],
     ):
         self.assertEqual(
             results[HuMoments.op.outputs["result"].name].shape, (7, ))
Esempio n. 13
0
 async def test_remove_whitespaces(self):
     input_data = [["  ABC ", "XYD   "], ["  ABC", "   XYD "]]
     output_data = [["ABC", "XYD"], ["ABC", "XYD"]]
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(remove_whitespaces, GetSingle),
         [
             Input(
                 value=[remove_whitespaces.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=remove_whitespaces.op.inputs["data"],
             ),
         ],
     ):
         self.assertTrue(
             (results[remove_whitespaces.op.outputs["result"].name] ==
              output_data).all())
Esempio n. 14
0
 async def test_normalize(self):
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(normalize, GetSingle),
         [
             Input(
                 value=[
                     normalize.op.outputs["result"].name,
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=self.INPUT_ARRAY,
                 definition=normalize.op.inputs["src"],
             ),
         ],
     ):
         self.assertEqual(
             results[normalize.op.outputs["result"].name].shape,
             self.INPUT_ARRAY.shape,
         )
Esempio n. 15
0
 async def test_remove_stopwords(self):
     input_sentence = (
         "The end is the beginning, and the beginning is the end")
     output_sentence = "end beginning , beginning end"
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(remove_stopwords, GetSingle),
         [
             Input(
                 value=[remove_stopwords.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence,
                 definition=remove_stopwords.op.inputs["text"],
             ),
         ],
     ):
         self.assertEqual(
             results[remove_stopwords.op.outputs["result"].name],
             output_sentence,
         )
Esempio n. 16
0
 async def test_get_sentences(self):
     input_sentence = "The end is the beginning. The beginning is the end."
     async for ctx, results in MemoryOrchestrator.run(
         DataFlow.auto(get_sentences, GetSingle),
         [
             Input(
                 value=[get_sentences.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence,
                 definition=get_sentences.op.inputs["text"],
             ),
             Input(
                 value="en_core_web_sm",
                 definition=get_sentences.op.inputs["spacy_model"],
             ),
         ],
     ):
         sentences = results[get_sentences.op.outputs["result"].name]
         self.assertEqual(len(sentences), 2)
Esempio n. 17
0
 async def test_singular_value_decomposition(self):
     input_data, _ = make_classification(
         n_samples=10,
         n_features=10,
         n_informative=8,
         n_redundant=2,
         random_state=7,
     )
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(singular_value_decomposition, GetSingle),
         [
             Input(
                 value=[
                     singular_value_decomposition.op.outputs["result"].name
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=singular_value_decomposition.op.inputs["data"],
             ),
             Input(
                 value=8,
                 definition=singular_value_decomposition.op.
                 inputs["n_components"],
             ),
             Input(
                 value=1,
                 definition=singular_value_decomposition.op.
                 inputs["n_iter"],
             ),
             Input(
                 value=7,
                 definition=singular_value_decomposition.op.
                 inputs["random_state"],
             ),
         ],
     ):
         self.assertTrue((10, 8) == results[singular_value_decomposition.op.
                                            outputs["result"].name].shape, )
Esempio n. 18
0
 async def test_flatten(self):
     input_array = np.zeros((100, 100, 3), dtype=np.uint8)
     output_array = [0] * (100 * 100 * 3)
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(flatten, GetSingle),
         [
             Input(
                 value=[
                     flatten.op.outputs["result"].name,
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_array,
                 definition=flatten.op.inputs["array"],
             ),
         ],
     ):
         self.assertEqual(
             results[flatten.op.outputs["result"].name].tolist(),
             output_array,
         )
Esempio n. 19
0
 async def test_ordinal_encoder(self):
     input_data = [["x", "a"], ["x", "b"], ["y", "a"]]
     output_data = [
         [1.0, 0.0, 1.0, 0.0],
         [1.0, 0.0, 0.0, 1.0],
         [0.0, 1.0, 1.0, 0.0],
     ]
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(ordinal_encoder, GetSingle),
         [
             Input(
                 value=[ordinal_encoder.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=ordinal_encoder.op.inputs["data"],
             ),
         ],
     ):
         self.assertTrue((results[ordinal_encoder.op.outputs["result"].name]
                          == output_data).all())
Esempio n. 20
0
 async def test_run(self):
     packages = {
         "http://pkg.freebsd.org/FreeBSD:13:amd64/latest/All/ImageMagick7-7.0.8.48.txz":
         {},
         "https://download.clearlinux.org/releases/10540/clear/x86_64/os/Packages/sudo-setuid-1.8.17p1-34.x86_64.rpm":
         {
             "./usr/bin/sudo": True
         },
         "https://rpmfind.net/linux/fedora/linux/updates/29/Everything/x86_64/Packages/g/gzip-1.9-9.fc29.x86_64.rpm":
         {},
         "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Everything/x86_64/os/Packages/c/curl-7.32.0-3.fc20.x86_64.rpm":
         {
             "./usr/bin/curl": False
         },
     }
     found = dict(zip(packages.keys(), [False] * len(packages)))
     async for ctx, results in MemoryOrchestrator.run(
             dataflow,
         {
             URL: [
                 Input(value=URL,
                       definition=URLToURLBytes.op.inputs["URL"]),
                 Input(
                     value=["rpm_filename", "binary_is_PIE"],
                     definition=Associate.op.inputs["spec"],
                 ),
             ]
             for URL in packages
         },
             strict=True,
     ):
         package_url = (await ctx.handle()).as_string()
         with self.subTest(package_url=package_url):
             self.assertIn("binary_is_PIE", results)
             self.assertDictEqual(results["binary_is_PIE"],
                                  packages[package_url])
         found[package_url] = True
     self.assertTrue(all(found.values()),
                     "Not all packages we analyized: f{found}")
Esempio n. 21
0
 async def test_lemmatizer(self):
     input_sentence = (
         "The end is the beginning , and the beginning is the end"
     )
     async for ctx, results in MemoryOrchestrator.run(
         DataFlow.auto(lemmatizer, GetSingle),
         [
             Input(
                 value=[lemmatizer.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence,
                 definition=lemmatizer.op.inputs["text"],
             ),
             Input(
                 value="en_core_web_sm",
                 definition=lemmatizer.op.inputs["spacy_model"],
             ),
         ],
     ):
         lemma_list = results[lemmatizer.op.outputs["result"].name]
         self.assertEqual(len(input_sentence.split()), len(lemma_list))
Esempio n. 22
0
 async def test_one_hot_encoder(self):
     categories = [["Male", "Female"], [1, 2, 3]]
     input_data = [["Female", 1], ["Male", 3]]
     output_data = [[0.0, 1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 1.0]]
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(one_hot_encoder, GetSingle),
         [
             Input(
                 value=[one_hot_encoder.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=one_hot_encoder.op.inputs["data"],
             ),
             Input(
                 value=categories,
                 definition=one_hot_encoder.op.inputs["categories"],
             ),
         ],
     ):
         self.assertTrue((results[one_hot_encoder.op.outputs["result"].name]
                          == output_data).all())
Esempio n. 23
0
async def main():
    # train the model
    await train(
        slr_model,
        {
            "Years": 0,
            "Salary": 10
        },
        {
            "Years": 1,
            "Salary": 20
        },
        {
            "Years": 2,
            "Salary": 30
        },
        {
            "Years": 3,
            "Salary": 40
        },
    )
    # Run the dataflow
    async for ctx, results in MemoryOrchestrator.run(dataflow, {"inputs": []}):
        pass
Esempio n. 24
0
 async def test_resize(self):
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(resize, GetSingle),
         [
             Input(
                 value=[
                     resize.op.outputs["result"].name,
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=self.INPUT_ARRAY,
                 definition=resize.op.inputs["src"],
             ),
             Input(
                 value=[50, 50, 3],
                 definition=resize.op.inputs["dsize"],
             ),
         ],
     ):
         self.assertEqual(
             results[resize.op.outputs["result"].name].shape,
             (50, 50, 3),
         )
Esempio n. 25
0
 async def test_tfidf_vectorizer(self):
     input_sentence = [
         "The end is the beginning. The beginning is the end."
     ]
     async for ctx, results in MemoryOrchestrator.run(
         DataFlow.auto(tfidf_vectorizer, GetSingle),
         [
             Input(
                 value=[tfidf_vectorizer.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence,
                 definition=tfidf_vectorizer.op.inputs["text"],
             ),
             Input(
                 value=[1, 1],
                 definition=count_vectorizer.op.inputs["ngram_range"],
             ),
             Input(
                 value=True,
                 definition=tfidf_vectorizer.op.inputs["get_feature_names"],
             ),
         ],
     ):
         vectors = results[tfidf_vectorizer.op.outputs["result"].name][0]
         features = results[tfidf_vectorizer.op.outputs["result"].name][1]
         self.assertTrue(isinstance(features, list))
         self.assertTrue(isinstance(vectors, np.ndarray))
         unique_tokens = list(
             set(input_sentence[0].lower().replace(".", "").split())
         )
         self.assertEqual(len(vectors[0]), len(unique_tokens))
         self.assertEqual(
             set(features).intersection(set(unique_tokens)), set(features)
         )