def test_conversion(self): problem_doc_path = os.path.join(os.path.dirname(__file__), 'data', 'problems', 'iris_problem_1', 'problemDoc.json') problem_uri = 'file://{problem_doc_path}'.format( problem_doc_path=problem_doc_path) problem_description = problem.Problem.load(problem_uri) self.assertEqual( problem_description.to_simple_structure(), problem.Problem.from_json_structure( problem_description.to_json_structure(), strict_digest=True).to_simple_structure()) # Legacy. self.assertEqual( utils.to_json_structure(problem_description.to_simple_structure()), problem.Problem.from_json_structure( utils.to_json_structure( problem_description.to_simple_structure()), strict_digest=True).to_simple_structure()) self.assertIs( problem.Problem.from_json_structure( problem_description.to_json_structure(), strict_digest=True)['problem']['task_keywords'][0], problem.TaskKeyword.CLASSIFICATION)
def test_dataframe_with_objects(self): df = pandas.DataFrame({str(i): [str(j) for j in range(10)] for i in range(5)}, columns=[str(i) for i in range(5)]) df = container.DataFrame(df, generate_metadata=False) compact_metadata = df.metadata.generate(df, compact=True) noncompact_metadata = df.metadata.generate(df, compact=False) basic_metadata = [ { 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 10, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 5, }, }, }, ] column_names = [{'selector': ['__ALL_ELEMENTS__', i], 'metadata': {'name': str(i)}} for i in range(5)] self.assertEqual(utils.to_json_structure(compact_metadata.to_internal_simple_structure()), basic_metadata + [ { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', }, } ] + column_names) column_names = [{'selector': ['__ALL_ELEMENTS__', i], 'metadata': {'name': str(i), 'structural_type': 'str'}} for i in range(5)] self.assertEqual(utils.to_json_structure(noncompact_metadata.to_internal_simple_structure()), basic_metadata + column_names)
def test_list_with_objects(self): l = container.List([container.List([str(j) for i in range(5)]) for j in range(10)], generate_metadata=True) self.assertEqual(utils.to_json_structure(l.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.list.List', 'dimension': { 'length': 10, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'd3m.container.list.List', 'dimension': { 'length': 5, }, } }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', }, }, ])
def test_basic(self): main = container.DataFrame({'timestamp': [1, 2, 3,4], 'value': [0.32,0.32,0.31,0.33],}, { 'top_level': 'main', }, generate_metadata=True) self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 4, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': {'structural_type': 'numpy.int64', 'name': 'timestamp'}, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': {'structural_type': 'numpy.float64', 'name': 'value'}, }]) hyperparams_class = HoltWintersExponentialSmoothing.HoltWintersExponentialSmoothing.metadata.get_hyperparams() primitive = HoltWintersExponentialSmoothing.HoltWintersExponentialSmoothing(hyperparams=hyperparams_class.defaults()) # primitive.set_training_data(inputs=main) # primitive.fit() output_main = primitive.produce(inputs=main).value output_main = round(output_main,2) # new_main_drop = new_main.iloc[2:] # new_main_drop = new_main_drop.reset_index(drop = True) print ( "output", output_main) expected_result = container.DataFrame(data = { 'timestamp' : [1,2,3,4], 'value': [0.32,0.32,0.31,0.32]}) print ("expected_result", expected_result) # output_main.reset_index() self.assertEqual(output_main[['timestamp','value_holt_winters_smoothing']].values.tolist(), expected_result[['timestamp','value']].values.tolist()) params = primitive.get_params() primitive.set_params(params=params)
def test_problem(self): problem_description = problem_module.parse_problem_description(os.path.join(TEST_PROBLEMS_DIR, 'iris_problem_1', 'problemDoc.json')) # TA2-TA3 API does not encode outputs. del problem_description['outputs'] problem_description['digest'] = d3m_utils.compute_digest(d3m_utils.to_json_structure(problem_description)) problem_message = utils.encode_problem_description(problem_description) self.assertEqual(utils.decode_problem_description(problem_message, strict_digest=True), problem_description)
def test_metadata(self): # print(self.prediction_labels.metadata.to_internal_simple_structure()) self.assertEqual( utils.to_json_structure(self.prediction_labels.metadata. to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 10, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 1, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'LSTMODetector0_0', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.int64', }, } ])
def test_dataframe_tabular_semantic_types(self): # A DataFrame with explicit WRONG metadata. df = container.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}, { 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], }, }) df.A = df.A.astype(numpy.int64) df.metadata = df.metadata.generate(df) self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { # We respect the name, but we override the semantic types. 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'A', 'structural_type': 'numpy.int64', } }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'B', 'structural_type': 'str', }, }])
def test_ndarray(self): array = container.ndarray(numpy.array([1, 2, 3], dtype=numpy.int64), generate_metadata=True) self.assertEqual(utils.to_json_structure(array.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.numpy.ndarray', 'dimension': { 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.int64', }, }])
def test_ndarray_with_objects(self): array = numpy.array([[[str(k) for k in range(5)] for i in range(10)] for j in range(10)], dtype=object) array = container.ndarray(array, generate_metadata=True) self.assertEqual(utils.to_json_structure(array.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.numpy.ndarray', 'dimension': { 'length': 10, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'length': 10, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'length': 5, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', }, }, ])
def test_basic(self): self.maxDiff = None main = container.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }, columns=['A', 'B'], generate_metadata=True) self.assertEqual( utils.to_json_structure( main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'structural_type': 'numpy.int64', 'name': 'A', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'numpy.int64', 'name': 'B', }, } ]) a = np.array([[1, 0, 1, 0, 1], [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]]) b = np.array([[1, 0], [1, 0], [1, 0], [1, 0], [1, 0]]) hyperparams_class = NonNegativeMatrixFactorization.NonNegativeMatrixFactorization.metadata.get_hyperparams( ) hp = hyperparams_class.defaults().replace({ 'use_semantic_types': True, 'use_columns': ( 0, 1, ), 'return_result': 'append', 'rank': 5, 'seed': 'fixed', 'W': a, 'H': b, }) primitive = NonNegativeMatrixFactorization.NonNegativeMatrixFactorization( hyperparams=hp) new_main = primitive.produce(inputs=main).value print("new_main", new_main) c = pd.DataFrame({ "A": [1, 2, 3, np.nan, np.nan], "B": [4, 5, 6, np.nan, np.nan], 'row_latent_vector_0': [0.816725, 1.078965, 1.341205, np.nan, np.nan], 'row_latent_vector_1': [3.514284e-16, 2.383547e-16, 2.227207e-16, np.nan, np.nan], 'row_latent_vector_2': [0.816725, 1.078965, 1.341205, np.nan, np.nan], 'row_latent_vector_3': [3.514284e-16, 2.383547e-16, 2.227207e-16, np.nan, np.nan], 'row_latent_vector_4': [0.816725, 1.078965, 1.341205, np.nan, np.nan], 'column_latent_vector_0': [0.642626, 0.542312, 0.642626, 0.542312, 0.642626], 'column_latent_vector_1': [1.534324, 1.848782, 1.534324, 1.848782, 1.534324], }) pd.testing.assert_frame_equal(new_main, c) params = primitive.get_params() primitive.set_params(params=params) # print(utils.to_json_structure(new_main.metadata.to_internal_simple_structure())) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 9, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'A', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'B', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'row_latent_vector_0', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'row_latent_vector_1', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 4], 'metadata': { 'name': 'row_latent_vector_2', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 5], 'metadata': { 'name': 'row_latent_vector_3', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 6], 'metadata': { 'name': 'row_latent_vector_4', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 7], 'metadata': { 'name': 'column_latent_vector_0', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 8], 'metadata': { 'name': 'column_latent_vector_1', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, } ]) params = primitive.get_params() primitive.set_params(params=params)
def test_basic(self): self.maxDiff=None main = container.DataFrame({'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0]}, columns=['timestamp', 'values', 'b'], generate_metadata=True) self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 4, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': {'structural_type': 'numpy.int64', 'name': 'timestamp'}, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': {'structural_type': 'numpy.float64', 'name': 'values'}, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': {'structural_type': 'numpy.float64', 'name': 'b'}, }]) hyperparams_class = StatisticalMaximum.StatisticalMaximumPrimitive.metadata.get_hyperparams() hp = hyperparams_class.defaults().replace({ 'use_columns': [1,2], 'use_semantic_types' : True, 'window_size':2 }) primitive = StatisticalMaximum.StatisticalMaximumPrimitive(hyperparams=hp) output_main = primitive.produce(inputs=main).value print(output_main) expected_output = container.DataFrame( {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0], 'values_maximum': [2.0,2.0, 3.0, 4.0], 'b_maximum': [4.0,4.0, 5.0, 6.0]}, columns=['timestamp', 'values', 'b', 'values_maximum', 'b_maximum']) self.assertEqual(output_main[['timestamp', 'values', 'b', 'values_maximum', 'b_maximum']].values.tolist(), expected_output[ ['timestamp', 'values', 'b', 'values_maximum', 'b_maximum' ]].values.tolist()) self.assertEqual(utils.to_json_structure(output_main.metadata.to_internal_simple_structure()), [{'metadata': {'dimension': {'length': 4, 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow']}, 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'structural_type': 'd3m.container.pandas.DataFrame'}, 'selector': []}, {'metadata': {'dimension': {'length': 5, 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn']}}, 'selector': ['__ALL_ELEMENTS__']}, {'metadata': {'name': 'timestamp', 'structural_type': 'numpy.int64'}, 'selector': ['__ALL_ELEMENTS__', 0]}, {'metadata': {'name': 'values', 'structural_type': 'numpy.float64'}, 'selector': ['__ALL_ELEMENTS__', 1]}, {'metadata': {'name': 'b', 'structural_type': 'numpy.float64'}, 'selector': ['__ALL_ELEMENTS__', 2]}, {'metadata': {'name': 'values_maximum', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], 'structural_type': 'numpy.float64'}, 'selector': ['__ALL_ELEMENTS__', 3]}, {'metadata': {'name': 'b_maximum', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], 'structural_type': 'numpy.float64'}, 'selector': ['__ALL_ELEMENTS__', 4]}, ]) params = primitive.get_params() primitive.set_params(params=params)
def _test_metadata(self, metadata): self.assertEqual( utils.to_json_structure(metadata.to_internal_simple_structure()), [ { 'metadata': { 'dimension': { 'length': 5, 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], }, 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table', 'https://metadata.datadrivendiscovery.org/types/FilesCollection', ], 'structural_type': 'd3m.container.pandas.DataFrame', }, 'selector': [], }, { 'metadata': { 'dimension': { 'length': 1, 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], } }, 'selector': ['__ALL_ELEMENTS__'], }, { 'metadata': { 'structural_type': 'd3m.container.numpy.ndarray' }, 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'] }, { 'metadata': { 'structural_type': 'str' }, 'selector': [ '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__' ], }, { 'metadata': { 'dimension': { 'length': 1, 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], }, 'location_base_uris': '__NO_VALUE__', 'media_types': '__NO_VALUE__', 'name': 'filename', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/ImageObject', 'https://metadata.datadrivendiscovery.org/types/Table', ] }, 'selector': ['__ALL_ELEMENTS__', 0], }, { 'metadata': { 'dimension': { 'length': 1, 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], } }, 'selector': ['__ALL_ELEMENTS__', 0, '__ALL_ELEMENTS__'], }, { 'metadata': { 'image_reader_metadata': { 'foobar': 42 }, 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/ImageObject', 'https://metadata.datadrivendiscovery.org/types/Table', ], }, 'selector': [0, 0], }, { 'metadata': { 'image_reader_metadata': { 'foobar': 42 }, 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/ImageObject', 'https://metadata.datadrivendiscovery.org/types/Table', ], }, 'selector': [1, 0], }, { 'metadata': { 'image_reader_metadata': { 'foobar': 42 }, 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/ImageObject', 'https://metadata.datadrivendiscovery.org/types/Table', ], }, 'selector': [2, 0], }, { 'metadata': { 'image_reader_metadata': { 'foobar': 42 }, 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/ImageObject', 'https://metadata.datadrivendiscovery.org/types/Table', ], }, 'selector': [3, 0], }, { 'metadata': { 'image_reader_metadata': { 'foobar': 42 }, 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/ImageObject', 'https://metadata.datadrivendiscovery.org/types/Table', ], }, 'selector': [4, 0], }, ], )
def test_basic(self): main = container.DataFrame( { 'd3mIndex': [0, 1, 2], 'timestamp': [1., 2., 4.], 'a': [1., 2., 3.], 'b': [2., 3., 4.], 'ground_truth': [0, 0, 0], }, columns=['d3mIndex', 'timestamp', 'a', 'b', 'ground_truth'], generate_metadata=True) # print(main) self.assertEqual( utils.to_json_structure( main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 5, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'structural_type': 'numpy.int64', 'name': 'd3mIndex' }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'timestamp' }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'a' } }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'b' } }, { 'selector': ['__ALL_ELEMENTS__', 4], 'metadata': { 'structural_type': 'numpy.int64', 'name': 'ground_truth' } } ]) self.assertIsInstance(main, container.DataFrame) hyperparams_class = ContinuityValidation.ContinuityValidation.metadata.get_hyperparams( ) primitive = ContinuityValidation.ContinuityValidation( hyperparams=hyperparams_class.defaults()) new_main = primitive.produce(inputs=main).value # print(new_main) expected_output = container.DataFrame({ 'd3mIndex': [0, 1, 2, 3], 'timestamp': [1., 2., 3., 4.], 'a': [1., 2., 2.5, 3.], 'b': [2., 3., 3.5, 4.], 'ground_truth': [0, 0, 0, 0] }) self.assertEqual(new_main.values.tolist(), expected_output.values.tolist()) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 4, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 5, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'd3mIndex', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'timestamp', 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'a', 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'b', 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 4], 'metadata': { 'name': 'ground_truth', 'structural_type': 'numpy.int64', }, } ]) self._test_continuity(new_main)
def test_list(self): lst = container.List(['a', 'b', 'c'], generate_metadata=True) self.assertEqual(utils.to_json_structure(lst.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.list.List', 'dimension': { 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', }, }]) lst = container.List([1, 'a', 2.0], generate_metadata=True) self.assertEqual(utils.to_json_structure(lst.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.list.List', 'dimension': { 'length': 3, }, }, }, { 'selector': [0], 'metadata': { 'structural_type': 'int', }, }, { 'selector': [1], 'metadata': { 'structural_type': 'str', }, }, { 'selector': [2], 'metadata': { 'structural_type': 'float', }, }]) dataframe = container.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) dataframe.A = dataframe.A.astype(numpy.int64) lst = container.List([dataframe], generate_metadata=True) self.assertEqual(utils.to_json_structure(lst.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.list.List', 'dimension': { 'length': 1, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'A', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'B', 'structural_type': 'str', }, }])
def test_basic(self): self.maxDiff = None dataset_fname = os.path.join( this_path, '../../datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv' ) dataset = pd.read_csv(dataset_fname) # dataset = np.random.rand(1000) main = d3m_dataframe(dataset, generate_metadata=True) # print(main) hyperparams_class = SKPowerTransformer.SKPowerTransformer.metadata.get_hyperparams( ) primitive = SKPowerTransformer.SKPowerTransformer( hyperparams=hyperparams_class.defaults()) primitive.set_training_data(inputs=main) primitive.fit() new_main = primitive.produce(inputs=main).value test_data = new_main.values[:, 2] # hist_data = new_main.values std_normal_samples = np.random.randn(test_data.__len__()) # Plot the distribution # import matplotlib.pyplot as plt # plt.hist(test_data, bins=100, alpha=0.6) # plt.hist(std_normal_samples, bins=100, alpha=0.6) # plt.legend(labels=['PowerTransformer', 'Standard Gaussian'], loc='best') # plt.savefig('./fig/test_SKPowerTransformer.png') # plt.close() # plt.show() # centerization check new_mean, new_std = test_data.mean(), test_data.std() mean_mse = new_mean**2 std_mse = (new_std - 1)**2 # print(mean_mse, std_mse) self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-5) self.assertAlmostEqual(std_mse.__float__(), 0., delta=1e-5) # # print(main.metadata.to_internal_simple_structure()) # print(new_main.metadata.to_internal_simple_structure()) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 7027, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 4, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'd3mIndex', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'timestamp', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'value', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'ground_truth', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, } ]) params = primitive.get_params() primitive.set_params(params=params)
def test_basic(self): self.maxDiff = None main = container.DataFrame( { 'a': [1., 2., 3., 4., 5, 6, 7, 8, 9], 'b': [2., 3., 4., 5., 6, 7, 8, 9, 10], 'c': [3., 4., 5., 6., 7, 8, 9, 10, 11] }, columns=['a', 'b', 'c'], generate_metadata=True) print(main) self.assertEqual( utils.to_json_structure( main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 9, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'a' }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'b' }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'c' } } ]) self.assertIsInstance(main, container.DataFrame) hyperparams_class = TelemanomPrimitive.metadata.get_hyperparams() hyperparams = hyperparams_class.defaults() hyperparams = hyperparams.replace({ 'l_s': 2, 'n_predictions': 1, 'return_result': 'new', 'return_subseq_inds': True, 'use_columns': (0, 1, 2) }) # print("hyperparams",hyperparams) primitive = TelemanomPrimitive(hyperparams=hyperparams) primitive.set_training_data(inputs=main) primitive.fit() new_main = primitive.produce_score(inputs=main).value print("new main", new_main) # print(utils.to_json_structure(new_main.metadata.to_internal_simple_structure())) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 6, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'Telemanom0_0', 'structural_type': 'numpy.float64', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'Telemanom0_1', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'Telemanom0_2', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], } } ])
def test_basic(self): self.maxDiff = None main = container.DataFrame( { 'a1': [1., 2., 3.], 'b1': [2., 3., 4.], 'a2': [3., 4., 5.], 'c1': [4., 5., 6.], 'a3': [5., 6., 7.], 'a1a': [6., 7., 8.] }, # {'top_level': 'main', }, columns=['a1', 'b1', 'a2', 'c1', 'a3', 'a1a'], generate_metadata=True) main.metadata = main.metadata.update_column(0, {'name': 'aaa111'}) main.metadata = main.metadata.update_column(1, {'name': 'bbb111'}) main.metadata = main.metadata.update_column(2, {'name': 'aaa222'}) main.metadata = main.metadata.update_column(3, {'name': 'ccc111'}) main.metadata = main.metadata.update_column(4, {'name': 'aaa333'}) main.metadata = main.metadata.update_column(5, {'name': 'aaa111'}) # print(main) self.assertEqual( utils.to_json_structure( main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 6, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'aaa111' }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'bbb111' }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'aaa222' }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'ccc111' }, }, { 'selector': ['__ALL_ELEMENTS__', 4], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'aaa333' }, }, { 'selector': ['__ALL_ELEMENTS__', 5], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'aaa111' }, } ]) hyperparams_class = SKStandardScaler.SKStandardScaler.metadata.get_hyperparams( ) primitive = SKStandardScaler.SKStandardScaler( hyperparams=hyperparams_class.defaults()) primitive.set_training_data(inputs=main) primitive.fit() new_main = primitive.produce(inputs=main).value new_mean, new_std = new_main.values.mean(0), new_main.values.std(0) mean_mse = np.matmul(new_mean.T, new_mean) std_mse = np.matmul((new_std - np.ones_like(new_std)).T, (new_std - np.ones_like(new_std))) # print(new_main) # print(mean_mse, std_mse) self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-8) self.assertAlmostEqual(std_mse.__float__(), 0., delta=1e-8) # print(main.metadata.to_internal_simple_structure()) # print(new_main.metadata.to_internal_simple_structure()) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 6, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'aaa111', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'bbb111', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'aaa222', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'ccc111', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 4], 'metadata': { 'name': 'aaa333', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 5], 'metadata': { 'name': 'aaa111', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, } ]) params = primitive.get_params() primitive.set_params(params=params)
def test_combine_columns_new_with_index_noncompact_metadata(self): main = container.DataFrame( { 'd3mIndex': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9] }, columns=['d3mIndex', 'b1', 'c1'], generate_metadata=False) main.metadata = main.metadata.generate(main, compact=False) main.metadata = main.metadata.update_column( 0, { 'name': 'd3mIndex', 'semantic_types': [ 'http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey' ] }) main.metadata = main.metadata.update_column( 1, { 'name': 'b1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] }) main.metadata = main.metadata.update_column( 2, { 'name': 'c1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] }) columns = container.DataFrame({ 'd3mIndex': [1, 2, 3], 'b2': [4, 5, 6] }, columns=['d3mIndex', 'b2'], generate_metadata=False) columns.metadata = columns.metadata.generate(columns, compact=False) columns.metadata = columns.metadata.update_column( 0, { 'name': 'd3mIndex', 'semantic_types': [ 'http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey' ] }) columns.metadata = columns.metadata.update_column( 1, { 'name': 'b2', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'] }) result = utils.combine_columns(main, [], [columns], return_result='new', add_index_columns=True) self.assertEqual(result.values.tolist(), [ [1, 4], [2, 5], [3, 6], ]) self.assertEqual( d3m_utils.to_json_structure( result.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'd3mIndex', 'semantic_types': [ 'http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey' ], 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'b2', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.int64', }, }])
def test_update_with_generated_metadata(self): metadata = base.DataMetadata({ 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': container.ndarray, }) cells_metadata = collections.OrderedDict() cells_metadata[('a',)] = {'other': 1} cells_metadata[('b',)] = {'other': 2} cells_metadata[('c',)] = {'other': 3} cells_metadata[(base.ALL_ELEMENTS,)] = {'foo': 'bar'} cells_metadata[('other', 'a')] = {'other': 4} cells_metadata[('other', 'b')] = {'other': 5} cells_metadata[('other', 'c')] = {'other': 6} cells_metadata[('other', base.ALL_ELEMENTS)] = {'foo': 'bar2'} metadata._update_with_generated_metadata(cells_metadata) self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.numpy.ndarray', }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': {'foo': 'bar'}, }, { 'selector': ['a'], 'metadata': {'other': 1}, }, { 'selector': ['b'], 'metadata': {'other': 2}, }, { 'selector': ['c'], 'metadata': {'other': 3}, }, { 'selector': ['other', '__ALL_ELEMENTS__'], 'metadata': {'foo': 'bar2'}, }, { 'selector': ['other', 'a'], 'metadata': {'other': 4}, }, { 'selector': ['other', 'b'], 'metadata': {'other': 5}, }, { 'selector': ['other', 'c'], 'metadata': {'other': 6}, }]) metadata = base.DataMetadata({ 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': container.ndarray, 'semantic_types': ['http://example.com/Type1'], 'dimension': { 'length': 0, 'foobar': 42, 'semantic_types': ['http://example.com/Type2'], } }) metadata = metadata.update(('a',), { 'semantic_types': ['http://example.com/Type3'], 'dimension': { 'length': 0, 'foobar': 45, 'semantic_types': ['http://example.com/Type4'], } }) cells_metadata = collections.OrderedDict() cells_metadata[()] = { 'other': 1, 'structural_type': container.ndarray, 'semantic_types': ['http://example.com/Type1a'], 'dimension': { 'length': 100, 'name': 'test1', 'semantic_types': ['http://example.com/Type2a'], } } cells_metadata[('a',)] = { 'semantic_types': ['http://example.com/Type3', 'http://example.com/Type3a'], 'dimension': { 'length': 200, 'name': 'test2', 'semantic_types': ['http://example.com/Type4', 'http://example.com/Type4a'], } } cells_metadata[('b',)] = {'other': 2} metadata._update_with_generated_metadata(cells_metadata) self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.numpy.ndarray', 'other': 1, 'semantic_types': ['http://example.com/Type1', 'http://example.com/Type1a'], 'dimension': { 'length': 100, 'name': 'test1', 'foobar': 42, 'semantic_types': ['http://example.com/Type2', 'http://example.com/Type2a'], }, }, }, { 'selector': ['a'], 'metadata': { 'semantic_types': ['http://example.com/Type3', 'http://example.com/Type3a'], 'dimension': { 'length': 200, 'name': 'test2', 'foobar': 45, 'semantic_types': ['http://example.com/Type4', 'http://example.com/Type4a'], }, }, }, { 'selector': ['b'], 'metadata': {'other': 2}, }]) self.assertEqual(metadata.to_json_structure(), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.numpy.ndarray', 'other': 1, 'semantic_types': ['http://example.com/Type1', 'http://example.com/Type1a'], 'dimension': { 'length': 100, 'name': 'test1', 'foobar': 42, 'semantic_types': ['http://example.com/Type2', 'http://example.com/Type2a'], }, }, }, { 'selector': ['a'], 'metadata': { 'semantic_types': ['http://example.com/Type3', 'http://example.com/Type3a'], 'dimension': { 'length': 200, 'name': 'test2', 'foobar': 45, 'semantic_types': ['http://example.com/Type4', 'http://example.com/Type4a'], }, }, }, { 'selector': ['b'], 'metadata': {'other': 2}, }])
def test_unset_structural_type(self): dataframe = container.DataFrame({'a': ['a', 'b', 'c'], 'b': ['a', 'b', 'c']}, generate_metadata=False) compact_metadata = dataframe.metadata.generate(dataframe, compact=True) all_elements_metadata = compact_metadata.query((base.ALL_ELEMENTS, base.ALL_ELEMENTS)) compact_metadata = compact_metadata.remove((base.ALL_ELEMENTS, base.ALL_ELEMENTS), strict_all_elements=True) compact_metadata = compact_metadata.update((base.ALL_ELEMENTS, 0), all_elements_metadata) compact_metadata = compact_metadata.update((base.ALL_ELEMENTS, 1), all_elements_metadata) compact_metadata = compact_metadata.generate(dataframe, compact=True) self.assertEqual(utils.to_json_structure(compact_metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'a', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'b', }, }]) compact_metadata = dataframe.metadata.generate(dataframe, compact=False) all_elements_metadata = compact_metadata.query((base.ALL_ELEMENTS, base.ALL_ELEMENTS)) compact_metadata = compact_metadata.remove((base.ALL_ELEMENTS, base.ALL_ELEMENTS), strict_all_elements=True) compact_metadata = compact_metadata.update((base.ALL_ELEMENTS, 0), all_elements_metadata) compact_metadata = compact_metadata.update((base.ALL_ELEMENTS, 1), all_elements_metadata) compact_metadata = compact_metadata.generate(dataframe, compact=False) self.assertEqual(utils.to_json_structure(compact_metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'a', 'structural_type': 'str', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'b', 'structural_type': 'str', }, }])
def test_dataset(self): dataframe = container.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) dataframe.A = dataframe.A.astype(numpy.int64, copy=False) dataset = container.Dataset({'0': dataframe}, generate_metadata=False) compact_metadata = dataset.metadata.generate(dataset, compact=True) noncompact_metadata = dataset.metadata.generate(dataset, compact=False) self.assertEqual(utils.to_json_structure(compact_metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.dataset.Dataset', 'dimension': { 'name': 'resources', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], 'length': 1, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'A', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'B', 'structural_type': 'str', }, }]) self.assertEqual(utils.to_json_structure(noncompact_metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.dataset.Dataset', 'dimension': { 'name': 'resources', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], 'length': 1, }, }, }, { 'selector': ['0'], 'metadata': { 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['0', '__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['0', '__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'A', 'structural_type': 'numpy.int64', }, }, { 'selector': ['0', '__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'B', 'structural_type': 'str', }, }])
def test_complex_value(self): self.maxDiff = None dataset = container.Dataset({ '0': container.DataFrame({ 'A': [ container.ndarray(numpy.array(['a', 'b', 'c'])), container.ndarray(numpy.array([1, 2, 3], dtype=numpy.int64)), container.ndarray(numpy.array([1.0, 2.0, 3.0])), ], 'B': [ container.List(['a', 'b', 'c']), container.List([1, 2, 3]), container.List([1.0, 2.0, 3.0]), ], }), }, generate_metadata=False) dataset_metadata = dataset.metadata.generate(dataset, compact=True) self.assertEqual(utils.to_json_structure(dataset_metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.dataset.Dataset', 'dimension': { 'name': 'resources', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], 'length': 1, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'length': 3 }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 0], 'metadata': { 'structural_type': 'd3m.container.numpy.ndarray', 'name': 'A', }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'd3m.container.list.List', 'name': 'B', }, }, { 'selector': ['__ALL_ELEMENTS__', 0, 0, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.str_', }, }, { 'selector': ['__ALL_ELEMENTS__', 0, 1, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', }, }, { 'selector': ['__ALL_ELEMENTS__', 1, 0, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1, 1, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'int', }, }, { 'selector': ['__ALL_ELEMENTS__', 2, 0, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2, 1, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'float', } }]) dataset_metadata = dataset.metadata.generate(dataset, compact=False) self.assertEqual(utils.to_json_structure(dataset_metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'dimension': { 'length': 1, 'name': 'resources', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], }, 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', 'structural_type': 'd3m.container.dataset.Dataset', }, }, { 'selector': ['0'], 'metadata': { 'dimension': { 'length': 3, 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], }, 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'structural_type': 'd3m.container.pandas.DataFrame', }, }, { 'selector': ['0', '__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'length': 2, 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], }, }, }, { 'selector': ['0', '__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'A', }, }, { 'selector': ['0', '__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'B', }, }, { 'selector': ['0', 0, 0], 'metadata': { 'dimension': { 'length': 3, }, 'structural_type': 'd3m.container.numpy.ndarray', }, }, { 'selector': ['0', 0, 0, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.str_' }, }, { 'selector': ['0', 0, 1], 'metadata': { 'dimension': { 'length': 3, }, 'structural_type': 'd3m.container.list.List', }, }, { 'selector': ['0', 0, 1, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', }, }, { 'selector': ['0', 1, 0], 'metadata': { 'dimension': { 'length': 3, }, 'structural_type': 'd3m.container.numpy.ndarray', }, }, { 'selector': ['0', 1, 0, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.int64', }, }, { 'selector': ['0', 1, 1], 'metadata': { 'dimension': { 'length': 3, }, 'structural_type': 'd3m.container.list.List', }, }, { 'selector': ['0', 1, 1, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'int', }, }, { 'selector': ['0', 2, 0], 'metadata': { 'dimension': { 'length': 3, }, 'structural_type': 'd3m.container.numpy.ndarray', }, }, { 'selector': ['0', 2, 0, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.float64', }, }, { 'selector': ['0', 2, 1], 'metadata': { 'dimension': { 'length': 3, }, 'structural_type': 'd3m.container.list.List', }, }, { 'selector': ['0', 2, 1, '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'float', }, }])
def time_to_simple_structure_with_json(self, dataset_files): utils.to_json_structure( self.dataset_metadata.to_internal_simple_structure())
def test_basic(self): self.maxDiff = None curr_path = os.path.dirname(__file__) dataset_fname = os.path.join( curr_path, '../../datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv' ) dataset = pd.read_csv(dataset_fname) # print(dataset.columns) value = dataset['value'] main = d3m_dataframe(value, generate_metadata=True) ################## Test Wavelet transform ################## hyperparams_default = WaveletTransformer.metadata.get_hyperparams( ).defaults() hyperparams = hyperparams_default.replace({ 'wavelet': 'db8', 'level': 2, 'inverse': 0, 'return_result': 'new' }) primitive = WaveletTransformer(hyperparams=hyperparams) new_main = primitive.produce(inputs=main).value # print(new_main) # print(mean_mse, std_mse) # self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-8) # self.assertAlmostEquael(std_mse.__float__(), 0., delta=1e-8) # print(main.metadata.to_internal_simple_structure()) # print(new_main.metadata.to_internal_simple_structure()) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3521, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'value', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'output_1', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'output_2', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, } ]) ################## Test inverse transform ################## hyperparams = hyperparams_default.replace({'inverse': 1}) primitive = WaveletTransformer(hyperparams=hyperparams) main_recover = primitive.produce(inputs=main).value self.assertAlmostEqual(main_recover.values.tolist(), main.values.tolist(), delta=1e-6) # print(main.metadata.to_internal_simple_structure()) # print(main_recover.metadata.to_internal_simple_structure()) self.assertEqual( utils.to_json_structure( main_recover.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 7027, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 1, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'value', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, } ]) params = primitive.get_params() primitive.set_params(params=params)
def test_basic(self): self.maxDiff = None main = container.DataFrame( { 'a': [1., 2., 3.], 'b': [2., 3., 4.], 'c': [3., 4., 5.], }, columns=['a', 'b', 'c'], generate_metadata=True) print(main) self.assertEqual( utils.to_json_structure( main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'a' }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'b' }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'c' } } ]) self.assertIsInstance(main, container.DataFrame) hyperparams_class = ABODPrimitive.metadata.get_hyperparams() hyperparams = hyperparams_class.defaults() hyperparams = hyperparams.replace({ 'return_result': 'new', 'method': 'default', }) primitive = ABODPrimitive(hyperparams=hyperparams) primitive.set_training_data(inputs=main) primitive.fit() new_main = primitive.produce(inputs=main).value new_main_score = primitive.produce_score(inputs=main).value print(new_main) print(new_main_score) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 1, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'Angle-base Outlier Detection Primitive0_0', 'structural_type': 'numpy.int64', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ] }, } ]) self.assertEqual( utils.to_json_structure( new_main_score.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 1, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'Angle-base Outlier Detection Primitive0_0', 'structural_type': 'numpy.float64', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ] }, } ])
def test_basic(self): self.maxDiff = None main = container.DataFrame({ 'A': [1, 2], 'B': ['a', 'b'] }, columns=['A', 'B'], generate_metadata=True) self.assertEqual( utils.to_json_structure( main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'structural_type': 'numpy.int64', 'name': 'A', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'str', 'name': 'B', }, } ]) self.assertIsInstance(main, container.DataFrame) hyperparams_class = CategoricalToBinary.CategoricalToBinary.metadata.get_hyperparams( ) hp = hyperparams_class.defaults().replace({ 'use_semantic_types': True, 'use_columns': (0, ), 'return_result': 'append', }) primitive = CategoricalToBinary.CategoricalToBinary(hyperparams=hp) new_main = primitive.produce(inputs=main).value c = pd.DataFrame({ "A": [1, 2], "B": ['a', 'b'], "A_1": ["1", "0"], "A_2": ["0", "1"] }) pd.testing.assert_frame_equal(new_main, c) # print("new_main\n",new_main) # print(utils.to_json_structure(new_main.metadata.to_internal_simple_structure())) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 4, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'A', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'B', 'structural_type': 'str', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'A_1', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'str', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'A_2', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'str', }, } ]) # print(new_main) # print(test_utils.convert_through_json(new_main.metadata.query(()))) # print(test_utils.convert_through_json(new_main.metadata.query((metadata_base.ALL_ELEMENTS,)))) # print(mean_mse, std_mse) # print("after testing") # self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-8) # self.assertAlmostEqual(std_mse.__float__(), 0., delta=1e-8) # print(main.metadata.to_internal_simple_structure()) # print(new_main.metadata.to_internal_simple_structure()) params = primitive.get_params() primitive.set_params(params=params)
def test_basic(self): self.maxDiff = None main = container.DataFrame( { 'a': [1., 2., 3., 4., 5., 6., 7., 8., 9.], 'b': [2., 3., 4., 5., 6., 7., 8., 9., 10.], 'c': [3., 4., 5., 6., 7., 8., 9., 10., 11.] }, columns=['a', 'b', 'c'], generate_metadata=True) print(main) self.assertEqual( utils.to_json_structure( main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 9, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'a' }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'b' }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'c' } } ]) self.assertIsInstance(main, container.DataFrame) hyperparams_class = MatrixProfile.metadata.get_hyperparams() hyperparams = hyperparams_class.defaults() hyperparams = hyperparams.replace({'window_size': 3}) primitive = MatrixProfile(hyperparams=hyperparams) #primitive.set_training_data(inputs=main) #primitive.fit() new_main = primitive.produce(inputs=main).value print(new_main) self.assertEqual( utils.to_json_structure( main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 9, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'a' }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'b' }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'structural_type': 'numpy.float64', 'name': 'c' } } ])
def test_dataframe_with_names_kept(self): df = container.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) df.A = df.A.astype(numpy.int64) df.metadata = df.metadata.generate(df) df.metadata = df.metadata.update((base.ALL_ELEMENTS, 0), { 'name': 'first_column', }) df.metadata = df.metadata.update((base.ALL_ELEMENTS, 1), { 'name': 'second_column', }) self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'first_column', 'structural_type': 'numpy.int64', } }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'second_column', 'structural_type': 'str', }, }]) df2 = container.DataFrame({'A': [1, 2, 3, 4], 'B': ['a', 'b', 'c', 'd']}) df2.metadata = df.metadata.generate(df2) self.assertEqual(utils.to_json_structure(df2.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 4, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 2, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'first_column', 'structural_type': 'numpy.int64', } }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'second_column', 'structural_type': 'str', }, }])
def test_combine_columns_compact_metadata(self): main = container.DataFrame( { 'a1': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9], 'd1': [10, 11, 12], 'e1': [13, 14, 15] }, { 'top_level': 'main', }, generate_metadata=False) main.metadata = main.metadata.generate(main, compact=True) main.metadata = main.metadata.update_column(0, {'name': 'aaa111'}) main.metadata = main.metadata.update_column(1, { 'name': 'bbb111', 'extra': 'b_column' }) main.metadata = main.metadata.update_column(2, {'name': 'ccc111'}) columns2 = container.DataFrame({ 'a2': [21, 22, 23], 'b2': [24, 25, 26] }, { 'top_level': 'columns2', }, generate_metadata=False) columns2.metadata = columns2.metadata.generate(columns2, compact=True) columns2.metadata = columns2.metadata.update_column( 0, {'name': 'aaa222'}) columns2.metadata = columns2.metadata.update_column( 1, {'name': 'bbb222'}) columns3 = container.DataFrame({ 'a3': [31, 32, 33], 'b3': [34, 35, 36] }, { 'top_level': 'columns3', }, generate_metadata=False) columns3.metadata = columns3.metadata.generate(columns3, compact=True) columns3.metadata = columns3.metadata.update_column( 0, {'name': 'aaa333'}) columns3.metadata = columns3.metadata.update_column( 1, {'name': 'bbb333'}) result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='append', add_index_columns=False) self.assertEqual(result.values.tolist(), [ [1, 4, 7, 10, 13, 21, 24, 31, 34], [2, 5, 8, 11, 14, 22, 25, 32, 35], [3, 6, 9, 12, 15, 23, 26, 33, 36], ]) self.assertEqual( d3m_utils.to_json_structure( result.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 9, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'aaa111', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'bbb111', 'extra': 'b_column', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'ccc111', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'd1', }, }, { 'selector': ['__ALL_ELEMENTS__', 4], 'metadata': { 'name': 'e1', }, }, { 'selector': ['__ALL_ELEMENTS__', 5], 'metadata': { 'name': 'aaa222', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 6], 'metadata': { 'name': 'bbb222', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 7], 'metadata': { 'name': 'aaa333', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 8], 'metadata': { 'name': 'bbb333', 'structural_type': 'numpy.int64', }, }]) result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='new', add_index_columns=False) self.assertEqual(result.values.tolist(), [ [21, 24, 31, 34], [22, 25, 32, 35], [23, 26, 33, 36], ]) self.assertEqual( d3m_utils.to_json_structure( result.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'top_level': 'columns2', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 4, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'aaa222', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'bbb222', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'aaa333', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'bbb333', 'structural_type': 'numpy.int64', }, }]) result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='replace', add_index_columns=False) self.assertEqual(result.values.tolist(), [ [1, 21, 24, 31, 34, 10, 13], [2, 22, 25, 32, 35, 11, 14], [3, 23, 26, 33, 36, 12, 15], ]) self.assertEqual( d3m_utils.to_json_structure( result.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 7, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'aaa111', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'aaa222', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'bbb222', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'aaa333', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 4], 'metadata': { 'name': 'bbb333', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 5], 'metadata': { 'name': 'd1', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 6], 'metadata': { 'name': 'e1', 'structural_type': 'numpy.int64', }, }]) result = utils.combine_columns(main, [0, 1, 2, 3, 4], [columns2, columns3], return_result='replace', add_index_columns=False) self.assertEqual(result.values.tolist(), [ [21, 24, 31, 34], [22, 25, 32, 35], [23, 26, 33, 36], ]) self.assertEqual( d3m_utils.to_json_structure( result.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 4, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'aaa222', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'bbb222', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'aaa333', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'bbb333', 'structural_type': 'numpy.int64', }, }]) result = utils.combine_columns(main, [4], [columns2, columns3], return_result='replace', add_index_columns=False) self.assertEqual(result.values.tolist(), [ [1, 4, 7, 10, 21, 24, 31, 34], [2, 5, 8, 11, 22, 25, 32, 35], [3, 6, 9, 12, 23, 26, 33, 36], ]) self.assertEqual( d3m_utils.to_json_structure( result.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 8, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'aaa111', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'bbb111', 'extra': 'b_column', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'ccc111', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'd1', }, }, { 'selector': ['__ALL_ELEMENTS__', 4], 'metadata': { 'structural_type': 'numpy.int64', 'name': 'aaa222', }, }, { 'selector': ['__ALL_ELEMENTS__', 5], 'metadata': { 'structural_type': 'numpy.int64', 'name': 'bbb222', }, }, { 'selector': ['__ALL_ELEMENTS__', 6], 'metadata': { 'structural_type': 'numpy.int64', 'name': 'aaa333', }, }, { 'selector': ['__ALL_ELEMENTS__', 7], 'metadata': { 'structural_type': 'numpy.int64', 'name': 'bbb333', }, }]) result = utils.combine_columns(main, [0, 2, 4], [columns2, columns3], return_result='replace', add_index_columns=False) self.assertEqual(result.values.tolist(), [ [21, 4, 24, 10, 31, 34], [22, 5, 25, 11, 32, 35], [23, 6, 26, 12, 33, 36], ]) self.assertEqual( d3m_utils.to_json_structure( result.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 6, }, }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'aaa222', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'bbb111', 'extra': 'b_column', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'bbb222', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'd1', }, }, { 'selector': ['__ALL_ELEMENTS__', 4], 'metadata': { 'name': 'aaa333', 'structural_type': 'numpy.int64', }, }, { 'selector': ['__ALL_ELEMENTS__', 5], 'metadata': { 'name': 'bbb333', 'structural_type': 'numpy.int64', }, }])
def test_custom_column_name_with_compacting(self): dataframe = container.DataFrame({'a': ['1.0', '2.0', '3.0']}, generate_metadata=False) dataframe.metadata = dataframe.metadata.generate(dataframe, compact=True) dataframe.metadata = dataframe.metadata.update((base.ALL_ELEMENTS, 0), { 'name': 'test', 'foo': 'bar', }) self.assertEqual(utils.to_json_structure(dataframe.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 1, } }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', 'name': 'a', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'test', 'foo': 'bar', }, }]) dataframe.metadata = dataframe.metadata.generate(dataframe, compact=True) self.assertEqual(utils.to_json_structure(dataframe.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 1, } }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', 'name': 'a', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'test', 'foo': 'bar', }, }]) dataframe.metadata = dataframe.metadata.update((base.ALL_ELEMENTS, 0), { 'name': base.NO_VALUE, }) dataframe.metadata = dataframe.metadata.generate(dataframe, compact=True) self.assertEqual(utils.to_json_structure(dataframe.metadata.to_internal_simple_structure()), [{ 'selector': [], 'metadata': { 'schema': base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'name': 'rows', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], 'length': 1, } }, }, { 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], 'metadata': { 'structural_type': 'str', 'name': 'a', }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': '__NO_VALUE__', 'foo': 'bar', }, }])