Exemple #1
0
    def test_conversion(self):
        problem_doc_path = os.path.join(os.path.dirname(__file__), 'data',
                                        'problems', 'iris_problem_1',
                                        'problemDoc.json')

        problem_uri = 'file://{problem_doc_path}'.format(
            problem_doc_path=problem_doc_path)

        problem_description = problem.Problem.load(problem_uri)

        self.assertEqual(
            problem_description.to_simple_structure(),
            problem.Problem.from_json_structure(
                problem_description.to_json_structure(),
                strict_digest=True).to_simple_structure())

        # Legacy.
        self.assertEqual(
            utils.to_json_structure(problem_description.to_simple_structure()),
            problem.Problem.from_json_structure(
                utils.to_json_structure(
                    problem_description.to_simple_structure()),
                strict_digest=True).to_simple_structure())

        self.assertIs(
            problem.Problem.from_json_structure(
                problem_description.to_json_structure(),
                strict_digest=True)['problem']['task_keywords'][0],
            problem.TaskKeyword.CLASSIFICATION)
    def test_dataframe_with_objects(self):
        df = pandas.DataFrame({str(i): [str(j) for j in range(10)] for i in range(5)}, columns=[str(i) for i in range(5)])

        df = container.DataFrame(df, generate_metadata=False)

        compact_metadata = df.metadata.generate(df, compact=True)
        noncompact_metadata = df.metadata.generate(df, compact=False)

        basic_metadata = [
            {
                'selector': [],
                'metadata': {
                    'schema': base.CONTAINER_SCHEMA_VERSION,
                    'structural_type': 'd3m.container.pandas.DataFrame',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'name': 'rows',
                        'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                        'length': 10,
                    },
                },
            },
            {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'name': 'columns',
                        'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                        'length': 5,
                    },
                },
            },
        ]

        column_names = [{'selector': ['__ALL_ELEMENTS__', i], 'metadata': {'name': str(i)}} for i in range(5)]

        self.assertEqual(utils.to_json_structure(compact_metadata.to_internal_simple_structure()), basic_metadata + [
            {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'str',
                },
            }
        ] + column_names)

        column_names = [{'selector': ['__ALL_ELEMENTS__', i], 'metadata': {'name': str(i), 'structural_type': 'str'}} for i in range(5)]

        self.assertEqual(utils.to_json_structure(noncompact_metadata.to_internal_simple_structure()), basic_metadata + column_names)
    def test_list_with_objects(self):
        l = container.List([container.List([str(j) for i in range(5)]) for j in range(10)], generate_metadata=True)

        self.assertEqual(utils.to_json_structure(l.metadata.to_internal_simple_structure()), [
            {
                'selector': [],
                'metadata': {
                    'schema': base.CONTAINER_SCHEMA_VERSION,
                    'structural_type': 'd3m.container.list.List',
                    'dimension': {
                        'length': 10,
                    },
                },
            },
            {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'd3m.container.list.List',
                    'dimension': {
                        'length': 5,
                    },
                }
            },
            {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'str',
                },
            },
        ])
    def test_basic(self):
        main = container.DataFrame({'timestamp': [1, 2, 3,4], 'value': [0.32,0.32,0.31,0.33],}, {
            'top_level': 'main',
        },  generate_metadata=True)
       

        self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'top_level': 'main',
                'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 4,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {'structural_type': 'numpy.int64', 'name': 'timestamp'},
        }, {
            'selector': ['__ALL_ELEMENTS__', 1],
            'metadata': {'structural_type': 'numpy.float64', 'name': 'value'},
        }])

        hyperparams_class = HoltWintersExponentialSmoothing.HoltWintersExponentialSmoothing.metadata.get_hyperparams()
        primitive = HoltWintersExponentialSmoothing.HoltWintersExponentialSmoothing(hyperparams=hyperparams_class.defaults())
      #  primitive.set_training_data(inputs=main)
      #  primitive.fit()
        output_main = primitive.produce(inputs=main).value
        output_main = round(output_main,2)
       
     #   new_main_drop = new_main.iloc[2:]
     #   new_main_drop = new_main_drop.reset_index(drop = True)
        print ( "output", output_main)

        expected_result = container.DataFrame(data = { 'timestamp' : [1,2,3,4], 'value': [0.32,0.32,0.31,0.32]})
        print ("expected_result", expected_result)
     #   output_main.reset_index()
      
        self.assertEqual(output_main[['timestamp','value_holt_winters_smoothing']].values.tolist(), expected_result[['timestamp','value']].values.tolist())
        

        params = primitive.get_params()
        primitive.set_params(params=params)
Exemple #5
0
    def test_problem(self):
        problem_description = problem_module.parse_problem_description(os.path.join(TEST_PROBLEMS_DIR, 'iris_problem_1', 'problemDoc.json'))

        # TA2-TA3 API does not encode outputs.
        del problem_description['outputs']
        problem_description['digest'] = d3m_utils.compute_digest(d3m_utils.to_json_structure(problem_description))

        problem_message = utils.encode_problem_description(problem_description)

        self.assertEqual(utils.decode_problem_description(problem_message, strict_digest=True), problem_description)
Exemple #6
0
 def test_metadata(self):
     # print(self.prediction_labels.metadata.to_internal_simple_structure())
     self.assertEqual(
         utils.to_json_structure(self.prediction_labels.metadata.
                                 to_internal_simple_structure()),
         [
             {
                 'selector': [],
                 'metadata': {
                     # 'top_level': 'main',
                     'schema':
                     metadata_base.CONTAINER_SCHEMA_VERSION,
                     'structural_type':
                     'd3m.container.pandas.DataFrame',
                     'semantic_types': [
                         'https://metadata.datadrivendiscovery.org/types/Table'
                     ],
                     'dimension': {
                         'name':
                         'rows',
                         'semantic_types': [
                             'https://metadata.datadrivendiscovery.org/types/TabularRow'
                         ],
                         'length':
                         10,
                     },
                 },
             },
             {
                 'selector': ['__ALL_ELEMENTS__'],
                 'metadata': {
                     'dimension': {
                         'name':
                         'columns',
                         'semantic_types': [
                             'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                         ],
                         'length':
                         1,
                     },
                 },
             },
             {
                 'selector': ['__ALL_ELEMENTS__', 0],
                 'metadata': {
                     'name':
                     'LSTMODetector0_0',
                     'semantic_types': [
                         'https://metadata.datadrivendiscovery.org/types/Attribute'
                     ],
                     'structural_type':
                     'numpy.int64',
                 },
             }
         ])
    def test_dataframe_tabular_semantic_types(self):
        # A DataFrame with explicit WRONG metadata.
        df = container.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}, {
            'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
            'dimension': {
                'name': 'columns',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
            },
        })
        df.A = df.A.astype(numpy.int64)
        df.metadata = df.metadata.generate(df)

        self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    # We respect the name, but we override the semantic types.
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'A',
                'structural_type': 'numpy.int64',
            }
        }, {
            'selector': ['__ALL_ELEMENTS__', 1],
            'metadata': {
                'name': 'B',
                'structural_type': 'str',
            },
        }])
    def test_ndarray(self):
        array = container.ndarray(numpy.array([1, 2, 3], dtype=numpy.int64), generate_metadata=True)

        self.assertEqual(utils.to_json_structure(array.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.numpy.ndarray',
                'dimension': {
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'numpy.int64',
            },
        }])
    def test_ndarray_with_objects(self):
        array = numpy.array([[[str(k) for k in range(5)] for i in range(10)] for j in range(10)], dtype=object)

        array = container.ndarray(array, generate_metadata=True)

        self.assertEqual(utils.to_json_structure(array.metadata.to_internal_simple_structure()), [
            {
                'selector': [],
                'metadata': {
                    'schema': base.CONTAINER_SCHEMA_VERSION,
                    'structural_type': 'd3m.container.numpy.ndarray',
                    'dimension': {
                        'length': 10,
                    },
                },
            },
            {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'length': 10,
                    },
                },
            },
            {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'length': 5,
                    },
                },
            },
            {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'str',
                },
            },
        ])
    def test_basic(self):
        self.maxDiff = None

        main = container.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6]
        },
                                   columns=['A', 'B'],
                                   generate_metadata=True)

        self.assertEqual(
            utils.to_json_structure(
                main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            2,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'structural_type': 'numpy.int64',
                        'name': 'A',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'structural_type': 'numpy.int64',
                        'name': 'B',
                    },
                }
            ])

        a = np.array([[1, 0, 1, 0, 1], [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]])
        b = np.array([[1, 0], [1, 0], [1, 0], [1, 0], [1, 0]])

        hyperparams_class = NonNegativeMatrixFactorization.NonNegativeMatrixFactorization.metadata.get_hyperparams(
        )
        hp = hyperparams_class.defaults().replace({
            'use_semantic_types': True,
            'use_columns': (
                0,
                1,
            ),
            'return_result': 'append',
            'rank': 5,
            'seed': 'fixed',
            'W': a,
            'H': b,
        })
        primitive = NonNegativeMatrixFactorization.NonNegativeMatrixFactorization(
            hyperparams=hp)
        new_main = primitive.produce(inputs=main).value

        print("new_main", new_main)
        c = pd.DataFrame({
            "A": [1, 2, 3, np.nan, np.nan],
            "B": [4, 5, 6, np.nan, np.nan],
            'row_latent_vector_0':
            [0.816725, 1.078965, 1.341205, np.nan, np.nan],
            'row_latent_vector_1':
            [3.514284e-16, 2.383547e-16, 2.227207e-16, np.nan, np.nan],
            'row_latent_vector_2':
            [0.816725, 1.078965, 1.341205, np.nan, np.nan],
            'row_latent_vector_3':
            [3.514284e-16, 2.383547e-16, 2.227207e-16, np.nan, np.nan],
            'row_latent_vector_4':
            [0.816725, 1.078965, 1.341205, np.nan, np.nan],
            'column_latent_vector_0':
            [0.642626, 0.542312, 0.642626, 0.542312, 0.642626],
            'column_latent_vector_1':
            [1.534324, 1.848782, 1.534324, 1.848782, 1.534324],
        })
        pd.testing.assert_frame_equal(new_main, c)

        params = primitive.get_params()
        primitive.set_params(params=params)

        # print(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()))
        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            9,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name': 'A',
                        'structural_type': 'numpy.int64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'name': 'B',
                        'structural_type': 'numpy.int64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'name':
                        'row_latent_vector_0',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 3],
                    'metadata': {
                        'name':
                        'row_latent_vector_1',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 4],
                    'metadata': {
                        'name':
                        'row_latent_vector_2',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 5],
                    'metadata': {
                        'name':
                        'row_latent_vector_3',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 6],
                    'metadata': {
                        'name':
                        'row_latent_vector_4',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 7],
                    'metadata': {
                        'name':
                        'column_latent_vector_0',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 8],
                    'metadata': {
                        'name':
                        'column_latent_vector_1',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                }
            ])

        params = primitive.get_params()
        primitive.set_params(params=params)
    def test_basic(self):
        self.maxDiff=None
        main = container.DataFrame({'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0]},
                                   columns=['timestamp', 'values', 'b'],
                                   generate_metadata=True)



        self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                # 'top_level': 'main',
                'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 4,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {'structural_type': 'numpy.int64', 'name': 'timestamp'},
        }, {
            'selector': ['__ALL_ELEMENTS__', 1],
            'metadata': {'structural_type': 'numpy.float64', 'name': 'values'},
        }, {
            'selector': ['__ALL_ELEMENTS__', 2],
            'metadata': {'structural_type': 'numpy.float64', 'name': 'b'},
        }])
        hyperparams_class = StatisticalMaximum.StatisticalMaximumPrimitive.metadata.get_hyperparams()

        hp = hyperparams_class.defaults().replace({
            'use_columns': [1,2],
            'use_semantic_types' : True,
            'window_size':2
        })

        primitive = StatisticalMaximum.StatisticalMaximumPrimitive(hyperparams=hp)

        output_main = primitive.produce(inputs=main).value
        print(output_main)
        expected_output = container.DataFrame(
            {'timestamp': [1, 3, 2, 5], 'values': [1.0, 2.0, 3.0, 4.0], 'b': [1.0, 4.0, 5.0, 6.0],
             'values_maximum': [2.0,2.0, 3.0, 4.0], 'b_maximum': [4.0,4.0, 5.0, 6.0]},
            columns=['timestamp', 'values', 'b', 'values_maximum', 'b_maximum'])

        self.assertEqual(output_main[['timestamp', 'values', 'b', 'values_maximum',
                                      'b_maximum']].values.tolist(), expected_output[
                             ['timestamp', 'values', 'b', 'values_maximum', 'b_maximum'
                              ]].values.tolist())

        self.assertEqual(utils.to_json_structure(output_main.metadata.to_internal_simple_structure()),
                         [{'metadata': {'dimension': {'length': 4,
                                                      'name': 'rows',
                                                      'semantic_types': [
                                                          'https://metadata.datadrivendiscovery.org/types/TabularRow']},
                                        'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json',
                                        'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                                        'structural_type': 'd3m.container.pandas.DataFrame'},
                           'selector': []},
                          {'metadata': {'dimension': {'length': 5,
                                                      'name': 'columns',
                                                      'semantic_types': [
                                                          'https://metadata.datadrivendiscovery.org/types/TabularColumn']}},
                           'selector': ['__ALL_ELEMENTS__']},
                          {'metadata': {'name': 'timestamp', 'structural_type': 'numpy.int64'},
                           'selector': ['__ALL_ELEMENTS__', 0]},
                          {'metadata': {'name': 'values', 'structural_type': 'numpy.float64'},
                           'selector': ['__ALL_ELEMENTS__', 1]},
                          {'metadata': {'name': 'b', 'structural_type': 'numpy.float64'},
                           'selector': ['__ALL_ELEMENTS__', 2]},
                          {'metadata': {'name': 'values_maximum',
                                        'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
                                        'structural_type': 'numpy.float64'},
                           'selector': ['__ALL_ELEMENTS__', 3]},
                          {'metadata': {'name': 'b_maximum',
                                        'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
                                        'structural_type': 'numpy.float64'},
                           'selector': ['__ALL_ELEMENTS__', 4]},

                          ])


        params = primitive.get_params()
        primitive.set_params(params=params)
Exemple #12
0
 def _test_metadata(self, metadata):
     self.assertEqual(
         utils.to_json_structure(metadata.to_internal_simple_structure()),
         [
             {
                 'metadata': {
                     'dimension': {
                         'length':
                         5,
                         'name':
                         'rows',
                         'semantic_types': [
                             'https://metadata.datadrivendiscovery.org/types/TabularRow'
                         ],
                     },
                     'schema':
                     'https://metadata.datadrivendiscovery.org/schemas/v0/container.json',
                     'semantic_types': [
                         'https://metadata.datadrivendiscovery.org/types/Table',
                         'https://metadata.datadrivendiscovery.org/types/FilesCollection',
                     ],
                     'structural_type':
                     'd3m.container.pandas.DataFrame',
                 },
                 'selector': [],
             },
             {
                 'metadata': {
                     'dimension': {
                         'length':
                         1,
                         'name':
                         'columns',
                         'semantic_types': [
                             'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                         ],
                     }
                 },
                 'selector': ['__ALL_ELEMENTS__'],
             },
             {
                 'metadata': {
                     'structural_type': 'd3m.container.numpy.ndarray'
                 },
                 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__']
             },
             {
                 'metadata': {
                     'structural_type': 'str'
                 },
                 'selector': [
                     '__ALL_ELEMENTS__', '__ALL_ELEMENTS__',
                     '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'
                 ],
             },
             {
                 'metadata': {
                     'dimension': {
                         'length':
                         1,
                         'name':
                         'rows',
                         'semantic_types': [
                             'https://metadata.datadrivendiscovery.org/types/TabularRow'
                         ],
                     },
                     'location_base_uris':
                     '__NO_VALUE__',
                     'media_types':
                     '__NO_VALUE__',
                     'name':
                     'filename',
                     'semantic_types': [
                         'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
                         'http://schema.org/ImageObject',
                         'https://metadata.datadrivendiscovery.org/types/Table',
                     ]
                 },
                 'selector': ['__ALL_ELEMENTS__', 0],
             },
             {
                 'metadata': {
                     'dimension': {
                         'length':
                         1,
                         'name':
                         'columns',
                         'semantic_types': [
                             'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                         ],
                     }
                 },
                 'selector': ['__ALL_ELEMENTS__', 0, '__ALL_ELEMENTS__'],
             },
             {
                 'metadata': {
                     'image_reader_metadata': {
                         'foobar': 42
                     },
                     'semantic_types': [
                         'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
                         'http://schema.org/ImageObject',
                         'https://metadata.datadrivendiscovery.org/types/Table',
                     ],
                 },
                 'selector': [0, 0],
             },
             {
                 'metadata': {
                     'image_reader_metadata': {
                         'foobar': 42
                     },
                     'semantic_types': [
                         'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
                         'http://schema.org/ImageObject',
                         'https://metadata.datadrivendiscovery.org/types/Table',
                     ],
                 },
                 'selector': [1, 0],
             },
             {
                 'metadata': {
                     'image_reader_metadata': {
                         'foobar': 42
                     },
                     'semantic_types': [
                         'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
                         'http://schema.org/ImageObject',
                         'https://metadata.datadrivendiscovery.org/types/Table',
                     ],
                 },
                 'selector': [2, 0],
             },
             {
                 'metadata': {
                     'image_reader_metadata': {
                         'foobar': 42
                     },
                     'semantic_types': [
                         'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
                         'http://schema.org/ImageObject',
                         'https://metadata.datadrivendiscovery.org/types/Table',
                     ],
                 },
                 'selector': [3, 0],
             },
             {
                 'metadata': {
                     'image_reader_metadata': {
                         'foobar': 42
                     },
                     'semantic_types': [
                         'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
                         'http://schema.org/ImageObject',
                         'https://metadata.datadrivendiscovery.org/types/Table',
                     ],
                 },
                 'selector': [4, 0],
             },
         ],
     )
Exemple #13
0
    def test_basic(self):
        main = container.DataFrame(
            {
                'd3mIndex': [0, 1, 2],
                'timestamp': [1., 2., 4.],
                'a': [1., 2., 3.],
                'b': [2., 3., 4.],
                'ground_truth': [0, 0, 0],
            },
            columns=['d3mIndex', 'timestamp', 'a', 'b', 'ground_truth'],
            generate_metadata=True)

        # print(main)

        self.assertEqual(
            utils.to_json_structure(
                main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            5,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'structural_type': 'numpy.int64',
                        'name': 'd3mIndex'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'timestamp'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'a'
                    }
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 3],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'b'
                    }
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 4],
                    'metadata': {
                        'structural_type': 'numpy.int64',
                        'name': 'ground_truth'
                    }
                }
            ])

        self.assertIsInstance(main, container.DataFrame)

        hyperparams_class = ContinuityValidation.ContinuityValidation.metadata.get_hyperparams(
        )
        primitive = ContinuityValidation.ContinuityValidation(
            hyperparams=hyperparams_class.defaults())
        new_main = primitive.produce(inputs=main).value
        # print(new_main)

        expected_output = container.DataFrame({
            'd3mIndex': [0, 1, 2, 3],
            'timestamp': [1., 2., 3., 4.],
            'a': [1., 2., 2.5, 3.],
            'b': [2., 3., 3.5, 4.],
            'ground_truth': [0, 0, 0, 0]
        })
        self.assertEqual(new_main.values.tolist(),
                         expected_output.values.tolist())

        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            4,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            5,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name': 'd3mIndex',
                        'structural_type': 'numpy.int64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'name': 'timestamp',
                        'structural_type': 'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'name': 'a',
                        'structural_type': 'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 3],
                    'metadata': {
                        'name': 'b',
                        'structural_type': 'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 4],
                    'metadata': {
                        'name': 'ground_truth',
                        'structural_type': 'numpy.int64',
                    },
                }
            ])

        self._test_continuity(new_main)
    def test_list(self):
        lst = container.List(['a', 'b', 'c'], generate_metadata=True)

        self.assertEqual(utils.to_json_structure(lst.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.list.List',
                'dimension': {
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'str',
            },
        }])

        lst = container.List([1, 'a', 2.0], generate_metadata=True)

        self.assertEqual(utils.to_json_structure(lst.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.list.List',
                'dimension': {
                    'length': 3,
                },
            },
        }, {
            'selector': [0],
            'metadata': {
                'structural_type': 'int',
            },
        }, {
            'selector': [1],
            'metadata': {
                'structural_type': 'str',
            },
        }, {
            'selector': [2],
            'metadata': {
                'structural_type': 'float',
            },
        }])

        dataframe = container.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
        dataframe.A = dataframe.A.astype(numpy.int64)
        lst = container.List([dataframe], generate_metadata=True)

        self.assertEqual(utils.to_json_structure(lst.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.list.List',
                'dimension': {
                    'length': 1,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'A',
                'structural_type': 'numpy.int64',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 1],
            'metadata': {
                'name': 'B',
                'structural_type': 'str',
            },
        }])
    def test_basic(self):
        self.maxDiff = None

        dataset_fname = os.path.join(
            this_path,
            '../../datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv'
        )
        dataset = pd.read_csv(dataset_fname)
        # dataset = np.random.rand(1000)

        main = d3m_dataframe(dataset, generate_metadata=True)
        # print(main)

        hyperparams_class = SKPowerTransformer.SKPowerTransformer.metadata.get_hyperparams(
        )
        primitive = SKPowerTransformer.SKPowerTransformer(
            hyperparams=hyperparams_class.defaults())
        primitive.set_training_data(inputs=main)
        primitive.fit()
        new_main = primitive.produce(inputs=main).value

        test_data = new_main.values[:, 2]
        # hist_data = new_main.values
        std_normal_samples = np.random.randn(test_data.__len__())

        # Plot the distribution
        # import matplotlib.pyplot as plt
        # plt.hist(test_data, bins=100, alpha=0.6)
        # plt.hist(std_normal_samples, bins=100, alpha=0.6)
        # plt.legend(labels=['PowerTransformer', 'Standard Gaussian'], loc='best')
        # plt.savefig('./fig/test_SKPowerTransformer.png')
        # plt.close()
        # plt.show()

        # centerization check
        new_mean, new_std = test_data.mean(), test_data.std()
        mean_mse = new_mean**2
        std_mse = (new_std - 1)**2
        # print(mean_mse, std_mse)
        self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-5)
        self.assertAlmostEqual(std_mse.__float__(), 0., delta=1e-5)
        #
        # print(main.metadata.to_internal_simple_structure())
        # print(new_main.metadata.to_internal_simple_structure())

        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            7027,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            4,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'd3mIndex',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'name':
                        'timestamp',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'name':
                        'value',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 3],
                    'metadata': {
                        'name':
                        'ground_truth',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                }
            ])

        params = primitive.get_params()
        primitive.set_params(params=params)
Exemple #16
0
    def test_basic(self):
        self.maxDiff = None
        main = container.DataFrame(
            {
                'a': [1., 2., 3., 4., 5, 6, 7, 8, 9],
                'b': [2., 3., 4., 5., 6, 7, 8, 9, 10],
                'c': [3., 4., 5., 6., 7, 8, 9, 10, 11]
            },
            columns=['a', 'b', 'c'],
            generate_metadata=True)

        print(main)

        self.assertEqual(
            utils.to_json_structure(
                main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            9,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'a'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'b'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'c'
                    }
                }
            ])

        self.assertIsInstance(main, container.DataFrame)

        hyperparams_class = TelemanomPrimitive.metadata.get_hyperparams()
        hyperparams = hyperparams_class.defaults()
        hyperparams = hyperparams.replace({
            'l_s': 2,
            'n_predictions': 1,
            'return_result': 'new',
            'return_subseq_inds': True,
            'use_columns': (0, 1, 2)
        })

        # print("hyperparams",hyperparams)

        primitive = TelemanomPrimitive(hyperparams=hyperparams)
        primitive.set_training_data(inputs=main)
        primitive.fit()
        new_main = primitive.produce_score(inputs=main).value

        print("new main", new_main)

        # print(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()))
        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            6,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'Telemanom0_0',
                        'structural_type':
                        'numpy.float64',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'structural_type':
                        'numpy.float64',
                        'name':
                        'Telemanom0_1',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'structural_type':
                        'numpy.float64',
                        'name':
                        'Telemanom0_2',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                    }
                }
            ])
Exemple #17
0
    def test_basic(self):
        self.maxDiff = None
        main = container.DataFrame(
            {
                'a1': [1., 2., 3.],
                'b1': [2., 3., 4.],
                'a2': [3., 4., 5.],
                'c1': [4., 5., 6.],
                'a3': [5., 6., 7.],
                'a1a': [6., 7., 8.]
            },
            # {'top_level': 'main', },
            columns=['a1', 'b1', 'a2', 'c1', 'a3', 'a1a'],
            generate_metadata=True)
        main.metadata = main.metadata.update_column(0, {'name': 'aaa111'})
        main.metadata = main.metadata.update_column(1, {'name': 'bbb111'})
        main.metadata = main.metadata.update_column(2, {'name': 'aaa222'})
        main.metadata = main.metadata.update_column(3, {'name': 'ccc111'})
        main.metadata = main.metadata.update_column(4, {'name': 'aaa333'})
        main.metadata = main.metadata.update_column(5, {'name': 'aaa111'})

        # print(main)

        self.assertEqual(
            utils.to_json_structure(
                main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            6,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'aaa111'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'bbb111'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'aaa222'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 3],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'ccc111'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 4],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'aaa333'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 5],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'aaa111'
                    },
                }
            ])

        hyperparams_class = SKStandardScaler.SKStandardScaler.metadata.get_hyperparams(
        )
        primitive = SKStandardScaler.SKStandardScaler(
            hyperparams=hyperparams_class.defaults())
        primitive.set_training_data(inputs=main)
        primitive.fit()
        new_main = primitive.produce(inputs=main).value
        new_mean, new_std = new_main.values.mean(0), new_main.values.std(0)

        mean_mse = np.matmul(new_mean.T, new_mean)
        std_mse = np.matmul((new_std - np.ones_like(new_std)).T,
                            (new_std - np.ones_like(new_std)))

        # print(new_main)
        # print(mean_mse, std_mse)

        self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-8)
        self.assertAlmostEqual(std_mse.__float__(), 0., delta=1e-8)

        # print(main.metadata.to_internal_simple_structure())
        # print(new_main.metadata.to_internal_simple_structure())

        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            6,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'aaa111',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'name':
                        'bbb111',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'name':
                        'aaa222',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 3],
                    'metadata': {
                        'name':
                        'ccc111',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 4],
                    'metadata': {
                        'name':
                        'aaa333',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 5],
                    'metadata': {
                        'name':
                        'aaa111',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                }
            ])

        params = primitive.get_params()
        primitive.set_params(params=params)
Exemple #18
0
    def test_combine_columns_new_with_index_noncompact_metadata(self):
        main = container.DataFrame(
            {
                'd3mIndex': [1, 2, 3],
                'b1': [4, 5, 6],
                'c1': [7, 8, 9]
            },
            columns=['d3mIndex', 'b1', 'c1'],
            generate_metadata=False)
        main.metadata = main.metadata.generate(main, compact=False)
        main.metadata = main.metadata.update_column(
            0, {
                'name':
                'd3mIndex',
                'semantic_types': [
                    'http://schema.org/Integer',
                    'https://metadata.datadrivendiscovery.org/types/PrimaryKey'
                ]
            })
        main.metadata = main.metadata.update_column(
            1, {
                'name':
                'b1',
                'semantic_types':
                ['https://metadata.datadrivendiscovery.org/types/Attribute']
            })
        main.metadata = main.metadata.update_column(
            2, {
                'name':
                'c1',
                'semantic_types':
                ['https://metadata.datadrivendiscovery.org/types/Attribute']
            })

        columns = container.DataFrame({
            'd3mIndex': [1, 2, 3],
            'b2': [4, 5, 6]
        },
                                      columns=['d3mIndex', 'b2'],
                                      generate_metadata=False)
        columns.metadata = columns.metadata.generate(columns, compact=False)
        columns.metadata = columns.metadata.update_column(
            0, {
                'name':
                'd3mIndex',
                'semantic_types': [
                    'http://schema.org/Integer',
                    'https://metadata.datadrivendiscovery.org/types/PrimaryKey'
                ]
            })
        columns.metadata = columns.metadata.update_column(
            1, {
                'name':
                'b2',
                'semantic_types':
                ['https://metadata.datadrivendiscovery.org/types/Attribute']
            })

        result = utils.combine_columns(main, [], [columns],
                                       return_result='new',
                                       add_index_columns=True)

        self.assertEqual(result.values.tolist(), [
            [1, 4],
            [2, 5],
            [3, 6],
        ])

        self.assertEqual(
            d3m_utils.to_json_structure(
                result.metadata.to_internal_simple_structure()),
            [{
                'selector': [],
                'metadata': {
                    'schema':
                    metadata_base.CONTAINER_SCHEMA_VERSION,
                    'structural_type':
                    'd3m.container.pandas.DataFrame',
                    'semantic_types':
                    ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'name':
                        'rows',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularRow'
                        ],
                        'length':
                        3,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'name':
                        'columns',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                        ],
                        'length':
                        2,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 0],
                'metadata': {
                    'name':
                    'd3mIndex',
                    'semantic_types': [
                        'http://schema.org/Integer',
                        'https://metadata.datadrivendiscovery.org/types/PrimaryKey'
                    ],
                    'structural_type':
                    'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 1],
                'metadata': {
                    'name':
                    'b2',
                    'semantic_types': [
                        'https://metadata.datadrivendiscovery.org/types/Attribute'
                    ],
                    'structural_type':
                    'numpy.int64',
                },
            }])
    def test_update_with_generated_metadata(self):
        metadata = base.DataMetadata({
            'schema': base.CONTAINER_SCHEMA_VERSION,
            'structural_type': container.ndarray,
        })

        cells_metadata = collections.OrderedDict()
        cells_metadata[('a',)] = {'other': 1}
        cells_metadata[('b',)] = {'other': 2}
        cells_metadata[('c',)] = {'other': 3}
        cells_metadata[(base.ALL_ELEMENTS,)] = {'foo': 'bar'}
        cells_metadata[('other', 'a')] = {'other': 4}
        cells_metadata[('other', 'b')] = {'other': 5}
        cells_metadata[('other', 'c')] = {'other': 6}
        cells_metadata[('other', base.ALL_ELEMENTS)] = {'foo': 'bar2'}

        metadata._update_with_generated_metadata(cells_metadata)

        self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.numpy.ndarray',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {'foo': 'bar'},
        }, {
            'selector': ['a'],
            'metadata': {'other': 1},
        }, {
            'selector': ['b'],
            'metadata': {'other': 2},
        }, {
            'selector': ['c'],
            'metadata': {'other': 3},
        }, {
            'selector': ['other', '__ALL_ELEMENTS__'],
            'metadata': {'foo': 'bar2'},
        }, {
            'selector': ['other', 'a'],
            'metadata': {'other': 4},
        }, {
            'selector': ['other', 'b'],
            'metadata': {'other': 5},
        }, {
            'selector': ['other', 'c'],
            'metadata': {'other': 6},
        }])

        metadata = base.DataMetadata({
            'schema': base.CONTAINER_SCHEMA_VERSION,
            'structural_type': container.ndarray,
            'semantic_types': ['http://example.com/Type1'],
            'dimension': {
                'length': 0,
                'foobar': 42,
                'semantic_types': ['http://example.com/Type2'],
            }
        })

        metadata = metadata.update(('a',), {
            'semantic_types': ['http://example.com/Type3'],
            'dimension': {
                'length': 0,
                'foobar': 45,
                'semantic_types': ['http://example.com/Type4'],
            }
        })

        cells_metadata = collections.OrderedDict()
        cells_metadata[()] = {
            'other': 1,
            'structural_type': container.ndarray,
            'semantic_types': ['http://example.com/Type1a'],
            'dimension': {
                'length': 100,
                'name': 'test1',
                'semantic_types': ['http://example.com/Type2a'],
            }
        }
        cells_metadata[('a',)] = {
            'semantic_types': ['http://example.com/Type3', 'http://example.com/Type3a'],
            'dimension': {
                'length': 200,
                'name': 'test2',
                'semantic_types': ['http://example.com/Type4', 'http://example.com/Type4a'],
            }
        }
        cells_metadata[('b',)] = {'other': 2}

        metadata._update_with_generated_metadata(cells_metadata)

        self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.numpy.ndarray',
                'other': 1,
                'semantic_types': ['http://example.com/Type1', 'http://example.com/Type1a'],
                'dimension': {
                    'length': 100,
                    'name': 'test1',
                    'foobar': 42,
                    'semantic_types': ['http://example.com/Type2', 'http://example.com/Type2a'],
                },
            },
        }, {
            'selector': ['a'],
            'metadata': {
                'semantic_types': ['http://example.com/Type3', 'http://example.com/Type3a'],
                'dimension': {
                    'length': 200,
                    'name': 'test2',
                    'foobar': 45,
                    'semantic_types': ['http://example.com/Type4', 'http://example.com/Type4a'],
                },
            },
        }, {
            'selector': ['b'],
            'metadata': {'other': 2},
        }])

        self.assertEqual(metadata.to_json_structure(), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.numpy.ndarray',
                'other': 1,
                'semantic_types': ['http://example.com/Type1', 'http://example.com/Type1a'],
                'dimension': {
                    'length': 100,
                    'name': 'test1',
                    'foobar': 42,
                    'semantic_types': ['http://example.com/Type2', 'http://example.com/Type2a'],
                },
            },
        }, {
            'selector': ['a'],
            'metadata': {
                'semantic_types': ['http://example.com/Type3', 'http://example.com/Type3a'],
                'dimension': {
                    'length': 200,
                    'name': 'test2',
                    'foobar': 45,
                    'semantic_types': ['http://example.com/Type4', 'http://example.com/Type4a'],
                },
            },
        }, {
            'selector': ['b'],
            'metadata': {'other': 2},
        }])
    def test_unset_structural_type(self):
        dataframe = container.DataFrame({'a': ['a', 'b', 'c'], 'b': ['a', 'b', 'c']}, generate_metadata=False)

        compact_metadata = dataframe.metadata.generate(dataframe, compact=True)

        all_elements_metadata = compact_metadata.query((base.ALL_ELEMENTS, base.ALL_ELEMENTS))
        compact_metadata = compact_metadata.remove((base.ALL_ELEMENTS, base.ALL_ELEMENTS), strict_all_elements=True)
        compact_metadata = compact_metadata.update((base.ALL_ELEMENTS, 0), all_elements_metadata)
        compact_metadata = compact_metadata.update((base.ALL_ELEMENTS, 1), all_elements_metadata)

        compact_metadata = compact_metadata.generate(dataframe, compact=True)

        self.assertEqual(utils.to_json_structure(compact_metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'str',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'a',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 1],
            'metadata': {
                'name': 'b',
            },
        }])

        compact_metadata = dataframe.metadata.generate(dataframe, compact=False)

        all_elements_metadata = compact_metadata.query((base.ALL_ELEMENTS, base.ALL_ELEMENTS))
        compact_metadata = compact_metadata.remove((base.ALL_ELEMENTS, base.ALL_ELEMENTS), strict_all_elements=True)
        compact_metadata = compact_metadata.update((base.ALL_ELEMENTS, 0), all_elements_metadata)
        compact_metadata = compact_metadata.update((base.ALL_ELEMENTS, 1), all_elements_metadata)

        compact_metadata = compact_metadata.generate(dataframe, compact=False)

        self.assertEqual(utils.to_json_structure(compact_metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'a',
                'structural_type': 'str',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 1],
            'metadata': {
                'name': 'b',
                'structural_type': 'str',
            },
        }])
    def test_dataset(self):
        dataframe = container.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
        dataframe.A = dataframe.A.astype(numpy.int64, copy=False)
        dataset = container.Dataset({'0': dataframe}, generate_metadata=False)

        compact_metadata = dataset.metadata.generate(dataset, compact=True)
        noncompact_metadata = dataset.metadata.generate(dataset, compact=False)

        self.assertEqual(utils.to_json_structure(compact_metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.dataset.Dataset',
                'dimension': {
                    'name': 'resources',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'],
                    'length': 1,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'A',
                'structural_type': 'numpy.int64',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 1],
            'metadata': {
                'name': 'B',
                'structural_type': 'str',
            },
        }])

        self.assertEqual(utils.to_json_structure(noncompact_metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.dataset.Dataset',
                'dimension': {
                    'name': 'resources',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'],
                    'length': 1,
                },
            },
        }, {
            'selector': ['0'],
            'metadata': {
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['0', '__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['0', '__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'A',
                'structural_type': 'numpy.int64',
            },
        }, {
            'selector': ['0', '__ALL_ELEMENTS__', 1],
            'metadata': {
                'name': 'B',
                'structural_type': 'str',
            },
        }])
    def test_complex_value(self):
        self.maxDiff = None

        dataset = container.Dataset({
            '0': container.DataFrame({
                'A': [
                    container.ndarray(numpy.array(['a', 'b', 'c'])),
                    container.ndarray(numpy.array([1, 2, 3], dtype=numpy.int64)),
                    container.ndarray(numpy.array([1.0, 2.0, 3.0])),
                ],
                'B': [
                    container.List(['a', 'b', 'c']),
                    container.List([1, 2, 3]),
                    container.List([1.0, 2.0, 3.0]),
                ],
            }),
        }, generate_metadata=False)

        dataset_metadata = dataset.metadata.generate(dataset, compact=True)

        self.assertEqual(utils.to_json_structure(dataset_metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.dataset.Dataset',
                'dimension': {
                    'name': 'resources',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'],
                    'length': 1,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'length': 3
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 0],
            'metadata': {
                'structural_type': 'd3m.container.numpy.ndarray',
                'name': 'A',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', 1],
            'metadata': {
                'structural_type': 'd3m.container.list.List',
                'name': 'B',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0, 0, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'numpy.str_',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0, 1, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'str',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 1, 0, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'numpy.int64',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 1, 1, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'int',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 2, 0, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'numpy.float64',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 2, 1, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'float',
            }
        }])

        dataset_metadata = dataset.metadata.generate(dataset, compact=False)

        self.assertEqual(utils.to_json_structure(dataset_metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'dimension': {
                    'length': 1,
                    'name': 'resources',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'],
                },
               'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json',
               'structural_type': 'd3m.container.dataset.Dataset',
            },
        }, {
            'selector': ['0'],
            'metadata': {
                'dimension': {
                    'length': 3,
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                },
               'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
               'structural_type': 'd3m.container.pandas.DataFrame',
            },
        },
        {
            'selector': ['0', '__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'length': 2,
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                },
            },
        },
        {
            'selector': ['0', '__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'A',
            },
        },
        {
            'selector': ['0', '__ALL_ELEMENTS__', 1],
            'metadata': {
                'name': 'B',
            },
        },
        {
            'selector': ['0', 0, 0],
            'metadata': {
                'dimension': {
                    'length': 3,
                },
                'structural_type': 'd3m.container.numpy.ndarray',
            },
        },
        {
            'selector': ['0', 0, 0, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'numpy.str_'
            },
        },
        {
            'selector': ['0', 0, 1],
            'metadata': {
                'dimension': {
                    'length': 3,
                },
                'structural_type': 'd3m.container.list.List',
            },
        }, {
            'selector': ['0', 0, 1, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'str',
            },
        }, {
            'selector': ['0', 1, 0],
            'metadata': {
                'dimension': {
                    'length': 3,
                },
                'structural_type': 'd3m.container.numpy.ndarray',
            },
        }, {
            'selector': ['0', 1, 0, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'numpy.int64',
            },
        }, {
            'selector': ['0', 1, 1],
            'metadata': {
                'dimension': {
                    'length': 3,
                },
                'structural_type': 'd3m.container.list.List',
            },
        }, {
            'selector': ['0', 1, 1, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'int',
            },
        }, {
            'selector': ['0', 2, 0],
            'metadata': {
                'dimension': {
                    'length': 3,
                },
                'structural_type': 'd3m.container.numpy.ndarray',
            },
        }, {
            'selector': ['0', 2, 0, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'numpy.float64',
            },
        },
        {
            'selector': ['0', 2, 1],
            'metadata': {
                'dimension': {
                    'length': 3,
                },
                'structural_type': 'd3m.container.list.List',
            },
        }, {
            'selector': ['0', 2, 1, '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'float',
            },
        }])
Exemple #23
0
 def time_to_simple_structure_with_json(self, dataset_files):
     utils.to_json_structure(
         self.dataset_metadata.to_internal_simple_structure())
    def test_basic(self):
        self.maxDiff = None
        curr_path = os.path.dirname(__file__)
        dataset_fname = os.path.join(
            curr_path,
            '../../datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv'
        )
        dataset = pd.read_csv(dataset_fname)
        # print(dataset.columns)
        value = dataset['value']
        main = d3m_dataframe(value, generate_metadata=True)

        ################## Test Wavelet transform ##################

        hyperparams_default = WaveletTransformer.metadata.get_hyperparams(
        ).defaults()
        hyperparams = hyperparams_default.replace({
            'wavelet': 'db8',
            'level': 2,
            'inverse': 0,
            'return_result': 'new'
        })

        primitive = WaveletTransformer(hyperparams=hyperparams)
        new_main = primitive.produce(inputs=main).value

        # print(new_main)
        # print(mean_mse, std_mse)

        # self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-8)
        # self.assertAlmostEquael(std_mse.__float__(), 0., delta=1e-8)

        # print(main.metadata.to_internal_simple_structure())
        # print(new_main.metadata.to_internal_simple_structure())

        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3521,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'value',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'name':
                        'output_1',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'name':
                        'output_2',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                }
            ])

        ################## Test inverse transform ##################

        hyperparams = hyperparams_default.replace({'inverse': 1})

        primitive = WaveletTransformer(hyperparams=hyperparams)
        main_recover = primitive.produce(inputs=main).value

        self.assertAlmostEqual(main_recover.values.tolist(),
                               main.values.tolist(),
                               delta=1e-6)
        # print(main.metadata.to_internal_simple_structure())
        # print(main_recover.metadata.to_internal_simple_structure())

        self.assertEqual(
            utils.to_json_structure(
                main_recover.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            7027,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            1,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'value',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                }
            ])

        params = primitive.get_params()
        primitive.set_params(params=params)
Exemple #25
0
    def test_basic(self):
        self.maxDiff = None
        main = container.DataFrame(
            {
                'a': [1., 2., 3.],
                'b': [2., 3., 4.],
                'c': [3., 4., 5.],
            },
            columns=['a', 'b', 'c'],
            generate_metadata=True)

        print(main)

        self.assertEqual(
            utils.to_json_structure(
                main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'a'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'b'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'c'
                    }
                }
            ])

        self.assertIsInstance(main, container.DataFrame)

        hyperparams_class = ABODPrimitive.metadata.get_hyperparams()
        hyperparams = hyperparams_class.defaults()
        hyperparams = hyperparams.replace({
            'return_result': 'new',
            'method': 'default',
        })

        primitive = ABODPrimitive(hyperparams=hyperparams)
        primitive.set_training_data(inputs=main)
        primitive.fit()
        new_main = primitive.produce(inputs=main).value
        new_main_score = primitive.produce_score(inputs=main).value
        print(new_main)
        print(new_main_score)

        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            1,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'Angle-base Outlier Detection Primitive0_0',
                        'structural_type':
                        'numpy.int64',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ]
                    },
                }
            ])

        self.assertEqual(
            utils.to_json_structure(
                new_main_score.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            1,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'Angle-base Outlier Detection Primitive0_0',
                        'structural_type':
                        'numpy.float64',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ]
                    },
                }
            ])
    def test_basic(self):
        self.maxDiff = None

        main = container.DataFrame({
            'A': [1, 2],
            'B': ['a', 'b']
        },
                                   columns=['A', 'B'],
                                   generate_metadata=True)

        self.assertEqual(
            utils.to_json_structure(
                main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            2,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            2,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'structural_type': 'numpy.int64',
                        'name': 'A',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'structural_type': 'str',
                        'name': 'B',
                    },
                }
            ])

        self.assertIsInstance(main, container.DataFrame)

        hyperparams_class = CategoricalToBinary.CategoricalToBinary.metadata.get_hyperparams(
        )
        hp = hyperparams_class.defaults().replace({
            'use_semantic_types': True,
            'use_columns': (0, ),
            'return_result': 'append',
        })

        primitive = CategoricalToBinary.CategoricalToBinary(hyperparams=hp)
        new_main = primitive.produce(inputs=main).value

        c = pd.DataFrame({
            "A": [1, 2],
            "B": ['a', 'b'],
            "A_1": ["1", "0"],
            "A_2": ["0", "1"]
        })

        pd.testing.assert_frame_equal(new_main, c)
        # print("new_main\n",new_main)

        # print(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()))
        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            2,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            4,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name': 'A',
                        'structural_type': 'numpy.int64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'name': 'B',
                        'structural_type': 'str',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'name':
                        'A_1',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'str',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 3],
                    'metadata': {
                        'name':
                        'A_2',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'str',
                    },
                }
            ])

        # print(new_main)
        # print(test_utils.convert_through_json(new_main.metadata.query(())))
        # print(test_utils.convert_through_json(new_main.metadata.query((metadata_base.ALL_ELEMENTS,))))
        # print(mean_mse, std_mse)

        # print("after testing")

        # self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-8)
        # self.assertAlmostEqual(std_mse.__float__(), 0., delta=1e-8)

        # print(main.metadata.to_internal_simple_structure())
        # print(new_main.metadata.to_internal_simple_structure())

        params = primitive.get_params()
        primitive.set_params(params=params)
Exemple #27
0
    def test_basic(self):
        self.maxDiff = None
        main = container.DataFrame(
            {
                'a': [1., 2., 3., 4., 5., 6., 7., 8., 9.],
                'b': [2., 3., 4., 5., 6., 7., 8., 9., 10.],
                'c': [3., 4., 5., 6., 7., 8., 9., 10., 11.]
            },
            columns=['a', 'b', 'c'],
            generate_metadata=True)

        print(main)

        self.assertEqual(
            utils.to_json_structure(
                main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            9,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'a'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'b'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'c'
                    }
                }
            ])

        self.assertIsInstance(main, container.DataFrame)

        hyperparams_class = MatrixProfile.metadata.get_hyperparams()
        hyperparams = hyperparams_class.defaults()
        hyperparams = hyperparams.replace({'window_size': 3})

        primitive = MatrixProfile(hyperparams=hyperparams)
        #primitive.set_training_data(inputs=main)
        #primitive.fit()
        new_main = primitive.produce(inputs=main).value
        print(new_main)

        self.assertEqual(
            utils.to_json_structure(
                main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            9,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'a'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'b'
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'structural_type': 'numpy.float64',
                        'name': 'c'
                    }
                }
            ])
    def test_dataframe_with_names_kept(self):
        df = container.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
        df.A = df.A.astype(numpy.int64)
        df.metadata = df.metadata.generate(df)

        df.metadata = df.metadata.update((base.ALL_ELEMENTS, 0), {
            'name': 'first_column',
        })
        df.metadata = df.metadata.update((base.ALL_ELEMENTS, 1), {
            'name': 'second_column',
        })

        self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'first_column',
                'structural_type': 'numpy.int64',
            }
        }, {
            'selector': ['__ALL_ELEMENTS__', 1],
            'metadata': {
                'name': 'second_column',
                'structural_type': 'str',
            },
        }])

        df2 = container.DataFrame({'A': [1, 2, 3, 4], 'B': ['a', 'b', 'c', 'd']})

        df2.metadata = df.metadata.generate(df2)

        self.assertEqual(utils.to_json_structure(df2.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 4,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 2,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'first_column',
                'structural_type': 'numpy.int64',
            }
        }, {
            'selector': ['__ALL_ELEMENTS__', 1],
            'metadata': {
                'name': 'second_column',
                'structural_type': 'str',
            },
        }])
Exemple #29
0
    def test_combine_columns_compact_metadata(self):
        main = container.DataFrame(
            {
                'a1': [1, 2, 3],
                'b1': [4, 5, 6],
                'c1': [7, 8, 9],
                'd1': [10, 11, 12],
                'e1': [13, 14, 15]
            }, {
                'top_level': 'main',
            },
            generate_metadata=False)
        main.metadata = main.metadata.generate(main, compact=True)
        main.metadata = main.metadata.update_column(0, {'name': 'aaa111'})
        main.metadata = main.metadata.update_column(1, {
            'name': 'bbb111',
            'extra': 'b_column'
        })
        main.metadata = main.metadata.update_column(2, {'name': 'ccc111'})

        columns2 = container.DataFrame({
            'a2': [21, 22, 23],
            'b2': [24, 25, 26]
        }, {
            'top_level': 'columns2',
        },
                                       generate_metadata=False)
        columns2.metadata = columns2.metadata.generate(columns2, compact=True)
        columns2.metadata = columns2.metadata.update_column(
            0, {'name': 'aaa222'})
        columns2.metadata = columns2.metadata.update_column(
            1, {'name': 'bbb222'})

        columns3 = container.DataFrame({
            'a3': [31, 32, 33],
            'b3': [34, 35, 36]
        }, {
            'top_level': 'columns3',
        },
                                       generate_metadata=False)
        columns3.metadata = columns3.metadata.generate(columns3, compact=True)
        columns3.metadata = columns3.metadata.update_column(
            0, {'name': 'aaa333'})
        columns3.metadata = columns3.metadata.update_column(
            1, {'name': 'bbb333'})

        result = utils.combine_columns(main, [1, 2], [columns2, columns3],
                                       return_result='append',
                                       add_index_columns=False)

        self.assertEqual(result.values.tolist(), [
            [1, 4, 7, 10, 13, 21, 24, 31, 34],
            [2, 5, 8, 11, 14, 22, 25, 32, 35],
            [3, 6, 9, 12, 15, 23, 26, 33, 36],
        ])

        self.assertEqual(
            d3m_utils.to_json_structure(
                result.metadata.to_internal_simple_structure()),
            [{
                'selector': [],
                'metadata': {
                    'top_level':
                    'main',
                    'schema':
                    metadata_base.CONTAINER_SCHEMA_VERSION,
                    'structural_type':
                    'd3m.container.pandas.DataFrame',
                    'semantic_types':
                    ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'name':
                        'rows',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularRow'
                        ],
                        'length':
                        3,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'name':
                        'columns',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                        ],
                        'length':
                        9,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 0],
                'metadata': {
                    'name': 'aaa111',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 1],
                'metadata': {
                    'name': 'bbb111',
                    'extra': 'b_column',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 2],
                'metadata': {
                    'name': 'ccc111',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 3],
                'metadata': {
                    'name': 'd1',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 4],
                'metadata': {
                    'name': 'e1',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 5],
                'metadata': {
                    'name': 'aaa222',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 6],
                'metadata': {
                    'name': 'bbb222',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 7],
                'metadata': {
                    'name': 'aaa333',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 8],
                'metadata': {
                    'name': 'bbb333',
                    'structural_type': 'numpy.int64',
                },
            }])

        result = utils.combine_columns(main, [1, 2], [columns2, columns3],
                                       return_result='new',
                                       add_index_columns=False)

        self.assertEqual(result.values.tolist(), [
            [21, 24, 31, 34],
            [22, 25, 32, 35],
            [23, 26, 33, 36],
        ])

        self.assertEqual(
            d3m_utils.to_json_structure(
                result.metadata.to_internal_simple_structure()),
            [{
                'selector': [],
                'metadata': {
                    'top_level':
                    'columns2',
                    'schema':
                    metadata_base.CONTAINER_SCHEMA_VERSION,
                    'structural_type':
                    'd3m.container.pandas.DataFrame',
                    'semantic_types':
                    ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'name':
                        'rows',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularRow'
                        ],
                        'length':
                        3,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'name':
                        'columns',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                        ],
                        'length':
                        4,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 0],
                'metadata': {
                    'name': 'aaa222',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 1],
                'metadata': {
                    'name': 'bbb222',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 2],
                'metadata': {
                    'name': 'aaa333',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 3],
                'metadata': {
                    'name': 'bbb333',
                    'structural_type': 'numpy.int64',
                },
            }])

        result = utils.combine_columns(main, [1, 2], [columns2, columns3],
                                       return_result='replace',
                                       add_index_columns=False)

        self.assertEqual(result.values.tolist(), [
            [1, 21, 24, 31, 34, 10, 13],
            [2, 22, 25, 32, 35, 11, 14],
            [3, 23, 26, 33, 36, 12, 15],
        ])

        self.assertEqual(
            d3m_utils.to_json_structure(
                result.metadata.to_internal_simple_structure()),
            [{
                'selector': [],
                'metadata': {
                    'top_level':
                    'main',
                    'schema':
                    metadata_base.CONTAINER_SCHEMA_VERSION,
                    'structural_type':
                    'd3m.container.pandas.DataFrame',
                    'semantic_types':
                    ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'name':
                        'rows',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularRow'
                        ],
                        'length':
                        3,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'name':
                        'columns',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                        ],
                        'length':
                        7,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 0],
                'metadata': {
                    'name': 'aaa111',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 1],
                'metadata': {
                    'name': 'aaa222',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 2],
                'metadata': {
                    'name': 'bbb222',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 3],
                'metadata': {
                    'name': 'aaa333',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 4],
                'metadata': {
                    'name': 'bbb333',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 5],
                'metadata': {
                    'name': 'd1',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 6],
                'metadata': {
                    'name': 'e1',
                    'structural_type': 'numpy.int64',
                },
            }])

        result = utils.combine_columns(main, [0, 1, 2, 3, 4],
                                       [columns2, columns3],
                                       return_result='replace',
                                       add_index_columns=False)

        self.assertEqual(result.values.tolist(), [
            [21, 24, 31, 34],
            [22, 25, 32, 35],
            [23, 26, 33, 36],
        ])

        self.assertEqual(
            d3m_utils.to_json_structure(
                result.metadata.to_internal_simple_structure()),
            [{
                'selector': [],
                'metadata': {
                    'top_level':
                    'main',
                    'schema':
                    metadata_base.CONTAINER_SCHEMA_VERSION,
                    'structural_type':
                    'd3m.container.pandas.DataFrame',
                    'semantic_types':
                    ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'name':
                        'rows',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularRow'
                        ],
                        'length':
                        3,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'name':
                        'columns',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                        ],
                        'length':
                        4,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 0],
                'metadata': {
                    'name': 'aaa222',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 1],
                'metadata': {
                    'name': 'bbb222',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 2],
                'metadata': {
                    'name': 'aaa333',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 3],
                'metadata': {
                    'name': 'bbb333',
                    'structural_type': 'numpy.int64',
                },
            }])

        result = utils.combine_columns(main, [4], [columns2, columns3],
                                       return_result='replace',
                                       add_index_columns=False)

        self.assertEqual(result.values.tolist(), [
            [1, 4, 7, 10, 21, 24, 31, 34],
            [2, 5, 8, 11, 22, 25, 32, 35],
            [3, 6, 9, 12, 23, 26, 33, 36],
        ])

        self.assertEqual(
            d3m_utils.to_json_structure(
                result.metadata.to_internal_simple_structure()),
            [{
                'selector': [],
                'metadata': {
                    'top_level':
                    'main',
                    'schema':
                    metadata_base.CONTAINER_SCHEMA_VERSION,
                    'structural_type':
                    'd3m.container.pandas.DataFrame',
                    'semantic_types':
                    ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'name':
                        'rows',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularRow'
                        ],
                        'length':
                        3,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'name':
                        'columns',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                        ],
                        'length':
                        8,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 0],
                'metadata': {
                    'name': 'aaa111',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 1],
                'metadata': {
                    'name': 'bbb111',
                    'extra': 'b_column',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 2],
                'metadata': {
                    'name': 'ccc111',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 3],
                'metadata': {
                    'name': 'd1',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 4],
                'metadata': {
                    'structural_type': 'numpy.int64',
                    'name': 'aaa222',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 5],
                'metadata': {
                    'structural_type': 'numpy.int64',
                    'name': 'bbb222',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 6],
                'metadata': {
                    'structural_type': 'numpy.int64',
                    'name': 'aaa333',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 7],
                'metadata': {
                    'structural_type': 'numpy.int64',
                    'name': 'bbb333',
                },
            }])

        result = utils.combine_columns(main, [0, 2, 4], [columns2, columns3],
                                       return_result='replace',
                                       add_index_columns=False)

        self.assertEqual(result.values.tolist(), [
            [21, 4, 24, 10, 31, 34],
            [22, 5, 25, 11, 32, 35],
            [23, 6, 26, 12, 33, 36],
        ])

        self.assertEqual(
            d3m_utils.to_json_structure(
                result.metadata.to_internal_simple_structure()),
            [{
                'selector': [],
                'metadata': {
                    'top_level':
                    'main',
                    'schema':
                    metadata_base.CONTAINER_SCHEMA_VERSION,
                    'structural_type':
                    'd3m.container.pandas.DataFrame',
                    'semantic_types':
                    ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'name':
                        'rows',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularRow'
                        ],
                        'length':
                        3,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__'],
                'metadata': {
                    'dimension': {
                        'name':
                        'columns',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                        ],
                        'length':
                        6,
                    },
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
                'metadata': {
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 0],
                'metadata': {
                    'name': 'aaa222',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 1],
                'metadata': {
                    'name': 'bbb111',
                    'extra': 'b_column',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 2],
                'metadata': {
                    'name': 'bbb222',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 3],
                'metadata': {
                    'name': 'd1',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 4],
                'metadata': {
                    'name': 'aaa333',
                    'structural_type': 'numpy.int64',
                },
            }, {
                'selector': ['__ALL_ELEMENTS__', 5],
                'metadata': {
                    'name': 'bbb333',
                    'structural_type': 'numpy.int64',
                },
            }])
    def test_custom_column_name_with_compacting(self):
        dataframe = container.DataFrame({'a': ['1.0', '2.0', '3.0']}, generate_metadata=False)

        dataframe.metadata = dataframe.metadata.generate(dataframe, compact=True)

        dataframe.metadata = dataframe.metadata.update((base.ALL_ELEMENTS, 0), {
            'name': 'test',
            'foo': 'bar',
        })

        self.assertEqual(utils.to_json_structure(dataframe.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 1,
                }
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'str',
                'name': 'a',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'test',
                'foo': 'bar',
            },
        }])

        dataframe.metadata = dataframe.metadata.generate(dataframe, compact=True)

        self.assertEqual(utils.to_json_structure(dataframe.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 1,
                }
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'str',
                'name': 'a',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': 'test',
                'foo': 'bar',
            },
        }])

        dataframe.metadata = dataframe.metadata.update((base.ALL_ELEMENTS, 0), {
            'name': base.NO_VALUE,
        })

        dataframe.metadata = dataframe.metadata.generate(dataframe, compact=True)

        self.assertEqual(utils.to_json_structure(dataframe.metadata.to_internal_simple_structure()), [{
            'selector': [],
            'metadata': {
                'schema': base.CONTAINER_SCHEMA_VERSION,
                'structural_type': 'd3m.container.pandas.DataFrame',
                'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
                'dimension': {
                    'name': 'rows',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
                    'length': 3,
                },
            },
        }, {
            'selector': ['__ALL_ELEMENTS__'],
            'metadata': {
                'dimension': {
                    'name': 'columns',
                    'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
                    'length': 1,
                }
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
            'metadata': {
                'structural_type': 'str',
                'name': 'a',
            },
        }, {
            'selector': ['__ALL_ELEMENTS__', 0],
            'metadata': {
                'name': '__NO_VALUE__',
                'foo': 'bar',
            },
        }])