def __init__(self, outputs, column_names, column_types, **kwargs): # TODO: support customized column_types. inputs = node.StructuredDataInput() inputs.column_types = column_types inputs.column_names = column_names if column_types: for column_type in column_types.values(): if column_type not in ['categorical', 'numerical']: raise ValueError( 'Column_types should be either "categorical" ' 'or "numerical", but got {name}'.format( name=column_type)) if column_names and column_types: for column_name in column_types: if column_name not in column_names: raise ValueError( 'Column_names and column_types are ' 'mismatched. Cannot find column name ' '{name} in the data.'.format(name=column_name)) super().__init__(inputs=inputs, outputs=outputs, **kwargs)
def set_config(self, config): self.num_columns = config['num_columns'] self.input_node = node.StructuredDataInput(*config['input_node']) self.max_columns = config['max_columns']
def test_structured_data_input_transform(): (x, _), _1 = common.dataframe_dataframe() input_node = node.StructuredDataInput() input_node.fit(x) input_node.transform(x) assert input_node.column_names[0] == 'sex'
def test_structured_data_input_transform(): (x, _), _1 = common.dataframe_dataframe() input_node = node.StructuredDataInput() input_node.transform(x) assert input_node.column_names[0] == 'sex' assert input_node.column_types == common.COLUMN_TYPES_FROM_CSV