def normalize(self, ds_source): """ Apply the normalizing operation to a given `DataSet`. :Parameters: ds_source : `DataSet` Data set to normalize. :Returns: `DataSet` : Normalized data set. :Raises NpyDataTypeError: If the given `DataSet` has not been numerized. """ if ds_source.is_numerized == False: raise NpyDataTypeError, 'ds_source must be numerized first.' ds_dest = DataSet() ds_dest.set_name_attribute(ds_source.get_name_attribute()) data_instances = ds_source.get_data_instances() for data_instance_old in data_instances: attributes_new = [] # Normalize each attribute for index, value in enumerate(data_instance_old.get_attributes()): value_new = (value - self.min[index]) * self.max[index] * (self.upper_bound - self.lower_bound) + self.lower_bound attributes_new.append(value_new) ds_dest.add_data_instance(data_instance_old.get_index_number(), attributes_new, data_instance_old.get_label_number()) ds_dest.is_numerized = True return ds_dest
def numerize(self, ds_source): """ Apply the numerizing operation to a given `DataSet`. :Parameters: ds_source : `DataSet` Data set to numerize. :Returns: `DataSet` : Numerized data set. :Raises NpyDataTypeError: If ds_source has already been numerized. """ if ds_source.is_numerized == True: raise NpyDataTypeError, 'ds_source has already been numerized.' ds_dest = DataSet() ds_dest.set_name_attribute(ds_source.get_name_attribute()) data_instances = ds_source.get_data_instances() for data_instance_old in data_instances: attributes = [] # Process the attribute values for index, value in enumerate(data_instance_old.get_attributes()): try: number = float(value) except ValueError: # Every time a non-float attribute value is met, # it is added to the numerizer number = self.attribute_string_to_number(value, index) attributes.append(number) # Process the label value label_old = data_instance_old.get_label_number() try: label_new = float(label_old) except ValueError: # Every time a non-float label value is met, # it is added to the numerizer label_new = self.label_string_to_number(label_old) ds_dest.add_data_instance(data_instance_old.get_index_number(), attributes, label_new) ds_dest.is_numerized = True return ds_dest