Esempio n. 1
0
 def validate_training(self):
     if not self.training.are_valid(self.klass, self.attributes):
         raise inv.InvalidDataError('Training data invalid.')
     if not self.can_handle_continuous_attributes(
     ) and self.attributes.has_continuous():
         raise inv.InvalidDataError(
             'One or more attributes are continuous.')
Esempio n. 2
0
 def create(self, overwrite = False):
     if not overwrite and os.path.exists(self.path):
         raise inv.InvalidDataError('File or Directory exists at ' + self.path + ' and overwrite is set to false.')
     if os.path.exists(self.path): 
         if os.path.isfile(self.path):
             os.remove(self.path)
         else:
             raise inv.InvalidDataError('Cannot overwrite directory ' + self.path + '.')
     fil = open(self.path, 'w')
     fil.close()
Esempio n. 3
0
 def by_rank(self):
     if self.attributes.has_continuous():
         raise inv.InvalidDataError(
             "Rank based feature selection cannot be performed on continuous attributes."
         )
     if rank_options_invalid(self.options):
         raise inv.InvalidDataError(
             "Invalid options for Rank based Feature selection."
         )  #Additional validation when not used from command prompt
     rem_attributes = self.find_attributes_by_ranking(
         OPTION_MAPPINGS[self.options[0]], int(self.options[1]))
     self.remove(rem_attributes)
Esempio n. 4
0
 def __validate_attribute_indices(self):
     for index in self.attribute_indices:
         if index < 0 or index >= len(self.attributes):
             raise inv.InvalidDataError(
                 'Attribute indices should be between 0 and ' +
                 str(len(self.attributes) - 1) +
                 ' both inclusive, but found ' + str(index))
Esempio n. 5
0
 def forward_selection(self):
     if wrapper_options_invalid(self.options):
         raise inv.InvalidDataError(
             "Invalid options for Forward Select Feature selection."
         )  #Additional validation when not used from command prompt
     selected = self.__select_attributes(-1, [], self.attributes[:],
                                         self.get_delta())
     self.remove(self.invert_attribute_selection(selected))
Esempio n. 6
0
def as_integers(name, com_str):
    indices = []
    if com_str is not None:
        for element in com_str.split(','):
            try:
                indices.append(int(element.strip()))
            except ValueError:
                raise inv.InvalidDataError('Invalid Data. ' + name +
                                           ' should contain integers.')
    return indices
Esempio n. 7
0
 def backward_elimination(self):
     if wrapper_options_invalid(self.options):
         raise inv.InvalidDataError(
             "Invalid options for Backward Select Feature selection.")
     fold = self.get_fold()
     avg_acc = self.avg_accuracy_by_cross_validation(
         self.training.cross_validation_datasets(fold), fold,
         self.attributes)
     selected = self.__eliminate_attributes(avg_acc, self.attributes[:],
                                            self.get_delta())
     self.remove(self.invert_attribute_selection(selected))
Esempio n. 8
0
 def best_decision_stump(self,
                         instances,
                         ignore_attributes=[],
                         algorithm='minimum_error'):
     decision_stumps = self.possible_decision_stumps(
         ignore_attributes, instances)
     try:
         return getattr(self, algorithm)(decision_stumps)
     except AttributeError:
         raise inv.InvalidDataError(
             'Invalid algorithm to find the best decision stump. ' +
             str(algorithm) + ' is not defined.')
Esempio n. 9
0
 def value_ranges(self, attributes):
     """
     Returns an array of range objects, in which each corresponds to the range of values an 
     attribute in the attributes parameter can take.
     len(returned range array) is equal to len(attributes)
     """
     ranges = []
     for attribute in attributes:
         if not attribute.is_continuous():
             raise inv.InvalidDataError('Cannot discretise non continuous attribute ' + attribute.name)
     values = self.values_grouped_by_attribute(attributes)
     for value in values: #each entry in values is the range of values for a particular attribute
         value.sort()
         ranges.append(r.Range(value[0], value[-1], True))
     return ranges
Esempio n. 10
0
 def __validate_options(self):
     if self.options is None: return
     for option in self.options:
         if option == 0:
             raise inv.InvalidDataError('Option cannot be equal to zero.')