Beispiel #1
0
    def __init__(self, params, default_params):
        self.name = self.__class__.__name__
        self.params = params
        self.default_params = default_params

        # Save and validate any parameters that could apply to all features
        features_file = self.get_param('features_file', required=False)
        if features_file is not None and utils.is_str(features_file):
            logging.debug("Opening the feature file %s for %s" %
                          (features_file, self))

            # If the features file exists, open it, read the features
            result_parser = resultparse.ResultParser()
            result_parser.load(features_file)
            self.feature_data = result_parser.get_feature_data(self.name)
            self.needs_users = False
        else:
            self.feature_data = None

        # Save feature-specific parameters
        self.save_params()
Beispiel #2
0
    def parse(self, conf):
        self.conf = conf

        # mysql must be undefined or a dictionary
        #if type(self.mysql) is not dict:
            #raise errors.ConfFileError("mysql must contain a dictionary.")

        # Make sure the MySQL dictionary has all the needed keys
        #mysql_keys = ('host', 'user', 'passwd', 'db')
        #if any(map(lambda k: k not in self.mysql, mysql_keys)):
        #    raise errors.ConfFileError("The MySQL dictionary needs: %s" %
        #                      ', '.join(mysql_keys))

        # Attribute must be defined as a string and must not be empty
        if (not utils.is_str(self.attribute) or
            self.attribute == ''):
            raise errors.ConfFileError("The dataset's attribute is required.")

        # Labels must be defined
        if type(self.labels) is not dict:
            raise errors.ConfFileError("Labels must be a dictionary.")

        # There must be exactly two labels (binary classifier)
        if len(self.labels) < 1:
            raise errors.ConfFileError("There must be at least 1 label.")

        # Limit must be set (a non-negative integer)
        if type(self.limit) is not int or self.limit < 0:
            raise errors.ConfFileError("The limit must be >= 0.")

        # If ignore_start is set, it should be a non-negative int
        if (self.ignore_start is not None and
            (type(self.ignore_start) is not int or
                self.ignore_start < 0)):
                raise errors.ConfFileError("The starting index of the section "
                                           "to ignore must be a "
                                           "non-negative integer.")

        # If ignore_number is set, it should be a non-negative int
        if (self.ignore_number is not None and
            (type(self.ignore_number) is not int or
                self.ignore_number < 0)):
                raise errors.ConfFileError("The number to ignore must be a "
                                           "non-negative integer.")

        # If one is set, the other must be set too
        ignore_start_set = self.ignore_start is not None
        ignore_number_set = self.ignore_number is not None
        if ignore_start_set != ignore_number_set:
            raise errors.ConfFileError("Both the number to ignore and the "
                                       "starting index of the section to "
                                       "ignore must be set.")

        # Make sure both values for the features dictionary are ints
        if any(map(lambda v: type(v) is not int, self.labels.values())):
            raise errors.ConfFileError("The label values must be integers.")

        # Features must be defined (each must be explicitly enabled)
        if type(self.features) is not dict:
            raise errors.ConfFileError("Features must be a dictionary.")

        if 'enabled' not in self.features:
            raise errors.ConfFileError("You must set the enabled features.")

        enabled = self.features['enabled']
        # enabled must be defined - non-empty dict, keys are non-empty strings
        if (type(enabled) is not dict or len(enabled) < 1 or
            any(map(lambda x: x == '' or not utils.is_str(x), enabled))):
            raise errors.ConfFileError("features.enabled must be a dict of "
                                       "at least 1 non-empty string.")

        # All of the values in the enabled dictionary must be dictionaries
        if any(map(lambda x: type(x) is not dict, enabled.values())):
            raise errors.ConfFileError("All the values in features.enabled "
                                       "must be dictionaries.")


        # All the keys in default_params must also be strings
        if 'default_params' in self.features:
            params = self.features['default_params']
            if any(map(lambda x: x == '' or not utils.is_str(x), params)):
                raise errors.ConfFileError("All the keys in default_params "
                                           "dictionary must be strings.")

        # Output file must be either undefined or a non-empty string
        if (self.output_file is not None and
            (not utils.is_str(self.output_file) or self.output_file == '')):
            raise errors.ConfFileError("The output file must be a non-empty "
                                       "string.")

        # Output file was not specified - default to the filename + _output
        if not self.output_file:
            if self.filename.endswith('.json'):
                self.conf['output_file'] = self.filename[:-5] + '_output' + '.json'
            else:
                self.conf['output_file'] = self.filename + '_output'

        # Everything is fine. Return true.
        return True
Beispiel #3
0
    def parse(self, conf):
        self.conf = conf

        # mysql must be undefined or a dictionary
        #if type(self.mysql) is not dict:
        #raise errors.ConfFileError("mysql must contain a dictionary.")

        # Make sure the MySQL dictionary has all the needed keys
        #mysql_keys = ('host', 'user', 'passwd', 'db')
        #if any(map(lambda k: k not in self.mysql, mysql_keys)):
        #    raise errors.ConfFileError("The MySQL dictionary needs: %s" %
        #                      ', '.join(mysql_keys))

        # Attribute must be defined as a string and must not be empty
        if (not utils.is_str(self.attribute) or self.attribute == ''):
            raise errors.ConfFileError("The dataset's attribute is required.")

        # Labels must be defined
        if type(self.labels) is not dict:
            raise errors.ConfFileError("Labels must be a dictionary.")

        # There must be exactly two labels (binary classifier)
        if len(self.labels) < 1:
            raise errors.ConfFileError("There must be at least 1 label.")

        # Limit must be set (a non-negative integer)
        if type(self.limit) is not int or self.limit < 0:
            raise errors.ConfFileError("The limit must be >= 0.")

        # If ignore_start is set, it should be a non-negative int
        if (self.ignore_start is not None and
            (type(self.ignore_start) is not int or self.ignore_start < 0)):
            raise errors.ConfFileError("The starting index of the section "
                                       "to ignore must be a "
                                       "non-negative integer.")

        # If ignore_number is set, it should be a non-negative int
        if (self.ignore_number is not None and
            (type(self.ignore_number) is not int or self.ignore_number < 0)):
            raise errors.ConfFileError("The number to ignore must be a "
                                       "non-negative integer.")

        # If one is set, the other must be set too
        ignore_start_set = self.ignore_start is not None
        ignore_number_set = self.ignore_number is not None
        if ignore_start_set != ignore_number_set:
            raise errors.ConfFileError("Both the number to ignore and the "
                                       "starting index of the section to "
                                       "ignore must be set.")

        # Make sure both values for the features dictionary are ints
        if any(map(lambda v: type(v) is not int, self.labels.values())):
            raise errors.ConfFileError("The label values must be integers.")

        # Features must be defined (each must be explicitly enabled)
        if type(self.features) is not dict:
            raise errors.ConfFileError("Features must be a dictionary.")

        if 'enabled' not in self.features:
            raise errors.ConfFileError("You must set the enabled features.")

        enabled = self.features['enabled']
        # enabled must be defined - non-empty dict, keys are non-empty strings
        if (type(enabled) is not dict or len(enabled) < 1 or any(
                map(lambda x: x == '' or not utils.is_str(x), enabled))):
            raise errors.ConfFileError("features.enabled must be a dict of "
                                       "at least 1 non-empty string.")

        # All of the values in the enabled dictionary must be dictionaries
        if any(map(lambda x: type(x) is not dict, enabled.values())):
            raise errors.ConfFileError("All the values in features.enabled "
                                       "must be dictionaries.")

        # All the keys in default_params must also be strings
        if 'default_params' in self.features:
            params = self.features['default_params']
            if any(map(lambda x: x == '' or not utils.is_str(x), params)):
                raise errors.ConfFileError("All the keys in default_params "
                                           "dictionary must be strings.")

        # Output file must be either undefined or a non-empty string
        if (self.output_file is not None and
            (not utils.is_str(self.output_file) or self.output_file == '')):
            raise errors.ConfFileError("The output file must be a non-empty "
                                       "string.")

        # Output file was not specified - default to the filename + _output
        if not self.output_file:
            if self.filename.endswith('.json'):
                self.conf[
                    'output_file'] = self.filename[:-5] + '_output' + '.json'
            else:
                self.conf['output_file'] = self.filename + '_output'

        # Everything is fine. Return true.
        return True