def xml_to_dict(self):
        list_dataset      = []
        observation_label = []

        # convert xml file to python 'dict'
        dataset = xmltodict.parse(self.svm_file)

        # build 'list_dataset'
        for dep_variable in dataset['dataset']['observation']:
            dep_variable_label = dep_variable['dependent-variable']

            validate = Validate_Dataset(dep_variable_label)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                print list_error
                return None
            else:
                observation_label.append(dep_variable_label)

            for indep_variable in dep_variable['independent-variable']:
                indep_variable_label = indep_variable['label']
                indep_variable_value = indep_variable['value']

                validate_label = Validate_Dataset(indep_variable_label)
                validate_value = Validate_Dataset(indep_variable_value)

                validate_label.validate_label()
                validate_value.validate_value()

                list_error_label = validate.get_errors()
                list_error_value = validate.get_errors()
                if list_error_label or list_error_value:
                    print list_error_label
                    print list_error_value
                    return None
                else:
                    list_dataset.append({'dep_variable_label': dep_variable_label, 'indep_variable_label': indep_variable_label, 'indep_variable_value': indep_variable_value})

            # generalized feature count in an observation
            if not self.count_features:
                self.count_features = len(dep_variable['independent-variable'])

        # close file, save observation labels, and return
        self.svm_file.close()
        self.observation_labels = observation_label
        return list_dataset
Exemple #2
0
def svm_csv_converter(raw_data):
    '''@svm_csv_converter

    This method converts the supplied csv file-object, intended for an svm
    model, to a python dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @list_observation_label, is a list containing dependent variable labels.

    Note: we use the 'Universal Newline Support' with the 'U' parameter when
          opening 'raw_data'. This allows newlines to be understood regardless,
          if the newline character was created in osx, windows, or linux.

    Note: since 'row' is a list, with one comma-delimited string element, the
          following line is required in this method:

          row = row[0].split(',')

        '''

    feature_count = None
    list_dataset = []
    list_observation_label = []
    list_feature_label = []
    logger = Logger(__name__, 'error', 'error')

    # open temporary 'csvfile' reader object
    dataset_reader = csv.reader(raw_data, delimiter=' ', quotechar='|')

    # iterate first row of csvfile
    for row in islice(dataset_reader, 0, 1):

        # iterate each column in a given row
        row_indep_label = row[0].split(',')
        for value in islice(row_indep_label, 1, None):
            validate = Validate_Dataset(value)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                logger.log(list_error)
                return None
            else:
                list_feature_label.append(value)

    # iterate all rows of csvfile
    for dep_index, row in enumerate(islice(dataset_reader, 0, None)):

        # iterate first column of each row (except first)
        row_dep_label = row[0].split(',')
        for value in row_dep_label[:1]:
            validate = Validate_Dataset(value)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                logger.log(list_error)
                return None
            else:
                list_observation_label.append(value)

        # generalized feature count in an observation
        row_indep_variable = row[0].split(',')
        if not feature_count:
            feature_count = len(row_indep_variable) - 1

        # iterate each column in a given row
        for indep_index, value in enumerate(islice(row_indep_variable, 1,
                                                   None)):

            try:
                validate = Validate_Dataset(value)
                validate.validate_value()

                list_error = validate.get_errors()
                if list_error:
                    logger.log(list_error)
                    return None
                else:
                    value = float(value)
            except Exception as error:
                logger.log(error)
                return False

            list_dataset.append({
                'dep_variable_label':
                list_observation_label[dep_index],
                'indep_variable_label':
                list_feature_label[indep_index],
                'indep_variable_value':
                value
            })

    # close file, save observation labels, and return
    raw_data.close()
    return {
        'dataset': list_dataset,
        'observation_labels': list_observation_label,
        'feature_count': feature_count
    }
def svm_xml_converter(raw_data):
    '''@svm_xml_converter

    This method converts the supplied xml file-object to a python dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @list_observation_label, is a list containing dependent variable
        labels.

    '''

    feature_count = None
    list_dataset = []
    list_observation_label = []
    logger = Logger(__name__, 'error', 'error')

    # convert xml file to python 'dict'
    dataset = xmltodict.parse(raw_data)

    # build 'list_dataset'
    for observation in dataset['dataset']['observation']:
        observation_label = observation['dependent-variable']

        validate = Validate_Dataset(observation_label)
        validate.validate_label()

        list_error = validate.get_errors()
        if list_error:
            logger.log(list_error)
            return None
        else:
            list_observation_label.append(observation_label)

        for feature in observation['independent-variable']:
            feature_label = feature['label']
            feature_value = feature['value']

            validate_label = Validate_Dataset(feature_label)
            validate_value = Validate_Dataset(feature_value)

            validate_label.validate_label()
            validate_value.validate_value()

            list_error_label = validate.get_errors()
            list_error_value = validate.get_errors()
            if list_error_label or list_error_value:
                logger.log(list_error_label)
                logger.log(list_error_value)
                return None
            else:
                list_dataset.append({
                    'dep_variable_label': observation_label,
                    'indep_variable_label': feature_label,
                    'indep_variable_value': feature_value
                })

        # generalized feature count in an observation
        if not feature_count:
            feature_count = len(observation['independent-variable'])

    # save observation labels, and return
    raw_data.close()
    return {
        'dataset': list_dataset,
        'observation_labels': list_observation_label,
        'feature_count': feature_count
    }
def svm_json_converter(raw_data, is_json):
    '''@svm_json_converter

    This method converts the supplied json file-object to a python
    dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @is_json, flag indicating 'raw_data' is a json string.

    @observation_labels, is a list containing dependent variable labels.

    '''

    feature_count = None
    list_dataset = []
    observation_labels = []
    logger = Logger(__name__, 'error', 'error')

    if is_json:
        dataset = raw_data
    else:
        dataset = json.load(raw_data)

    for observation_label in dataset:
        # variables
        observations = dataset[observation_label]

        # validation (part 1)
        validate_olabel = Validate_Dataset(observation_label)
        validate_olabel.validate_label()

        # dependent variable with single observation
        if type(observations) == list:
            for observation in observations:
                for feature_label, feature_value in observation.items():
                    # validation (part 2)
                    validate_flabel = Validate_Dataset(feature_label)
                    validate_flabel.validate_label()
                    validate_fvalue = Validate_Dataset(feature_value)
                    validate_fvalue.validate_value()

                    # restructured data
                    list_dataset.append({
                        'dep_variable_label': observation_label,
                        'indep_variable_label': feature_label,
                        'indep_variable_value': feature_value
                    })

                # generalized feature count in an observation
                if not feature_count:
                    feature_count = len(observation)

        # dependent variable with multiple observations
        elif type(observations) == dict:
            for feature_label, feature_value in observations.items():
                # validation (part 2)
                validate_flabel = Validate_Dataset(feature_label)
                validate_flabel.validate_label()
                validate_fvalue = Validate_Dataset(feature_value)
                validate_fvalue.validate_value()

                # restructured data
                list_dataset.append({
                    'dep_variable_label': observation_label,
                    'indep_variable_label': feature_label,
                    'indep_variable_value': feature_value
                })

            # generalized feature count in an observation
            if not feature_count:
                feature_count = len(observations)

        # list of observation label
        observation_labels.append(observation_label)

        # check for errors
        olabel_error = validate_olabel.get_errors()
        flabel_error = validate_flabel.get_errors()
        fvalue_error = validate_fvalue.get_errors()
        for error in [olabel_error, flabel_error, fvalue_error]:
            if error:
                logger.log(error)
        if error and len(error) > 0:
            return None

    # close file
    if not is_json:
        raw_data.close()

    # save observation labels, and return
    return {
        'dataset': list_dataset,
        'observation_labels': observation_labels,
        'feature_count': feature_count
    }
    def csv_to_dict(self):
        """@csv_to_dict

        This method converts the supplied csv file-object to a python
        dictionary.

        @list_observation_label, is a list containing dependent variable
            labels.

        Note: we use the 'Universal Newline Support' with the 'U" parameter
            when opening 'self.svm_data'. This allows newlines to be
            understood regardless, if the newline character was created in
            osx, windows, or linux.

        Note: since 'row' is a list, with one comma-delimited string element,
            the following line is required in this method:

                row = row[0].split(',')

        """

        list_dataset = []
        list_observation_label = []
        list_feature_label = []

        # open temporary 'csvfile' reader object
        dataset_reader = csv.reader(
            self.svm_data,
            delimiter=' ',
            quotechar='|'
        )

        # iterate first row of csvfile
        for row in islice(dataset_reader, 0, 1):

            # iterate each column in a given row
            row_indep_label = row[0].split(',')
            for value in islice(row_indep_label, 1, None):
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    list_feature_label.append(value)

        # iterate all rows of csvfile
        for dep_index, row in enumerate(islice(dataset_reader, 0, None)):

            # iterate first column of each row (except first)
            row_dep_label = row[0].split(',')
            for value in row_dep_label[:1]:
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    list_observation_label.append(value)

            # generalized feature count in an observation
            row_indep_variable = row[0].split(',')
            if not self.count_features:
                self.count_features = len(row_indep_variable) - 1

            # iterate each column in a given row
            for indep_index, value in enumerate(
                islice(row_indep_variable, 1, None)
            ):

                try:
                    validate = Validate_Dataset(value)
                    validate.validate_value()

                    list_error = validate.get_errors()
                    if list_error:
                        print list_error
                        return None
                    else:
                        value = float(value)
                except Exception as error:
                    print error
                    return False

                list_dataset.append({
                    'dep_variable_label': list_observation_label[dep_index],
                    'indep_variable_label': list_feature_label[indep_index],
                    'indep_variable_value': value
                })

        # close file, save observation labels, and return
        self.svm_data.close()
        self.observation_labels = list_observation_label
        return list_dataset
    def xml_to_dict(self):
        """@xml_to_dict

        This method converts the supplied xml file-object to a python
        dictionary.

        @list_observation_label, is a list containing dependent variable
            labels.

        """

        list_dataset = []
        list_observation_label = []

        # convert xml file to python 'dict'
        dataset = xmltodict.parse(self.svm_data)

        # build 'list_dataset'
        for observation in dataset['dataset']['observation']:
            observation_label = observation['dependent-variable']

            validate = Validate_Dataset(observation_label)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                print list_error
                return None
            else:
                list_observation_label.append(observation_label)

            for feature in observation['independent-variable']:
                feature_label = feature['label']
                feature_value = feature['value']

                validate_label = Validate_Dataset(feature_label)
                validate_value = Validate_Dataset(feature_value)

                validate_label.validate_label()
                validate_value.validate_value()

                list_error_label = validate.get_errors()
                list_error_value = validate.get_errors()
                if list_error_label or list_error_value:
                    print list_error_label
                    print list_error_value
                    return None
                else:
                    list_dataset.append({
                        'dep_variable_label': observation_label,
                        'indep_variable_label': feature_label,
                        'indep_variable_value': feature_value
                    })

            # generalized feature count in an observation
            if not self.count_features:
                self.count_features = len(observation['independent-variable'])

        # close file, save observation labels, and return
        self.svm_data.close()
        self.observation_labels = list_observation_label
        return list_dataset
Exemple #7
0
def svm_xml_converter(raw_data):
    '''@svm_xml_converter

    This method converts the supplied xml file-object to a python dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @list_observation_label, is a list containing dependent variable
        labels.

    '''

    feature_count = None
    list_dataset = []
    list_observation_label = []
    logger = Logger(__name__, 'error', 'error')

    # convert xml file to python 'dict'
    dataset = xmltodict.parse(raw_data)

    # build 'list_dataset'
    for observation in dataset['dataset']['observation']:
        observation_label = observation['dependent-variable']

        validate = Validate_Dataset(observation_label)
        validate.validate_label()

        list_error = validate.get_errors()
        if list_error:
            logger.log(list_error)
            return None
        else:
            list_observation_label.append(observation_label)

        for feature in observation['independent-variable']:
            feature_label = feature['label']
            feature_value = feature['value']

            validate_label = Validate_Dataset(feature_label)
            validate_value = Validate_Dataset(feature_value)

            validate_label.validate_label()
            validate_value.validate_value()

            list_error_label = validate.get_errors()
            list_error_value = validate.get_errors()
            if list_error_label or list_error_value:
                logger.log(list_error_label)
                logger.log(list_error_value)
                return None
            else:
                list_dataset.append({
                    'dep_variable_label': observation_label,
                    'indep_variable_label': feature_label,
                    'indep_variable_value': feature_value
                })

        # generalized feature count in an observation
        if not feature_count:
            feature_count = len(observation['independent-variable'])

    # save observation labels, and return
    raw_data.close()
    return {
        'dataset': list_dataset,
        'observation_labels': list_observation_label,
        'feature_count': feature_count
    }
    def csv_to_dict(self):
        list_dataset         = []
        observation_label    = []
        indep_variable_label = []

        # open temporary 'csvfile' reader object
        dataset_reader = csv.reader(self.svm_file, delimiter=' ', quotechar='|')

        # iterate first row of csvfile
        for row in islice(dataset_reader, 0, 1):

            # iterate each column in a given row
            row_indep_label = row[0].split(',')
            for value in islice(row_indep_label, 1, None):
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    indep_variable_label.append(value)

        # iterate all rows of csvfile
        for dep_index, row in enumerate(islice(dataset_reader, 0, None)):

            # iterate first column of each row (except first)
            row_dep_label = row[0].split(',')
            for value in row_dep_label[:1]:
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    observation_label.append(value)

            # generalized feature count in an observation
            row_indep_variable = row[0].split(',')
            if not self.count_features:
                self.count_features = len(row_indep_variable) - 1

            # iterate each column in a given row
            for indep_index, value in enumerate(islice(row_indep_variable, 1, None)):
                try:
                    validate = Validate_Dataset(value)
                    validate.validate_value()

                    list_error = validate.get_errors()
                    if list_error:
                        print list_error
                        return None
                    else:
                        value = float(value)
                except Exception as error:
                    print error
                    return False

                list_dataset.append({'dep_variable_label': observation_label[dep_index], 'indep_variable_label': indep_variable_label[indep_index], 'indep_variable_value': value})

        # close file, save observation labels, and return
        self.svm_file.close()
        self.observation_labels = observation_label
        return list_dataset
Exemple #9
0
    def csv_to_dict(self):
        """@csv_to_dict

        This method converts the supplied csv file-object to a python
        dictionary.

        @list_observation_label, is a list containing dependent variable
            labels.

        Note: we use the 'Universal Newline Support' with the 'U" parameter
            when opening 'self.svm_data'. This allows newlines to be
            understood regardless, if the newline character was created in
            osx, windows, or linux.

        Note: since 'row' is a list, with one comma-delimited string element,
            the following line is required in this method:

                row = row[0].split(',')

        """

        list_dataset = []
        list_observation_label = []
        list_feature_label = []

        # open temporary 'csvfile' reader object
        dataset_reader = csv.reader(self.svm_data,
                                    delimiter=' ',
                                    quotechar='|')

        # iterate first row of csvfile
        for row in islice(dataset_reader, 0, 1):

            # iterate each column in a given row
            row_indep_label = row[0].split(',')
            for value in islice(row_indep_label, 1, None):
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    list_feature_label.append(value)

        # iterate all rows of csvfile
        for dep_index, row in enumerate(islice(dataset_reader, 0, None)):

            # iterate first column of each row (except first)
            row_dep_label = row[0].split(',')
            for value in row_dep_label[:1]:
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    list_observation_label.append(value)

            # generalized feature count in an observation
            row_indep_variable = row[0].split(',')
            if not self.count_features:
                self.count_features = len(row_indep_variable) - 1

            # iterate each column in a given row
            for indep_index, value in enumerate(
                    islice(row_indep_variable, 1, None)):

                try:
                    validate = Validate_Dataset(value)
                    validate.validate_value()

                    list_error = validate.get_errors()
                    if list_error:
                        print list_error
                        return None
                    else:
                        value = float(value)
                except Exception as error:
                    print error
                    return False

                list_dataset.append({
                    'dep_variable_label':
                    list_observation_label[dep_index],
                    'indep_variable_label':
                    list_feature_label[indep_index],
                    'indep_variable_value':
                    value
                })

        # close file, save observation labels, and return
        self.svm_data.close()
        self.observation_labels = list_observation_label
        return list_dataset
Exemple #10
0
    def xml_to_dict(self):
        """@xml_to_dict

        This method converts the supplied xml file-object to a python
        dictionary.

        @list_observation_label, is a list containing dependent variable
            labels.

        """

        list_dataset = []
        list_observation_label = []

        # convert xml file to python 'dict'
        dataset = xmltodict.parse(self.svm_data)

        # build 'list_dataset'
        for observation in dataset['dataset']['observation']:
            observation_label = observation['dependent-variable']

            validate = Validate_Dataset(observation_label)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                print list_error
                return None
            else:
                list_observation_label.append(observation_label)

            for feature in observation['independent-variable']:
                feature_label = feature['label']
                feature_value = feature['value']

                validate_label = Validate_Dataset(feature_label)
                validate_value = Validate_Dataset(feature_value)

                validate_label.validate_label()
                validate_value.validate_value()

                list_error_label = validate.get_errors()
                list_error_value = validate.get_errors()
                if list_error_label or list_error_value:
                    print list_error_label
                    print list_error_value
                    return None
                else:
                    list_dataset.append({
                        'dep_variable_label': observation_label,
                        'indep_variable_label': feature_label,
                        'indep_variable_value': feature_value
                    })

            # generalized feature count in an observation
            if not self.count_features:
                self.count_features = len(observation['independent-variable'])

        # close file, save observation labels, and return
        self.svm_data.close()
        self.observation_labels = list_observation_label
        return list_dataset