def svr_xml_converter(raw_data):
    '''

    This method converts the supplied xml file-object to a python dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @list_observation_label, is a list containing dependent variable
        labels.

    '''

    feature_count = None
    list_dataset = []
    list_observation_label = []
    logger = Logger(__name__, 'error', 'error')

    # convert xml file to python 'dict'
    dataset = xmltodict.parse(raw_data)

    # build 'list_dataset'
    for observation in dataset['dataset']['observation']:
        for key in observation:
            if key == 'criterion':
                observation_label = observation['criterion']
                list_observation_label.append(observation[key])
            elif key == 'predictor':
                for predictor in observation[key]:
                    predictor_label = predictor['label']
                    predictor_value = predictor['value']

                    validate_value = Validate_Dataset(predictor_value)
                    validate_value.validate_value()
                    list_error_value = validate_value.get_errors()
                    if list_error_value:
                        logger.log(list_error_value)
                        return None
                    else:
                        list_dataset.append({
                            'dep_variable_label':
                            str(observation_label),
                            'indep_variable_label':
                            str(predictor_label),
                            'indep_variable_value':
                            predictor_value
                        })

        # generalized feature count in an observation
        if not feature_count:
            feature_count = len(observation['predictor'])

    # save observation labels, and return
    raw_data.close()
    return {
        'dataset': list_dataset,
        'observation_labels': list_observation_label,
        'feature_count': feature_count
    }
Exemple #2
0
def svr_xml_converter(raw_data):
    """

    This method converts the supplied xml file-object to a python dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @list_observation_label, is a list containing dependent variable
        labels.

    """

    feature_count = None
    list_dataset = []
    list_observation_label = []
    logger = Logger(__name__, "error", "error")

    # convert xml file to python 'dict'
    dataset = xmltodict.parse(raw_data)

    # build 'list_dataset'
    for observation in dataset["dataset"]["observation"]:
        for key in observation:
            if key == "criterion":
                observation_label = observation["criterion"]
                list_observation_label.append(observation[key])
            elif key == "predictor":
                for predictor in observation[key]:
                    predictor_label = predictor["label"]
                    predictor_value = predictor["value"]

                    validate_value = Validate_Dataset(predictor_value)
                    validate_value.validate_value()
                    list_error_value = validate_value.get_errors()
                    if list_error_value:
                        logger.log(list_error_value)
                        return None
                    else:
                        list_dataset.append(
                            {
                                "dep_variable_label": str(observation_label),
                                "indep_variable_label": str(predictor_label),
                                "indep_variable_value": predictor_value,
                            }
                        )

        # generalized feature count in an observation
        if not feature_count:
            feature_count = len(observation["predictor"])

    # save observation labels, and return
    raw_data.close()
    return {"dataset": list_dataset, "observation_labels": list_observation_label, "feature_count": feature_count}
Exemple #3
0
def svm_csv_converter(raw_data):
    '''@svm_csv_converter

    This method converts the supplied csv file-object, intended for an svm
    model, to a python dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @list_observation_label, is a list containing dependent variable labels.

    Note: we use the 'Universal Newline Support' with the 'U' parameter when
          opening 'raw_data'. This allows newlines to be understood regardless,
          if the newline character was created in osx, windows, or linux.

    Note: since 'row' is a list, with one comma-delimited string element, the
          following line is required in this method:

          row = row[0].split(',')

        '''

    feature_count = None
    list_dataset = []
    list_observation_label = []
    list_feature_label = []
    logger = Logger(__name__, 'error', 'error')

    # open temporary 'csvfile' reader object
    dataset_reader = csv.reader(raw_data, delimiter=' ', quotechar='|')

    # iterate first row of csvfile
    for row in islice(dataset_reader, 0, 1):

        # iterate each column in a given row
        row_indep_label = row[0].split(',')
        for value in islice(row_indep_label, 1, None):
            validate = Validate_Dataset(value)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                logger.log(list_error)
                return None
            else:
                list_feature_label.append(value)

    # iterate all rows of csvfile
    for dep_index, row in enumerate(islice(dataset_reader, 0, None)):

        # iterate first column of each row (except first)
        row_dep_label = row[0].split(',')
        for value in row_dep_label[:1]:
            validate = Validate_Dataset(value)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                logger.log(list_error)
                return None
            else:
                list_observation_label.append(value)

        # generalized feature count in an observation
        row_indep_variable = row[0].split(',')
        if not feature_count:
            feature_count = len(row_indep_variable) - 1

        # iterate each column in a given row
        for indep_index, value in enumerate(islice(row_indep_variable, 1,
                                                   None)):

            try:
                validate = Validate_Dataset(value)
                validate.validate_value()

                list_error = validate.get_errors()
                if list_error:
                    logger.log(list_error)
                    return None
                else:
                    value = float(value)
            except Exception as error:
                logger.log(error)
                return False

            list_dataset.append({
                'dep_variable_label':
                list_observation_label[dep_index],
                'indep_variable_label':
                list_feature_label[indep_index],
                'indep_variable_value':
                value
            })

    # close file, save observation labels, and return
    raw_data.close()
    return {
        'dataset': list_dataset,
        'observation_labels': list_observation_label,
        'feature_count': feature_count
    }
def svm_xml_converter(raw_data):
    '''@svm_xml_converter

    This method converts the supplied xml file-object to a python dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @list_observation_label, is a list containing dependent variable
        labels.

    '''

    feature_count = None
    list_dataset = []
    list_observation_label = []
    logger = Logger(__name__, 'error', 'error')

    # convert xml file to python 'dict'
    dataset = xmltodict.parse(raw_data)

    # build 'list_dataset'
    for observation in dataset['dataset']['observation']:
        observation_label = observation['dependent-variable']

        validate = Validate_Dataset(observation_label)
        validate.validate_label()

        list_error = validate.get_errors()
        if list_error:
            logger.log(list_error)
            return None
        else:
            list_observation_label.append(observation_label)

        for feature in observation['independent-variable']:
            feature_label = feature['label']
            feature_value = feature['value']

            validate_label = Validate_Dataset(feature_label)
            validate_value = Validate_Dataset(feature_value)

            validate_label.validate_label()
            validate_value.validate_value()

            list_error_label = validate.get_errors()
            list_error_value = validate.get_errors()
            if list_error_label or list_error_value:
                logger.log(list_error_label)
                logger.log(list_error_value)
                return None
            else:
                list_dataset.append({
                    'dep_variable_label': observation_label,
                    'indep_variable_label': feature_label,
                    'indep_variable_value': feature_value
                })

        # generalized feature count in an observation
        if not feature_count:
            feature_count = len(observation['independent-variable'])

    # save observation labels, and return
    raw_data.close()
    return {
        'dataset': list_dataset,
        'observation_labels': list_observation_label,
        'feature_count': feature_count
    }
def svm_json_converter(raw_data, is_json):
    '''@svm_json_converter

    This method converts the supplied json file-object to a python
    dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @is_json, flag indicating 'raw_data' is a json string.

    @observation_labels, is a list containing dependent variable labels.

    '''

    feature_count = None
    list_dataset = []
    observation_labels = []
    logger = Logger(__name__, 'error', 'error')

    if is_json:
        dataset = raw_data
    else:
        dataset = json.load(raw_data)

    for observation_label in dataset:
        # variables
        observations = dataset[observation_label]

        # validation (part 1)
        validate_olabel = Validate_Dataset(observation_label)
        validate_olabel.validate_label()

        # dependent variable with single observation
        if type(observations) == list:
            for observation in observations:
                for feature_label, feature_value in observation.items():
                    # validation (part 2)
                    validate_flabel = Validate_Dataset(feature_label)
                    validate_flabel.validate_label()
                    validate_fvalue = Validate_Dataset(feature_value)
                    validate_fvalue.validate_value()

                    # restructured data
                    list_dataset.append({
                        'dep_variable_label': observation_label,
                        'indep_variable_label': feature_label,
                        'indep_variable_value': feature_value
                    })

                # generalized feature count in an observation
                if not feature_count:
                    feature_count = len(observation)

        # dependent variable with multiple observations
        elif type(observations) == dict:
            for feature_label, feature_value in observations.items():
                # validation (part 2)
                validate_flabel = Validate_Dataset(feature_label)
                validate_flabel.validate_label()
                validate_fvalue = Validate_Dataset(feature_value)
                validate_fvalue.validate_value()

                # restructured data
                list_dataset.append({
                    'dep_variable_label': observation_label,
                    'indep_variable_label': feature_label,
                    'indep_variable_value': feature_value
                })

            # generalized feature count in an observation
            if not feature_count:
                feature_count = len(observations)

        # list of observation label
        observation_labels.append(observation_label)

        # check for errors
        olabel_error = validate_olabel.get_errors()
        flabel_error = validate_flabel.get_errors()
        fvalue_error = validate_fvalue.get_errors()
        for error in [olabel_error, flabel_error, fvalue_error]:
            if error:
                logger.log(error)
        if error and len(error) > 0:
            return None

    # close file
    if not is_json:
        raw_data.close()

    # save observation labels, and return
    return {
        'dataset': list_dataset,
        'observation_labels': observation_labels,
        'feature_count': feature_count
    }
    def csv_to_dict(self):
        """@csv_to_dict

        This method converts the supplied csv file-object to a python
        dictionary.

        @list_observation_label, is a list containing dependent variable
            labels.

        Note: we use the 'Universal Newline Support' with the 'U" parameter
            when opening 'self.svm_data'. This allows newlines to be
            understood regardless, if the newline character was created in
            osx, windows, or linux.

        Note: since 'row' is a list, with one comma-delimited string element,
            the following line is required in this method:

                row = row[0].split(',')

        """

        list_dataset = []
        list_observation_label = []
        list_feature_label = []

        # open temporary 'csvfile' reader object
        dataset_reader = csv.reader(
            self.svm_data,
            delimiter=' ',
            quotechar='|'
        )

        # iterate first row of csvfile
        for row in islice(dataset_reader, 0, 1):

            # iterate each column in a given row
            row_indep_label = row[0].split(',')
            for value in islice(row_indep_label, 1, None):
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    list_feature_label.append(value)

        # iterate all rows of csvfile
        for dep_index, row in enumerate(islice(dataset_reader, 0, None)):

            # iterate first column of each row (except first)
            row_dep_label = row[0].split(',')
            for value in row_dep_label[:1]:
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    list_observation_label.append(value)

            # generalized feature count in an observation
            row_indep_variable = row[0].split(',')
            if not self.count_features:
                self.count_features = len(row_indep_variable) - 1

            # iterate each column in a given row
            for indep_index, value in enumerate(
                islice(row_indep_variable, 1, None)
            ):

                try:
                    validate = Validate_Dataset(value)
                    validate.validate_value()

                    list_error = validate.get_errors()
                    if list_error:
                        print list_error
                        return None
                    else:
                        value = float(value)
                except Exception as error:
                    print error
                    return False

                list_dataset.append({
                    'dep_variable_label': list_observation_label[dep_index],
                    'indep_variable_label': list_feature_label[indep_index],
                    'indep_variable_value': value
                })

        # close file, save observation labels, and return
        self.svm_data.close()
        self.observation_labels = list_observation_label
        return list_dataset
    def xml_to_dict(self):
        """@xml_to_dict

        This method converts the supplied xml file-object to a python
        dictionary.

        @list_observation_label, is a list containing dependent variable
            labels.

        """

        list_dataset = []
        list_observation_label = []

        # convert xml file to python 'dict'
        dataset = xmltodict.parse(self.svm_data)

        # build 'list_dataset'
        for observation in dataset['dataset']['observation']:
            observation_label = observation['dependent-variable']

            validate = Validate_Dataset(observation_label)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                print list_error
                return None
            else:
                list_observation_label.append(observation_label)

            for feature in observation['independent-variable']:
                feature_label = feature['label']
                feature_value = feature['value']

                validate_label = Validate_Dataset(feature_label)
                validate_value = Validate_Dataset(feature_value)

                validate_label.validate_label()
                validate_value.validate_value()

                list_error_label = validate.get_errors()
                list_error_value = validate.get_errors()
                if list_error_label or list_error_value:
                    print list_error_label
                    print list_error_value
                    return None
                else:
                    list_dataset.append({
                        'dep_variable_label': observation_label,
                        'indep_variable_label': feature_label,
                        'indep_variable_value': feature_value
                    })

            # generalized feature count in an observation
            if not self.count_features:
                self.count_features = len(observation['independent-variable'])

        # close file, save observation labels, and return
        self.svm_data.close()
        self.observation_labels = list_observation_label
        return list_dataset
Exemple #8
0
def svm_csv_converter(raw_data):
    '''

    This method converts the supplied csv file-object, intended for an svm
    model, to a python dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @list_observation_label, is a list containing dependent variable labels.

    Note: we use the 'Universal Newline Support' with the 'U' parameter when
          opening 'raw_data'. This allows newlines to be understood regardless,
          if the newline character was created in osx, windows, or linux.

    Note: since 'row' is a list, with one comma-delimited string element, the
          following line is required in this method:

          row = row[0].split(',')

        '''

    feature_count = None
    list_dataset = []
    list_observation_label = []
    list_feature_label = []
    logger = Logger(__name__, 'error', 'error')

    # open temporary 'csvfile' reader object
    dataset_reader = csv.reader(
        raw_data,
        delimiter=' ',
        quotechar='|'
    )

    # iterate first row of csvfile
    for row in islice(dataset_reader, 0, 1):

        # iterate each column in a given row
        row_indep_label = row[0].split(',')
        for value in islice(row_indep_label, 1, None):
            list_feature_label.append(str(value))

    # iterate all rows of csvfile
    for dep_index, row in enumerate(islice(dataset_reader, 0, None)):

        # iterate first column of each row (except first)
        row_dep_label = row[0].split(',')
        for value in row_dep_label[:1]:
            list_observation_label.append(str(value))

        # generalized feature count in an observation
        row_indep_variable = row[0].split(',')
        if not feature_count:
            feature_count = len(row_indep_variable) - 1

        # iterate each column in a given row
        for indep_index, value in enumerate(
            islice(row_indep_variable, 1, None)
        ):

            try:
                validate = Validate_Dataset(value)
                validate.validate_value()

                list_error = validate.get_errors()
                if list_error:
                    logger.log(list_error)
                    return None
                else:
                    value = float(value)
            except Exception as error:
                logger.log(error)
                return False

            list_dataset.append({
                'dep_variable_label': list_observation_label[dep_index],
                'indep_variable_label': list_feature_label[indep_index],
                'indep_variable_value': value
            })

    # close file, save observation labels, and return
    raw_data.close()
    return {
        'dataset': list_dataset,
        'observation_labels': list_observation_label,
        'feature_count': feature_count
    }
    def csv_to_dict(self):
        list_dataset         = []
        observation_label    = []
        indep_variable_label = []

        # open temporary 'csvfile' reader object
        dataset_reader = csv.reader(self.svm_file, delimiter=' ', quotechar='|')

        # iterate first row of csvfile
        for row in islice(dataset_reader, 0, 1):

            # iterate each column in a given row
            row_indep_label = row[0].split(',')
            for value in islice(row_indep_label, 1, None):
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    indep_variable_label.append(value)

        # iterate all rows of csvfile
        for dep_index, row in enumerate(islice(dataset_reader, 0, None)):

            # iterate first column of each row (except first)
            row_dep_label = row[0].split(',')
            for value in row_dep_label[:1]:
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    observation_label.append(value)

            # generalized feature count in an observation
            row_indep_variable = row[0].split(',')
            if not self.count_features:
                self.count_features = len(row_indep_variable) - 1

            # iterate each column in a given row
            for indep_index, value in enumerate(islice(row_indep_variable, 1, None)):
                try:
                    validate = Validate_Dataset(value)
                    validate.validate_value()

                    list_error = validate.get_errors()
                    if list_error:
                        print list_error
                        return None
                    else:
                        value = float(value)
                except Exception as error:
                    print error
                    return False

                list_dataset.append({'dep_variable_label': observation_label[dep_index], 'indep_variable_label': indep_variable_label[indep_index], 'indep_variable_value': value})

        # close file, save observation labels, and return
        self.svm_file.close()
        self.observation_labels = observation_label
        return list_dataset
    def xml_to_dict(self):
        list_dataset      = []
        observation_label = []

        # convert xml file to python 'dict'
        dataset = xmltodict.parse(self.svm_file)

        # build 'list_dataset'
        for dep_variable in dataset['dataset']['observation']:
            dep_variable_label = dep_variable['dependent-variable']

            validate = Validate_Dataset(dep_variable_label)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                print list_error
                return None
            else:
                observation_label.append(dep_variable_label)

            for indep_variable in dep_variable['independent-variable']:
                indep_variable_label = indep_variable['label']
                indep_variable_value = indep_variable['value']

                validate_label = Validate_Dataset(indep_variable_label)
                validate_value = Validate_Dataset(indep_variable_value)

                validate_label.validate_label()
                validate_value.validate_value()

                list_error_label = validate.get_errors()
                list_error_value = validate.get_errors()
                if list_error_label or list_error_value:
                    print list_error_label
                    print list_error_value
                    return None
                else:
                    list_dataset.append({'dep_variable_label': dep_variable_label, 'indep_variable_label': indep_variable_label, 'indep_variable_value': indep_variable_value})

            # generalized feature count in an observation
            if not self.count_features:
                self.count_features = len(dep_variable['independent-variable'])

        # close file, save observation labels, and return
        self.svm_file.close()
        self.observation_labels = observation_label
        return list_dataset
Exemple #11
0
def svm_json_converter(raw_data, is_json):
    '''

    This method converts the supplied json file-object to a python
    dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @is_json, flag indicating 'raw_data' is a json string.

    @observation_labels, is a list containing dependent variable labels.

    '''

    # local variables
    feature_count = None
    list_dataset = []
    observation_labels = []
    logger = Logger(__name__, 'error', 'error')

    # web-interface
    if not is_json:
        dataset = json.load(raw_data)

        for observation_label in dataset:
            # variables
            observations = dataset[observation_label]

            # dependent variable with single observation
            if type(observations) == dict:
                for feature_label, feature_value in observations.items():
                    # validation
                    validate_fvalue = Validate_Dataset(feature_value)
                    validate_fvalue.validate_value()

                    if validate_fvalue.get_errors():
                        logger.log(validate_fvalue.get_errors())
                    else:
                        # restructured data
                        list_dataset.append({
                            'dep_variable_label': str(observation_label),
                            'indep_variable_label': str(feature_label),
                            'indep_variable_value': feature_value
                        })

                # generalized feature count in an observation
                if not feature_count:
                    feature_count = len(observations)

            # dependent variable with multiple observations
            elif type(observations) == list:
                for observation in observations:
                    for feature_label, feature_value in observation.items():
                        # validation
                        validate_fvalue = Validate_Dataset(feature_value)
                        validate_fvalue.validate_value()

                        if validate_fvalue.get_errors():
                            logger.log(validate_fvalue.get_errors())
                        else:
                            # restructured data
                            list_dataset.append({
                                'dep_variable_label': str(observation_label),
                                'indep_variable_label': str(feature_label),
                                'indep_variable_value': feature_value
                            })

                    # generalized feature count in an observation
                    if not feature_count:
                        feature_count = len(observation)

            # list of observation label
            observation_labels.append(observation_label)

    # programmatic-interface
    else:
        dataset = raw_data
        observation_label = raw_data[0]

        # list of observation label
        observation_labels.append(observation_label)

        # dependent variable with single observation
        if type(raw_data[1]) == dict:
            for label, feature in raw_data[1].items():
                # validation
                validate_fvalue = Validate_Dataset(feature)
                validate_fvalue.validate_value()

                if validate_fvalue.get_errors():
                    logger.log(validate_fvalue.get_errors())
                else:
                    # restructured data
                    list_dataset.append({
                        'dep_variable_label': str(observation_label),
                        'indep_variable_label': str(label),
                        'indep_variable_value': feature
                    })

            # generalized feature count in an observation
            if not feature_count:
                feature_count = len(raw_data[1])

        # dependent variable with multiple observations
        if type(raw_data[1]) == list:
            for feature_set in raw_data[1]:
                for feature_label, feature_value in feature_set.items():
                    # validation
                    validate_fvalue = Validate_Dataset(feature_value)
                    validate_fvalue.validate_value()

                    if validate_fvalue.get_errors():
                        logger.log(validate_fvalue.get_errors())
                    else:
                        # restructured data
                        list_dataset.append({
                            'dep_variable_label': str(observation_label),
                            'indep_variable_label': str(feature_label),
                            'indep_variable_value': feature_value
                        })

                # generalized feature count in an observation
                if not feature_count:
                    feature_count = len(feature_set)

    # close file
    if not is_json:
        raw_data.close()

    # save observation labels, and return
    return {
        'dataset': list_dataset,
        'observation_labels': observation_labels,
        'feature_count': feature_count
    }
def svm_json_converter(raw_data, is_json):
    '''

    This method converts the supplied json file-object to a python
    dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @is_json, flag indicating 'raw_data' is a json string.

    @observation_labels, is a list containing dependent variable labels.

    '''

    # local variables
    feature_count = None
    list_dataset = []
    observation_labels = []
    logger = Logger(__name__, 'error', 'error')

    # web-interface
    if not is_json:
        dataset = json.load(raw_data)

        for observation_label in dataset:
            # variables
            observations = dataset[observation_label]

            # dependent variable with single observation
            if type(observations) == dict:
                for feature_label, feature_value in observations.items():
                    # validation
                    validate_fvalue = Validate_Dataset(feature_value)
                    validate_fvalue.validate_value()

                    if validate_fvalue.get_errors():
                        logger.log(validate_fvalue.get_errors())
                    else:
                        # restructured data
                        list_dataset.append({
                            'dep_variable_label':
                            str(observation_label),
                            'indep_variable_label':
                            str(feature_label),
                            'indep_variable_value':
                            feature_value
                        })

                # generalized feature count in an observation
                if not feature_count:
                    feature_count = len(observations)

            # dependent variable with multiple observations
            elif type(observations) == list:
                for observation in observations:
                    for feature_label, feature_value in observation.items():
                        # validation
                        validate_fvalue = Validate_Dataset(feature_value)
                        validate_fvalue.validate_value()

                        if validate_fvalue.get_errors():
                            logger.log(validate_fvalue.get_errors())
                        else:
                            # restructured data
                            list_dataset.append({
                                'dep_variable_label':
                                str(observation_label),
                                'indep_variable_label':
                                str(feature_label),
                                'indep_variable_value':
                                feature_value
                            })

                    # generalized feature count in an observation
                    if not feature_count:
                        feature_count = len(observation)

            # list of observation label
            observation_labels.append(observation_label)

    # programmatic-interface
    else:
        dataset = raw_data
        observation_label = raw_data[0]

        # list of observation label
        observation_labels.append(observation_label)

        # dependent variable with single observation
        if type(raw_data[1]) == dict:
            for label, feature in raw_data[1].items():
                # validation
                validate_fvalue = Validate_Dataset(feature)
                validate_fvalue.validate_value()

                if validate_fvalue.get_errors():
                    logger.log(validate_fvalue.get_errors())
                else:
                    # restructured data
                    list_dataset.append({
                        'dep_variable_label':
                        str(observation_label),
                        'indep_variable_label':
                        str(label),
                        'indep_variable_value':
                        feature
                    })

            # generalized feature count in an observation
            if not feature_count:
                feature_count = len(raw_data[1])

        # dependent variable with multiple observations
        if type(raw_data[1]) == list:
            for feature_set in raw_data[1]:
                for feature_label, feature_value in feature_set.items():
                    # validation
                    validate_fvalue = Validate_Dataset(feature_value)
                    validate_fvalue.validate_value()

                    if validate_fvalue.get_errors():
                        logger.log(validate_fvalue.get_errors())
                    else:
                        # restructured data
                        list_dataset.append({
                            'dep_variable_label':
                            str(observation_label),
                            'indep_variable_label':
                            str(feature_label),
                            'indep_variable_value':
                            feature_value
                        })

                # generalized feature count in an observation
                if not feature_count:
                    feature_count = len(feature_set)

    # close file
    if not is_json:
        raw_data.close()

    # save observation labels, and return
    return {
        'dataset': list_dataset,
        'observation_labels': observation_labels,
        'feature_count': feature_count
    }
def svr_json_converter(raw_data, is_json):
    '''

    This method converts the supplied json file-object to a python
    dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @is_json, flag indicating 'raw_data' is a json string.

    @observation_labels, is a list containing dependent variable labels.

    '''

    # local variables
    feature_count = None
    list_dataset = []
    observation_labels = []
    logger = Logger(__name__, 'error', 'error')

    # web-interface
    if not is_json:
        dataset = json.load(raw_data)
        for criterion, predictors in dataset.items():
            observation_label = criterion

            # list of observation label
            observation_labels.append(criterion)

            # criterion with single observation
            if type(predictors) == dict:
                for label, predictor in predictors.items():
                    # validation (part 1)
                    validate_predictor = Validate_Dataset(predictor)
                    validate_predictor.validate_value()

                    if validate_predictor.get_errors():
                        logger.log(validate_predictor.get_errors())
                    else:
                        # restructured data
                        list_dataset.append({
                            'dep_variable_label':
                            str(observation_label),
                            'indep_variable_label':
                            str(label),
                            'indep_variable_value':
                            predictor
                        })

                # generalized feature count in an observation
                if not feature_count:
                    feature_count = len(predictors)

            # criterion with multiple observation
            if type(predictors) == list:
                for criterion in predictors:
                    for label, predictor in criterion.items():
                        # validation (part 1)
                        validate_predictor = Validate_Dataset(predictor)
                        validate_predictor.validate_value()

                        if validate_predictor.get_errors():
                            logger.log(validate_predictor.get_errors())
                        else:
                            # restructured data
                            list_dataset.append({
                                'dep_variable_label':
                                str(observation_label),
                                'indep_variable_label':
                                str(label),
                                'indep_variable_value':
                                predictor
                            })

                        # generalized feature count in an observation
                        if not feature_count:
                            feature_count = len(criterion.items())

    # programmatic-interface
    else:
        dataset = raw_data

        for criterion, predictors in dataset.items():
            # list of observation label
            observation_labels.append(criterion)

            # criterion with single observation
            if type(predictors) == dict:
                for label, predictor in predictors.items():
                    # validation (part 1)
                    validate_predictor = Validate_Dataset(predictor)
                    validate_predictor.validate_value()

                    if validate_predictor.get_errors():
                        logger.log(validate_predictor.get_errors())
                    else:
                        # restructured data
                        list_dataset.append({
                            'dep_variable_label':
                            str(criterion),
                            'indep_variable_label':
                            str(label),
                            'indep_variable_value':
                            predictor
                        })

            # generalized feature count in an observation
            if not feature_count:
                feature_count = len(predictors.items())

            # criterion with multiple observation
            if type(predictors) == list:
                for single_predictors in predictors:
                    for label, predictor in single_predictors.items():
                        # validation (part 1)
                        validate_predictor = Validate_Dataset(predictor)
                        validate_predictor.validate_value()

                        if validate_predictor.get_errors():
                            logger.log(validate_predictor.get_errors())
                        else:
                            # restructured data
                            list_dataset.append({
                                'dep_variable_label':
                                str(criterion),
                                'indep_variable_label':
                                str(label),
                                'indep_variable_value':
                                predictor
                            })

                    # generalized feature count in an observation
                    if not feature_count:
                        feature_count = len(single_predictors.items())

    # close file
    if not is_json:
        raw_data.close()

    # save observation labels, and return
    return {
        'dataset': list_dataset,
        'observation_labels': observation_labels,
        'feature_count': feature_count
    }
def svr_json_converter(raw_data, is_json):
    '''@svr_json_converter

    This method converts the supplied json file-object to a python
    dictionary.

    @raw_data, generally a file (or json string) containing the raw dataset(s),
        to be used when computing a corresponding model. If this argument is a
        file, it needs to be closed.

    @is_json, flag indicating 'raw_data' is a json string.

    @observation_labels, is a list containing dependent variable labels.

    '''

    # local variables
    feature_count = None
    list_dataset = []
    observation_labels = []
    logger = Logger(__name__, 'error', 'error')

    # web-interface
    if not is_json:
        dataset = json.load(raw_data)
        for criterion, predictors in dataset.items():
            observation_label = criterion

            # list of observation label
            observation_labels.append(criterion)

            # criterion with single observation
            if type(predictors) == dict:
                for label, predictor in predictors.items():
                    # validation (part 1)
                    validate_predictor = Validate_Dataset(str(predictor))
                    validate_predictor.validate_value()

                    if validate_predictor.get_errors():
                        logger.log(validate_predictor.get_errors())
                    else:
                        # restructured data
                        list_dataset.append({
                            'dep_variable_label': observation_label,
                            'indep_variable_label': str(label),
                            'indep_variable_value': predictor
                        })

                # generalized feature count in an observation
                if not feature_count:
                    feature_count = len(predictors)

            # criterion with multiple observation
            if type(predictors) == list:
                for criterion in predictors:
                    for label, predictor in criterion.items():
                        # validation (part 1)
                        validate_predictor = Validate_Dataset(predictor)
                        validate_predictor.validate_value()

                        if validate_predictor.get_errors():
                            logger.log(validate_predictor.get_errors())
                        else:
                            # restructured data
                            list_dataset.append({
                                'dep_variable_label': str(observation_label),
                                'indep_variable_label': str(label),
                                'indep_variable_value': predictor
                            })

                        # generalized feature count in an observation
                        if not feature_count:
                            feature_count = len(criterion.items())

    # programmatic-interface
    else:
        dataset = raw_data

        for criterion, predictors in dataset.items():
            # list of observation label
            observation_labels.append(criterion)

            # criterion with single observation
            if type(predictors) == dict:
                for label, predictor in predictors.items():
                    # validation (part 1)
                    validate_predictor = Validate_Dataset(predictor)
                    validate_predictor.validate_value()

                    if validate_predictor.get_errors():
                        logger.log(validate_predictor.get_errors())
                    else:
                        # restructured data
                        list_dataset.append({
                            'dep_variable_label': str(criterion),
                            'indep_variable_label': str(label),
                            'indep_variable_value': predictor
                        })

            # generalized feature count in an observation
            if not feature_count:
                feature_count = len(predictors.items())

            # criterion with multiple observation
            if type(predictors) == list:
                for single_predictors in predictors:
                    for label, predictor in single_predictors.items():
                        # validation (part 1)
                        validate_predictor = Validate_Dataset(predictor)
                        validate_predictor.validate_value()

                        if validate_predictor.get_errors():
                            logger.log(validate_predictor.get_errors())
                        else:
                            # restructured data
                            list_dataset.append({
                                'dep_variable_label': str(criterion),
                                'indep_variable_label': str(label),
                                'indep_variable_value': predictor
                            })

                    # generalized feature count in an observation
                    if not feature_count:
                        feature_count = len(single_predictors.items())

    # close file
    if not is_json:
        raw_data.close()

    # save observation labels, and return
    return {
        'dataset': list_dataset,
        'observation_labels': observation_labels,
        'feature_count': feature_count
    }
Exemple #15
0
    def csv_to_dict(self):
        """@csv_to_dict

        This method converts the supplied csv file-object to a python
        dictionary.

        @list_observation_label, is a list containing dependent variable
            labels.

        Note: we use the 'Universal Newline Support' with the 'U" parameter
            when opening 'self.svm_data'. This allows newlines to be
            understood regardless, if the newline character was created in
            osx, windows, or linux.

        Note: since 'row' is a list, with one comma-delimited string element,
            the following line is required in this method:

                row = row[0].split(',')

        """

        list_dataset = []
        list_observation_label = []
        list_feature_label = []

        # open temporary 'csvfile' reader object
        dataset_reader = csv.reader(self.svm_data,
                                    delimiter=' ',
                                    quotechar='|')

        # iterate first row of csvfile
        for row in islice(dataset_reader, 0, 1):

            # iterate each column in a given row
            row_indep_label = row[0].split(',')
            for value in islice(row_indep_label, 1, None):
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    list_feature_label.append(value)

        # iterate all rows of csvfile
        for dep_index, row in enumerate(islice(dataset_reader, 0, None)):

            # iterate first column of each row (except first)
            row_dep_label = row[0].split(',')
            for value in row_dep_label[:1]:
                validate = Validate_Dataset(value)
                validate.validate_label()

                list_error = validate.get_errors()
                if list_error:
                    print list_error
                    return None
                else:
                    list_observation_label.append(value)

            # generalized feature count in an observation
            row_indep_variable = row[0].split(',')
            if not self.count_features:
                self.count_features = len(row_indep_variable) - 1

            # iterate each column in a given row
            for indep_index, value in enumerate(
                    islice(row_indep_variable, 1, None)):

                try:
                    validate = Validate_Dataset(value)
                    validate.validate_value()

                    list_error = validate.get_errors()
                    if list_error:
                        print list_error
                        return None
                    else:
                        value = float(value)
                except Exception as error:
                    print error
                    return False

                list_dataset.append({
                    'dep_variable_label':
                    list_observation_label[dep_index],
                    'indep_variable_label':
                    list_feature_label[indep_index],
                    'indep_variable_value':
                    value
                })

        # close file, save observation labels, and return
        self.svm_data.close()
        self.observation_labels = list_observation_label
        return list_dataset
Exemple #16
0
    def xml_to_dict(self):
        """@xml_to_dict

        This method converts the supplied xml file-object to a python
        dictionary.

        @list_observation_label, is a list containing dependent variable
            labels.

        """

        list_dataset = []
        list_observation_label = []

        # convert xml file to python 'dict'
        dataset = xmltodict.parse(self.svm_data)

        # build 'list_dataset'
        for observation in dataset['dataset']['observation']:
            observation_label = observation['dependent-variable']

            validate = Validate_Dataset(observation_label)
            validate.validate_label()

            list_error = validate.get_errors()
            if list_error:
                print list_error
                return None
            else:
                list_observation_label.append(observation_label)

            for feature in observation['independent-variable']:
                feature_label = feature['label']
                feature_value = feature['value']

                validate_label = Validate_Dataset(feature_label)
                validate_value = Validate_Dataset(feature_value)

                validate_label.validate_label()
                validate_value.validate_value()

                list_error_label = validate.get_errors()
                list_error_value = validate.get_errors()
                if list_error_label or list_error_value:
                    print list_error_label
                    print list_error_value
                    return None
                else:
                    list_dataset.append({
                        'dep_variable_label': observation_label,
                        'indep_variable_label': feature_label,
                        'indep_variable_value': feature_value
                    })

            # generalized feature count in an observation
            if not self.count_features:
                self.count_features = len(observation['independent-variable'])

        # close file, save observation labels, and return
        self.svm_data.close()
        self.observation_labels = list_observation_label
        return list_dataset