Python DataSet.attribute_types Examples

Programming Language: Python

Namespace/Package Name: DataSet

Class/Type: DataSet

Method/Function: attribute_types

Examples at hotexamples.com: 2

Python DataSet.attribute_types - 2 examples found. These are the top rated real world Python examples of DataSet.DataSet.attribute_types extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataSet(30)

add_input(12)

add_raw(12)

add_seg(11)

add_gt(7)

getEmbedding(5)

getTestNeg(5)

getTrainInstance(4)

heart_sound_scoring(4)

getInstances(4)

getTestInstance(4)

__init__(3)

getExamples(3)

add(3)

getInputs(3)

formatToBinary(3)

build_train_data(2)

add_item(2)

discretize_values(2)

attributes(2)

attribute_types(2)

addExpectedMeta(2)

fileName(2)

from_filenames(2)

generateDataBatch(2)

get_test_data_obo(2)

getAttributes(2)

PreparingData(2)

getMaxValue(1)

getMimeType(1)

info(1)

getLabels(1)

getMinValue(1)

getFeatureEmbedding(1)

getExamplesByClass(1)

getGeometryType(1)

getPixelRes(1)

getMissingIndex(1)

get_image(1)

import_from_csv(1)

headers(1)

get_train_dataBatch(1)

get_time_cnt(1)

get_test_RGB_image(1)

get_node_dim(1)

getTrainingAndTestingSets(1)

getNodes(1)

getTrainValidateTestSet(1)

getTrainTestSet(1)

getTrainAll(1)

Example #1

Show file

File: prepare_data.py Project: KashaVarya/Vaccination

def prepare_data(filename, n_attr):
    data_set = DataSet("")

    # Load data set
    with open(filename) as f:
        data_set.rows = [line for line in csv.reader(f, delimiter=",")]

    data_set.attributes = data_set.rows.pop(0)

    # this is used to generalize the code for other datasets.
    # true indicates numeric data. false in nominal data
    # example: data_set.attribute_types = ['false', 'true', 'false', 'false', 'true', 'true', 'false']
    data_set.attribute_types = ['false' for _ in range(n_attr)]
    data_set.classifier = data_set.attributes[-1]

    # find index of classifier
    data_set.class_col_index = data_set.attributes.index(data_set.classifier)

    # preprocessing the data_set
    data_set.preprocessing()

    return data_set

Example #2

Show file

File: DecisionTreeNode.py Project: KashaVarya/Vaccination

    def compute_decision_tree(self, dataset, parent_node):
        node = DecisionTreeNode(parent_node)
        if parent_node is None:
            node.height = 0
        else:
            node.height = node.parent.height + 1

        # count_positives() will count the number of rows with classification "1"
        ones = dataset.count_positives()

        if len(dataset.rows) == ones:
            node.classification = 1
            node.is_leaf_node = True
            return node
        elif ones == 0:
            node.is_leaf_node = True
            node.classification = 0
            return node
        else:
            node.is_leaf_node = False

        # The index of the attribute we will split on
        splitting_attribute = None

        # The information gain given by the best attribute
        maximum_info_gain = 0

        split_val = None
        minimum_info_gain = 0.01

        entropy = dataset.calculate_entropy()

        # for each column of data
        for attr_index in range(len(dataset.rows[0])):
            if dataset.attributes[attr_index] != dataset.classifier:
                local_max_gain = 0
                local_split_val = None

                # these are the values we can split on, now we must find the best one
                attr_value_list = [example[attr_index] for example in dataset.rows]
                # remove duplicates from list of all attribute values
                attr_value_list = list(set(attr_value_list))

                if len(attr_value_list) > 100:
                    attr_value_list = sorted(attr_value_list)
                    total = len(attr_value_list)
                    ten_percentile = int(total / 10)
                    new_list = []
                    for x in range(1, 10):
                        new_list.append(attr_value_list[x * ten_percentile])
                    attr_value_list = new_list

                for val in attr_value_list:
                    # calculate the gain if we split on this value
                    # if gain is greater than local_max_gain, save this gain and this value
                    current_gain = dataset.calculate_information_gain(attr_index, val, entropy)

                    if current_gain > local_max_gain:
                        local_max_gain = current_gain
                        local_split_val = val

                if local_max_gain > maximum_info_gain:
                    maximum_info_gain = local_max_gain
                    split_val = local_split_val
                    splitting_attribute = attr_index

        if maximum_info_gain <= minimum_info_gain or node.height > 20:
            node.is_leaf_node = True
            node.classification = self.classify_leaf(dataset)
            return node

        node.attribute_split_index = splitting_attribute
        node.attribute_split = dataset.attributes[splitting_attribute]
        node.attribute_split_value = split_val

        left_dataset = DataSet(dataset.classifier)
        right_dataset = DataSet(dataset.classifier)

        left_dataset.attributes = dataset.attributes
        right_dataset.attributes = dataset.attributes

        left_dataset.attribute_types = dataset.attribute_types
        right_dataset.attribute_types = dataset.attribute_types

        for row in dataset.rows:
            if splitting_attribute is not None and row[splitting_attribute] >= split_val:
                left_dataset.rows.append(row)
            elif splitting_attribute is not None:
                right_dataset.rows.append(row)

        node.left_child = self.compute_decision_tree(left_dataset, node)
        node.right_child = self.compute_decision_tree(right_dataset, node)

        return node