Ejemplos de DataHandler.get_freq en Python

Lenguaje de programación: Python

Namespace/Package Name: datahandler

Clase / Tipo: DataHandler

Método / Función: get_freq

Ejemplos en hotexamples.com: 2

Python DataHandler.get_freq - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de datahandler.DataHandler.get_freq extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

DataHandler(30)

generate_data(3)

getOneBatch(3)

getImageData(2)

doNormalization(2)

getData(2)

getClasses(2)

get_evaluation(1)

get_freq(1)

get_histogram(1)

get_list(1)

get_lists(1)

get_loaders(1)

get_nevents(1)

get_daily_change_of_deaths(1)

get_next_test_batch(1)

get_next_train_batch(1)

get_num_items(1)

get_num_test_batches(1)

get_num_training_batches(1)

get_num_training_sessions(1)

get_num_users(1)

get_partitions(1)

process_tweet(1)

get_data(1)

get_batch(1)

get_corrections(1)

delete(1)

addPacket(1)

add_hit(1)

append_data(1)

cleanStemmer(1)

convert_to_sentence(1)

copy(1)

create_database(1)

equals(1)

get_correction(1)

finish(1)

getFeatures(1)

getKFoldData(1)

getTrainSplit(1)

getTrainingData(1)

get_all_hits(1)

SaveFiles(1)

get_changed_rows(1)

remove(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: subset.py Proyecto: ganeshp-cuelogic/sentiment_analysis

class Subset(SubsetBase):

    K_FEATURES = 10

    def __init__(self, indices, data_handler=None, data=None):
        self.indices = indices
        self.labels = None
        if data_handler:
            self.data_handler = data_handler
        else:
            self.data_handler = DataHandler(data)

    def get_size(self):
        """
		Returns the size of the subset i.e the number
		of rows in the subset
		"""
        return len(self.indices)

    def purity(self):
        """
		Determines the "purity" of the subset by calculating
		the gini index of the data
		"""
        return self.data_handler.gini_index(self.indices)

    def majority_label(self):
        """
		Returns the mode of the all the labels in the subset
		"""
        labels = self.data_handler.get_freq(self.indices)
        # Loop frequency hash and find the mode
        majority, count = None, -1
        for label, value in labels.iteritems():
            if value > count:
                majority = label

        return majority

    def split(self):
        """
		Returns a tuple of arrays of (feature, values, subsets) 
		given the feature to split on.
		"""
        n, f = self.data_handler.get_shape()
        # Selects k features without replacement
        features = random.sample(range(1, f), self.K_FEATURES)
        # Calculate the gini index of k different splits
        splits = {}
        for feature in features:
            (gini, threshold) = self.data_handler.test_split(self.indices, feature)
            splits[feature] = {"threshold": threshold, "gini": gini}
            # Finds the optimal split from all the splits above
        best_feature, threshold, min_gini = None, None, 100
        for feature, results in splits.iteritems():
            if results["gini"] < min_gini:
                best_feature, threshold, min_gini = feature, results["threshold"], results["gini"]
                # Split the subset
        subset_left, subset_right = self.get_subsets(best_feature, threshold)
        return best_feature, threshold, subset_left, subset_right

    def get_subsets(self, feature, threshold):
        """
		Splits the current subset into two based on the 
		input feature and threshold
		"""
        left_indices, right_indices = self.data_handler.split(self.indices, feature, threshold)
        left_subset = Subset(left_indices, data_handler=self.data_handler)
        right_subset = Subset(right_indices, data_handler=self.data_handler)

        return left_subset, right_subset

Ejemplo n.º 2

Mostrar archivo

Archivo: subset.py Proyecto: guoqing-zhou/sentiment_analysis

class Subset(SubsetBase):

    K_FEATURES = 10

    def __init__(self, indices, data_handler=None, data=None):
        self.indices = indices
        self.labels = None
        if data_handler:
            self.data_handler = data_handler
        else:
            self.data_handler = DataHandler(data)

    def get_size(self):
        """
		Returns the size of the subset i.e the number
		of rows in the subset
		"""
        return len(self.indices)

    def purity(self):
        """
		Determines the "purity" of the subset by calculating
		the gini index of the data
		"""
        return self.data_handler.gini_index(self.indices)

    def majority_label(self):
        """
		Returns the mode of the all the labels in the subset
		"""
        labels = self.data_handler.get_freq(self.indices)
        #Loop frequency hash and find the mode
        majority, count = None, -1
        for label, value in labels.iteritems():
            if value > count:
                majority = label

        return majority

    def split(self):
        """
		Returns a tuple of arrays of (feature, values, subsets) 
		given the feature to split on.
		"""
        n, f = self.data_handler.get_shape()
        # Selects k features without replacement
        features = random.sample(range(1, f), self.K_FEATURES)
        # Calculate the gini index of k different splits
        splits = {}
        for feature in features:
            (gini,
             threshold) = self.data_handler.test_split(self.indices, feature)
            splits[feature] = {'threshold': threshold, 'gini': gini}
        # Finds the optimal split from all the splits above
        best_feature, threshold, min_gini = None, None, 100
        for feature, results in splits.iteritems():
            if results['gini'] < min_gini:
                best_feature, threshold, min_gini = feature, results[
                    'threshold'], results['gini']
        #Split the subset
        subset_left, subset_right = self.get_subsets(best_feature, threshold)
        return best_feature, threshold, subset_left, subset_right

    def get_subsets(self, feature, threshold):
        """
		Splits the current subset into two based on the 
		input feature and threshold
		"""
        left_indices, right_indices = self.data_handler.split(
            self.indices, feature, threshold)
        left_subset = Subset(left_indices, data_handler=self.data_handler)
        right_subset = Subset(right_indices, data_handler=self.data_handler)

        return left_subset, right_subset