Python IGFeatureSelection Examples

Programming Language: Python

Namespace/Package Name: FeatureSelection

Examples at hotexamples.com: 3

Python IGFeatureSelection - 3 examples found. These are the top rated real world Python examples of FeatureSelection.IGFeatureSelection extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

selectFeatures(1)

Example #1

Show file

File: MultinomialNB.py Project: satyajitvg/textclassifier

 def __init__(self, num_features, min_df=10):
     self.priors = {}
     self.condprobs = defaultdict(lambda: defaultdict(float))
     self.class_label_counts = defaultdict(int)
     self.token_tf_counts = defaultdict(lambda: defaultdict(int))
     self.token_df_counts = defaultdict(lambda: defaultdict(int)) # Info-Gain requires knowing document frequency counts
     self.total_tf = 0
     self.feature_selection = IGFeatureSelection(num_features, min_df)

Example #2

Show file

File: MultinomialNB.py Project: satyajitvg/textclassifier

class NBTrain:
    def __init__(self, num_features, min_df=10):
        self.priors = {}
        self.condprobs = defaultdict(lambda: defaultdict(float))
        self.class_label_counts = defaultdict(int)
        self.token_tf_counts = defaultdict(lambda: defaultdict(int))
        self.token_df_counts = defaultdict(lambda: defaultdict(int)) # Info-Gain requires knowing document frequency counts
        self.total_tf = 0
        self.feature_selection = IGFeatureSelection(num_features, min_df)

    def addDocument(self, tokens, class_label):
        self.total_tf+=len(tokens)
        self.class_label_counts[class_label]+=1
        for token in tokens:
            self.token_tf_counts[token][class_label]+=1 # Term frequencies
        for tokens in set(tokens):
            self.token_df_counts[token][class_label]+=1 # Document Frequencies

    def train(self):
        #calculate class priors
        doc_count = sum(self.class_label_counts.values())
        for class_label, count in self.class_label_counts.items():
            self.priors[class_label] = count/doc_count

        #calcualte count of each token in everey class. If token never seen in class then token_counts[token][class_label] = 0
        token_tf_counts = defaultdict(lambda: defaultdict(int))
        token_df_counts = defaultdict(lambda: defaultdict(int))
        for token, class_counts in self.token_tf_counts.items():
            for class_label in self.class_label_counts.keys():
                try:
                    token_tf_counts[token][class_label] = self.token_tf_counts[token][class_label]
                    token_df_counts[token][class_label] = self.token_df_counts[token][class_label]
                except KeyError:
                    token_tf_counts[token][class_label] = 0
                    token_df_counts[token][class_label] = 0

        selected_tokens = self.feature_selection.selectFeatures(token_df_counts, self.class_label_counts)

        #calculate conditional probabilities for selected tokens
        vocabulary_size = len(self.token_tf_counts.keys())
        for token in selected_tokens.keys():
            for class_label in self.class_label_counts.keys():
                self.condprobs[token][class_label] = (token_tf_counts[token][class_label] + 1)/(self.total_tf + vocabulary_size)
        return self.priors, self.condprobs

Example #3

Show file

File: BernoulliNB.py Project: satyajitvg/textclassifier

 def __init__(self, num_features, min_df=10):
     self.priors = {}
     self.condprobs = defaultdict(lambda: defaultdict(float))
     self.class_label_counts = defaultdict(int)
     self.token_df_counts = defaultdict(lambda: defaultdict(int))
     self.feature_selection = IGFeatureSelection(num_features, min_df)