def entropy(objects, question): '''Returns an entropy value. This algorithm for entropy is heavily modeled on the ID3 decision tree algorithm for entropy. The difference is that here, we want what would traditionally be a high entropy. To adjust for this, we take the reciprocal of entropy before returning it.''' objects = tuple(objects) # necessary for SQL IN statement to work positives = model.get_num_positives(objects, question.id) *1.0 negatives = model.get_num_negatives(objects, question.id) *1.0 total = len(objects) if positives != 0: frac_positives = (-1*positives)/total * math.log(positives/total, 2) else: frac_positives = 0 if negatives != 0: frac_negatives = (-1*negatives)/total * math.log(negatives/total, 2) else: frac_negatives = 0 entropy = frac_positives + frac_negatives entropy *= (positives + negatives)/total # weighted average if entropy != 0: entropy = 1/entropy # minimizes rather than maximizes else: entropy = float('inf') return entropy
def simple_entropy(objects,question): '''Returns an entropy value for a question based on the weights for all the objects. Entropy is low if for a given question, the number of yes and no answers is about even, and the number of unsure answers is low.''' objects = tuple(objects) # necessary for SQL IN statement to work positives = model.get_num_positives(objects, question.id) negatives = model.get_num_negatives(objects, question.id) unknowns = model.get_num_unknowns(objects, question.id) question_entropy = 0 question_entropy += positives * 1 question_entropy -= negatives * 1 question_entropy += unknowns * 5 # arbitrary weight to discourage questions with lots of unknowns return abs(question_entropy)