# determine how predictive variables are of whether an animal is fast. Linfoot # if basically the information-theoretic counterpart to correlation. linfoot_lean = engine.mutual_information('fast', 'lean', linfoot=False) linfoot_stripes = engine.mutual_information('fast', 'stripes', linfoot=False) print('Linfoot(fast, lean) = %f' % (linfoot_lean,)) print('Linfoot(fast, stripes) = %f' % (linfoot_stripes,)) # We can also figure out which animals are more similar. Is a wolf more # similar to a dalmatian or a rat. sim_wolves = engine.row_similarity('chihuahua', 'wolf') sim_rats = engine.row_similarity('chihuahua', 'rat') print('Similarity between Chihuahuas and wolves is %f' % (sim_wolves,)) print('Similarity between Chihuahuas and rats is %f' % (sim_rats,)) # Which animals are outliers with respect to their being fast. We can find out # by calculating the surprisal (self infotmation). s = engine.surprisal('fast') s.sort(['surprisal'], ascending=False, inplace=True) print(s.head(10)) # Lets say we're out in the woods and we see a lean, spotted animal with a # tail. What is the probability that it is fierce and fast? # Note that for continuous variables, Engine.probability returns the log PDF # of an event given observations. logp = engine.probability([1, 1], ['fierce', 'fast'], given=[('lean', 1,), ('spots', 1,), ('tail', 1,)]) print('p(fierce, fast | lean, spots, tail) = %s' % (exp(logp),))
linfoot_stripes = engine.mutual_information('fast', 'stripes', linfoot=False) print('Linfoot(fast, lean) = %f' % (linfoot_lean, )) print('Linfoot(fast, stripes) = %f' % (linfoot_stripes, )) # We can also figure out which animals are more similar. Is a wolf more # similar to a dalmatian or a rat. sim_wolves = engine.row_similarity('chihuahua', 'wolf') sim_rats = engine.row_similarity('chihuahua', 'rat') print('Similarity between Chihuahuas and wolves is %f' % (sim_wolves, )) print('Similarity between Chihuahuas and rats is %f' % (sim_rats, )) # Which animals are outliers with respect to their being fast. We can find out # by calculating the surprisal (self infotmation). s = engine.surprisal('fast') s.sort(['surprisal'], ascending=False, inplace=True) print(s.head(10)) # Lets say we're out in the woods and we see a lean, spotted animal with a # tail. What is the probability that it is fierce and fast? # Note that for continuous variables, Engine.probability returns the log PDF # of an event given observations. logp = engine.probability([1, 1], ['fierce', 'fast'], given=[( 'lean', 1, ), ( 'spots', 1, ), (