col_info = engine.col_info() print(col_info) # To do inference, we intialize some cross-categorization states with # `init_models` then `run` the inference. We intitialize many models to hedge # the inferences we make. Every model is a draw from the posterior. We want to # make inference about the data given the posterior distribution of states, so # we take several models. print('Initializing 32 models...') engine.init_models() print('Running models for 200 iterations...') engine.run(200, checkpoint=5) # To check whether inference has converges, we plot the log score for each # model as a function of time and make sure they all have leveled out. engine.convergence_plot() plt.show() # We can view which columns are dependent on which other columns by plotting # a n_cols by n_cols matrix where each cell is the dependence probability # between two columns. Note that the dependence probability is simply the # probability that a dependence exists, not the strength of the dependence. engine.heatmap('dependence_probability', plot_kwargs={'figsize': (10, 10,)}) plt.show() engine.heatmap('row_similarity', plot_kwargs={'figsize': (10, 10,)}) plt.show() # The paint job is an important part of what makes a pine wood derby car fast, # but does it matter for animals? We'll use the linfoot information to # determine how predictive variables are of whether an animal is fast. Linfoot
28, 28, )) return pixels assert __name__ == "__main__" exdir = os.path.dirname(os.path.realpath(__file__)) df = pd.read_csv(os.path.join(exdir, 'mnist.csv.gz'), compression='gzip') df = df.sample(2000) testdata = df['label'][1500:] df['label'][1500:] = float('NaN') engine = Engine(df) engine.init_models(4) engine.run(1000, checkpoint=4, verbose=True) engine.convergence_plot() plt.show() _, m = engine.eval(testdata, metric=Accuracy()) print('Acuracy = %f' % (m, )) # engine.heatmap('row_similarity') # plt.show() # engine.heatmap('dependence_probability') # plt.show()