def pspnperplexity(train, test, min_slices, ind_test_method, row_cluster_method): c1 = Chrono().start() spn = LearnSPN(alpha=0.001, min_slices=min_slices, cluster_prep_method="sqrt", ind_test_method=ind_test_method, row_cluster_method=row_cluster_method).fit_structure(train) c1.end() time = c1.elapsed() pwb, perplexity, words, logl = spn.perplexity(test) print( "SPN ll=%s %.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words" % (logl, pwb, perplexity, test.shape[0], words)) return perplexity, logl, time, spn.size()
printlocal(featureNames) printlocal(len(featureNames)) printlocal(data.shape) stats = Stats(name=dsname) for train, test, i in kfolded(data, 5): spn = LearnSPN(alpha=0.001, min_instances_slice=80, cluster_prep_method="sqrt", cache=memory).fit_structure(train) printlocal("done") stats.addConfig("PSPN", spn.config) # stats.add("SPN Pois", Stats.LOG_LIKELIHOOD, llspn(spn, test)) printlocal("LL") stats.add("PSPN", Stats.MODEL_SIZE, spn.size()) printlocal("model size") prediction = spnComputeLambdas(spn, test) printlocal("model spnComputeLambdas") #prediction2 = spnComputeLambdasCuda(spn, test) prediction2 = spnComputeLambdas2(spn, test) printlocal("model spnComputeLambdas2") stats.add("PSPN", Stats.ABS_ERROR, abs_error(test, prediction)) stats.add("PSPN", Stats.SQUARED_ERROR, squared_error(test, prediction)) stats.add("PSPN_MJ", Stats.ABS_ERROR, squared_error(test, prediction2)) stats.add("PSPN_MJ", Stats.SQUARED_ERROR, squared_error(test, prediction2)) pdn = pdnlearn(train, featureNames, max_depth=30, iterations=20) stats.addConfig("PDN Pois", pdn.config) prediction = pdn.getLambdas(test)