printlocal(featureNames) printlocal(len(featureNames)) printlocal(data.shape) stats = Stats(name=dsname) for train, test, i in kfolded(data, 5): spn = LearnSPN(alpha=0.001, min_instances_slice=80, cluster_prep_method="sqrt", cache=memory).fit_structure(train) printlocal("done") stats.addConfig("PSPN", spn.config) # stats.add("SPN Pois", Stats.LOG_LIKELIHOOD, llspn(spn, test)) printlocal("LL") stats.add("PSPN", Stats.MODEL_SIZE, spn.size()) printlocal("model size") prediction = spnComputeLambdas(spn, test) printlocal("model spnComputeLambdas") #prediction2 = spnComputeLambdasCuda(spn, test) prediction2 = spnComputeLambdas2(spn, test) printlocal("model spnComputeLambdas2") stats.add("PSPN", Stats.ABS_ERROR, abs_error(test, prediction)) stats.add("PSPN", Stats.SQUARED_ERROR, squared_error(test, prediction)) stats.add("PSPN_MJ", Stats.ABS_ERROR, squared_error(test, prediction2)) stats.add("PSPN_MJ", Stats.SQUARED_ERROR, squared_error(test, prediction2)) pdn = pdnlearn(train, featureNames, max_depth=30, iterations=20) stats.addConfig("PDN Pois", pdn.config) prediction = pdn.getLambdas(test)
stats = Stats(name=dsname) nrfolds = 5 for train, test, i in kfolded(data, nrfolds): print(dsname, train.shape, test.shape, i) for topics in [5, 10, 20, 50, 100]: stats.addConfig( "LDA" + str(topics), { "topics": topics, "train documents": train.shape[0], "test documents": test.shape[0], "words": train.shape[1] }) perplexity, tt = ldaperplexity(train, test, topics) stats.add("LDA" + str(topics), Stats.PERPLEXITY, perplexity) stats.add("LDA" + str(topics), Stats.TIME, tt) stats.addConfig( "HLDA" + str(topics), { "topics": topics, "train documents": train.shape[0], "test documents": test.shape[0], "words": train.shape[1] }) perplexity, tt = hldaperplexity(train, test) stats.add("HLDA" + str(topics), Stats.PERPLEXITY, perplexity) stats.add("HLDA" + str(topics), Stats.TIME, tt) for pct in [1, 10, 25, 50, 75, 90]: