def compareKnns(testName, seed = 123432, folderName = "knn_est_test", ntests = 25, ns = None, SAVE = True): if ns is None: ns = [125,250,500,1000, 2000] # different random number generators for data generation and for the knnMI method (permutation tests) rrData = np.random.RandomState(seed) rr1 = np.random.RandomState(seed + 1) # global rng is also used.. np.random.seed(seed) cores = mp.cpu_count() k_values = [0.01,0.1,0.2,3,5] local_perm = [True,False] graph_rules = ["AND","OR"] methods = list(product(k_values,local_perm)) method_names = [__method2str(method) for method in product(k_values,local_perm,graph_rules)] # initilize dictionaries for results res = {"HD" : [], "UG" : []} # measured quantities are keys Nres = {n : copy.deepcopy(res) for n in ns} allRes = {method : copy.deepcopy(Nres) for method in method_names} # used parameters parameters = {"seed": seed, "ntests": 0, "ns": ns, "testName" : testName, "methods" : method_names, "trueUGs" : []} # create folder where to save the resuls if folderName is None: directory = "tests" else: directory = "tests/" + folderName if not os.path.exists(directory): os.makedirs(directory) test_str = __test2str(testName) filename = directory + "/" + test_str + ".p" # create object for generating data and run the tests dd = DataGenerator(testName,rng = rrData) for tt in range(0,ntests): print("test ",tt + 1,"/", ntests, sep="") Xall,G = dd.createData(np.max(ns)) for n in ns: X = Xall[:n,:] X = scale(X) # zero mean, sd one for all the features print("............sample size: ",n) for method in methods: kk,local = method if kk < 1: k = max(3,int(np.ceil(kk*n))) else: k = kk if local: k_perm = 5 else: k_perm = None knnest = KnnEstimator(k = k,k_perm = k_perm, rng = rr1, parallel = cores) knn_sl = StructLearn(X, ci_estimator= knnest) knn_sl.findMoralGraph() for graph_rule in graph_rules: est_ug = knn_sl.getMoralGraph(graph_rule) method_name = __method2str( (method[0],method[1],graph_rule) ) # compute Hamming distance hd = HD(G,est_ug) # save stuff print(method_name,hd) allRes[method_name][n]["HD"].append(hd) allRes[method_name][n]["UG"].append(est_ug) # save the true UG (this is differs between the tests only in random graph cases) parameters["trueUGs"].append(G) # save results after every 5 tests if (tt + 1) % 5 == 0 and SAVE: parameters["ntests"] = tt + 1 res = (allRes,parameters) saveResults(res,filename) # final results parameters["ntests"] = tt + 1 res = (allRes,parameters) if SAVE: saveResults(res,filename) return res
def doTests(testName, folderName = None, seed = 123456, ntests = 25, ns = None, k = 3, k_perm = None, methods = None, lambdaRatio = 0.01, useTransformation = False, SAVE = True): if methods is None: methods = ["knnMI_AND", "knnMI_OR", "fisherZ_AND", "fisherZ_OR", "mb_RIC", "glasso_RIC", "mb_STARS", "glasso_STARS", "mb_auto"] if ns is None: ns = [125,250,500,1000,2000] # different random number generators for data generation and for the knnMI method (permutation tests) rrData = np.random.RandomState(seed) rr1 = np.random.RandomState(seed + 1) # global rng is also used.. np.random.seed(seed) cores = mp.cpu_count() # conditional independence tests knnEst1 = KnnEstimator(k = k,rng = rr1, parallel= cores,k_perm = k_perm) fEst = FisherCI() if "KCIT_OR" in methods or "KCIT_AND" in methods: k_cit = KCIT(seed = seed + 2) if "RCIT_OR" in methods or "RCIT_AND" in methods: r_cit = RCIT(seed = seed + 3) # initilize dictionaries for results res = {"HD" : [], "UG" : [], "sparsity" : []} # measured quantities are keys Nres = {n : copy.deepcopy(res) for n in ns} allRes = {method : copy.deepcopy(Nres) for method in methods} # used parameters parameters = {"seed": seed, "ntests": 0, "ns": ns, "testName" : testName, "methods" : methods, "k" : k, "lambdaRatio" : lambdaRatio, "trueUGs" : []} # create folder where to save the resuls if folderName is None: directory = "tests" else: directory = "tests/" + folderName if not os.path.exists(directory): os.makedirs(directory) test_str = __test2str(testName) filename = directory + "/" + test_str + ".p" # create object for generating data and run the tests dd = DataGenerator(testName,rng = rrData) nonPara = True # use non-paranormal transformation for glasso and mb #DEBUG errorCount = 0 for tt in range(0,ntests): print("test ",tt + 1,"/", ntests, sep="") Xall,G = dd.createData(np.max(ns)) for n in ns: X = Xall[:n,:] X = scale(X) # zero mean, sd one for all the features if useTransformation: X = transform(X) # non-paranormal transformation for every method print("Transformation used.") nonPara = False # no need to perform the transformation twice when glasso/mb is called print("............sample size: ",n) # kernel methods if "KCIT_OR" in methods or "KCIT_AND" in methods: kcitSl = StructLearn(X,ci_estimator = k_cit) kcitSl.findMoralGraph() if "RCIT_OR" in methods or "RCIT_AND" in methods: rcitSl = StructLearn(X,ci_estimator = r_cit) rcitSl.findMoralGraph() # find Markov blankets for knnMI method if "knnMI_AND" in methods or "knnMI_OR" in methods: if k < 1: knnEst1.k = max(3,int(np.ceil(k*n))) knnSl = StructLearn(X, ci_estimator= knnEst1) knnSl.findMoralGraph() # same for fisherZ based method if "fisherZ_AND" in methods or "fisherZ" in methods: fishSl = StructLearn(X, ci_estimator= fEst) fishSl.findMoralGraph() for method in methods: sp = np.nan # record sparsities of estimated graphs for glasso/mb, for other methods use just nan (graphs are saved so sparsity is easy to compute) # DEBUG seeeds = np.random.RandomState if method == "knnMI_AND": estUG = knnSl.getMoralGraph("AND") elif method == "knnMI_OR": estUG = knnSl.getMoralGraph("OR") elif method == "fisherZ_AND": estUG = fishSl.getMoralGraph("AND") elif method == "fisherZ_OR": estUG = fishSl.getMoralGraph("OR") elif method == "glasso_RIC": estUG, sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "ric", nonPara=nonPara, lambdaRatio= lambdaRatio) elif method == "glasso_BIC": estUG, sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "ebic", nonPara=nonPara, ebicTuning= 0.0,lambdaRatio= lambdaRatio) elif method == "glasso_EBIC": estUG, sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "ebic", nonPara=nonPara, ebicTuning= 0.5,lambdaRatio= lambdaRatio) elif method == "mb_RIC": estUG,sp = hugeLearnGraph(X,method = "mb", modelSelectCrit= "ric", nonPara=nonPara,lambdaRatio= lambdaRatio) elif method == "mb_auto": estUG,sp = hugeLearnGraph(X,method = "mb", modelSelectCrit= "mbDefault", nonPara=nonPara) elif method == "mb_STARS": estUG,sp = hugeLearnGraph(X,method = "mb", modelSelectCrit= "stars", nonPara=nonPara,lambdaRatio= lambdaRatio) elif method == "glasso_STARS": estUG,sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "stars", nonPara=nonPara,lambdaRatio= lambdaRatio) elif method == "KCIT_AND": estUG = kcitSl.getMoralGraph("AND") elif method == "KCIT_OR": estUG = kcitSl.getMoralGraph("OR") elif method == "RCIT_AND": estUG = rcitSl.getMoralGraph("AND") elif method == "RCIT_OR": estUG = rcitSl.getMoralGraph("OR") else: print("unspecified method!!") hd = np.nan # DEBUG if (estUG == estUG.T).all() == False: errors = {"testName" : testName, "data": X, "method" : method, "currentSeed" : seeeds, "estUG": estUG, "trueUG": G, "testNumber" : tt +1 } errorCount += 1 path = directory + "/errors_" + test_str + "_" + str(errorCount) + ".p" saveResults(errors,path) ## force symmetry on UG estUG = 1*(estUG + estUG.T == 2) # compute Hamming distance hd = HD(G,estUG) # save stuff print(method,hd) allRes[method][n]["HD"].append(hd) allRes[method][n]["UG"].append(estUG) allRes[method][n]["sparsity"].append(sp) # save the true UG (this is differs between the tests only in random graph cases) parameters["trueUGs"].append(G) # save results after every 5 tests if (tt + 1) % 5 == 0 and SAVE: parameters["ntests"] = tt + 1 res = (allRes,parameters) saveResults(res,filename) # final results parameters["ntests"] = tt + 1 res = (allRes,parameters) if SAVE: saveResults(res,filename) return res