Python DataGenerator.createData Examples

Programming Language: Python

Namespace/Package Name: datagenerator

Class/Type: DataGenerator

Method/Function: createData

Examples at hotexamples.com: 2

Python DataGenerator.createData - 2 examples found. These are the top rated real world Python examples of datagenerator.DataGenerator.createData extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataGenerator(30)

get_dag_pipeline(3)

get_ranktree_pipeline(3)

get_unbalanced_pipeline(3)

get_tree_pipeline(3)

get_linear_pipeline(3)

get_single_node_pipeline(3)

get_disconnected_pipeline(3)

get_loose_pipeline(3)

createData(2)

_add_to_dict(2)

__len__(2)

__getitem__(2)

get_self_ref_pipeline(1)

get_empty_pipeline(1)

get_duplicate_node_pipeline(1)

CreateStandardSpectrogram(1)

get_args(1)

get_cyclic_pipeline(1)

CreateTrainingDataSpectrogram(1)

bfs(1)

_read_train_data(1)

_map_filter_cols(1)

_filter_cols(1)

__visualisation__(1)

__init__(1)

GetListOfFiles(1)

get_unknown_uid_framework(1)

Example #1

Show file

def compareKnns(testName,
                  seed = 123432,
                  folderName = "knn_est_test",
                  ntests = 25,
                  ns = None,
                  SAVE = True):
    
    if ns is None:
        ns = [125,250,500,1000, 2000]
    
    # different random number generators for data generation and for the knnMI method (permutation tests) 
    rrData = np.random.RandomState(seed)
    rr1 = np.random.RandomState(seed + 1) 
    
    # global rng is also used..
    np.random.seed(seed)
 
    cores = mp.cpu_count()
    
    k_values = [0.01,0.1,0.2,3,5]
    local_perm = [True,False]
    graph_rules = ["AND","OR"]

    methods = list(product(k_values,local_perm))
    method_names = [__method2str(method) for method in product(k_values,local_perm,graph_rules)]
     
    # initilize dictionaries for results
    res = {"HD" : [], "UG" : []} # measured quantities are keys
    Nres = {n : copy.deepcopy(res) for n in ns}
    allRes = {method : copy.deepcopy(Nres) for method in method_names}
              
    # used parameters          
    parameters = {"seed": seed, 
                  "ntests": 0,
                  "ns": ns,
                  "testName" : testName,
                  "methods" : method_names,
                  "trueUGs" : []}    
                
    # create folder where to save the resuls              
    if folderName is None:
        directory = "tests"
    else:
        directory = "tests/" + folderName
    
    if not os.path.exists(directory):
        os.makedirs(directory)            
        
    test_str = __test2str(testName)
    filename = directory + "/" + test_str + ".p"
    
    # create object for generating data and run the tests
    dd = DataGenerator(testName,rng = rrData)
    
    for tt in range(0,ntests):
        
        print("test ",tt + 1,"/", ntests, sep="")
        Xall,G = dd.createData(np.max(ns))
        
        for n in ns:          
            
            X = Xall[:n,:]
            X = scale(X) # zero mean, sd one for all the features
            
            print("............sample size: ",n)
            
            for method in methods:
                
                kk,local = method
                
                if kk < 1:
                    k = max(3,int(np.ceil(kk*n)))
                else:
                    k = kk
                
                if local:
                    k_perm = 5
                else:
                    k_perm = None
                    
                knnest = KnnEstimator(k = k,k_perm = k_perm, rng = rr1, parallel = cores)
                    
                knn_sl = StructLearn(X, ci_estimator= knnest)    
                knn_sl.findMoralGraph()
                          
                for graph_rule in graph_rules:
                    
                    est_ug = knn_sl.getMoralGraph(graph_rule)
                    method_name = __method2str( (method[0],method[1],graph_rule) )
                
                    # compute Hamming distance                
                    hd = HD(G,est_ug)    
                        
                    # save stuff
                    print(method_name,hd)
                    allRes[method_name][n]["HD"].append(hd)
                    allRes[method_name][n]["UG"].append(est_ug)
               
        # save the true UG (this is differs between the tests only in random graph cases)
        parameters["trueUGs"].append(G)
               
        # save results after every 5 tests    
        if (tt + 1) % 5 == 0 and SAVE:
            parameters["ntests"] = tt + 1
            res = (allRes,parameters) 
            saveResults(res,filename)
    
    # final results
    parameters["ntests"] = tt + 1
    res = (allRes,parameters) 
    if SAVE:
        saveResults(res,filename)
    
    return res

Example #2

Show file

def doTests(testName, 
            folderName = None, 
            seed = 123456, 
            ntests = 25, 
            ns = None, 
            k = 3,  
            k_perm = None,
            methods = None,
            lambdaRatio = 0.01,
            useTransformation = False,
            SAVE = True):
    
    if methods is None:
        
        methods = ["knnMI_AND",
                   "knnMI_OR",
                   "fisherZ_AND",
                   "fisherZ_OR",
                   "mb_RIC",
                   "glasso_RIC",
                   "mb_STARS",
                   "glasso_STARS",
                   "mb_auto"]

    if ns is None:
        ns = [125,250,500,1000,2000]
    
    
    # different random number generators for data generation and for the knnMI method (permutation tests) 
    rrData = np.random.RandomState(seed)
    rr1 = np.random.RandomState(seed + 1) 
    
    # global rng is also used..
    np.random.seed(seed)
     
    cores = mp.cpu_count()
    
    # conditional independence tests
    knnEst1 = KnnEstimator(k = k,rng = rr1, parallel= cores,k_perm = k_perm)
    fEst = FisherCI()
    
    if "KCIT_OR" in methods or "KCIT_AND" in methods: 
        k_cit = KCIT(seed = seed + 2)
    if "RCIT_OR" in methods or "RCIT_AND" in methods:
        r_cit = RCIT(seed = seed + 3)
        
       
    # initilize dictionaries for results
    res = {"HD" : [], "UG" : [], "sparsity" : []} # measured quantities are keys
    Nres = {n : copy.deepcopy(res) for n in ns}
    allRes = {method : copy.deepcopy(Nres) for method in methods}
              
    # used parameters          
    parameters = {"seed": seed, 
                  "ntests": 0,
                  "ns": ns,
                  "testName" : testName,
                  "methods" : methods,
                  "k" : k,
                  "lambdaRatio" : lambdaRatio,
                  "trueUGs" : []}    
                
    # create folder where to save the resuls              
    if folderName is None:
        directory = "tests"
    else:
        directory = "tests/" + folderName
    
    if not os.path.exists(directory):
        os.makedirs(directory)            
        
    test_str = __test2str(testName)
    filename = directory + "/" + test_str + ".p"
     
    # create object for generating data and run the tests
    dd = DataGenerator(testName,rng = rrData)
    
    nonPara = True # use non-paranormal transformation for glasso and mb
    
    #DEBUG
    errorCount = 0     
    
    for tt in range(0,ntests):
        
        print("test ",tt + 1,"/", ntests, sep="")
        Xall,G = dd.createData(np.max(ns))
        
        for n in ns:          
            
            X = Xall[:n,:]
            X = scale(X) # zero mean, sd one for all the features
            
            if useTransformation:
                X = transform(X) # non-paranormal transformation for every method
                print("Transformation used.")
                nonPara = False # no need to perform the transformation twice when glasso/mb is called

            print("............sample size: ",n)
                  
            # kernel methods
            if "KCIT_OR" in methods or "KCIT_AND" in methods:
                kcitSl = StructLearn(X,ci_estimator = k_cit)
                kcitSl.findMoralGraph()
                
            if "RCIT_OR" in methods or "RCIT_AND" in methods: 
                rcitSl = StructLearn(X,ci_estimator = r_cit)
                rcitSl.findMoralGraph()
                
            # find Markov blankets for knnMI method
            if "knnMI_AND" in methods or "knnMI_OR" in methods:
                if k < 1:
                    knnEst1.k = max(3,int(np.ceil(k*n)))
                                        
                knnSl = StructLearn(X, ci_estimator= knnEst1)    
                knnSl.findMoralGraph()
                
            # same for fisherZ based method    
            if "fisherZ_AND" in methods or "fisherZ" in methods:
                fishSl = StructLearn(X, ci_estimator= fEst)
                fishSl.findMoralGraph()
    
            for method in methods:
                
                sp = np.nan # record sparsities of estimated graphs for glasso/mb, for other methods use just nan (graphs are saved so sparsity is easy to compute)
                
                # DEBUG
                seeeds = np.random.RandomState
                
                if method == "knnMI_AND":
                    estUG = knnSl.getMoralGraph("AND")
                elif method == "knnMI_OR":
                    estUG = knnSl.getMoralGraph("OR")                
                elif method == "fisherZ_AND":
                    estUG = fishSl.getMoralGraph("AND")
                elif method == "fisherZ_OR":
                    estUG = fishSl.getMoralGraph("OR")                    
                elif method == "glasso_RIC":
                    estUG, sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "ric", nonPara=nonPara, lambdaRatio= lambdaRatio) 
                elif method == "glasso_BIC":
                    estUG, sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "ebic", nonPara=nonPara, ebicTuning= 0.0,lambdaRatio= lambdaRatio)
                elif method == "glasso_EBIC":
                    estUG, sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "ebic", nonPara=nonPara, ebicTuning= 0.5,lambdaRatio= lambdaRatio)
                elif method == "mb_RIC":
                    estUG,sp = hugeLearnGraph(X,method = "mb", modelSelectCrit= "ric", nonPara=nonPara,lambdaRatio= lambdaRatio) 
                elif method == "mb_auto":
                    estUG,sp = hugeLearnGraph(X,method = "mb", modelSelectCrit= "mbDefault", nonPara=nonPara)     
                elif method == "mb_STARS":
                    estUG,sp = hugeLearnGraph(X,method = "mb", modelSelectCrit= "stars", nonPara=nonPara,lambdaRatio= lambdaRatio)    
                elif method == "glasso_STARS":
                    estUG,sp = hugeLearnGraph(X,method = "glasso", modelSelectCrit= "stars", nonPara=nonPara,lambdaRatio= lambdaRatio)
                elif method == "KCIT_AND":
                    estUG = kcitSl.getMoralGraph("AND")
                elif method == "KCIT_OR":
                    estUG = kcitSl.getMoralGraph("OR")
                elif method == "RCIT_AND":
                    estUG = rcitSl.getMoralGraph("AND")
                elif method == "RCIT_OR":
                    estUG = rcitSl.getMoralGraph("OR")
                else:
                    print("unspecified method!!")
                    hd = np.nan
              
                # DEBUG    
                if (estUG == estUG.T).all() == False:
                    errors = {"testName" : testName, "data": X, "method" : method, "currentSeed" : seeeds, "estUG": estUG, "trueUG": G, "testNumber" : tt +1 }
                    errorCount += 1
                
                    path = directory + "/errors_" + test_str + "_" + str(errorCount) + ".p"
                    saveResults(errors,path)
      
                    ## force symmetry on UG
                    estUG = 1*(estUG + estUG.T == 2)
                                        
                # compute Hamming distance
                hd = HD(G,estUG)    
                    
                # save stuff
                print(method,hd)
                allRes[method][n]["HD"].append(hd)
                allRes[method][n]["UG"].append(estUG)
                allRes[method][n]["sparsity"].append(sp)
                
        # save the true UG (this is differs between the tests only in random graph cases)
        parameters["trueUGs"].append(G)
               
        # save results after every 5 tests    
        if (tt + 1) % 5 == 0 and SAVE:
            parameters["ntests"] = tt + 1
            res = (allRes,parameters) 
            saveResults(res,filename)
    
    # final results
    parameters["ntests"] = tt + 1
    res = (allRes,parameters) 
    if SAVE:
        saveResults(res,filename)
    
    return res