예제 #1
0
def test1(data, features):
    
    data = data[:, 1:20]
    features = features[0:data.shape[1]]

    arcs, mixt = getArchetypes(data, 3)
    
    nrfolds = 10
    
    
    stats = Stats(name=dsname)
    
    for train, test, i in kfolded(mixt, nrfolds):
        c = Chrono().start()
        spn = SPN.LearnStructure(train, featureTypes=["continuous"] * train.shape[1], row_split_method=Splitting.KmeansRows(), col_split_method=Splitting.RDCTest(threshold=0.3),
                                 # spn = SPN.LearnStructure(data, featureNames=["X1"], domains =
                                 # domains, families=families, row_split_method=Splitting.KmeansRows(),
                                 # col_split_method=Splitting.RDCTest(),
                                 min_instances_slice=100)
        c.end()
        
        spn.root.validate()
        ll = numpy.mean(spn.root.eval(test))
        
        print(ll)
        
        stats.add("HSPN", Stats.LOG_LIKELIHOOD, ll)
        stats.add("HSPN", Stats.TIME, c.elapsed())
        
        stats.save("stats_" + dsname + ".json")
    
    print(arcs)
예제 #2
0
def pdnperplexity(train, test, features, depth, iters):
    from pdn.GBMPDN import GBMPDN
    c = Chrono().start()
    pdn = GBMPDN(train, features, max_depth=depth, iterations=iters)
    c.end()
    pwb, perplexity, words, ll = pdn.perplexity(test)
    print(
        "PDN %s,%s=%s %.3f per-word bound, %.3f perplexity estimate based on a held-out corpus of %i documents with %i words"
        % (iters, depth, ll, pwb, perplexity, test.shape[0], words))
    return perplexity, ll, c.elapsed()
예제 #3
0
def pspnperplexity(train, test, min_slices, ind_test_method,
                   row_cluster_method):
    c1 = Chrono().start()
    spn = LearnSPN(alpha=0.001,
                   min_slices=min_slices,
                   cluster_prep_method="sqrt",
                   ind_test_method=ind_test_method,
                   row_cluster_method=row_cluster_method).fit_structure(train)
    c1.end()
    time = c1.elapsed()
    pwb, perplexity, words, logl = spn.perplexity(test)

    print(
        "SPN ll=%s %.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words"
        % (logl, pwb, perplexity, test.shape[0], words))
    return perplexity, logl, time, spn.size()
예제 #4
0
def ldaperplexity(train, test, topics):
    corpus = gensim.matutils.Dense2Corpus(train.astype(int),
                                          documents_columns=False)
    corpusTest = gensim.matutils.Dense2Corpus(test.astype(int),
                                              documents_columns=False)
    dictionary = Dictionary.from_corpus(corpus)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        c = Chrono().start()
        lda = runLda(corpus, dictionary, topics=topics)
        c.end()

    corpus_words = sum(cnt for document in corpusTest for _, cnt in document)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        perwordbound = lda.log_perplexity(corpusTest)
    print(
        "LDA %.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words"
        % (perwordbound, numpy.exp2(-perwordbound), len(corpusTest),
           corpus_words))
    return numpy.exp2(-perwordbound), c.elapsed()
예제 #5
0
def generateInput(dsname,  data, family):
    data = data[numpy.var(data, 1) > 0, :]

    fullcmd = []
    cscmd = []
    rndcmd = []

    for fold, (train_index, test_index) in enumerate(KFold(n_splits=10, random_state=1).split(data)):

        train, test = data[train_index, :], data[test_index, :]

        prepareExp(mkfolders("data/" + dsname + "/full/"), fold, train, test, None, family, 0.0, fullcmd, train)

        for pct in [5, 10, 20, 30, 40]:
            cschrono = Chrono().start()
            sampleIndexes, csweight = coreset(train, pct)
            cstrain = train[sampleIndexes, :]
            cschrono.end()

            prepareExp(mkfolders("data/" + dsname + "/cs/" + str(pct) + "/"), fold, cstrain, test, csweight, family, cschrono.elapsed(), cscmd, train)


            rndchrono = Chrono().start()
            sampleIndexes, _ = getSamples(train, None, pct)
            rtrain = train[sampleIndexes, :]
            rndchrono.end()

            prepareExp(mkfolders("data/" + dsname + "/rnd/" + str(pct) + "/"), fold, rtrain, test, None, family, rndchrono.elapsed(), rndcmd, train)


    allcmds = []
    allcmds.extend(fullcmd)
    allcmds.extend(cscmd)
    allcmds.extend(rndcmd)

    with open("cmds_"+dsname+".txt", "w") as text_file:
        text_file.write("\n".join(allcmds))
예제 #6
0
    
    gn1 = GaussianNode("gn1", 0, "X0", 1.0, 1.0)
    pn1 = PoissonNode("pn1", 1, "X1", 1.0)
    bn1 = BernoulliNode("bn1", 2, "X2", 0.0)
    p1 = ProductNode("p1", gn1, pn1, bn1)
    
    gn2 = GaussianNode("gn2", 0, "X0", 10.0, 1.0)
    pn2 = PoissonNode("pn2", 1, "X1", 10.0)
    bn2 = BernoulliNode("bn2", 2, "X2", 1.0)
    p2 = ProductNode("p1", gn2, pn2, bn2)

    s1 = SumNode("s1", [0.5, 0.5], p1, p2)
    spn = SPN()
    spn.root = s1
    
    c = Chrono().start()
    
    with tf.device("/cpu:0"):
        tf.reset_default_graph()
                    
        with tf.name_scope('input'):
            X = tf.placeholder(tf.float64, [None, 3], name="x")
        
        with tf.name_scope('SPN') as scope:
            spn.root.initTf(X)
            costf = JointCost(spn.root)
        
        train_op = tf.train.AdamOptimizer().minimize(costf)
        
    print(c.end().elapsed())