Esempio n. 1
0
def runAlgorithm():
    K,P = 10, 75
    for modelName in [ Rtm ]: #ModelNames:
        cmdline = '' \
                + ' --debug '          + "False" \
                + ' --model '          + modelName \
                + ' --dtype '          + 'f8:f8'      \
                + ' --num-topics '     + str(K)    \
                + ' --log-freq '       + '5'       \
                + ' --eval '           + 'perplexity'  \
                + ' --iters '          + '5'      \
                + ' --query-iters '    + '5'      \
                + ' --folds '          + '2'      \
                + ' --words '          + AclWordPath \
                + ' --links '          + AclCitePath \
                + ' --limit-to '       + '100000' \
                + ' --eval '           + MeanAveragePrecAllDocs \
                + ' --out-model '      + '/Users/bryanfeeney/Desktop/acl-out'
#                     + ' --words '          + '/Users/bryanfeeney/Dropbox/Datasets/ACL/words.pkl' \
#                     + ' --words '          + '/Users/bryanfeeney/Desktop/NIPS-from-pryor-Sep15/W_ar.pkl'
#                      + ' --words '          + '/Users/bryanfeeney/Desktop/Dataset-Sep-2014/words.pkl' \
#                      + ' --feats '          + '/Users/bryanfeeney/Desktop/Dataset-Sep-2014/side.pkl'
#                    + ' --words '          + wordsFile \
#                    + ' --feats '          + featsFile
#                    + ' --words '          + '/Users/bryanfeeney/Desktop/Tweets600/words-by-author.pkl' \
#                     + ' --out-model '      + modelFileDir \

        run(cmdline.strip().split(' '))
Esempio n. 2
0
    def testMainScript(self):
        D, T, K, Q, F, P, avgWordsPerDoc = 200, 100, 10, 6, 12, 8, 500
        tpcs, vocab, docLens, X, W = sampleFromModel(D, T, K, F, P, avgWordsPerDoc)
        
        wordsFile, featsFile, modelFileDir = tmpFiles()
        with open(wordsFile, 'wb') as f:
            pkl.dump(W, f)
        with open(featsFile, 'wb') as f:
            pkl.dump(X, f)
        
        print ("New Version")

        RateDelay       = 10
        ForgettingRate  = 0.55
        BatchSize       = 100

        sgd_setups = [(b,r,f) for b in [1, 5, 10, 100] for r in [1, 10, 30] for f in [0.6, 0.75, 0.9]]

        Folds, ExecutedFoldCount = 5,1
        K,P,Q = 10, 150, 20
        TrainIters, QueryIters, LogFreq = 2, 2, 1

        PriorCov = 1
        VocabPrior = 5
        Debug = True

        modelFileses = []
        for DataSetName in [TweetsFreq]:
            for k in [K]: # [10, 25, 50, 100]:
                for p in [P]: #, [50, 100, 250, 500]:
                    #for (BatchSize, RetardationRate, ForgettingRate) in sgd_setups:
                    for modelName in [ StmYvBohning ]:
                        cmdline = '' \
                                +(' --debug '          + str(Debug) if Debug else "") \
                                + ' --model '          + modelName \
                                + ' --dtype '          + 'f8'      \
                                + ' --num-topics '     + str(k)    \
                                + ' --num-lat-feats '  + str(p) \
                                + ' --num-lat-topics ' + str(Q) \
                                + ' --log-freq '       + str(LogFreq)       \
                                + ' --eval '           + HashtagPrecAtM  \
                                + ' --gradient-batch-size '      + str(BatchSize) \
                                + ' --gradient-rate-delay '      + str(RateDelay) \
                                + ' --gradient-forgetting-rate ' + str(ForgettingRate) \
                                + ' --iters '          + str(TrainIters)      \
                                + ' --query-iters '    + str(QueryIters)      \
                                + ' --folds '          + str(Folds)      \
                                + ' --truncate-folds ' + str(ExecutedFoldCount)      \
                                + (' --word-dict '     + DictsPath[DataSetName] if DictsPath[DataSetName] is not None else "") \
                                + ' --words '          + WordsPath[DataSetName] \
                                + (' --feats '         + FeatsPath[DataSetName] if FeatsPath[DataSetName] is not None else "") \
                                + (' --links '         + CitesPath[DataSetName] if CitesPath[DataSetName] is not None else "") \
                                + ' --topic-var '      + str(PriorCov) \
                                + ' --feat-var '       + str(PriorCov) \
                                + ' --lat-topic-var '  + str(PriorCov) \
                                + ' --lat-feat-var '   + str(PriorCov) \
                                + ' --vocab-prior '    + str(VocabPrior) \
                                + ' --tag-recall-opts ' + "0:-1,0.75"
            #                     + ' --out-model '      + '/Users/bryanfeeney/Desktop/acl-out-tm' \
            #                    + ' --feats-mask '     + FeatsMask[DataSetName] \
            #                    + ' --lda-model '      + PreBuiltVbTopics[DataSetName][k]
            #                    + ' --words '          + '/Users/bryanfeeney/Dropbox/Datasets/ACL/words.pkl' \
            #                    + ' --words '          + '/Users/bryanfeeney/Desktop/NIPS-from-pryor-Sep15/W_ar.pkl'
            #                    + ' --words '          + '/Users/bryanfeeney/Desktop/Dataset-Sep-2014/words.pkl' \
            #                    + ' --feats '          + '/Users/bryanfeeney/Desktop/Dataset-Sep-2014/side.pkl'
            #                    + ' --words '          + wordsFile \
            #                    + ' --feats '          + featsFile
            #                    + ' --words '          + '/Users/bryanfeeney/Desktop/Tweets600/words-by-author.pkl' \

                        modelFileses.extend (run(cmdline.strip().split(' ')))

                    modelFileses.insert(0, wordsFile)
                    modelFileses.insert(1, featsFile)
                    print ("Files can be found in:" + "\n\t".join(modelFileses))
Esempio n. 3
0
    def testAlgorithms(self):
        D, T, K, Q, F, P, avgWordsPerDoc = 200, 100, 10, 6, 12, 8, 500
        tpcs, vocab, docLens, X, W = sampleFromModel(D, T, K, F, P, avgWordsPerDoc)

        wordsFile, featsFile, modelFileDir = tmpFiles()
        with open(wordsFile, "wb") as f:
            pkl.dump(W, f)
        with open(featsFile, "wb") as f:
            pkl.dump(X, f)

        print("New Version")

        Folds, ExecutedFoldCount = 5, 5
        K, P, Q = 50, 150, 20
        TrainIters, QueryIters, LogFreq = 600, 500, 50

        PriorCov = 1
        VocabPrior = 5
        Debug = False

        modelFileses = []
        for DataSetName in [TweetsFreq]:
            for k in [50]:
                for modelName in [StmUyvBohning]:
                    cmdline = (
                        ""
                        + (" --debug " + str(Debug) if Debug else "")
                        + " --model "
                        + modelName
                        + " --dtype "
                        + "f8:f8"
                        + " --num-topics "
                        + str(k)
                        + " --num-lat-feats "
                        + str(P)
                        + " --num-lat-topics "
                        + str(Q)
                        + " --log-freq "
                        + str(LogFreq)
                        + " --eval "
                        + Perplexity
                        + " --iters "
                        + str(TrainIters)
                        + " --query-iters "
                        + str(QueryIters)
                        + " --folds "
                        + str(Folds)
                        + " --truncate-folds "
                        + str(ExecutedFoldCount)
                        + (" --word-dict " + DictsPath[DataSetName] if DictsPath[DataSetName] is not None else "")
                        + " --words "
                        + WordsPath[DataSetName]
                        + (" --feats " + FeatsPath[DataSetName] if FeatsPath[DataSetName] is not None else "")
                        + (" --links " + CitesPath[DataSetName] if CitesPath[DataSetName] is not None else "")
                        + " --topic-var "
                        + str(PriorCov)
                        + " --feat-var "
                        + str(PriorCov)
                        + " --lat-topic-var "
                        + str(PriorCov)
                        + " --lat-feat-var "
                        + str(PriorCov)
                        + " --vocab-prior "
                        + str(VocabPrior)
                        + " --out-model "
                        + "/Users/bryanfeeney/Desktop/acl-out-tm"
                    )
                    # + ' --feats-mask '     + FeatsMask[DataSetName] \
                    # + ' --lda-model '      + PreBuiltVbTopics[DataSetName][k]
                #                     + ' --words '          + '/Users/bryanfeeney/Dropbox/Datasets/ACL/words.pkl' \
                #                     + ' --words '          + '/Users/bryanfeeney/Desktop/NIPS-from-pryor-Sep15/W_ar.pkl'
                #                      + ' --words '          + '/Users/bryanfeeney/Desktop/Dataset-Sep-2014/words.pkl' \
                #                      + ' --feats '          + '/Users/bryanfeeney/Desktop/Dataset-Sep-2014/side.pkl'
                #                    + ' --words '          + wordsFile \
                #                    + ' --feats '          + featsFile
                #                    + ' --words '          + '/Users/bryanfeeney/Desktop/Tweets600/words-by-author.pkl' \

                modelFileses.extend(run(cmdline.strip().split(" ")))

                modelFileses.insert(0, wordsFile)
                modelFileses.insert(1, featsFile)
                print("Files can be found in:" + "\n\t".join(modelFileses))