コード例 #1
0
ファイル: ToyBarsK6.py プロジェクト: csa0001/Refinery
def get_minibatch_iterator(seed=8675309,
                           nBatch=10,
                           nObsBatch=None,
                           nObsTotal=25000,
                           nLap=1,
                           allocModelName=None,
                           dataorderseed=0,
                           **kwargs):
    words_dict = get_BoW(seed)
    Data = WordsData(**words_dict)
    DataIterator = AdmixMinibatchIterator(Data,
                                          nBatch=nBatch,
                                          nObsBatch=nObsBatch,
                                          nLap=nLap,
                                          dataorderseed=dataorderseed)
    DataIterator.summary = get_data_info()
    return DataIterator
コード例 #2
0
def run_topic_modeling(username,folder_id,ex_id):

    d = Folder.query.get(folder_id)
    ex = Experiment.query.get(ex_id)
    set_tm_status(username,folder_id, ex,'inprogress')
    db.session.commit()
    
    exinfo = ex.getExInfo()
        
    # CREATE WORD DATA
    datafile = d.wordcount_path()
    vocabfile = d.vocab_path()

    vocab = {}
    idx = 0
    vv = [x.strip() for x in open(vocabfile,'r')]
    for v in vv:
        vocab[idx] = v
        idx += 1
        
    lines = [x.strip().split(",") for x in open(datafile,'r')]
        
    docrange = []
    word_id = []
    word_count = []
    start = 0
    cur = -1
    curD = -1
    for l in lines:
        cur += 1
        word_id.append(int(l[1]))
        word_count.append(int(l[2]))

        dID = int(l[0])
        if(curD == -1):
            curD = dID
        if(curD != dID):
            docrange.append([start,cur-1])
            start = cur-1
            curD = dID
    docrange.append([start,cur+1])
    data = WordsData(word_id,word_count,docrange,len(vocab),vocab,len(docrange))

    # RUN Topic Modeling in BNPY

    a = {"tm_id":str(d.id), "username":username}

    hmodel = bnpy.Run.run(data, 'HDPModel', 'Mult', 'VB', doSaveToDisk=False, K=exinfo.nTopics,
                          nLap=100, initname="randomfromprior",
                          customFuncPath="refinery/webapp/", customFuncArgs=json.dumps(a))
    '''
                          moves='birth,merge', birthPerLap=10, \
                          mergePerLap=10, nFreshLap=25)
    '''

    
    exinfo.viz_data = getModelState(hmodel[0],hmodel[1],100)

    set_tm_status(username,folder_id, ex,'finish')

    db.session.commit()
コード例 #3
0
ファイル: ToyBarsK6.py プロジェクト: csa0001/Refinery
def get_data(seed=8675309, nObsTotal=25000, **kwargs):
    words_dict = get_BoW(seed)
    Data = WordsData(**words_dict)
    Data.summary = get_data_info()
    return Data