Python kl_divergence 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: anatool.analyze.lm

메소드/함수: kl_divergence

hotexamples.com에서의 예제들: 4

Python kl_divergence - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 anatool.analyze.lm.kl_divergence에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def confusionmatrix(places):
    """ Show the matrix of confusion between LMs by KL-divergence
    """
    lmtwt1 = dict()
    lmtwt2 = dict()
    for pid in places:
        cur = CONN_POOL.get_cur(GEOTWEET)
        cur.execute('select text from sample' \
                ' where place_id = \'{0}\' order by rand() limit {1}'.format(pid, 200))
        text = [row['text'] for row in cur]
        lmtwt1[pid] = lmfromtext(text[:80])
        lmtwt2[pid] = lmfromtext(text[81:160])
    confmat = list()
    for lm_i in places:
        confmat.append(
            [kl_divergence(lmtwt1[lm_i], lmtwt2[lm_j]) for lm_j in places])

    selfavg = sum([confmat[i][i] for i in range(len(places))])
    mutavg = sum([sum(confmat[i]) for i in range(len(places))]) - selfavg
    selfavg /= float(len(places))
    mutavg /= float(len(places) * len(places) - len(places))
    print selfavg, mutavg

    plt.imshow(np.array(confmat), cmap=cm.gray, interpolation='nearest')
    plt.yticks(range(len(places)), \
            ['{0}: {1}'.format(place_name(places[i]), i) for i in range(len(places))])
    plt.xticks(range(len(places)))
    plt.subplots_adjust(left=0.4)
    plt.colorbar(shrink=0.66)
    plt.savefig('sf_confm.eps')
    plt.show()

예제 #2

파일 보기

파일: distinguish.py 프로젝트: spacelis/anatool

def confusionmatrix(places):
    """ Show the matrix of confusion between LMs by KL-divergence
    """
    lmtwt1 = dict()
    lmtwt2 = dict()
    for pid in places:
        cur = CONN_POOL.get_cur(GEOTWEET)
        cur.execute('select text from sample' \
                ' where place_id = \'{0}\' order by rand() limit {1}'.format(pid, 200))
        text = [row['text'] for row in cur]
        lmtwt1[pid] = lmfromtext(text[:80])
        lmtwt2[pid] = lmfromtext(text[81:160])
    confmat = list()
    for lm_i in places:
        confmat.append([kl_divergence(lmtwt1[lm_i], lmtwt2[lm_j]) for lm_j in places])

    selfavg = sum([confmat[i][i] for i in range(len(places))])
    mutavg = sum([sum(confmat[i]) for i in range(len(places))]) - selfavg
    selfavg /= float(len(places))
    mutavg /= float(len(places)*len(places) - len(places))
    print selfavg, mutavg


    plt.imshow(np.array(confmat), cmap = cm.gray, interpolation='nearest')
    plt.yticks(range(len(places)), \
            ['{0}: {1}'.format(place_name(places[i]), i) for i in range(len(places))])
    plt.xticks(range(len(places)))
    plt.subplots_adjust(left=0.4)
    plt.colorbar(shrink=0.66)
    plt.savefig('sf_confm.eps')
    plt.show()

예제 #3

파일 보기

파일: websmooth.py 프로젝트: spacelis/anatool

def kldiff(places):
    """ compare the difference of kl-divergence between tweets and web pages
        for each place in places
    """
    diff = Dataset()
    for pid in places:
        twt = loadrows(GEOTWEET, ('place_id', 'text'),
                ('place_id=\'{0}\''.format(pid),), 'sample',
                'order by rand() limit {0}'.format(100))
        web = loadrows(GEOTWEET, ('place_id', 'web'),
                ('place_id=\'{0}\''.format(pid),), 'web',
                'limit 25')
        lmref = lmfromtext(twt['text'][:50])
        lmtwt = lmfromtext(twt['text'][51:])
        lmweb = lmfromtext(web['web'])
        diff.append({'pid': pid, 'twtkld': kl_divergence(lmtwt, lmref),
            'webkld': kl_divergence(lmweb, lmref)})
    for item in diff:
        print '{0} & {1} & {2}'.format(place_name(item['pid']), item['twtkld'], item['webkld'])

예제 #4

파일 보기

파일: websmooth.py 프로젝트: spacelis/anatool

def kldiff(places):
    """ compare the difference of kl-divergence between tweets and web pages
        for each place in places
    """
    diff = Dataset()
    for pid in places:
        twt = loadrows(GEOTWEET, ('place_id', 'text'),
                       ('place_id=\'{0}\''.format(pid), ), 'sample',
                       'order by rand() limit {0}'.format(100))
        web = loadrows(GEOTWEET, ('place_id', 'web'),
                       ('place_id=\'{0}\''.format(pid), ), 'web', 'limit 25')
        lmref = lmfromtext(twt['text'][:50])
        lmtwt = lmfromtext(twt['text'][51:])
        lmweb = lmfromtext(web['web'])
        diff.append({
            'pid': pid,
            'twtkld': kl_divergence(lmtwt, lmref),
            'webkld': kl_divergence(lmweb, lmref)
        })
    for item in diff:
        print '{0} & {1} & {2}'.format(place_name(item['pid']), item['twtkld'],
                                       item['webkld'])