Python TopicMetric.TopicMetric 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: topic_modeling.visualize.models

클래스/타입: TopicMetric

메소드/함수: TopicMetric

hotexamples.com에서의 예제들: 4

Python TopicMetric.TopicMetric - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 topic_modeling.visualize.models.TopicMetric.TopicMetric에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

TopicMetric(4)

save(4)

자주 사용되는 메소드들

TopicMetric (4)

save (4)

예제 #1

파일 보기

파일: subset_document_entropy.py 프로젝트: robbymeals/topicalguide

def add_metric(dataset, analysis, force_import=False, *args, **kwargs):
    analysis = Analysis.objects.get(dataset__name=dataset, name=analysis)
    for attr in analysis.dataset.attribute_set.all():
        for val in attr.value_set.all():
            metric_name = 'Document Entropy for %s: %s' % (attr.name,
                                                           val.value)
            print metric_name
            try:
                metric = TopicMetric.objects.get(name=metric_name,
                                                 analysis=analysis)
                if not force_import:
                    raise RuntimeError(
                        '%s is already in the database for this '
                        'analysis!' % metric_name)
            except TopicMetric.DoesNotExist:
                metric = TopicMetric(name=metric_name, analysis=analysis)
                metric.save()
            topics = analysis.topics.all()
            docs = [
                d.id for d in analysis.dataset.documents.filter(
                    attributevaluedocument__attribute=attr,
                    attributevaluedocument__value=val)
            ]
            for topic in topics:
                ent = 0
                for dt in topic.documenttopic_set.filter(
                        document__id__in=docs):
                    prob = dt.count / topic.total_count
                    ent -= prob * (log(prob) / log(2))
                tmv = TopicMetricValue(topic=topic, metric=metric, value=ent)
                tmv.save()

예제 #2

파일 보기

파일: sentiment.py 프로젝트: robbymeals/topicalguide

def add_metric(dataset, analysis, force_import=False, *args, **kwargs):
    analysis = Analysis.objects.get(dataset__name=dataset, name=analysis)
    try:
        metric = TopicMetric.objects.get(name='Percent Tokens Positive '
                'Sentiment', analysis=analysis)
        if not force_import:
            raise RuntimeError('Sentiment is already in the database '
                    'for this analysis!')
    except TopicMetric.DoesNotExist:
        metric = TopicMetric(name='Percent Tokens Positive Sentiment',
                analysis=analysis)
        metric.save()

    # call stuff to classify documents and get sentiment information, as in
    # parse_dependencies.py

    data_root = analysis.dataset.dataset_dir
    topics = analysis.topics.all()
    for topic in topics:
        positive = 0;
        negative = 0;
        for docTopic in topic.documenttopic_set.all():
            filename = data_root + '/' + docTopic.document.filename
            print topic, filename
            sentiment = float(sentiment_document(filename))
            print 'sentiment returned:', sentiment
            if sentiment == 1 :
                positive += docTopic.count
            print '%d/%d' % (positive, topic.total_count)
        # compute aggregate information for topic
        topicSentiment = float(positive)/float(topic.total_count)
        tmv = TopicMetricValue(topic=topic, metric=metric, value=topicSentiment)
        tmv.save()

예제 #3

파일 보기

파일: coherence.py 프로젝트: robbymeals/topicalguide

def add_metric(dataset, analysis, force_import=False, *args, **kwargs):
    analysis = Analysis.objects.get(dataset__name=dataset, name=analysis)
    try:
        metric = TopicMetric.objects.get(name=metric_name, analysis=analysis)
        if not force_import:
            raise RuntimeError('%s is already in the database for this '
                               'analysis!' % metric_name)
    except TopicMetric.DoesNotExist:
        metric = TopicMetric(name=metric_name, analysis=analysis)
        metric.save()

    conn = sqlite3.connect(kwargs['counts'])
    c = conn.cursor()
    c.execute("select words from total_counts")
    for row in c:
        total_words = float(row[0])
    c.execute("select cooccurrences from total_counts")
    for row in c:
        total_cooccurrences = float(row[0])
    topics = analysis.topics.all()
    for topic in topics:
        topicwords = topic.topicword_set.filter(
            word__ngram=False).order_by('-count')
        # We just grab the first ten words - there's probably a better way to
        # do this
        words = [tw.word.type for tw in topicwords[:10]]
        total_pmi = 0
        for w1 in words:
            for w2 in words:
                if w1 == w2: continue
                total_pmi += compute_pmi(w1, w2, c, total_words,
                                         total_cooccurrences)
        average_pmi = total_pmi / (len(words)**2)
        tmv = TopicMetricValue(topic=topic, metric=metric, value=average_pmi)
        tmv.save()

예제 #4

파일 보기

def add_metric(dataset, analysis, force_import=False, *args, **kwargs):
    analysis = Analysis.objects.get(dataset__name=dataset, name=analysis)
    for attr in analysis.dataset.attribute_set.all():
        name = 'Value Entropy for Attribute %s' % attr.name
        try:
            metric = TopicMetric.objects.get(name=name, analysis=analysis)
            if not force_import:
                raise RuntimeError('%s is already in the database for this '
                        'analysis!' % name)
        except TopicMetric.DoesNotExist:
            metric = TopicMetric(name=name, analysis=analysis)
            metric.save()
        topics = analysis.topics.all()
        for topic in topics:
            entropy = 0
            for avt in topic.attributevaluetopic_set.filter(attribute=attr):
                prob = avt.count / topic.total_count
                entropy -= prob * (log(prob) / log(2))
            tmv = TopicMetricValue(topic=topic, metric=metric, value=entropy)
            tmv.save()