Beispiel #1
0
    def run(name, anchors):
        topics = ankura.recover_topics(dataset, anchors)
        features = ankura.topic_combine(topics, dataset)
        train, test = ankura.pipeline.train_test_split(features, .9)

        vw_contingency = ankura.measure.vowpal_contingency(train, test, 'dirname')
        print(name, 'accuracy:', ankura.measure.vowpal_accuracy(train, test, 'dirname'))
        print(name, 'f-Measure:', vw_contingency.fmeasure())
        print(name, 'ari:', vw_contingency.ari())
        print(name, 'rand:', vw_contingency.rand())
        print(name, 'vi:', vw_contingency.vi())

        coherence = []
        for topic in ankura.topic.topic_summary_indices(topics, dataset, 10):
            coherence.append(ankura.measure.topic_coherence(topic, dataset))
        print(name, 'coherence-10:', numpy.mean(coherence))

        coherence = []
        for topic in ankura.topic.topic_summary_indices(topics, dataset, 15):
            coherence.append(ankura.measure.topic_coherence(topic, dataset))
        print(name, 'coherence-15:', numpy.mean(coherence))

        coherence = []
        for topic in ankura.topic.topic_summary_indices(topics, dataset, 20):
            coherence.append(ankura.measure.topic_coherence(topic, dataset))
        print(name, 'coherence-20:', numpy.mean(coherence))
Beispiel #2
0
    def run(name, anchors):
        topics = ankura.recover_topics(dataset, anchors)
        features = ankura.topic_combine(topics, dataset)
        train, test = ankura.pipeline.train_test_split(features, .9)

        vw_contingency = ankura.measure.vowpal_contingency(
            train, test, 'dirname')
        print(name, 'accuracy:',
              ankura.measure.vowpal_accuracy(train, test, 'dirname'))
        print(name, 'f-Measure:', vw_contingency.fmeasure())
        print(name, 'ari:', vw_contingency.ari())
        print(name, 'rand:', vw_contingency.rand())
        print(name, 'vi:', vw_contingency.vi())

        coherence = []
        for topic in ankura.topic.topic_summary_indices(topics, dataset, 10):
            coherence.append(ankura.measure.topic_coherence(topic, dataset))
        print(name, 'coherence-10:', numpy.mean(coherence))

        coherence = []
        for topic in ankura.topic.topic_summary_indices(topics, dataset, 15):
            coherence.append(ankura.measure.topic_coherence(topic, dataset))
        print(name, 'coherence-15:', numpy.mean(coherence))

        coherence = []
        for topic in ankura.topic.topic_summary_indices(topics, dataset, 20):
            coherence.append(ankura.measure.topic_coherence(topic, dataset))
        print(name, 'coherence-20:', numpy.mean(coherence))
Beispiel #3
0
def demo():
    """Runs the newsgroups demo"""
    dataset = get_newsgroups()
    anchors = ankura.gramschmidt_anchors(dataset, 20, 500)
    topics = ankura.recover_topics(dataset, anchors)

    for topic in ankura.topic.topic_summary_tokens(topics, dataset, 20):
        print(' '.join(topic))
Beispiel #4
0
def topic_inference(raw_anchors):
    """Returns infered topic info from raw anchors"""
    dataset = args.get_dataset()

    if raw_anchors is None:
        anchor_tokens, anchors = args.default_anchors()
    else:
        anchor_tokens = ankura.util.tuplize(json.loads(raw_anchors))
        anchors = user_anchors(anchor_tokens)

    topics = ankura.recover_topics(dataset, anchors, epsilon=1e-6)
    topic_summary = ankura.topic.topic_summary_tokens(topics, dataset, n=15)

    return topics, topic_summary, anchor_tokens
Beispiel #5
0
def topic_inference(raw_anchors):
    """Returns infered topic info from raw anchors"""
    dataset = args.get_dataset()

    if raw_anchors is None:
        anchor_tokens, anchors = args.default_anchors()
    else:
        anchor_tokens = ankura.util.tuplize(json.loads(raw_anchors))
        anchors = user_anchors(anchor_tokens)

    topics = ankura.recover_topics(dataset, anchors, epsilon=1e-6)
    topic_summary = ankura.topic.topic_summary_tokens(topics, dataset, n=15)

    return topics, topic_summary, anchor_tokens