def test_perspective_jsd_matrix_symmetric():
    nTopics = 20
    params = {'nTopics': nTopics, 'outDir': 'cptm/tests/data/{}'}
    opinions = load_opinions(params)
    jsd_matrix = perspective_jsd_matrix(opinions, params.get('nTopics'))

    for i in range(nTopics):
        jsd = jsd_matrix[i]
        yield assert_true, (jsd.transpose() == jsd).all()
Exemple #2
0
def test_perspective_jsd_matrix_symmetric():
    nTopics = 20
    params = {'nTopics': nTopics, 'outDir': 'cptm/tests/data/{}'}
    opinions = load_opinions(params)
    jsd_matrix = perspective_jsd_matrix(opinions, params.get('nTopics'))

    for i in range(nTopics):
        jsd = jsd_matrix[i]
        yield assert_true, (jsd.transpose() == jsd).all()
def test_perspective_jsd_matrix_diagonal_zeros():
    nTopics = 20
    params = {'nTopics': nTopics, 'outDir': 'cptm/tests/data/{}'}
    opinions = load_opinions(params)
    jsd_matrix = perspective_jsd_matrix(opinions, params.get('nTopics'))

    for i in range(nTopics):
        jsd = jsd_matrix[i]
        for idx in range(jsd.shape[0]):
            yield assert_equal, jsd[idx, idx], 0.0
Exemple #4
0
def test_perspective_jsd_matrix_diagonal_zeros():
    nTopics = 20
    params = {'nTopics': nTopics, 'outDir': 'cptm/tests/data/{}'}
    opinions = load_opinions(params)
    jsd_matrix = perspective_jsd_matrix(opinions, params.get('nTopics'))

    for i in range(nTopics):
        jsd = jsd_matrix[i]
        for idx in range(jsd.shape[0]):
            yield assert_equal, jsd[idx, idx], 0.0
def test_contrastive_opinions_result_shape():
    """Verify the shape of the output of contrastive_opinions"""
    params = {
        "inputData": "/home/jvdzwaan/data/tmp/test/*",
        "outDir": "cptm/tests/data/{}",
        "nTopics": 20
    }
    topics = load_topics(params)
    opinions = load_opinions(params)
    nks = load('cptm/tests/data/nks_20.npy')
    co = contrastive_opinions('carrot', topics, opinions, nks)
    num_opinion_words = len(opinions[opinions.keys()[0]].index)
    assert_equal(co.shape, (num_opinion_words, len(opinions)))
def test_filter_opinions():
    params = {
        "inputData": "/home/jvdzwaan/data/tmp/test/*",
        "outDir": "cptm/tests/data/{}",
        "nTopics": 20
    }
    opinions = load_opinions(params)
    for perspectives in chain(combinations(opinions.keys(), 1),
                              [[], ['p0', 'p1']]):
        filtered = filter_opinions(perspectives, opinions)

        for p in perspectives:
            yield assert_true, p in filtered.keys()
Exemple #7
0
def test_contrastive_opinions_result_shape():
    """Verify the shape of the output of contrastive_opinions"""
    params = {
        "inputData": "/home/jvdzwaan/data/tmp/test/*",
        "outDir": "cptm/tests/data/{}",
        "nTopics": 20
    }
    topics = load_topics(params)
    opinions = load_opinions(params)
    nks = load('cptm/tests/data/nks_20.npy')
    co = contrastive_opinions('carrot', topics, opinions, nks)
    num_opinion_words = len(opinions[opinions.keys()[0]].index)
    assert_equal(co.shape, (num_opinion_words, len(opinions)))
Exemple #8
0
def test_filter_opinions():
    params = {
        "inputData": "/home/jvdzwaan/data/tmp/test/*",
        "outDir": "cptm/tests/data/{}",
        "nTopics": 20
    }
    opinions = load_opinions(params)
    for perspectives in chain(combinations(opinions.keys(), 1),
                              [[], ['p0', 'p1']]):
        filtered = filter_opinions(perspectives, opinions)

        for p in perspectives:
            yield assert_true, p in filtered.keys()
def test_contrastive_opinions_prob_distr():
    """Verify that the sum of all columns == 1.0 (probability distribution)"""
    params = {
        "inputData": "/home/jvdzwaan/data/tmp/test/*",
        "outDir": "cptm/tests/data/{}",
        "nTopics": 20
    }
    topics = load_topics(params)
    opinions = load_opinions(params)
    nks = load('cptm/tests/data/nks_20.npy')
    co = contrastive_opinions('carrot', topics, opinions, nks)

    s = co.sum(axis=0)

    for v in s:
        yield assert_almost_equal, v, 1.0
Exemple #10
0
def test_contrastive_opinions_prob_distr():
    """Verify that the sum of all columns == 1.0 (probability distribution)"""
    params = {
        "inputData": "/home/jvdzwaan/data/tmp/test/*",
        "outDir": "cptm/tests/data/{}",
        "nTopics": 20
    }
    topics = load_topics(params)
    opinions = load_opinions(params)
    nks = load('cptm/tests/data/nks_20.npy')
    co = contrastive_opinions('carrot', topics, opinions, nks)

    s = co.sum(axis=0)

    for v in s:
        yield assert_almost_equal, v, 1.0
print args.output

config = load_config(args.json)

if args.output:
    fName = args.output
else:
    fName = config.get('outDir').format('co_words_{}.csv'.format(
        config.get('nTopics')))
logger.info('writing output to {}'.format(fName))

corpus = get_corpus(config)

words = corpus.topic_words()
topics = load_topics(config)
opinions = load_opinions(config)
nks = load_nks(config)

if args.perspectives:
    perspectives = ast.literal_eval(args.perspectives)
    logger.info('filtering opinions to [{}]'.format(', '.join(perspectives)))
    opinions = filter_opinions(perspectives, opinions)

results = pd.DataFrame(index=words, columns=['jsd'])

for idx, word in enumerate(words):
    co = contrastive_opinions(word, topics, opinions, nks)
    jsd = jsd_opinions(co.values)
    results.set_value(word, 'jsd', jsd)

    if idx % 1000 == 0:
print args.output

config = load_config(args.json)

if args.output:
    fName = args.output
else:
    fName = config.get('outDir').format('co_words_{}.csv'.
                                        format(config.get('nTopics')))
logger.info('writing output to {}'.format(fName))

corpus = get_corpus(config)

words = corpus.topic_words()
topics = load_topics(config)
opinions = load_opinions(config)
nks = load_nks(config)

if args.perspectives:
    perspectives = ast.literal_eval(args.perspectives)
    logger.info('filtering opinions to [{}]'.format(', '.join(perspectives)))
    opinions = filter_opinions(perspectives, opinions)

results = pd.DataFrame(index=words, columns=['jsd'])

for idx, word in enumerate(words):
    co = contrastive_opinions(word, topics, opinions, nks)
    jsd = jsd_opinions(co.values)
    results.set_value(word, 'jsd', jsd)

    if idx % 1000 == 0: