def test_perspective_jsd_matrix_symmetric(): nTopics = 20 params = {'nTopics': nTopics, 'outDir': 'cptm/tests/data/{}'} opinions = load_opinions(params) jsd_matrix = perspective_jsd_matrix(opinions, params.get('nTopics')) for i in range(nTopics): jsd = jsd_matrix[i] yield assert_true, (jsd.transpose() == jsd).all()
def test_perspective_jsd_matrix_diagonal_zeros(): nTopics = 20 params = {'nTopics': nTopics, 'outDir': 'cptm/tests/data/{}'} opinions = load_opinions(params) jsd_matrix = perspective_jsd_matrix(opinions, params.get('nTopics')) for i in range(nTopics): jsd = jsd_matrix[i] for idx in range(jsd.shape[0]): yield assert_equal, jsd[idx, idx], 0.0
def test_contrastive_opinions_result_shape(): """Verify the shape of the output of contrastive_opinions""" params = { "inputData": "/home/jvdzwaan/data/tmp/test/*", "outDir": "cptm/tests/data/{}", "nTopics": 20 } topics = load_topics(params) opinions = load_opinions(params) nks = load('cptm/tests/data/nks_20.npy') co = contrastive_opinions('carrot', topics, opinions, nks) num_opinion_words = len(opinions[opinions.keys()[0]].index) assert_equal(co.shape, (num_opinion_words, len(opinions)))
def test_filter_opinions(): params = { "inputData": "/home/jvdzwaan/data/tmp/test/*", "outDir": "cptm/tests/data/{}", "nTopics": 20 } opinions = load_opinions(params) for perspectives in chain(combinations(opinions.keys(), 1), [[], ['p0', 'p1']]): filtered = filter_opinions(perspectives, opinions) for p in perspectives: yield assert_true, p in filtered.keys()
def test_contrastive_opinions_prob_distr(): """Verify that the sum of all columns == 1.0 (probability distribution)""" params = { "inputData": "/home/jvdzwaan/data/tmp/test/*", "outDir": "cptm/tests/data/{}", "nTopics": 20 } topics = load_topics(params) opinions = load_opinions(params) nks = load('cptm/tests/data/nks_20.npy') co = contrastive_opinions('carrot', topics, opinions, nks) s = co.sum(axis=0) for v in s: yield assert_almost_equal, v, 1.0
print args.output config = load_config(args.json) if args.output: fName = args.output else: fName = config.get('outDir').format('co_words_{}.csv'.format( config.get('nTopics'))) logger.info('writing output to {}'.format(fName)) corpus = get_corpus(config) words = corpus.topic_words() topics = load_topics(config) opinions = load_opinions(config) nks = load_nks(config) if args.perspectives: perspectives = ast.literal_eval(args.perspectives) logger.info('filtering opinions to [{}]'.format(', '.join(perspectives))) opinions = filter_opinions(perspectives, opinions) results = pd.DataFrame(index=words, columns=['jsd']) for idx, word in enumerate(words): co = contrastive_opinions(word, topics, opinions, nks) jsd = jsd_opinions(co.values) results.set_value(word, 'jsd', jsd) if idx % 1000 == 0:
print args.output config = load_config(args.json) if args.output: fName = args.output else: fName = config.get('outDir').format('co_words_{}.csv'. format(config.get('nTopics'))) logger.info('writing output to {}'.format(fName)) corpus = get_corpus(config) words = corpus.topic_words() topics = load_topics(config) opinions = load_opinions(config) nks = load_nks(config) if args.perspectives: perspectives = ast.literal_eval(args.perspectives) logger.info('filtering opinions to [{}]'.format(', '.join(perspectives))) opinions = filter_opinions(perspectives, opinions) results = pd.DataFrame(index=words, columns=['jsd']) for idx, word in enumerate(words): co = contrastive_opinions(word, topics, opinions, nks) jsd = jsd_opinions(co.values) results.set_value(word, 'jsd', jsd) if idx % 1000 == 0: