def test_detect_events_given_path(): result_path = os.path.join(CURDIR, 'test/data/cand_trees.pkl') cand_trees = pickle.load(open(result_path)) expected_trees = detect_events(cand_trees, K=5) actual_trees = detect_events_given_path(result_path, K=5) for at, et in zip(actual_trees, expected_trees): assert_equal(sorted(et.edges()), sorted(at.edges()))
def test_detect_events(): result_path = os.path.join(CURDIR, 'test/data/cand_trees.pkl') cand_trees = pickle.load(open(result_path)) trees = detect_events(cand_trees, K=5) assert_equal(5, len(trees)) for t1, t2 in zip(trees, trees[1:]): assert_true(len(t1.nodes()) >= len(t2.nodes()))
def main(): import sys from pprint import pprint pkl_path = sys.argv[1] candidate_events = pkl.load(open(pkl_path)) g = detect_events(candidate_events, 5)[0] mid2interaction = load_id2obj_dict('data/enron.json', 'message_id') root = get_roots(g)[0] pprint('children documents count: {}'.format( count_message_ids(children_documents(g, root, mid2interaction)))) pprint('all documents count: {}'.format( count_message_ids(all_documents(g, mid2interaction)))) lpd = longest_path_documents(g, root, mid2interaction) pprint('longest path documents count: {}'.format(count_message_ids(lpd))) pprint('longest path documents\' subject: {}'.format( [d['subject'] for d in lpd]))