def detect_events(cand_trees, K): nodes_of_trees = [set(t.nodes()) for t in cand_trees] selected_ids = argmax_k_coverage(nodes_of_trees, K) trees = [cand_trees[id_] for id_ in selected_ids] return trees
def k_best_trees(cand_trees, k): # print('removing self-talking event') # print('before, len(cand_trees):', len(cand_trees)) # cand_trees = [t for t in cand_trees # if len(set(t.node[n]['sender_id'] # for n in t.nodes_iter())) > 1] # print('after, len(cand_trees):', len(cand_trees)) nodes_of_trees = [set(t.nodes()) for t in cand_trees] selected_ids = argmax_k_coverage(nodes_of_trees, k) return [cand_trees[id_] for id_ in selected_ids]
# name path name path # names = [name for i, name in enumerate(sys.argv[1:]) if i % 2 == 0] # paths = [path for i, path in enumerate(sys.argv[1:]) if i % 2 == 1] paths = glob("tmp/lda-25-topics/result-*U=5*interactions=False*.pkl") names = map(lambda n: n.replace('tmp/lda-25-topics/result-', '').replace('.pkl', ''), paths) K = 5 table = [] for name, path in zip(names, paths): trees = pickle.load(open(path)) nodes_of_trees = [set(t.nodes()) for t in trees] selected_ids = argmax_k_coverage(nodes_of_trees, K) selected_trees = [trees[i] for i in selected_ids] nodes_list = [t.nodes() for t in selected_trees] unique_nodes = reduce(lambda acc, nodes: acc | set(nodes), nodes_list, set()) row = [name] row += [len(nodes) for nodes in nodes_list] row.append(len(unique_nodes)) table.append(row) df = pds.DataFrame(table, columns=['', '#1', '#2', '#3', '#4', '#5', 'total']) print(tabulate(df.sort(['total'], ascending=False),