def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--result_path')
    parser.add_argument('--interactions_path')
    parser.add_argument('--output_path')
    parser.add_argument('--non_event_sample_n', type=int)
    parser.add_argument('--k', type=int)

    args = parser.parse_args()
    result = pkl.load(open(args.result_path))
    trees = k_best_trees(result, args.k)
    df = pd.read_json(args.interactions_path)
    
    dt_format = '%Y-%m-%dT%H:%M:%S.000Z'

    data = []
    event_nodes = set()
    for i, t in enumerate(trees):
        for n in t.nodes_iter():
            event_nodes.add(n)
            # print(t.node[n]['datetime'])
            data.append(
                {
                    'series': 'event-{}'.format(i+1),
                    'datetime': t.node[n]['datetime'].strftime(dt_format)
                }
            )
    # for enron:
    df = df[df['datetime'] > dt(2000, 6, 1)]

    # for ukraine:
    # df = df[df['datetime'] > dt(2015, 2, 26)]

    # for baltimore
    # df = df[df['datetime'] >= dt(2015, 4, 27)]

    if args.non_event_sample_n:
        print df.shape
        df = df[df['message_id'].map(lambda m: m not in event_nodes)]

        df = df.sample(n=args.non_event_sample_n)
        print df.shape

    for i, r in df.iterrows():
        # print(r)
        # print(r['datetime'])
        if r['message_id'] not in event_nodes:
            data.append(
                {
                    'series': 'non-event',
                    'datetime': r['datetime'].strftime(dt_format)
                }
            )
        else:
            # print "drop"
            pass

    json.dump(data, open(args.output_path, 'w'))
        recipients = mg.node[n]['recipient_ids']
        people_network.add_edges_from([(sender, r) for r in recipients])

    print('people_network.number_of_nodes():',
          people_network.number_of_nodes())
    colors = ['red', 'blue', 'green', 'yellow', 'black', 'orange']
    colors = defaultdict(lambda: 'gray')
    for c, t in zip(colors, trees):
        for n in t.nodes_iter():
            colors[n] = c
    pos = nx.spring_layout(people_network)
    nx.draw_networkx(people_network, pos)
    nx.draw_networkx_nodes(people_network,
                           pos,
                           node_color=colors)
    plt.savefig(output_path)

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-m', '--meta_graph_path')
    parser.add_argument('-r', '--result_path')
    parser.add_argument('-k', '--k', type=int)
    parser.add_argument('-o', '--output_path')

    args = parser.parse_args()

    draw(nx.read_gpickle(args.meta_graph_path),
         k_best_trees(pkl.load(open(args.result_path)), args.k),
         args.output_path)
Example #3
0
def get_meta_data_of_f1(acc_trees, true_trees, k):
    pred_trees = k_best_trees(acc_trees, k)
    pred_nodes = set([n for t in pred_trees for n in t.nodes_iter()])
    true_nodes = set([n for t in true_trees for n in t.nodes_iter()])
    correct_nodes = pred_nodes & true_nodes
    return float(len(correct_nodes)), len(pred_nodes), len(true_nodes)
Example #4
0
def k_max_setcover(acc_trees, true_trees, k):
    trees = k_best_trees(acc_trees, k)
    return len(set([n for t in trees for n in t.nodes_iter()]))
def run(cand_trees, k, summary_kws, undirected):
    interactions = summary_kws['topics']['interactions']
    mid2i = {
        i['message_id']: i
        for i in interactions
    }
    trees = k_best_trees(cand_trees, k)
    summaries = [MetaGraphStat(t, summary_kws).summary_dict() for t in trees]

    items = []
    groups = []
    start_times = []
    end_times = []
    added_id_count = Counter()
    counter = 0
    for group_id, (summ, t) in enumerate(zip(summaries, trees)):
        group_id += 1
        for i in t.nodes_iter():
            counter += 1
            items.append({
                'id': counter,
                'content': (mid2i[i]['subject'].strip()
                            if mid2i[i]['subject'] else
                            mid2i[i]['body']),
                'start': format_time(mid2i[i]['datetime']),
                'group': group_id
            })
            added_id_count[i] += 1
        counter += 1
        items.append(
            {
                'id': counter,
                # 'id': 'event_{}'.format(group_id),
                'start': format_time(summ['time_span']['start_time']),
                'end': format_time(summ['time_span']['end_time']),
                'content': 'Event {}'.format(group_id),
                'group': group_id,
                'type': 'background'
            })
        g = {
            'id': group_id,
            'terms': summ['topics']['topic_terms'],
            # 'terms': summ['frequent_terms'],
            # 'terms': summ['tdidf_terms'],
            'participants': dict(
                summ['participants']['participant_count']
            ),
            'start': format_time(summ['time_span']['start_time']),
            'end': format_time(summ['time_span']['end_time']),
            'days': (summ['time_span']['end_time'] - summ['time_span']['start_time']).days,
            'link_type_freq': summ['link_type_freq']
        }
        if 'hashtags' in summ:
            g['hashtags'] = summ['hashtags']
        groups.append(g)

        start_times.append(summ['time_span']['start_time'])
        end_times.append(summ['time_span']['end_time'])

    return {
        'items': items,
        'groups': groups,
        'start': format_time(min(start_times)),
        'end': format_time(max(end_times))
    }
Example #6
0
def eval_setcover_obj_func(cand_trees, k):
    trees = k_best_trees(cand_trees, k)
    return len(set(n for t in trees for n in t.nodes_iter()))