コード例 #1
0
def display_headnote_toplist(
    period_group_index=None,
    topic_group_name=None,
    recode_is_cultural=False,
    treaty_filter=None,
    parties=None,
    extra_groupbys=None,
    use_lemma=False,
    compute_co_occurance=False,
    remove_stopwords=True,
    min_word_size=2,
    n_min_count=1,
    output_format='table',
    n_top=50,
    progress=utility.noop,
    wti_index=None,
    print_args=False
):
    """Display headnote word co-occurrences

    Parameters
    ----------

    Returns
    -------

    """
    try:
        if print_args:
            args = utility.filter_dict(locals(), [ 'progress', 'print_args' ], filter_out=True)
            args['wti_index'] = None
            pp(args)

        corpus = headnote_corpus.HeadnoteTokenCorpus(treaties=wti_index.treaties)

        period_group = config.DEFAULT_PERIOD_GROUPS[period_group_index]
        topic_group = config.TOPIC_GROUP_MAPS[topic_group_name]

        progress()

        """ Get subset of treaties filtered by topic, period, and is-cultural filter """
        treaties = wti_index.get_categorized_treaties(
            topic_category=topic_group,
            period_group=period_group,
            treaty_filter=treaty_filter,
            recode_is_cultural=recode_is_cultural
        )

        if parties is not None and not 'ALL' in parties:
            treaties = treaties.loc[(treaties.party1.isin(parties))|(treaties.party2.isin(parties))]

        if treaties.shape[0] == 0:
            print('No data for selection')
            return

        progress()

        token_or_lemma = 'token' if not use_lemma else 'lemma'

        if compute_co_occurance:
            treaty_tokens = corpus.get_tokens_for(treaties.index)
            treaty_tokens = treaty_tokens\
                .loc[treaty_tokens[token_or_lemma].str.len() >= min_word_size]\
                .reset_index()\
                .drop(['is_stopword', 'sequence_id'], axis=1)\
                .set_index('treaty_id')
            treaty_tokens = compute_co_occurrance(treaty_tokens)

        else:
            #FIXME: Filter based on token-length
            treaty_tokens = corpus.tokens
            if remove_stopwords is True:
                treaty_tokens = treaty_tokens.loc[treaty_tokens.is_stopword==False]
            treaty_tokens = treaty_tokens.reset_index().set_index('treaty_id')

        progress()

        treaty_tokens = treaty_tokens.merge(treaties[['signed_year']], how='inner', left_index=True, right_index=True)

        progress()

        groupbys  = ([ period_group['column'] ] if not period_group is None else []) +\
                    (extra_groupbys or []) +\
                    [ token_or_lemma ]

        result = treaty_tokens.groupby(groupbys).size().reset_index().rename(columns={0: 'Count'})

        progress()

        ''' Filter out the n_top most frequent words from each group '''
        result = result\
                    .groupby(groupbys[-1])\
                    .apply(lambda x: x.nlargest(n_top, 'Count'))\
                    .reset_index(level=0, drop=True)

        if n_min_count > 1:
            result = result.loc[result.Count >= n_min_count]

        progress()

        result = result.sort_values(groupbys[:-1] + ['Count'], ascending=len(groupbys[:-1])*[True] + [False])

        progress()

        if output_format in ('table', 'qgrid'):
            result.columns = [ remove_snake_case(x) for x in result.columns ]
            if output_format == 'table':
                display(result)
            else:
                qgrid.show_grid(result, show_toolbar=True)
        elif output_format == 'unstack':
            result = result.set_index(groupbys).unstack(level=0).fillna(0).astype('int32')
            result.columns = [ x[1] for x in result.columns ]
            display(result)
        elif output_format.startswith('plot'):
            parts = output_format.split('_')
            kind = parts[-1]
            stacked = 'stacked' in parts
            result = result.set_index(list(reversed(groupbys))).unstack(level=0).fillna(0).astype('int32')
            result.columns = [ x[1] for x in result.columns ]
            result.plot(kind=kind, stacked=stacked, figsize=(16,8))

        progress(0)

    except Exception as ex:
        raise
        logger.error(ex)
    finally:
        progress(0)
def display_topic_quantity_groups(period_group_index,
                                  topic_group_name,
                                  recode_is_cultural=False,
                                  normalize_values=False,
                                  extra_other_category=None,
                                  chart_type_name=None,
                                  plot_style='classic',
                                  parties=None,
                                  chart_per_category=False,
                                  target_quantity="topic",
                                  wti_index=None,
                                  progress=utility.noop,
                                  print_args=False):

    if print_args or (chart_type_name == 'print_args'):
        args = utility.filter_dict(locals(), ['progress', 'print_args'],
                                   filter_out=True)
        args['wti_index'] = None
        pp(args)

    period_group = config.DEFAULT_PERIOD_GROUPS[period_group_index]
    topic_group = config.TOPIC_GROUP_MAPS[topic_group_name]
    topic_groups = [topic_group]
    wti_parties = [x for x in parties if x not in ['ALL OTHER']]

    if target_quantity in ['party', 'region']:
        party_groups = [{'label': topic_group_name, 'parties': wti_parties}]
        if chart_per_category:
            topic_groups = [{
                k: {
                    k: topic_group[k]
                }
            } for k in topic_group.keys()]
    else:
        if not chart_per_category:
            if 'ALL' in parties:
                party_groups = [{'label': 'ALL', 'parties': None}]
            else:
                party_groups = [{
                    'label': party_group_label(parties),
                    'parties': wti_parties
                }]
        else:
            party_groups = [{
                'label': x,
                'parties': [x] if x != 'ALL OTHER' else parties
            } for x in parties]

    for p_g in party_groups:
        for t_c in topic_groups:
            display_topic_quantity(period_group=period_group,
                                   party_group=p_g,
                                   topic_group=t_c,
                                   recode_is_cultural=recode_is_cultural,
                                   normalize_values=normalize_values,
                                   extra_other_category=extra_other_category,
                                   chart_type_name=chart_type_name,
                                   plot_style=plot_style,
                                   target_quantity=target_quantity,
                                   wti_index=wti_index,
                                   progress=progress)
コード例 #3
0
def display_party_network(parties=None,
                          period_group_index=0,
                          treaty_filter='',
                          plot_data=None,
                          topic_group=None,
                          recode_is_cultural=False,
                          layout_algorithm='',
                          C=1.0,
                          K=0.10,
                          p1=0.10,
                          output='network',
                          party_name='party',
                          node_size_range=[40, 60],
                          palette_name=None,
                          width=900,
                          height=900,
                          node_size=None,
                          node_partition=None,
                          wti_index=None,
                          year_limit=None,
                          progress=utility.noop,
                          done_callback=None):
    try:

        if output == 'print_args':
            args = utility.filter_dict(locals(), ['progress', 'done_callback'],
                                       filter_out=True)
            args['wti_index'] = None
            args['plot_data'] = None
            args['output'] = 'network'
            pp(args)
            return

        plot_data = plot_data or utility.SimpleStruct(handle=None,
                                                      nodes=None,
                                                      edges=None,
                                                      slice_range_type=2,
                                                      slice_range=year_limit)
        weight_threshold = 0.0

        palette = get_palette(palette_name)

        progress(1)
        period_group = config.DEFAULT_PERIOD_GROUPS[period_group_index]
        kwargs = dict(period_group=period_group,
                      treaty_filter=treaty_filter,
                      recode_is_cultural=recode_is_cultural,
                      year_limit=year_limit)
        parties = list(parties)
        party_data = wti_index.get_party_network(party_name, topic_group,
                                                 parties, **kwargs)

        if party_data is None or party_data.shape[0] == 0:
            print('No data for selection')
            return

        if topic_group is not None:

            party_data = party_data.loc[(party_data.topic.isin(
                topic_group.keys()))]

            group_keys = topic_group.keys()
            line_palette = color_utility.DEFAULT_LINE_PALETTE
            line_palette_map = {
                k: i % len(line_palette)
                for i, k in enumerate(group_keys)
            }
            party_data['line_color'] = party_data.category.apply(
                lambda x: line_palette[line_palette_map[x]])

        else:
            party_data['category'] = party_data.topic

        party_data['edge_label'] = party_data.signed.apply(
            lambda x: x.year).astype(str) + '/' + party_data.category

        progress(2)

        #if not multigraph:
        #    data = data.groupby(['party', 'party_other']).size().reset_index().rename(columns={0: 'weight'})

        if party_data is None or party_data.shape[0] == 0:
            print('No data for selection')
            return

        G = create_party_network(party_data, K, node_partition,
                                 palette)  #, multigraph)

        progress(3)

        if output == 'network':

            if weight_threshold > 0:
                G = get_sub_network(G, weight_threshold)

            layout, _ = layout_network(G, layout_algorithm,
                                       **dict(scale=1.0, K=K, C=C, p=p1))

            progress(4)

            edges = get_positioned_edges2(G, layout, sort_attr='signed')
            nodes = get_positioned_nodes(G, layout)

            edges = {k: list(edges[k]) for k in edges}
            nodes = {k: list(nodes[k]) for k in nodes}

            node_size = setup_node_size(nodes, node_size, node_size_range)

            x_offset, y_offset = adjust_node_label_offset(nodes, node_size)

            plot_opts = utility.extend(NETWORK_PLOT_OPTS,
                                       figsize=(width, height),
                                       node_size=node_size,
                                       node_label_opts=dict(y_offset=y_offset,
                                                            x_offset=x_offset),
                                       edge_label_opts={})

            progress(5)

            data = plot_network(nodes=nodes,
                                edges=edges,
                                node_label='name',
                                edge_label='edge_label',
                                **plot_opts)

            plot_data.update(**data)

            progress(6)

            #bp.show(p)

            if done_callback is not None:
                done_callback(None)

        elif output == 'table':
            party_data.columns = [
                dict(party='source', party_other='target').get(x, x)
                for x in party_data.columns
            ]
            display(party_data)

    except Exception as ex:
        logger.error(ex)
        raise
    finally:
        progress(0)
コード例 #4
0
def display_quantity_by_party(period_group_index=0,
                              party_name='',
                              parties=None,
                              year_limit=None,
                              treaty_filter='',
                              extra_category='',
                              normalize_values=False,
                              chart_type_name=None,
                              plot_style='classic',
                              top_n_parties=5,
                              overlay=True,
                              progress=utility.noop,
                              wti_index=None,
                              print_args=False,
                              treaty_sources=None):
    try:

        static_color_map = color_utility.get_static_color_map()

        if print_args or (chart_type_name == 'print_args'):
            args = utility.filter_dict(locals(), ['progress', 'print_args'],
                                       filter_out=True)
            args['wti_index'] = None
            pp(args)

        progress()

        period_group = config.DEFAULT_PERIOD_GROUPS[period_group_index]
        chart_type = config.CHART_TYPE_MAP[chart_type_name]

        parties = list(parties)

        if period_group['type'] == 'range':
            period_group = treaty_utility.trim_period_group(
                period_group, year_limit)
        else:
            year_limit = None

        data = analysis_data.QuantityByParty.get_treaties_statistics(
            wti_index,
            period_group=period_group,
            party_name=party_name,
            parties=parties,
            treaty_filter=treaty_filter,
            extra_category=extra_category,
            n_top=top_n_parties,
            year_limit=year_limit,
            treaty_sources=treaty_sources)

        progress()

        if data.shape[0] == 0:
            print('No data for selection')
            return

        pivot = pd.pivot_table(data,
                               index=['Period'],
                               values=["Count"],
                               columns=['Party'],
                               fill_value=0)
        pivot.columns = [x[-1] for x in pivot.columns]

        if normalize_values is True:
            pivot = pivot.div(0.01 * pivot.sum(1), axis=0)
            data['Count'] = data.groupby(
                ['Period']).transform(lambda x: 100.0 * (x / x.sum()))

        progress()

        if chart_type.name.startswith('plot'):

            columns = pivot.columns

            pivot = pivot.reset_index()[columns]
            colors = static_color_map.get_palette(columns)

            kwargs = analysis_plot.prepare_plot_kwargs(pivot, chart_type,
                                                       normalize_values,
                                                       period_group)
            kwargs.update(dict(overlay=overlay, colors=colors))

            progress()

            analysis_plot.quantity_plot(data, pivot, chart_type, plot_style,
                                        **kwargs)

        elif chart_type.name == 'table':
            display(data)
        else:
            display(pivot)

        progress()

    except Exception as ex:
        logger.error(ex)
        #raise
    finally:
        progress(0)