def display_headnote_toplist( period_group_index=None, topic_group_name=None, recode_is_cultural=False, treaty_filter=None, parties=None, extra_groupbys=None, use_lemma=False, compute_co_occurance=False, remove_stopwords=True, min_word_size=2, n_min_count=1, output_format='table', n_top=50, progress=utility.noop, wti_index=None, print_args=False ): """Display headnote word co-occurrences Parameters ---------- Returns ------- """ try: if print_args: args = utility.filter_dict(locals(), [ 'progress', 'print_args' ], filter_out=True) args['wti_index'] = None pp(args) corpus = headnote_corpus.HeadnoteTokenCorpus(treaties=wti_index.treaties) period_group = config.DEFAULT_PERIOD_GROUPS[period_group_index] topic_group = config.TOPIC_GROUP_MAPS[topic_group_name] progress() """ Get subset of treaties filtered by topic, period, and is-cultural filter """ treaties = wti_index.get_categorized_treaties( topic_category=topic_group, period_group=period_group, treaty_filter=treaty_filter, recode_is_cultural=recode_is_cultural ) if parties is not None and not 'ALL' in parties: treaties = treaties.loc[(treaties.party1.isin(parties))|(treaties.party2.isin(parties))] if treaties.shape[0] == 0: print('No data for selection') return progress() token_or_lemma = 'token' if not use_lemma else 'lemma' if compute_co_occurance: treaty_tokens = corpus.get_tokens_for(treaties.index) treaty_tokens = treaty_tokens\ .loc[treaty_tokens[token_or_lemma].str.len() >= min_word_size]\ .reset_index()\ .drop(['is_stopword', 'sequence_id'], axis=1)\ .set_index('treaty_id') treaty_tokens = compute_co_occurrance(treaty_tokens) else: #FIXME: Filter based on token-length treaty_tokens = corpus.tokens if remove_stopwords is True: treaty_tokens = treaty_tokens.loc[treaty_tokens.is_stopword==False] treaty_tokens = treaty_tokens.reset_index().set_index('treaty_id') progress() treaty_tokens = treaty_tokens.merge(treaties[['signed_year']], how='inner', left_index=True, right_index=True) progress() groupbys = ([ period_group['column'] ] if not period_group is None else []) +\ (extra_groupbys or []) +\ [ token_or_lemma ] result = treaty_tokens.groupby(groupbys).size().reset_index().rename(columns={0: 'Count'}) progress() ''' Filter out the n_top most frequent words from each group ''' result = result\ .groupby(groupbys[-1])\ .apply(lambda x: x.nlargest(n_top, 'Count'))\ .reset_index(level=0, drop=True) if n_min_count > 1: result = result.loc[result.Count >= n_min_count] progress() result = result.sort_values(groupbys[:-1] + ['Count'], ascending=len(groupbys[:-1])*[True] + [False]) progress() if output_format in ('table', 'qgrid'): result.columns = [ remove_snake_case(x) for x in result.columns ] if output_format == 'table': display(result) else: qgrid.show_grid(result, show_toolbar=True) elif output_format == 'unstack': result = result.set_index(groupbys).unstack(level=0).fillna(0).astype('int32') result.columns = [ x[1] for x in result.columns ] display(result) elif output_format.startswith('plot'): parts = output_format.split('_') kind = parts[-1] stacked = 'stacked' in parts result = result.set_index(list(reversed(groupbys))).unstack(level=0).fillna(0).astype('int32') result.columns = [ x[1] for x in result.columns ] result.plot(kind=kind, stacked=stacked, figsize=(16,8)) progress(0) except Exception as ex: raise logger.error(ex) finally: progress(0)
def display_topic_quantity_groups(period_group_index, topic_group_name, recode_is_cultural=False, normalize_values=False, extra_other_category=None, chart_type_name=None, plot_style='classic', parties=None, chart_per_category=False, target_quantity="topic", wti_index=None, progress=utility.noop, print_args=False): if print_args or (chart_type_name == 'print_args'): args = utility.filter_dict(locals(), ['progress', 'print_args'], filter_out=True) args['wti_index'] = None pp(args) period_group = config.DEFAULT_PERIOD_GROUPS[period_group_index] topic_group = config.TOPIC_GROUP_MAPS[topic_group_name] topic_groups = [topic_group] wti_parties = [x for x in parties if x not in ['ALL OTHER']] if target_quantity in ['party', 'region']: party_groups = [{'label': topic_group_name, 'parties': wti_parties}] if chart_per_category: topic_groups = [{ k: { k: topic_group[k] } } for k in topic_group.keys()] else: if not chart_per_category: if 'ALL' in parties: party_groups = [{'label': 'ALL', 'parties': None}] else: party_groups = [{ 'label': party_group_label(parties), 'parties': wti_parties }] else: party_groups = [{ 'label': x, 'parties': [x] if x != 'ALL OTHER' else parties } for x in parties] for p_g in party_groups: for t_c in topic_groups: display_topic_quantity(period_group=period_group, party_group=p_g, topic_group=t_c, recode_is_cultural=recode_is_cultural, normalize_values=normalize_values, extra_other_category=extra_other_category, chart_type_name=chart_type_name, plot_style=plot_style, target_quantity=target_quantity, wti_index=wti_index, progress=progress)
def display_party_network(parties=None, period_group_index=0, treaty_filter='', plot_data=None, topic_group=None, recode_is_cultural=False, layout_algorithm='', C=1.0, K=0.10, p1=0.10, output='network', party_name='party', node_size_range=[40, 60], palette_name=None, width=900, height=900, node_size=None, node_partition=None, wti_index=None, year_limit=None, progress=utility.noop, done_callback=None): try: if output == 'print_args': args = utility.filter_dict(locals(), ['progress', 'done_callback'], filter_out=True) args['wti_index'] = None args['plot_data'] = None args['output'] = 'network' pp(args) return plot_data = plot_data or utility.SimpleStruct(handle=None, nodes=None, edges=None, slice_range_type=2, slice_range=year_limit) weight_threshold = 0.0 palette = get_palette(palette_name) progress(1) period_group = config.DEFAULT_PERIOD_GROUPS[period_group_index] kwargs = dict(period_group=period_group, treaty_filter=treaty_filter, recode_is_cultural=recode_is_cultural, year_limit=year_limit) parties = list(parties) party_data = wti_index.get_party_network(party_name, topic_group, parties, **kwargs) if party_data is None or party_data.shape[0] == 0: print('No data for selection') return if topic_group is not None: party_data = party_data.loc[(party_data.topic.isin( topic_group.keys()))] group_keys = topic_group.keys() line_palette = color_utility.DEFAULT_LINE_PALETTE line_palette_map = { k: i % len(line_palette) for i, k in enumerate(group_keys) } party_data['line_color'] = party_data.category.apply( lambda x: line_palette[line_palette_map[x]]) else: party_data['category'] = party_data.topic party_data['edge_label'] = party_data.signed.apply( lambda x: x.year).astype(str) + '/' + party_data.category progress(2) #if not multigraph: # data = data.groupby(['party', 'party_other']).size().reset_index().rename(columns={0: 'weight'}) if party_data is None or party_data.shape[0] == 0: print('No data for selection') return G = create_party_network(party_data, K, node_partition, palette) #, multigraph) progress(3) if output == 'network': if weight_threshold > 0: G = get_sub_network(G, weight_threshold) layout, _ = layout_network(G, layout_algorithm, **dict(scale=1.0, K=K, C=C, p=p1)) progress(4) edges = get_positioned_edges2(G, layout, sort_attr='signed') nodes = get_positioned_nodes(G, layout) edges = {k: list(edges[k]) for k in edges} nodes = {k: list(nodes[k]) for k in nodes} node_size = setup_node_size(nodes, node_size, node_size_range) x_offset, y_offset = adjust_node_label_offset(nodes, node_size) plot_opts = utility.extend(NETWORK_PLOT_OPTS, figsize=(width, height), node_size=node_size, node_label_opts=dict(y_offset=y_offset, x_offset=x_offset), edge_label_opts={}) progress(5) data = plot_network(nodes=nodes, edges=edges, node_label='name', edge_label='edge_label', **plot_opts) plot_data.update(**data) progress(6) #bp.show(p) if done_callback is not None: done_callback(None) elif output == 'table': party_data.columns = [ dict(party='source', party_other='target').get(x, x) for x in party_data.columns ] display(party_data) except Exception as ex: logger.error(ex) raise finally: progress(0)
def display_quantity_by_party(period_group_index=0, party_name='', parties=None, year_limit=None, treaty_filter='', extra_category='', normalize_values=False, chart_type_name=None, plot_style='classic', top_n_parties=5, overlay=True, progress=utility.noop, wti_index=None, print_args=False, treaty_sources=None): try: static_color_map = color_utility.get_static_color_map() if print_args or (chart_type_name == 'print_args'): args = utility.filter_dict(locals(), ['progress', 'print_args'], filter_out=True) args['wti_index'] = None pp(args) progress() period_group = config.DEFAULT_PERIOD_GROUPS[period_group_index] chart_type = config.CHART_TYPE_MAP[chart_type_name] parties = list(parties) if period_group['type'] == 'range': period_group = treaty_utility.trim_period_group( period_group, year_limit) else: year_limit = None data = analysis_data.QuantityByParty.get_treaties_statistics( wti_index, period_group=period_group, party_name=party_name, parties=parties, treaty_filter=treaty_filter, extra_category=extra_category, n_top=top_n_parties, year_limit=year_limit, treaty_sources=treaty_sources) progress() if data.shape[0] == 0: print('No data for selection') return pivot = pd.pivot_table(data, index=['Period'], values=["Count"], columns=['Party'], fill_value=0) pivot.columns = [x[-1] for x in pivot.columns] if normalize_values is True: pivot = pivot.div(0.01 * pivot.sum(1), axis=0) data['Count'] = data.groupby( ['Period']).transform(lambda x: 100.0 * (x / x.sum())) progress() if chart_type.name.startswith('plot'): columns = pivot.columns pivot = pivot.reset_index()[columns] colors = static_color_map.get_palette(columns) kwargs = analysis_plot.prepare_plot_kwargs(pivot, chart_type, normalize_values, period_group) kwargs.update(dict(overlay=overlay, colors=colors)) progress() analysis_plot.quantity_plot(data, pivot, chart_type, plot_style, **kwargs) elif chart_type.name == 'table': display(data) else: display(pivot) progress() except Exception as ex: logger.error(ex) #raise finally: progress(0)