def plot_centre_dist(self, thresh=2, show_threads=True, **kwargs): """Plots time elapsed since last comment for each participant""" project, show, _ = ac.handle_kwargs(**kwargs) data_high, data_low = self.__get_centre_distances( thresh, split=True) # set up and create plots plt.style.use(SETTINGS['style']) _, axes = plt.subplots() colors_high = ac.color_list( self.author_frame.loc[data_high.columns, 'color'], SETTINGS['vmin'], SETTINGS['vmax'], cmap=CMAP) colors_low = ac.color_list( self.author_frame.loc[data_low.columns, 'color'], SETTINGS['vmin'], SETTINGS['vmax'], cmap=CMAP) data_high.plot(ax=axes, color=colors_high, legend=False) data_low.plot(ax=axes, alpha=.1, color=colors_low, legend=False) axes.set_ylabel("Days elapsed since last comment") axes.set_title("Distance from centre of discussion\n{}".format( project)) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') if show_threads: self.__show_threads(axes) ac.show_or_save(show)
def plot_activity_prop(self, **kwargs): """Shows plot of number of comments (bar) and proportion level-1 / higher-level comment (line) for all authors""" project, show, fontsize = ac.handle_kwargs(**kwargs) plt.style.use(SETTINGS['style']) cols = self.author_frame.columns[ self.author_frame.columns.str.startswith('level')].tolist() data = self.author_frame[cols].copy() data['proportion'] = (data[cols[1:]].sum(axis=1) / data[cols].sum(axis=1)) colors = [plt.cm.Set1(20 * i) for i in range(len(data.index))] axes = data[cols].plot( kind='bar', stacked=True, color=colors, title="Commenting activity and proportion of higher-level comments for {}".format(project).title(), fontsize=fontsize) axes.set_ylabel("Number of comments") axes.legend(bbox_to_anchor=(0.165, 1)) axes2 = axes.twinx() axes2.set_ylabel("Proportion of Higher-level comments") axes2.plot(axes.get_xticks(), data['proportion'].values, linestyle=':', marker='.', markersize=10, linewidth=.7, color='darkgrey', fontsize=fontsize) the_lines = [mlines.Line2D([], [], color='gray', linestyle=':', marker='.', markersize=10, label="Proportion")] axes2.legend(handles=the_lines, bbox_to_anchor=(1, 1)) ac.show_or_save(show)
def plot_centre_closeness(self, thresh=10, ylim=None, **kwargs): """Boxplot of time before return to centre for core authors""" project, show, _ = ac.handle_kwargs(**kwargs) timestamps = self.author_frame['timestamps'].apply(np.array) try: timestamps.drop("Anonymous", inplace=True) except ValueError: pass delays = timestamps.apply(np.diff) delays = delays[delays.apply(len) >= thresh] to_days = np.vectorize(lambda x: x.total_seconds() / (60**2 * 24)) delays = delays.map(to_days) plt.style.use(SETTINGS['style']) _, axes = plt.subplots() bplot = plt.boxplot(delays, sym='.', showmeans=True, meanline=True) for key in ['whiskers', 'boxes', 'caps']: plt.setp(bplot[key], color='steelblue') plt.setp(bplot['means'], color="firebrick") axes.set_xticklabels(delays.index, rotation=40, ha='right') axes.set_xlabel("Participants with at least {} comments".format( thresh)) axes.set_yticks(np.logspace(-1, 3, num=5, base=2)) axes.set_ylabel("Delay in days") if ylim: axes.set_ylim(0, ylim) axes.set_title("Delays between comments in {}".format(project)) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') ac.show_or_save(show)
def scatter_authors(self, measure="betweenness centrality", thresh=15, **kwargs): """Scatter-plot with position based on interaction and cluster measure, color based on number of comments, and size on avg comment length""" project, show, _ = ac.handle_kwargs(**kwargs) x_measure, y_measure = [" ".join([netw, measure]) for netw in ["interaction", "cluster"]] axes = self.author_frame.plot( kind='scatter', x=x_measure, y=y_measure, c='total comments', s=self.author_frame['word counts'] / self.author_frame[ 'total comments'], cmap="viridis_r", sharex=False, title="Author-activity and centrality in {}".format(project)) for name, data in self.author_frame.iterrows(): if data['total comments'] >= thresh: axes.text(data[x_measure], data[y_measure], name, fontsize=6) ac.fake_legend([50, 100, 250], title="Average wordcount of comments") ac.show_or_save(show)
def plot_centrality_measures(self, g_type="interaction", measures=None, delete_on=None, thresh=0, **kwargs): """Shows plot of degree_centrality for each author (only if first measure is non-zero)""" project, show, fontsize = ac.handle_kwargs(**kwargs) if not measures: measures = self.centr_measures centr_cols, centrality, means = self.__get_centrality_measures( g_type, measures) if delete_on is not None: centrality = centrality[centrality[centr_cols[delete_on]] > thresh] colors = ac.color_list(len(measures), SETTINGS['vmin'], SETTINGS['vmax'], factor=15) full_measure_names = centrality.columns centrality.columns = [ col.replace(g_type + " ", "") for col in centrality.columns] plt.style.use(SETTINGS['style']) axes = centrality.plot( kind='bar', color=colors, title="Centrality-measures for {} ({}-graph)".format( project, g_type).title()) for measure, color in zip(full_measure_names, colors): the_mean = means[measure] axes.lines.append( mlines.Line2D( [-.5, len(centrality.index) - .5], [the_mean, the_mean], linestyle='-', linewidth=.5, color=color, zorder=1, transform=axes.transData)) axes.set_xticklabels(centrality.index, fontsize=fontsize) ac.show_or_save(show)
def plot_author_activity_bar(self, what='by level', **kwargs): """Shows plot of number of comments / wordcount per author. what can be either 'by level' or 'word counts'""" project, show, fontsize = ac.handle_kwargs(**kwargs) plt.style.use(SETTINGS['style']) if what == "by level": levels, colors = self.__get_author_activity_bylevel() total_num_of_comments = int(levels.sum().sum()) axes = levels.plot( kind='barh', stacked=True, color=colors, title='Comments per author (total: {})'.format( total_num_of_comments), fontsize=fontsize) axes.set_yticklabels(levels.index, fontsize=fontsize) elif what == "word counts": word_counts = self.author_frame[what].sort_values(ascending=False) total_word_count = int(word_counts.sum()) axes = word_counts.plot( kind='bar', logy=True, title='Word-count per author in {} (total: {})'.format( project, total_word_count), fontsize=fontsize) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') else: raise ValueError ac.show_or_save(show)
def scatter_comments_replies(self, **kwargs): """Scatter-plot of comments vs direct replies received""" project, show, _ = ac.handle_kwargs(**kwargs) data = self.author_frame[['total comments', 'replies (direct)']] data.plot( kind='scatter', x="total comments", y='replies (direct)', sharex=False, title="total comments vs replies in {}".format(project)) ac.show_or_save(show)
def plot_author_activity_hist(self, what='total comments', bins=10, **kwargs): """Shows plot of histogram of commenting activity. What can be either 'total comments' (default) or 'word counts'""" project, show, _ = ac.handle_kwargs(**kwargs) if what not in set(['total comments', 'word counts']): raise ValueError comments = self.author_frame[what] plt.style.use(SETTINGS['style']) _, axes = plt.subplots() comments.hist(bins=bins, grid=False, ax=axes) axes.set_title("Histogram of {} for {}".format(what, project)) axes.set_xlim(1) axes.set_yticks(axes.get_yticks()[1:]) ac.show_or_save(show)
def draw_graph(self, graph_type="interaction", k=None, reset=False, **kwargs): """Draws and shows graph.""" project, show, fontsize = ac.handle_kwargs(**kwargs) if graph_type == "cluster": graph = self.c_graph graph_type = "Co-location Network" elif graph_type == "interaction": graph = self.i_graph graph_type = "Interaction Network" # attributing widths and colors to edges edges = graph.edges() weights = [graph[source][dest]['weight'] * 15 for source, dest in edges] edge_colors = [plt.cm.Blues(weight) for weight in weights] # attributes sizes to nodes sizes = [(log(self.author_count()[author], 4) + 1) * 300 for author in self.author_frame.index] # positions with spring if reset or not self.positions: self.positions = nx.spring_layout(graph, k=k, scale=1) # creating title and axes figure = plt.figure() figure.suptitle("{} for {}".format(graph_type, project).title(), fontsize=12) axes = figure.add_subplot(111) axes.xaxis.set_ticks([]) axes.yaxis.set_ticks([]) # actual drawing # consider adding legend plt.style.use(SETTINGS['style']) nx.draw_networkx(graph, self.positions, with_labels=SETTINGS['show_labels_authors'], font_size=fontsize, node_size=sizes, nodelist=self.author_frame.index.tolist(), node_color=self.author_frame['color'].tolist(), edges=edges, width=1, edge_color=edge_colors, vmin=SETTINGS['vmin'], vmax=SETTINGS['vmax'], cmap=CMAP, ax=axes) ac.show_or_save(show)
def plot_activity_degree(self, g_type='interaction', measures=None, delete_on=None, thresh=0, **kwargs): """Shows plot of number of comments (bar) and network-measures (line) for all authors with non-null centrality-measure""" project, show, fontsize = ac.handle_kwargs(**kwargs) # data for centrality measures if not measures: measures = self.centr_measures if measures == ['hits']: centr_cols = ['hubs', 'authorities'] centrality = self.__hits()[centr_cols].sort_values( centr_cols[0], ascending=False) else: centr_cols, centrality, _ = self.__get_centrality_measures( g_type, measures) if delete_on is not None: centrality = centrality[centrality[centr_cols[delete_on]] > thresh] # data for commenting-activity (limited to index of centrality) comments, colors = self.__get_author_activity_bylevel() comments = comments.loc[centrality.index] plt.style.use(SETTINGS['style']) axes = comments.plot( kind='bar', stacked=True, color=colors, title="Commenting activity and {} for {}".format( ", ".join(measures), project).title(), fontsize=fontsize) axes.set_ylabel("Number of comments") axes.xaxis.set_ticks_position('bottom') axes2 = axes.twinx() axes2.set_ylabel("Measures") col_marker = list(zip(centr_cols, "oDsv^")) for col, marker in col_marker: axes2.plot(axes.get_xticks(), centrality[col].values, linestyle=':', marker=marker, markersize=5, linewidth=.7, color='darkgray') the_lines = [mlines.Line2D([], [], color='darkgray', linewidth=.7, marker=marker, markersize=5, label=col.replace(g_type + " ", "")) for (col, marker) in col_marker] axes2.legend(handles=the_lines, bbox_to_anchor=(.83, 1)) ac.show_or_save(show)
def plot_author_activity_pie(self, what='total comments', **kwargs): """Shows plot of commenting activity as piechart what can be either 'total comments' (default) or 'word counts'""" project, show, fontsize = ac.handle_kwargs(**kwargs) if what not in set(['total comments', 'word counts']): raise ValueError comments = self.author_frame[[what, 'color']].sort_values( what, ascending=False) thresh = int(np.ceil(comments[what].sum() / 100)) whatcounted = 'comments' if what == 'total comments' else 'words' comments.index = [[x if y >= thresh else "fewer than {} {}" .format(thresh, whatcounted) for (x, y) in comments[what].items()]] merged_commenters = comments.index.value_counts()[0] comments = DataFrame({ 'totals': comments[what].groupby(comments.index).sum(), 'maxs': comments[what].groupby(comments.index).max(), 'color': comments['color'].groupby( comments.index).max()}).sort_values( 'maxs', ascending=False) for_pie = comments['totals'] for_pie.name = "" colors = ac.color_list(comments['color'], SETTINGS['vmin'], SETTINGS['vmax'], cmap=CMAP) plt.style.use(SETTINGS['style']) title = "Activity per author for {}".format(project).title() if what == "total comments": title += ' ({} comments, {} with fewer than {} comments)'.format( int(comments['totals'].sum()), merged_commenters, thresh) else: title += ' ({} words, {} with fewer than {} words)'.format( int(comments['totals'].sum()), merged_commenters, thresh) for_pie.plot( kind='pie', autopct='%.2f %%', figsize=(6, 6), labels=for_pie.index, colors=colors, title=('\n'.join(wrap(title, 60))), fontsize=fontsize) ac.show_or_save(show)
def scatter_authors_hits(self, thresh=10, **kwargs): """Scatter-plot based on hits-algorithm for hubs and authorities""" project, show, _ = ac.handle_kwargs(**kwargs) hits = self.__hits() axes = hits.plot( kind='scatter', x='hubs', y='authorities', c='total comments', s=hits['word counts'] / hits['total comments'], cmap="viridis_r", sharex=False, title="Hubs and Authorities in {}".format(project)) for name, data in hits.iterrows(): if data['total comments'] >= thresh: axes.text(data['hubs'], data['authorities'], name, fontsize=6) ac.fake_legend([50, 100, 250], title="Average wordcount of comments") ac.show_or_save(show)
def plot_centre_crowd(self, thresh=2, show_threads=False, **kwargs): """Plotting evolution of number of participants close to centre""" project, show, _ = ac.handle_kwargs(**kwargs) data = self.__get_centre_distances(thresh, split=False) data_close = DataFrame({ '6 hours': data[data <= .25].count(axis=1), '12 hours': data[(data <= .5) & (data > .25)].count(axis=1), '24 hours': data[(data <= 1) & (data > .5)].count(axis=1)}, columns=['6 hours', '12 hours', '24 hours']) plt.style.use(SETTINGS['style']) y_max = data_close.sum(axis=1).max() _, axes = plt.subplots() data_close.plot(kind="area", ax=axes, stacked=True, color=['darkslategray', 'steelblue', 'lightgray']) axes.set_yticks(range(1, y_max + 1)) axes.set_ylabel("Number of participants") axes.set_title("Crowd close to the centre of discussion in {}".format( project)) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') if show_threads: self.__show_threads(axes) ac.show_or_save(show)
def plot_i_trajectories(self, thresh=None, select=None, l_thresh=5, **kwargs): """Plots interaction-trajectories for each pair of contributors.""" project, show, _ = ac.handle_kwargs(**kwargs) trajectories = {} for (source, dest, data) in self.i_graph.edges_iter(data=True): name = " / ".join([source, dest]) trajectories[name] = Series(Counter(data['timestamps']), name=name) try: tr_data = DataFrame(trajectories) except ValueError as err: print("Could not create DataFrame: ", err) else: tr_data = tr_data.fillna(0).cumsum().sort_index() col_order = tr_data.iloc[-1].sort_values(ascending=False).index tr_data = tr_data[col_order] title = "Interaction trajectories for {}".format(project) if select: tr_data = tr_data.iloc[:, :select] title += " ({} largest)".format(select) elif thresh: tr_data = tr_data.loc[:, ~(tr_data < thresh).all(axis=0)] title += " (minimally {} interactions)".format(thresh) plt.style.use(SETTINGS['style']) _, axes = plt.subplots() for col in col_order[:l_thresh]: tr_data[col].plot(ax=axes, label=col) for col in col_order[l_thresh:]: tr_data[col].plot(ax=axes, label=None) axes.legend(labels=col_order[:l_thresh], loc='best') axes.set_title("Interaction trajectories for {}".format(project)) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') ac.show_or_save(show)
def draw_centre_discussion(self, regular_intervals=False, skips=2, zoom=12, **kwargs): """Draws part of nx.DiGraph to picture who's at the centre of activity""" _, show, _ = ac.handle_kwargs(**kwargs) activity_df = self.author_frame[ ['color', 'angle', 'timestamps']].copy() if not regular_intervals: intervals = np.concatenate(activity_df['timestamps'].values) intervals.sort(kind='mergesort') intervals = intervals[::skips] else: start = np.min(activity_df['timestamps'].apply(np.min)) stop = np.max(activity_df['timestamps'].apply(np.max)) intervals = date_range(start, stop)[::skips] x_max, y_max = 0, 0 for interval in intervals: interval_data = activity_df['timestamps'].apply( # TODO: fix - does not work lambda x, intv=interval: x[x <= intv]) try: interval_data = interval_data.apply( lambda x, intv=interval: (intv - x[-1]).total_seconds() if x.size else np.nan) except AttributeError: interval_data = interval_data.apply( lambda x, intv=interval: (intv - x[-1]) / np.timedelta64(1, 's') if x.size else np.nan) x_coord = interval_data * np.cos(activity_df['angle']) the_min, the_max = np.min(x_coord), np.max(x_coord) x_max = max(abs(the_max), abs(the_min), x_max) y_coord = interval_data * np.sin(activity_df['angle']) the_min, the_max = np.min(y_coord), np.max(y_coord) y_max = max(abs(the_max), abs(the_min), y_max) coords = DataFrame({"x": x_coord, "y": y_coord}) assert interval not in activity_df.columns activity_df[interval] = [list(x) for x in coords.values] in_secs = {'day': 86400, '2 days': 172800, 'week': 604800, '1 week': 604800, '2 weeks': 1209600, '3 weeks': 1814400, 'month': 2635200} try: xy_max = max(x_max, y_max) / zoom except TypeError: xy_max = in_secs[zoom] except KeyError: xy_max = max(x_max, y_max) def get_fig(activity_df, col_name): """Helper-function returning a fig based on DataFrame and col.""" plt.style.use(SETTINGS['style']) coord = activity_df[col_name].to_dict() dists = pd.DataFrame(activity_df[col_name].tolist(), columns=['x', 'y'], index=activity_df.index) dists = np.sqrt(dists['x']**2 + dists['y']**2) in_day = dists[dists < in_secs['day']].count() in_week = dists[dists < in_secs['week']].count() in_month = dists[dists < in_secs['month']].count() fig = plt.figure() # left plot (full) axes1 = fig.add_subplot(121, aspect='equal') axes1.set_xlim([-xy_max, xy_max]) axes1.set_ylim([-xy_max, xy_max]) axes1.xaxis.set_ticks([]) axes1.yaxis.set_ticks([]) day = plt.Circle((0, 0), in_secs['day'], color='darkslategray') week = plt.Circle((0, 0), in_secs['week'], color='slategray') month = plt.Circle((0, 0), in_secs['month'], color="lightblue") axes1.add_artist(month) axes1.add_artist(week) axes1.add_artist(day) the_date = pd.to_datetime(str(col_name)).strftime( '%Y.%m.%d\n%H:%M') axes1.text(-xy_max / 1.07, xy_max / 1.30, the_date, bbox=dict(facecolor='slategray', alpha=0.5)) # axes.text(in_secs['day'], -100, '1 day', fontsize=10) # axes.text(in_secs['week'], -100, '1 week', fontsize=10) # axes.text(in_secs['month'], -100, '1 month', fontsize=10) nx.draw_networkx_nodes(self.graph, coord, nodelist=activity_df.index.tolist(), node_color=activity_df['color'], node_size=20, cmap=CMAP, ax=axes1) # right plot: zoomed axes2 = fig.add_subplot(122, aspect='equal') axes2.set_xlim([-xy_max / 10, xy_max / 10]) axes2.set_ylim([-xy_max / 10, xy_max / 10]) axes2.xaxis.set_ticks([]) axes2.yaxis.set_ticks([]) day = plt.Circle((0, 0), in_secs['day'], color='darkslategray') week = plt.Circle((0, 0), in_secs['week'], color='slategray') month = plt.Circle((0, 0), in_secs['month'], color="lightblue") axes2.add_artist(month) axes2.add_artist(week) axes2.add_artist(day) day_patch = mpatches.Patch( color='darkslategray', label="{: <3} active in last day".format(in_day).ljust(25)) week_patch = mpatches.Patch( color='slategray', label="{: <3} active in last week".format(in_week).ljust(25)) month_patch = mpatches.Patch( color='lightblue', label="{: <3} active in last month".format(in_month).ljust(25)) plt.legend(handles=[day_patch, week_patch, month_patch]) nx.draw_networkx_nodes(self.graph, coord, nodelist=activity_df.index.tolist(), node_color=activity_df['color'], node_size=20, cmap=CMAP, ax=axes2) return fig ion() for (num, interval) in enumerate(intervals): fig = get_fig(activity_df, interval) if show: fig.canvas.draw() plt.draw() plt.pause(1) plt.close(fig) else: plt.savefig("FIGS/img{0:0>5}.png".format(num)) plt.close(fig)