def project_participation_evolution( pm_frame, all_authors, n=2, skip_anon=True, research_only=False): """Assembles data on participation to projects with n as thresh. Returns DataFrame, index, selection and title for data for use by stacked bar-plot and heatmap functions.""" if not research_only: thread_type = 'all threads' data, _ = get_last(pm_frame, thread_type) all_authors = list(all_authors) title = "Participation per project in Polymath\ (threshold = {})".format(n) else: thread_type = 'research threads' data, _ = get_last(pm_frame, thread_type) all_authors = set().union( *data['research threads', 'authors (accumulated)']) title = "Participation per project in Polymath\ (threshold = {}, only research-threads)".format(n) data.index = data.index.droplevel(1) author_project = DataFrame(columns=all_authors) for author in author_project.columns: author_project[author] = data[ thread_type, 'authors (accumulated)'].apply( lambda project, author=author: author in project) author_project = author_project.T author_project = author_project.sort_values(by=data.index.tolist(), ascending=False) author_project = author_project.drop( "Anonymous") if skip_anon else author_project select = author_project.sum(axis=1) >= n return author_project, data.index, select, title
def plot_community_evolution(pm_frame, project, thread_type): """Area_plot of current, joined and left per project or thread. thread_type is 'all threads', 'research threads', or 'discussion threads'""" if not thread_type: logging.warning("Need explicit thread type") return try: int(project.split()[-1]) as_threads = True except ValueError: as_threads = False if as_threads: data = pm_frame[['basic', thread_type]].loc[project].dropna() added, removed = _added_removed(data, thread_type, 'authors') size = data[thread_type, 'authors'].apply(len) - added df = DataFrame({'joined': added, 'left': removed, 'current': size}, columns=["joined", "current", "left"], index=data.index) df.index = range(len(df)) try: assert np.all(df == df.dropna()) except AssertionError: logging.warning("Some nan-values still present") df.index.name = "Threads" else: data, positions = get_last(pm_frame, thread_type) added, removed = _added_removed( data, thread_type, 'authors (accumulated)') size = data[thread_type, 'authors (accumulated)'].dropna().apply( len) - added df = DataFrame({'joined': added, 'left': removed, 'current': size}, columns=["joined", "current", "left"]) df.index = range(1, len(positions) + 1) mpl.style.use(SBSTYLE) axes = df.plot(kind="area", title="Community Evolution in {} ({})".format( project, thread_type), color=['sage', 'lightgrey', 'indianred'], stacked=True) axes.set_xticks(df.index) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') if as_threads: axes.set_xticklabels(data['basic', 'title'], rotation=90, fontsize='small') else: xlabels = data.index.droplevel(1) axes.set_xticklabels(xlabels, rotation=90, fontsize='small') axes.set_ylabel('number of active commenters')