def plot_growth_timeline(self, cumulative=True, plot_method="d3"): ''' Plots a graph depicting the growth of each cluster's size as a function of time. ''' assert self.clusters != [] if plot_method == "matplotlib": for cluster in self.clusters: documents = cluster.get_documents() if len(documents) > 0: dates, counts = aggregate_data([doc['date'] for doc in documents.values()], cumulative=cumulative) t = MatplotlibTimeline(dates, counts, cumulative=cumulative) t.plot() t.show() elif plot_method=="d3": data = []; meta = [] for cluster in self.clusters: documents = cluster.get_documents() cluster.analyse() if len(documents) > 0: data.append([doc.date for doc in documents.values()]) meta.append({"Terms" :cluster.get_most_frequent_terms(N=8), "Authors": [len(cluster.get_authors())], #should be wrapped in a list "Locations": cluster.get_locations(N=2), "Main entities": cluster.get_persons(N=2)}) dates = []; counts = [] for d in data: t_dates, t_counts = aggregate_data(d, cumulative) dates.append([num2date(date).strftime('%Y-%m-%d %H:%M:%S') for date in t_dates]) counts.append(t_counts) final_dates = dates final_counts = [count.tolist() for count in counts] t = D3Timeline(final_dates, final_counts, meta, cumulative=cumulative) t.plot(url='timeline_growth.html')
top_clusters = sorted(top_clusters, key=lambda x: -x[0])[:20] meta = [] top_clusters = sorted(top_clusters, key=lambda x: x[1]) for i, cluster in enumerate(top_clusters): cluster_struct = cluster[2] cluster_struct.analyse() meta.append({"title":"event"+str(i), "date":cluster[1].strftime('%Y-%m-%d %H:%M:%S'), "keywords":cluster_struct.get_most_frequent_terms(N=9), "authors": len(cluster_struct.get_authors()), "locations": cluster_struct.get_locations(), "namedEntities": cluster_struct.get_persons()}) data = [[doc.date for doc in items]] dates = [] counts = [] for d in data: t_dates, t_counts = aggregate_data(d, cumulative=False) dates.append([num2date(date).strftime('%Y-%m-%d %H:%M:%S') for date in t_dates]) counts.append(t_counts) final_dates = dates final_counts = [count.tolist() for count in counts] t = D3Timeline(final_dates, final_counts, meta=meta, cumulative=False) t.plot(url='timeline_hackathon.html')