Example #1
0
    def plot_growth_timeline(self, cumulative=True, plot_method="d3"):
        '''
        Plots a graph depicting the growth of each cluster's size as a 
        function of time.
        '''
        assert self.clusters != []
        if plot_method == "matplotlib":
            for cluster in self.clusters:
                documents =  cluster.get_documents()
                if len(documents) > 0:
                    dates, counts = aggregate_data([doc['date'] for doc in documents.values()], cumulative=cumulative)
                    t = MatplotlibTimeline(dates, counts, cumulative=cumulative)
                    t.plot()
            t.show()
        elif plot_method=="d3":        
            data = []; meta = []
            for cluster in self.clusters:
                documents = cluster.get_documents()
                cluster.analyse()
                if len(documents) > 0:
                    data.append([doc.date for doc in documents.values()])
                    meta.append({"Terms" :cluster.get_most_frequent_terms(N=8), 
                                 "Authors": [len(cluster.get_authors())], #should be wrapped in a list
                                 "Locations": cluster.get_locations(N=2),
                                 "Main entities": cluster.get_persons(N=2)})

            dates = []; counts = []
            for d in data:
                t_dates, t_counts = aggregate_data(d, cumulative)
                dates.append([num2date(date).strftime('%Y-%m-%d %H:%M:%S') for date in t_dates])
                counts.append(t_counts)
            final_dates = dates
            final_counts = [count.tolist() for count in counts]
            t = D3Timeline(final_dates, final_counts, meta, cumulative=cumulative)
            t.plot(url='timeline_growth.html')
    
top_clusters = sorted(top_clusters, key=lambda x: -x[0])[:20]

meta = []
top_clusters = sorted(top_clusters, key=lambda x: x[1])
for i, cluster in enumerate(top_clusters):
    cluster_struct = cluster[2]
    cluster_struct.analyse()
    meta.append({"title":"event"+str(i), 
                 "date":cluster[1].strftime('%Y-%m-%d %H:%M:%S'), 
                 "keywords":cluster_struct.get_most_frequent_terms(N=9),
                 "authors": len(cluster_struct.get_authors()),
                 "locations": cluster_struct.get_locations(),
                 "namedEntities": cluster_struct.get_persons()})

data = [[doc.date for doc in items]]

dates = []
counts = []
for d in data:
    t_dates, t_counts = aggregate_data(d, cumulative=False)
    dates.append([num2date(date).strftime('%Y-%m-%d %H:%M:%S') for date in t_dates])
    counts.append(t_counts)
   
final_dates = dates
final_counts = [count.tolist() for count in counts]

t = D3Timeline(final_dates, final_counts, meta=meta, cumulative=False)
t.plot(url='timeline_hackathon.html')