def run_experiment(tweets_csv, keywords, timezoneslist, titles, colors): for timezones, title in zip(timezoneslist, titles): fig = plt.figure(figsize=(6,6)) ax = plt.axes() loc = MultipleLocator(base=5.0) loc.MAXTICKS = 10000 ax.xaxis.set_major_formatter(xfmt) ax.xaxis.set_major_locator(loc) box = ax.get_position() #ax.set_position([box.x0, box.y0, box.width*0.85, box.height]) if len(timezones) == 1 and timezones[0] == "": ax.set_title("All timezones") else: ax.set_title("Timezone: {}".format(title)) x, y, labels = foreach_keyword(tweets_csv, keywords, timezones) xf, yf = fill_with_zeros(x, y) plot_stacked_timelines(ax, xf, yf, colors, labels) ax.set_xlabel("Date") ax.set_ylabel("Tweet count") ax.xaxis_date() ax.autoscale() #plt.legend(loc="best", bbox_to_anchor=(1, 0.6)) plt.savefig("frequency_stacked_{}.pdf".format(title))
#!/usr/bin/python import sys import matplotlib.pyplot as plt import matplotlib.cm as cm import pandas from matplotlib.ticker import MultipleLocator, FormatStrFormatter plt.clf() fig=plt.figure(figsize=(40, 40)) ax = fig.add_subplot(111) mat=pandas.read_csv(sys.argv[1],header=0,index_col=0) # min=0 and max=1 is set to just distinguish absent and present # remove those parameters to see colors scaled by value ax.matshow(mat,cmap=cm.gray, interpolation='nearest', vmin=0, vmax=1) locator=MultipleLocator(1) locator.MAXTICKS = 10000 ax.xaxis.set_major_locator(locator) ax.set_xticklabels([""]+mat.columns.tolist(), rotation=90) ax.yaxis.set_major_locator(MultipleLocator(1)) ax.yaxis.set_major_formatter(FormatStrFormatter('%s')) ax.set_yticklabels([""]+mat.index.tolist()) ax.tick_params(axis='both', which='both', labelsize=8, direction='out', labelleft='on', labelright='off', labelbottom='off', labeltop='on', left='on', right='off', bottom='off', top='on') plt.savefig(sys.argv[2], bbox_inches='tight', dpi = 100)