def distribution(): mu, sigma = 0, 0.5 measured = np.random.normal(mu, sigma, 1000) hist, edges = np.histogram(measured, density=True, bins=20) x = np.linspace(-2, 2, 1000) pdf = 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(x - mu) ** 2 / (2 * sigma ** 2)) cdf = (1 + scipy.special.erf((x - mu) / np.sqrt(2 * sigma ** 2))) / 2 output_server("distribution_reveal") hold() figure(title="Interactive plots", tools="pan, wheel_zoom, box_zoom, reset, previewsave", background_fill="#E5E5E5") quad(top=hist, bottom=np.zeros(len(hist)), left=edges[:-1], right=edges[1:], fill_color="#333333", line_color="#E5E5E5", line_width=3) # Use `line` renderers to display the PDF and CDF line(x, pdf, line_color="#348abd", line_width=8, alpha=0.7, legend="PDF") line(x, cdf, line_color="#7a68a6", line_width=8, alpha=0.7, legend="CDF") xgrid().grid_line_color = "white" xgrid().grid_line_width = 3 ygrid().grid_line_color = "white" ygrid().grid_line_width = 3 legend().orientation = "top_left" return curplot(), cursession()
def plotHistogram(data): data_to_plot = data["Age"] data_to_plot = data_to_plot[-np.isnan(data_to_plot)] hist,edges = np.histogram(data_to_plot,bins=20) pl.output_file('histogram.html') pl.figure() pl.hold() pl.quad(top=hist,bottom=0,left=edges[:-1],right=edges[1:], fill_color="#036564", line_color="#033649", title="Age distribution",xlabel="Age",ylabel="Number") pl.show()
def tweetsGraph(): logger.info("Drawing graphs to %s" % path_to_graphs+"Stats.html") stat_db_cursor.execute('SELECT * FROM tweets') tweets = stat_db_cursor.fetchall() date, volume, cumulative, volumePast24h = zip(*[(datetime.datetime.strptime(t['date'], "%Y-%m-%dT%H"), t['current_hour'], t['cumulative'], t['past_24h']) for t in tweets]) hourly =zip([datetime.datetime(year=d.year, month=d.month, day=d.day) for d in date],volume) hourly.sort() days, dailyVolume = zip(*[(d, sum([v[1] for v in vol])) for d,vol in itertools.groupby(hourly, lambda i:i[0])]) bokeh_plt.output_file(path_to_graphs+"Stats.html") bokeh_plt.hold() bokeh_plt.quad(days, [d+datetime.timedelta(days=1) for d in days], dailyVolume, [0]*len(dailyVolume), x_axis_type="datetime", color='gray', legend="Daily volume") bokeh_plt.line(date, volume, x_axis_type="datetime", color='red', legend="Hourly volume") bokeh_plt.line(date, volumePast24h, x_axis_type="datetime", color='green', legend="Volume in the past 24 hours") bokeh_plt.curplot().title = "Volume" bokeh_plt.figure() bokeh_plt.line(date, cumulative, x_axis_type="datetime") bokeh_plt.curplot().title = "Cumulative volume" fig, ax = matplotlib_plt.subplots() f=DateFormatter("%Y-%m-%d") ax.xaxis.set_major_formatter(f) matplotlib_plt.plot(date, volume) matplotlib_plt.plot(date, volumePast24h) matplotlib_plt.plot(days, dailyVolume) matplotlib_plt.xticks(np.concatenate((np.array(date)[range(0,len(date),24*7)],[date[-1]])), rotation=70) matplotlib_plt.savefig(path_to_graphs+"volume.png", bbox_inches="tight") stat_db_cursor.execute('SELECT * FROM users') users = stat_db_cursor.fetchall() date, nUsers, nUsersWithFriends = zip(*[(datetime.datetime.strptime(u['date'], "%Y-%m-%dT%H:%M:%S.%f"), u['total'], u['with_friends']) for u in users]) bokeh_plt.figure() bokeh_plt.line(date, nUsers, x_axis_type="datetime", legend="Total") bokeh_plt.line(date, nUsersWithFriends, x_axis_type="datetime", legend="Friendship collected") bokeh_plt.legend().orientation = "top_left" bokeh_plt.curplot().title = "Number of users" bokeh_plt.save() matplotlib_plt.figure() fig, ax = matplotlib_plt.subplots() f=DateFormatter("%Y-%m-%d") ax.xaxis.set_major_formatter(f) matplotlib_plt.plot(date, nUsers) matplotlib_plt.plot(date, nUsersWithFriends) matplotlib_plt.xticks(np.concatenate((np.array(date)[range(0,len(date),24*7)],[date[-1]])), rotation=70) matplotlib_plt.savefig(path_to_graphs+"users.png", bbox_inches="tight")
def distribution(): mu, sigma = 0, 0.5 measured = np.random.normal(mu, sigma, 1000) hist, edges = np.histogram(measured, density=True, bins=20) x = np.linspace(-2, 2, 1000) pdf = 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 / (2 * sigma**2)) cdf = (1 + scipy.special.erf((x - mu) / np.sqrt(2 * sigma**2))) / 2 output_server("distribution_reveal") hold() figure(title="Interactive plots", tools="pan, wheel_zoom, box_zoom, reset, previewsave", background_fill="#E5E5E5") quad(top=hist, bottom=np.zeros(len(hist)), left=edges[:-1], right=edges[1:], fill_color="#333333", line_color="#E5E5E5", line_width=3) # Use `line` renderers to display the PDF and CDF line(x, pdf, line_color="#348abd", line_width=8, alpha=0.7, legend="PDF") line(x, cdf, line_color="#7a68a6", line_width=8, alpha=0.7, legend="CDF") xgrid().grid_line_color = "white" xgrid().grid_line_width = 3 ygrid().grid_line_color = "white" ygrid().grid_line_width = 3 legend().orientation = "top_left" return curplot(), cursession()
def create_histogram(releases): number_of_bidders_list = [] frequency_list = [] for number_of_bidders, corresponding_releases in get_releases_by_amount_of_bidders(releases).items(): number_of_bidders_list.append(number_of_bidders) frequency_list.append(len(corresponding_releases)) zeros = [0]*len(frequency_list) right = [x+1 for x in number_of_bidders_list] histogram = quad(top=frequency_list, bottom=zeros, left=number_of_bidders_list, right=right) histogram.title = "Frequency of releases vs. Number of Bidders" xaxis()[0].axis_label = "Number of Bidders" yaxis()[0].axis_label = "Frequency of Releases" show()
def make_plot(xr): yr = Range1d(start=-10, end=10) figure(plot_width=800, plot_height=350, y_range=yr, x_range=xr, tools="xpan,xwheel_zoom,hover,box_zoom,reset") hold() genes = pd.read_csv('/home/hugoshi/data/lab7/genes.refseq.hg19.bed', sep='\t', skiprows=[0], header=None) genes.rename(columns={ 0: "chromosome", 1: "start", 2: "end"}, inplace=True ) genes = genes[genes.chromosome == 'chr5'] g_len = len(genes) quad(genes.start - 0.5, # left edge genes.end - 0.5, # right edge [2.3] * g_len, # top edge [1.7] * g_len, ['blue'] * g_len) # bottom edge exons = pd.read_csv('/home/hugoshi/data/lab7/exons.refseq.hg19.bed', sep='\t', skiprows=[0], header=None) exons.columns = ["chromosome", "start", "end", "meta1", "meta2", "meta3"] exons = exons[exons.chromosome == 'chr5'] e_len = len(exons) quad(exons.start - 0.5, # left edge exons.end - 0.5, # right edge [1.3] * e_len, # top edge [0.7] * e_len, ['blue'] * e_len) # bottom edge df = pd.read_csv('/home/hugoshi/data/lab7/CHP2.20131001.hotspots.bed', sep='\t', skiprows=[0], header=None) df.columns = ["chromosome", "start", "end", "meta1", "meta2", "meta3"] df = df[df.chromosome == 'chr5'] singles = df[df.start+1 == df.end] widers = df[df.start+1 != df.end] slen = len(singles) wlen = len(widers) s_source = ColumnDataSource( data = dict( start=singles.start, end=singles.end, meta1=singles.meta1, meta2=singles.meta2, meta3=singles.meta3)) rect('start', # x center [1]*slen, # y center [0.9]*slen, [1]*slen, color=['red']*slen, source=s_source) # height hover = [t for t in curplot().tools if isinstance(t, HoverTool)][0] hover.tooltips = OrderedDict([ # add to this ("position", "@start"), ("meta 1", "@meta1"), ("meta 2", "@meta2"), ("meta 3", "@meta3") ]) quad(widers.start - 0.5, # left edge widers.end - 0.5, # right edge [0.3] * wlen, # top edge [-0.3] * wlen) # bottom edge hold() return curplot()
def make_plot(xr): yr = Range1d(start=-10, end=10) figure(plot_width=800, plot_height=350, y_range=yr, x_range=xr, tools="xpan,xwheel_zoom,hover,box_zoom,reset") hold() genes = pd.read_csv('/home/hugoshi/data/lab7/genes.refseq.hg19.bed', sep='\t', skiprows=[0], header=None) genes.rename(columns={0: "chromosome", 1: "start", 2: "end"}, inplace=True) genes = genes[genes.chromosome == 'chr5'] g_len = len(genes) quad( genes.start - 0.5, # left edge genes.end - 0.5, # right edge [2.3] * g_len, # top edge [1.7] * g_len, ['blue'] * g_len) # bottom edge exons = pd.read_csv('/home/hugoshi/data/lab7/exons.refseq.hg19.bed', sep='\t', skiprows=[0], header=None) exons.columns = ["chromosome", "start", "end", "meta1", "meta2", "meta3"] exons = exons[exons.chromosome == 'chr5'] e_len = len(exons) quad( exons.start - 0.5, # left edge exons.end - 0.5, # right edge [1.3] * e_len, # top edge [0.7] * e_len, ['blue'] * e_len) # bottom edge df = pd.read_csv('/home/hugoshi/data/lab7/CHP2.20131001.hotspots.bed', sep='\t', skiprows=[0], header=None) df.columns = ["chromosome", "start", "end", "meta1", "meta2", "meta3"] df = df[df.chromosome == 'chr5'] singles = df[df.start + 1 == df.end] widers = df[df.start + 1 != df.end] slen = len(singles) wlen = len(widers) s_source = ColumnDataSource(data=dict(start=singles.start, end=singles.end, meta1=singles.meta1, meta2=singles.meta2, meta3=singles.meta3)) rect( 'start', # x center [1] * slen, # y center [0.9] * slen, [1] * slen, color=['red'] * slen, source=s_source) # height hover = [t for t in curplot().tools if isinstance(t, HoverTool)][0] hover.tooltips = OrderedDict([ # add to this ("position", "@start"), ("meta 1", "@meta1"), ("meta 2", "@meta2"), ("meta 3", "@meta3") ]) quad( widers.start - 0.5, # left edge widers.end - 0.5, # right edge [0.3] * wlen, # top edge [-0.3] * wlen) # bottom edge hold() return curplot()
color=data["Survived"].map(colormap),alpha=0.8, xlabel="Fare") pl.show() if __name__=="__main__": data = loadData("train.csv") data,label = buildDataset(data,False) data_to_plot = data["Age"] data_to_plot = data_to_plot[-np.isnan(data_to_plot)] hist,edges = np.histogram(data_to_plot,bins=20) pl.output_file('histogram.html') pl.figure() pl.hold() pl.quad(top=hist,bottom=0,left=edges[:-1],right=edges[1:], fill_color="#036564", line_color="#033649", title="Age distribution",xlabel="Age",ylabel="Number") pl.line(np.average([edges[1:],edges[:-1]],axis=0),hist, line_width=5,color="red",alpha=0.8) pl.show()