Example #1
0
def distribution():

    mu, sigma = 0, 0.5

    measured = np.random.normal(mu, sigma, 1000)
    hist, edges = np.histogram(measured, density=True, bins=20)

    x = np.linspace(-2, 2, 1000)
    pdf = 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(x - mu) ** 2 / (2 * sigma ** 2))
    cdf = (1 + scipy.special.erf((x - mu) / np.sqrt(2 * sigma ** 2))) / 2

    output_server("distribution_reveal")

    hold()

    figure(title="Interactive plots",
           tools="pan, wheel_zoom, box_zoom, reset, previewsave",
           background_fill="#E5E5E5")
    quad(top=hist, bottom=np.zeros(len(hist)), left=edges[:-1], right=edges[1:],
         fill_color="#333333", line_color="#E5E5E5", line_width=3)

    # Use `line` renderers to display the PDF and CDF
    line(x, pdf, line_color="#348abd", line_width=8, alpha=0.7, legend="PDF")
    line(x, cdf, line_color="#7a68a6", line_width=8, alpha=0.7, legend="CDF")

    xgrid().grid_line_color = "white"
    xgrid().grid_line_width = 3
    ygrid().grid_line_color = "white"
    ygrid().grid_line_width = 3

    legend().orientation = "top_left"

    return curplot(), cursession()
Example #2
0
def plotHistogram(data):
    data_to_plot = data["Age"]
    data_to_plot = data_to_plot[-np.isnan(data_to_plot)]
    
    hist,edges = np.histogram(data_to_plot,bins=20)
    
    pl.output_file('histogram.html')
    pl.figure()
    pl.hold()
    pl.quad(top=hist,bottom=0,left=edges[:-1],right=edges[1:],
            fill_color="#036564", line_color="#033649",
            title="Age distribution",xlabel="Age",ylabel="Number")
    pl.show()
Example #3
0
def tweetsGraph():
    logger.info("Drawing graphs to %s" % path_to_graphs+"Stats.html")
    stat_db_cursor.execute('SELECT * FROM tweets')
    tweets = stat_db_cursor.fetchall()
    date, volume, cumulative, volumePast24h = zip(*[(datetime.datetime.strptime(t['date'], "%Y-%m-%dT%H"), t['current_hour'], t['cumulative'], t['past_24h']) for t in tweets])
    hourly =zip([datetime.datetime(year=d.year, month=d.month, day=d.day) for d in date],volume)
    hourly.sort()
    days, dailyVolume = zip(*[(d, sum([v[1] for v in vol])) for d,vol in itertools.groupby(hourly, lambda i:i[0])])

    bokeh_plt.output_file(path_to_graphs+"Stats.html")
    bokeh_plt.hold()
    bokeh_plt.quad(days, [d+datetime.timedelta(days=1) for d in days], dailyVolume, [0]*len(dailyVolume),  x_axis_type="datetime", color='gray', legend="Daily volume")
    bokeh_plt.line(date, volume, x_axis_type="datetime",  color='red', legend="Hourly volume")
    bokeh_plt.line(date, volumePast24h, x_axis_type="datetime", color='green', legend="Volume in the past 24 hours")
    bokeh_plt.curplot().title = "Volume"
    bokeh_plt.figure()
    bokeh_plt.line(date, cumulative, x_axis_type="datetime")
    bokeh_plt.curplot().title = "Cumulative volume"

    fig, ax = matplotlib_plt.subplots()
    f=DateFormatter("%Y-%m-%d")
    ax.xaxis.set_major_formatter(f)
    matplotlib_plt.plot(date, volume)
    matplotlib_plt.plot(date, volumePast24h)
    matplotlib_plt.plot(days, dailyVolume)
    matplotlib_plt.xticks(np.concatenate((np.array(date)[range(0,len(date),24*7)],[date[-1]])), rotation=70)
    matplotlib_plt.savefig(path_to_graphs+"volume.png", bbox_inches="tight")


    stat_db_cursor.execute('SELECT * FROM users')
    users = stat_db_cursor.fetchall()
    date, nUsers, nUsersWithFriends = zip(*[(datetime.datetime.strptime(u['date'], "%Y-%m-%dT%H:%M:%S.%f"), u['total'], u['with_friends']) for u in users])
    bokeh_plt.figure()
    bokeh_plt.line(date, nUsers, x_axis_type="datetime", legend="Total")
    bokeh_plt.line(date, nUsersWithFriends, x_axis_type="datetime", legend="Friendship collected")
    bokeh_plt.legend().orientation = "top_left"
    bokeh_plt.curplot().title = "Number of users"
    bokeh_plt.save()

    matplotlib_plt.figure()
    fig, ax = matplotlib_plt.subplots()
    f=DateFormatter("%Y-%m-%d")
    ax.xaxis.set_major_formatter(f)
    matplotlib_plt.plot(date, nUsers)
    matplotlib_plt.plot(date, nUsersWithFriends)
    matplotlib_plt.xticks(np.concatenate((np.array(date)[range(0,len(date),24*7)],[date[-1]])), rotation=70)
    matplotlib_plt.savefig(path_to_graphs+"users.png", bbox_inches="tight")
Example #4
0
def distribution():

    mu, sigma = 0, 0.5

    measured = np.random.normal(mu, sigma, 1000)
    hist, edges = np.histogram(measured, density=True, bins=20)

    x = np.linspace(-2, 2, 1000)
    pdf = 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 /
                                                    (2 * sigma**2))
    cdf = (1 + scipy.special.erf((x - mu) / np.sqrt(2 * sigma**2))) / 2

    output_server("distribution_reveal")

    hold()

    figure(title="Interactive plots",
           tools="pan, wheel_zoom, box_zoom, reset, previewsave",
           background_fill="#E5E5E5")
    quad(top=hist,
         bottom=np.zeros(len(hist)),
         left=edges[:-1],
         right=edges[1:],
         fill_color="#333333",
         line_color="#E5E5E5",
         line_width=3)

    # Use `line` renderers to display the PDF and CDF
    line(x, pdf, line_color="#348abd", line_width=8, alpha=0.7, legend="PDF")
    line(x, cdf, line_color="#7a68a6", line_width=8, alpha=0.7, legend="CDF")

    xgrid().grid_line_color = "white"
    xgrid().grid_line_width = 3
    ygrid().grid_line_color = "white"
    ygrid().grid_line_width = 3

    legend().orientation = "top_left"

    return curplot(), cursession()
Example #5
0
def create_histogram(releases):

    number_of_bidders_list = []
    frequency_list = []
    for number_of_bidders, corresponding_releases in get_releases_by_amount_of_bidders(releases).items():
        number_of_bidders_list.append(number_of_bidders)
        frequency_list.append(len(corresponding_releases))

    zeros = [0]*len(frequency_list)
    right = [x+1 for x in number_of_bidders_list]
    histogram = quad(top=frequency_list, bottom=zeros, left=number_of_bidders_list, right=right)

    histogram.title = "Frequency of releases vs. Number of Bidders"
    xaxis()[0].axis_label = "Number of Bidders"
    yaxis()[0].axis_label = "Frequency of Releases"

    show()
Example #6
0
def make_plot(xr):
    yr = Range1d(start=-10, end=10)
    figure(plot_width=800, plot_height=350,
           y_range=yr,
           x_range=xr,
           tools="xpan,xwheel_zoom,hover,box_zoom,reset")
    hold()
    genes = pd.read_csv('/home/hugoshi/data/lab7/genes.refseq.hg19.bed', sep='\t',
                        skiprows=[0], header=None)
    genes.rename(columns={
        0: "chromosome",
        1: "start",
        2: "end"},
                 inplace=True
    )
    genes = genes[genes.chromosome == 'chr5']
    g_len = len(genes)
    quad(genes.start - 0.5,      # left edge
         genes.end - 0.5,        # right edge
         [2.3] * g_len,            # top edge
         [1.7] * g_len,
         ['blue'] * g_len)           # bottom edge
    exons = pd.read_csv('/home/hugoshi/data/lab7/exons.refseq.hg19.bed', sep='\t', skiprows=[0], header=None)
    exons.columns = ["chromosome", "start", "end", "meta1", "meta2", "meta3"]
    exons = exons[exons.chromosome == 'chr5']
    e_len = len(exons)
    quad(exons.start - 0.5,      # left edge
         exons.end - 0.5,        # right edge
         [1.3] * e_len,            # top edge
         [0.7] * e_len,
         ['blue'] * e_len)           # bottom edge

    df = pd.read_csv('/home/hugoshi/data/lab7/CHP2.20131001.hotspots.bed', sep='\t', skiprows=[0], header=None)
    df.columns = ["chromosome", "start", "end", "meta1", "meta2", "meta3"]
    df = df[df.chromosome == 'chr5']
    singles = df[df.start+1 == df.end]
    widers = df[df.start+1 != df.end]
    slen = len(singles)
    wlen = len(widers)
    s_source = ColumnDataSource(
    data = dict(
        start=singles.start,
        end=singles.end,
        meta1=singles.meta1,
        meta2=singles.meta2,
        meta3=singles.meta3))
    rect('start',    # x center
         [1]*slen,         # y center
         [0.9]*slen,
         [1]*slen,
         color=['red']*slen,
         source=s_source)         # height
    hover = [t for t in curplot().tools if isinstance(t, HoverTool)][0]
    hover.tooltips = OrderedDict([
        # add to this
        ("position", "@start"),
        ("meta 1", "@meta1"),
        ("meta 2", "@meta2"),
        ("meta 3", "@meta3")
    ])

    quad(widers.start - 0.5,      # left edge
         widers.end - 0.5,        # right edge
         [0.3] * wlen,            # top edge
         [-0.3] * wlen)           # bottom edge

    hold()
    return curplot()
Example #7
0
def make_plot(xr):
    yr = Range1d(start=-10, end=10)
    figure(plot_width=800,
           plot_height=350,
           y_range=yr,
           x_range=xr,
           tools="xpan,xwheel_zoom,hover,box_zoom,reset")
    hold()
    genes = pd.read_csv('/home/hugoshi/data/lab7/genes.refseq.hg19.bed',
                        sep='\t',
                        skiprows=[0],
                        header=None)
    genes.rename(columns={0: "chromosome", 1: "start", 2: "end"}, inplace=True)
    genes = genes[genes.chromosome == 'chr5']
    g_len = len(genes)
    quad(
        genes.start - 0.5,  # left edge
        genes.end - 0.5,  # right edge
        [2.3] * g_len,  # top edge
        [1.7] * g_len,
        ['blue'] * g_len)  # bottom edge
    exons = pd.read_csv('/home/hugoshi/data/lab7/exons.refseq.hg19.bed',
                        sep='\t',
                        skiprows=[0],
                        header=None)
    exons.columns = ["chromosome", "start", "end", "meta1", "meta2", "meta3"]
    exons = exons[exons.chromosome == 'chr5']
    e_len = len(exons)
    quad(
        exons.start - 0.5,  # left edge
        exons.end - 0.5,  # right edge
        [1.3] * e_len,  # top edge
        [0.7] * e_len,
        ['blue'] * e_len)  # bottom edge

    df = pd.read_csv('/home/hugoshi/data/lab7/CHP2.20131001.hotspots.bed',
                     sep='\t',
                     skiprows=[0],
                     header=None)
    df.columns = ["chromosome", "start", "end", "meta1", "meta2", "meta3"]
    df = df[df.chromosome == 'chr5']
    singles = df[df.start + 1 == df.end]
    widers = df[df.start + 1 != df.end]
    slen = len(singles)
    wlen = len(widers)
    s_source = ColumnDataSource(data=dict(start=singles.start,
                                          end=singles.end,
                                          meta1=singles.meta1,
                                          meta2=singles.meta2,
                                          meta3=singles.meta3))
    rect(
        'start',  # x center
        [1] * slen,  # y center
        [0.9] * slen,
        [1] * slen,
        color=['red'] * slen,
        source=s_source)  # height
    hover = [t for t in curplot().tools if isinstance(t, HoverTool)][0]
    hover.tooltips = OrderedDict([
        # add to this
        ("position", "@start"),
        ("meta 1", "@meta1"),
        ("meta 2", "@meta2"),
        ("meta 3", "@meta3")
    ])

    quad(
        widers.start - 0.5,  # left edge
        widers.end - 0.5,  # right edge
        [0.3] * wlen,  # top edge
        [-0.3] * wlen)  # bottom edge

    hold()
    return curplot()
Example #8
0
               color=data["Survived"].map(colormap),alpha=0.8,
               xlabel="Fare")
    pl.show()





if __name__=="__main__":
    data = loadData("train.csv")
    data,label = buildDataset(data,False)
    
    data_to_plot = data["Age"]
    data_to_plot = data_to_plot[-np.isnan(data_to_plot)]
    
    hist,edges = np.histogram(data_to_plot,bins=20)
    
    pl.output_file('histogram.html')
    pl.figure()
    pl.hold()
    pl.quad(top=hist,bottom=0,left=edges[:-1],right=edges[1:],
            fill_color="#036564", line_color="#033649",
            title="Age distribution",xlabel="Age",ylabel="Number")
            
    pl.line(np.average([edges[1:],edges[:-1]],axis=0),hist,
            line_width=5,color="red",alpha=0.8)            
            
    pl.show()