コード例 #1
0
    def back_test_performance(self):
        if len(self.sim_account.trades) > 0 and self.sim_account.trades[-1].side == 1:
            self.sim_account.trades.append(
                Order(price=self.tick_data["ma1"][-1], side=-1, symbol=self.trade_symbol, quantity=self.position,
                      at=self.bolling.minute_q[-1].time_stamp))
            self.sim_account.nets.append(
                self.sim_account.balance + self.sim_account.position * self.tick_data["ma1"][-1])

        super(Strategy, self).back_test_performance()

        def random_color():
            import random
            x = random.random() * 256 * 256 * 256
            prefix = '#'
            for i in range(6 - len(str(hex(int(x))[2:]))):
                prefix = prefix + '0'
            return prefix + str(hex(int(x))[2:]).upper()

        from bokeh.plotting import figure, show, Column

        buy_orders = {"time": [], "price": []}
        sell_orders = {"time": [], "price": []}
        for trade in self.sim_account.trades:
            assert isinstance(trade, Order)
            t_time = trade.at + datetime.timedelta(hours=8)
            if trade.side > 0:
                buy_orders["time"].append(t_time)
                buy_orders["price"].append(trade.price)
            else:
                sell_orders["time"].append(t_time)
                sell_orders["price"].append(trade.price)

        plot = figure(x_axis_type="datetime", width=1300)
        bar_plot = figure(x_axis_type="datetime", width=1300)
        plot.line(x=self.tick_data["time"], y=self.tick_data["ma1"], legend="ma10", color=random_color())
        plot.circle_x(x=buy_orders["time"], y=buy_orders["price"], color="red", fill_color=None, size=20, line_width=5)
        plot.square_cross(x=sell_orders["time"], y=sell_orders["price"], color="blue", fill_color=None, size=20,
                          line_width=5)
        plot.line(x=self.tick_data["time"], y=self.tick_data["ma5"], legend="ma30", color=random_color())
        plot.line(x=self.tick_data["time"], y=self.tick_data["ma15"], legend="ma90", color=random_color())
        # plot.line(x=self.tick_data["time"], y=self.tick_data["mean"], legend="mean", color='black')
        # plot.line(x=self.tick_data["time"], y=self.tick_data["upper"], legend="upper", color='black')
        # plot.line(x=self.tick_data["time"], y=self.tick_data["lower"], legend="lower", color='black')
        plot.scatter(x=self.tick_data["time"], y=self.tick_data["bid"], legend="bid", color=random_color())
        plot.scatter(x=self.tick_data["time"], y=self.tick_data["ask"], legend="ask", color=random_color())
        bar_plot.vbar(x=self.tick_data["time"], top=self.tick_data["std"], bottom=0, width=1)
        bar_plot.line(x=self.tick_data["time"], y=self.tick_data["avg_std"])
        show(Column(plot, bar_plot))
コード例 #2
0
def generate_error_report(df,
                          title='Error report',
                          x_type='event_date',
                          save_to='unnamed_error_report.html'):
    # normal is already deltas
    p1 = plot_cat_bokeh(df.loc[:, df.count() > 0], title=title, x_type=x_type)
    p1.title.text_font_size = '18pt'
    # histogram
    p2 = plot_hist_bokeh(df, bins=100, color_offset=0)
    # box plot
    p3 = plot_boxplot_bokeh(df)
    p4 = plot_acf_bokeh(df)
    p = Column(p1, Row(p2, p3), p4)
    if save_to is None:
        return p
    else:
        output_file(save_to, title=title)
        save(p)
コード例 #3
0
def Hierarchical(doc):
    global source, nodes
    """"     # df = pd.read_csv('application/dataSet/GephiMatrix_author_similarity.csv', sep=';')
    #csv_reader = pd.read_csv('application/dataSet/authors.csv', sep=';')

    #############################################################
    # Make a condensed distance matrix
    ############################################################

    # df_std = (df - df.min(axis=0)) / (df.max(axis=0) - df.min(axis=0))
    # df_scaled = df_std * (1.0 - 0.0) + 0.0
    #
    # dist = scipy.spatial.distance.squareform(distancematrix)
    # linkage_matrix = linkage(dist, "single")
    # results = dendrogram(linkage_matrix, no_plot=True)
    # icoord, dcoord = results['icoord'], results['dcoord']
    # labels = list(map(int, results['ivl']))
    # df = df.iloc[labels]
    # df_scaled = df_scaled.iloc[labels]
    #
    # tms = []
    #
    #
    # icoord = pd.DataFrame(icoord)

    args = doc.session_context.request.arguments
    print(args)
    file = args.get('file')[0]
    file = str(file.decode('UTF-8'))

    with open("media/" + file) as data:
        csv_reader = csv.reader(data, delimiter=';')

        nArr = csv_reader.index.values
        dfArr = csv_reader.values

        nodes = dfArr
        names = nArr

        N = len(names)
        counts = np.zeros((N, N))
        for i in range(0, len(nodes)):
            for j in range(0, len(nodes)):
                counts[i, j] = nodes[j][i]
                counts[j, i] = nodes[j][i]

        N = len(counts)

        distancematrix = np.zeros((N, N))
        count = 0
        for node_1 in counts:
            distancematrix[count] = node_1
            count = count + 1

        for m in range(N):
            for n in range(N):
                if distancematrix[m][n] == 0:
                    distancematrix[m][n] = float("inf")
        for l in range(N):
            distancematrix[l][l] = 0

        for k in range(N):
            for i in range(N):
                for j in range(N):
                    if distancematrix[i][j] > distancematrix[i][k] + distancematrix[k][j]:
                        distancematrix[i][j] = distancematrix[i][k] + distancematrix[k][j]

        values = distancematrix """

    #########################################################################################################

    def getLevelInfo(tree):
        nodes = [tree.get_left(), tree.get_right()]
        total_desc = tree.get_count()
        percents = [0]
        names = []
        for node in nodes:
            percentage = float(node.get_count()) / float(total_desc)
            percents.append(float(percentage + percents[-1]))
            names.append(node.get_id())

        return percents, names, nodes

    def genDataSource(tree):
        percents, names, nodes = getLevelInfo(tree)

        # define starts/ends for wedges from percentages of a circle
        starts = [p * 2 * pi for p in percents[:-1]]
        ends = [p * 2 * pi for p in percents[1:]]
        colours = getColours(len(starts))
        branchLengths = [node.dist for node in nodes]
        children = [node.get_count() for node in nodes]
        source = ColumnDataSource(data=dict(start=starts,
                                            end=ends,
                                            name=names,
                                            colour=colours,
                                            branchLength=branchLengths,
                                            children=children))
        return source, nodes

    def getColours(Length):
        colours = [
            "red", "green", "blue", "orange", "yellow", "purple", "pink"
        ]
        returnColours = colours
        while len(returnColours) <= Length:
            returnColours += colours
        if returnColours[-1] == "red":
            returnColours[-1] = "orange"

        return returnColours[0:Length]

    def calcAngle(x, y):
        innerProduct = x
        lengthProduct = math.sqrt(x**2 + y**2)
        cosAngle = innerProduct / lengthProduct
        if y < 0 and x > 0:
            return 2 * pi - math.acos(cosAngle)
        else:
            return math.acos(cosAngle)

    def update(event):
        print('Click registered')
        angle = calcAngle(event.x, event.y)
        print(angle)
        global source, nodes
        for i in range(len(source.data['end'])):
            if source.data['end'][i] > angle and source.data['start'][
                    i] < angle:
                clickedNode = i
                print(i)

        if nodes[clickedNode].get_count() > 2:
            new_source, nodes = genDataSource(nodes[clickedNode])
            source.data = new_source.data

    def returnVisualisation():
        global source, nodes
        new_source, nodes = genDataSource(tree)
        source.data = new_source.data

    args = doc.session_context.request.arguments
    file = args.get('file')[0]
    file = str(file.decode('UTF-8'))

    try:
        df = pd.read_csv("media/" + file, sep=';')
        print('Loaded data succesfully')
    except:
        raise Exception("File does not exist")

    names = df.index.values
    counts = df.values

    # If data too large
    #########################################################
    if len(names) > 50:
        n = 50
        while len(names) != 50:
            names = np.delete(names, (n))
            counts = np.delete(counts, (n), axis=0)
            counts = np.delete(counts, (n), axis=1)

    counts = np.delete(counts, len(counts), axis=1)
    # Make a distance matrix
    #######################################################
    N = len(counts)
    distancematrix = np.zeros((N, N))
    count = 0
    for node_1 in counts:
        distancematrix[count] = node_1
        count = count + 1

    for m in range(N):
        for n in range(N):
            if distancematrix[m][n] == 0:
                distancematrix[m][n] = float("inf")
    for l in range(N):
        distancematrix[l][l] = 0

    for k in range(N):
        for i in range(N):
            for j in range(N):
                if distancematrix[i][
                        j] > distancematrix[i][k] + distancematrix[k][j]:
                    distancematrix[i][
                        j] = distancematrix[i][k] + distancematrix[k][j]

    X = distancematrix

    Z = linkage(X, 'ward')

    tree = to_tree(Z)

    ## Create the first data source for the root view
    source, nodes = genDataSource(tree)

    ## Create buttons and tools to interact with the visualisation
    returnButton = Button(label="Return")
    hover = HoverTool()
    hover.tooltips = [("Name", "@name"), ("Lenght to parent", "@branchLength"),
                      ("Children", "@children")]
    hover.mode = 'mouse'
    tools = [hover, 'save']

    ## Create the canvas
    p = figure(x_range=(-1, 1), y_range=(-1, 1), tools=tools)

    ## Draw the wedges on the canvas according to the tree info
    p.wedge(x=0,
            y=0,
            radius=1,
            start_angle='start',
            end_angle='end',
            color='colour',
            alpha=0.6,
            source=source)

    ## Map actions to events for the interaction
    p.on_event(events.Tap, update)
    returnButton.on_click(returnVisualisation)

    ## Display the visualisation
    doc.add_root(Column(returnButton, p))
コード例 #4
0
ファイル: main.py プロジェクト: suGaGa2/rewind_python_M2_fall
        d5['end_datetime_str'].push(  d6['end_datetime_str'][draw_index]);
        d5['start_datetime_dt'].push( d6['start_datetime_dt'][draw_index]);
        d5['end_datetime_dt'].push(   d6['end_datetime_dt'][draw_index]);

        source_sx_draw_highlight.data = d5;
        source_sx_draw_highlight.change.emit();
        
    """)
silder_draw_index.js_on_change('value', callback)



columns = [
        TableColumn(field="start_datetime_dt", title="start_datetime_dt"),
        TableColumn(field="sx_value", title="sx_value")
          ]
data_table = DataTable(source=source_sx_persistency, columns=columns, width=400, height=280)

controles=Column(silder_draw_index, slider_sx, sizing_mode='stretch_width')
p_sx_layout  = Column(p_sx,  controles, margin=( 8 , 8 , 8 , 8 ))

toggles = Row(toggle1, toggle2)
p_crd_layout = Column(p_crd, toggles, slider_alpha_Value, data_table, margin=( 8 , 8 , 8 , 8 ))

#最後の部分
plots = Row(p_sx_layout, p_crd_layout)
#output_file("MOGE.html")
show(plots)
curdoc().add_root(plots)

コード例 #5
0
ファイル: GUI.py プロジェクト: JanBenisek/LesionAdjuster
    def showGUI(self, pth_to_img, y_form, pred):
        ''' 
        Method builds the bokeh GUI
        
        Parameters
        ----------
        pth_to_img: path to ultrasound image
        y_form: true form of the lesion
        pred: predicted form the lesion
        '''

        ##############
        #Set up a figure
        ##############
        p = figure(x_range=(0, self.DIMS[0]),
                   y_range=(0, self.DIMS[1]),
                   tools=self._tools_to_show,
                   plot_width=self.DIMS[0],
                   plot_height=self.DIMS[1],
                   toolbar_location="above")

        #Add image as background
        p.image_url(url=[self.root_pth + pth_to_img],
                    x=431,
                    y=302,
                    w=862,
                    h=604,
                    anchor="center")

        #Nicier plot
        self._makeShiny(plot=p)

        ##############
        #Add lines and plot them
        ##############
        src_true, src_pred = self._getData()
        self._plotLines(plot=p, src_true=src_true, src_pred=src_pred)

        ##############
        #Add table
        ##############
        table = self._addTable(src_pred=src_pred)

        ##############
        #Add polygons
        ##############
        true_pol, c_t = self._addLesionForm(form=y_form, color='red', plot=p)
        pred_pol, c_p = self._addLesionForm(form=pred, color='blue', plot=p)

        #Add toggles for polygons
        toggle_true = Toggle(label="Show true form",
                             button_type="primary",
                             active=True)
        toggle_true.js_link('active', true_pol, 'visible')
        toggle_true.js_link('active', c_t, 'visible')

        toggle_pred = Toggle(label="Show predicted form",
                             button_type="primary",
                             active=True)
        toggle_pred.js_link('active', pred_pol, 'visible')
        toggle_true.js_link('active', c_p, 'visible')

        ##############
        #Add download button
        ##############
        button_csv = Button(label="Download", button_type="primary")
        button_csv.callback = CustomJS(args=dict(source=src_pred),
                                       code=open(self.root_pth +
                                                 "download.js").read())

        ##############
        #Add title div
        ##############
        div_title = Div(text="""<div> <b>LESION ADJUSTER</b> </div>""",
                        align='center',
                        style={
                            'font-size': '150%',
                            'color': '#1f77b4'
                        })
        ##############
        #Add description to the buttons
        ##############
        div_desc = Div(text="""<div> <b>CONTROLS</b> </div>""",
                       align='center',
                       style={
                           'font-size': '110%',
                           'color': '#1f77b4'
                       })

        ##############
        #Add Div to show euclidean distance and button to recalculate it
        ##############
        div_euclid = Div(text="""
                         <b>Diameter of predicted form is:</b> 334.80 <br>
                         <b>Diameter of true form is:</b> 368.64 <br>
                         <b>RMSE is:</b> 34.13
                         """,
                         align='center',
                         style={'font-size': '100%'})

        p.js_on_event(
            events.MouseMove,
            CustomJS(args=dict(div=div_euclid,
                               source_data_pred=src_pred,
                               source_data_true=src_true),
                     code="""
               var data_p = source_data_pred.data;
               var data_t = source_data_true.data;
               
               var x_p = data_p['x']
               var y_p = data_p['y']
               
               var x_t = data_t['x']
               var y_t = data_t['y']
               
               var diam_p = 0
               var diam_t = 0
               var rmse = 0
               
               //Diameter of pred form
               diam_p = Math.sqrt(Math.pow((x_p[0]-x_p[1]),2) + Math.pow((y_p[0]-y_p[1]),2))
               
               //Diameter of true form
               diam_t = Math.sqrt(Math.pow((x_t[0]-x_t[1]),2) + Math.pow((y_t[0]-y_t[1]),2))
               
               //RMSE
               rmse = Math.sqrt(Math.pow(diam_p - diam_t,2)/1)
               
               //Result
               div.text = "<b>Diameter of predicted form is: </b>" + diam_p.toFixed(2) + "<br> <b>Diameter of true form is: </b>" + diam_t.toFixed(2) + " <br> <b>RMSE is: </b>" + rmse.toFixed(2);
               
               """))

        ##############
        #Show
        ##############
        show(
            Column(
                div_title,
                Row(
                    Column(p, table),
                    Column(div_desc, toggle_true, toggle_pred, button_csv,
                           div_euclid))))
コード例 #6
0
def main():
    # set up main bokeh figure
    p = figure(x_range=(0, 10),
               y_range=(0, 10),
               tools=[],
               title='Draw points in the network')
    p.background_fill_color = 'lightgrey'

    # start off with sample points and their associated flows
    source = ColumnDataSource({
        'x': [2, 7, 5, 8],
        'y': [2, 2, 6, 1],
        'flow': ['-2', '-5', '8', '-1']
    })

    renderer = p.scatter(x='x', y='y', source=source, color='blue', size=10)
    columns = [
        TableColumn(field="x", title="x"),
        TableColumn(field="y", title="y"),
        TableColumn(field='flow', title='flow')
    ]
    table = DataTable(source=source,
                      columns=columns,
                      editable=True,
                      height=200)

    draw_tool = PointDrawTool(renderers=[renderer], empty_value='1')
    p.add_tools(draw_tool)
    p.toolbar.active_tap = draw_tool

    titletextbox = Div(
        text=
        "<h2>Objective: minimize construction cost of network<p>Construction cost is based on number of pipes and distance between nodes.<br>Additional constraints imposed: flows in network must be balanced.<br></h2>",
        width=1100,
        height=150)
    textbox = Div(text="", width=200, height=100)
    floating = 1.
    fixed = 0.
    button = Button(label='Solve Network')

    button.on_event(
        ButtonClick,
        partial(button_click_event, source=source, textbox=textbox, figure=p))

    p.on_event(
        PanEnd,
        partial(button_click_event, source=source, textbox=textbox, figure=p))

    # set up sliders
    lumpSumCost = Slider(title="Fixed cost pipe",
                         value=0.0,
                         start=0.0,
                         end=500.0,
                         step=50)
    floatingCost = Slider(title="Floating cost pipe",
                          value=1.0,
                          start=0.0,
                          end=500.0,
                          step=10.)

    for w in [lumpSumCost, floatingCost]:
        w.on_change(
            'value',
            partial(update_data,
                    source=source,
                    textbox=textbox,
                    figure=p,
                    lumpSumCost=lumpSumCost,
                    floatingCost=floatingCost))

    # create page layout
    curdoc().add_root(
        Column(
            titletextbox,
            Row(Column(p, table, width=800),
                Column(lumpSumCost, floatingCost, button, textbox,
                       width=300))))
    curdoc().title = "Network"
コード例 #7
0
def choro_map():

    df = pd.read_pickle("./data/Rat_Sightings.pkl")

    def make_map(var="postalCode", year="All", season="All"):
        def filter_years_seasons(df=df, season=season, year=year):

            if year == "All" and season == "All":

                #group dataframe by indicated column name and rename columns
                df = (df)

            elif year != "All" and season == "All":

                #group dataframe by indicated column name and rename columns
                df = (df.query("Year == '%s'" % year))

            elif year != "All" and season != "All":

                # select cases for user-selected year and season
                df = (df.query("Year == '%s'" % year).query("Season == '%s'" %
                                                            season))

            elif year == "All" and season != "All":

                # only extract cases for the user-selected season
                df = (df.query("Season == '%s'" % season))

            else:

                # just set it to itself if user-error occurs
                df = df

            return df

        new_df = filter_years_seasons()

        if var == "postalCode":

            groupeddf = (new_df.assign(
                n=0).groupby(var).n.count().reset_index().rename(
                    columns={
                        "n": "Rat_Sightings"
                    }).merge(df[[
                        "postalCode", "Neighborhood", "Borough", "xs", "ys"
                    ]]).drop_duplicates(subset=[var]).reset_index(drop=True))

        elif var == "Neighborhood":

            groupeddf = (new_df.assign(
                n=0).groupby(var).n.count().reset_index().rename(
                    columns={
                        "n": "Rat_Sightings"
                    }).merge(df[[
                        "Neighborhood", "Borough", "nhood_xs", "nhood_ys"
                    ]]).drop_duplicates(subset=[var]).reset_index(
                        drop=True).rename(columns={
                            "nhood_xs": "xs",
                            "nhood_ys": "ys"
                        }))

        elif var == "Borough":
            groupeddf = (new_df.assign(
                n=0).groupby(var).n.count().reset_index().rename(
                    columns={
                        "n": "Rat_Sightings"
                    }).merge(df[["Borough", "boro_xs", "boro_ys"
                                 ]]).drop_duplicates(subset=[var]).reset_index(
                                     drop=True).rename(columns={
                                         "boro_xs": "xs",
                                         "boro_ys": "ys"
                                     }))
        else:

            var = "postalCode"

            groupeddf = (new_df.assign(
                n=0).groupby(var).n.count().reset_index().rename(
                    columns={
                        "n": "Rat_Sightings"
                    }).merge(df[[
                        "postalCode", "Neighborhood", "Borough", "xs", "ys"
                    ]]).drop_duplicates(subset=[var]).reset_index(drop=True))

        # instantiate the color mapper
        color_mapper = LogColorMapper(palette=OrRd9[::-1])

        p = figure(x_range=(-8400000, -8100000),
                   y_range=(4950000, 5000000),
                   x_axis_type="mercator",
                   y_axis_type="mercator",
                   plot_height=1200,
                   plot_width=1000)
        p.axis.visible = False
        p.grid.grid_line_color = None
        p.add_tile(CARTODBPOSITRON)

        p.grid.grid_line_color = None

        p.patches("xs",
                  "ys",
                  source=ColumnDataSource(data=groupeddf),
                  fill_color={
                      "field": "Rat_Sightings",
                      "transform": color_mapper
                  },
                  fill_alpha=0.6)

        p.add_tools(
            HoverTool(tooltips=[("Number of Rat Sightings: ",
                                 "@Rat_Sightings"), ("%s" % var,
                                                     "@%s" % var)]))

        return p

    year_select = Select(value="All",
                         options=[*df["Year"].unique()] + ["All"],
                         title="Select a year to view: ")

    season_select = Select(value="All",
                           options=[*df["Season"].unique()] + ["All"],
                           title="Select a season to view: ")

    agg_level_select = Select(
        value="postalCode",
        options=["postalCode", "Neighborhood", "Borough"],
        title="Select a value to view the data by: ")

    def update_plot(attr, old, new):

        layout.children[1] = make_map(year=year_select.value,
                                      season=season_select.value,
                                      var=agg_level_select.value)

    year_select.on_change("value", update_plot)
    season_select.on_change("value", update_plot)
    agg_level_select.on_change("value", update_plot)

    layout = Column(Column(agg_level_select, year_select, season_select),
                    make_map())

    return layout
コード例 #8
0
from bokeh.models import ColumnDataSource, DataTable, PointDrawTool, TableColumn
from bokeh.plotting import Column, figure, output_file, show

output_file("tools_point_draw.html")

p = figure(x_range=(0, 10), y_range=(0, 10), tools=[],
           title='Point Draw Tool')
p.background_fill_color = 'lightgrey'

source = ColumnDataSource({
    'x': [1, 5, 9], 'y': [1, 5, 9], 'color': ['red', 'green', 'yellow']
})

renderer = p.scatter(x='x', y='y', source=source, color='color', size=10)
columns = [TableColumn(field="x", title="x"),
           TableColumn(field="y", title="y"),
           TableColumn(field='color', title='color')]
table = DataTable(source=source, columns=columns, editable=True, height=200)

draw_tool = PointDrawTool(renderers=[renderer], empty_value='black')
p.add_tools(draw_tool)
p.toolbar.active_tap = draw_tool

show(Column(p, table))
コード例 #9
0
ファイル: main.py プロジェクト: almostscheidplatz/guesspeak
# print db.get(str(id)+"4"+"x")
# print db.get(str(id)+"5"+"x")

# print db.get(str(id)+"1"+"y")
# print db.get(str(id)+"2"+"y")
# print db.get(str(id)+"3"+"y")
# print db.get(str(id)+"4"+"y")
# print db.get(str(id)+"5"+"y")

# print db.getall()
#db.deldb()
#db.dump()

#db.deldb()
#db.dump()
layout = (Column(p, Row(button1, button)))

tools = ["xpan,pan,xwheel_zoom,wheel_zoom,box_zoom,reset,previewsave"]
q = figure(x_range=(0, 10),
           y_range=(0, 10),
           tools=tools,
           title='Analysis Peak')

q.y_range.start = -10
q.y_range.end = 220
q.x_range.start = -10
q.x_range.end = 180

q.xaxis.axis_label = "Extension [nm]"
q.xaxis.axis_label_text_font_size = "20pt"
q.xaxis.axis_label_text_font_style = 'bold'
コード例 #10
0
def new_scatter():

    df = pd.read_pickle("./data/Rat_Sightings.pkl")

    def var_var_scatter(df=df,
                        var1="Borough",
                        var2="Neighborhood",
                        year="All",
                        season="All"):
        """
            Allows the user to select whichever variables
            from the dataset for comparison in the form
            of a scatter plot.
        """

        # we want the var1 to be the least granular variable
        name1, name2 = var1, var2

        # get the number of classes
        n_classes1 = df[var1].nunique()
        n_classes2 = df[var2].nunique()

        # if the num of classes in var2 is less then reassign variables
        if n_classes2 < n_classes1:
            var1 = name2
            var2 = name1

        # group by
        if year == "All" and season == "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.assign(n=0).groupby([
                var1, var2
            ]).n.count().reset_index().rename(columns={"n": "rat_sightings"}))

        elif year != "All" and season == "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Year == '%s'" % year).assign(n=0).groupby([
                var1, var2
            ]).n.count().reset_index().rename(columns={"n": "rat_sightings"}))

        elif year != "All" and season != "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Year == '%s'" % year).query(
                "Season == '%s'" % season).assign(n=0).groupby(
                    [var1, var2]).n.count().reset_index().rename(
                        columns={"n": "rat_sightings"}))

        elif year == "All" and season != "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Season == '%s'" % season).assign(
                n=0).groupby([var1, var2]).n.count().reset_index().rename(
                    columns={"n": "rat_sightings"}))

        else:

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.assign(n=0).groupby([
                var1, var2
            ]).n.count().reset_index().rename(columns={"n": "rat_sightings"}))

        # get unique values for both vars
        uniq_vals1 = [*groupeddf[var1].unique()]
        uniq_vals2 = [*groupeddf[var2].unique()]

        # now add in percentages to give more information to user
        # get the total values

        sum_data = pd.DataFrame(groupeddf.groupby(var1)["rat_sightings"].sum())

        col_name = "total_%s_sights" % var1
        sum_data.columns = [col_name]

        # merge the totals in to the dataframe
        """ Sure this can be applied to the dataframe in some fashion but opting
        for the conceptually easier solution here.
        """
        groupeddf = groupeddf.merge(sum_data.reset_index())

        # get percentages for each location type
        perc_name = "perc_sights"
        groupeddf[perc_name] = (groupeddf["rat_sightings"] /
                                groupeddf[col_name]) * 100

        # create color mapper
        mapper = LogColorMapper(palette=Blues9[::-1])

        # instantiate figure
        p = figure(plot_width=800, plot_height=600, y_range=uniq_vals1)

        p.circle(x="rat_sightings",
                 y=jitter(var1, width=0.6, range=p.y_range),
                 source=ColumnDataSource(data=groupeddf),
                 alpha=0.6,
                 size=30,
                 hover_alpha=0.9,
                 fill_color={
                     "field": "rat_sightings",
                     "transform": mapper
                 })

        p.add_tools(
            HoverTool(tooltips=[(
                "%s" % var1, "@%s" %
                var1), ("%s" % var2, "@%s" %
                        var2), ("Num. of Rat Sightings", "@rat_sightings"),
                                ("Percentage of Rat Sightings within the %ss" %
                                 var1, "@perc_sights %")]))

        return p

    # define variable selects
    var1_select = Select(value="Borough",
                         options=[*df.columns],
                         title="Select a variable: ")
    var2_select = Select(value="Neighborhood",
                         options=[*df.columns],
                         title="Select a variable: ")
    year_select = Select(value="All",
                         options=[*df["Year"].unique()] + ["All"],
                         title="Select a year to view: ")
    season_select = Select(value="All",
                           options=[*df["Season"].unique()] + ["All"],
                           title="Select a season to view: ")

    # define interactivity
    def update_plot(attr, old, new):

        layout.children[1] = var_var_scatter(var1=var1_select.value,
                                             var2=var2_select.value,
                                             year=year_select.value,
                                             season=season_select.value)

    # inc interactivity
    var1_select.on_change("value", update_plot)
    var2_select.on_change("value", update_plot)
    year_select.on_change("value", update_plot)
    season_select.on_change("value", update_plot)

    layout = Column(
        Row(Column(var1_select, var2_select), Column(year_select,
                                                     season_select)),
        var_var_scatter())
    return layout
コード例 #11
0
def heatmap():

    df = pd.read_pickle("./data/Rat_Sightings.pkl")

    def var_loc_heatmap(df=df,
                        var="Borough",
                        loc_var="Location Type",
                        year="All",
                        season="All"):
        """
            Creates a heatmap for each class in var (Borough, Neighborhood, or Zip Code)
            and determines which location types are the of the highest
            intensity for each. Essentially a recreation of the scatter
            plot but with a heatmap instead of circles.
        """

        # group by
        if year == "All" and season == "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.assign(n=0).groupby(
                [var,
                 loc_var]).n.count().reset_index().rename(columns={
                     loc_var: "loc_type",
                     "n": "rat_sightings"
                 }))

        elif year != "All" and season == "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Year == '%s'" % year).assign(n=0).groupby(
                [var,
                 loc_var]).n.count().reset_index().rename(columns={
                     loc_var: "loc_type",
                     "n": "rat_sightings"
                 }))

        elif year != "All" and season != "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Year == '%s'" % year).query(
                "Season == '%s'" % season).assign(n=0).groupby([
                    var, loc_var
                ]).n.count().reset_index().rename(columns={
                    loc_var: "loc_type",
                    "n": "rat_sightings"
                }))

        elif year == "All" and season != "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query(
                "Season == '%s'" % season).assign(n=0).groupby([
                    var, loc_var
                ]).n.count().reset_index().rename(columns={
                    loc_var: "loc_type",
                    "n": "rat_sightings"
                }))

        else:

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.assign(n=0).groupby(
                [var,
                 loc_var]).n.count().reset_index().rename(columns={
                     loc_var: "loc_type",
                     "n": "rat_sightings"
                 }))

        # get total percentages
        groupeddf["total_perc"] = (groupeddf.rat_sightings /
                                   groupeddf.rat_sightings.sum()) * 100

        # get unique value names
        loc_types = [*groupeddf["loc_type"].unique()]
        uniq_vals = [*groupeddf[var].unique()]

        # now add in percentages to give more information to user
        # get the total values

        sum_data = pd.DataFrame(groupeddf.groupby(var).rat_sightings.sum())

        col_name = "total_%s_sights" % var
        sum_data.columns = [col_name]

        # merge the totals in to the dataframe
        """ Sure this can be applied to the dataframe in some fashion but opting
        for the conceptually easier solution here.
        """
        groupeddf = groupeddf.merge(sum_data.reset_index())

        # get percentages for each location type
        perc_name = "perc_sights"
        groupeddf[perc_name] = (groupeddf["rat_sightings"] /
                                groupeddf[col_name]) * 100

        # instantiate color mapper
        mapper = LogColorMapper(palette=Blues9[::-1])

        # instantiate plot
        p = figure(y_range=uniq_vals, x_range=loc_types)

        # specify plot parameters
        p.grid.grid_line_color = None
        p.axis.axis_line_color = None
        p.axis.major_tick_line_color = None
        p.axis.major_label_text_font_size = "7pt"
        p.axis.major_label_standoff = 0
        p.xaxis.major_label_orientation = pi / 3

        # fill plot with data
        p.rect(x="loc_type",
               y=var,
               width=1,
               height=1,
               source=ColumnDataSource(data=groupeddf),
               alpha=0.6,
               hover_alpha=0.9,
               fill_color={
                   "field": "rat_sightings",
                   "transform": mapper
               })

        p.add_tools(
            HoverTool(tooltips=[("%s" % var, "@%s" % var),
                                ("Location Type", "@loc_type"),
                                ("Num. of Rat Sightings", "@rat_sightings"),
                                ("Perc. of Rat Sightings Across %ss" % var,
                                 "@perc_sights %")],
                      point_policy="follow_mouse"))

        return p

    var_select = Select(
        value="Borough",
        options=["Borough", "Neighborhood", "postalCode", "Year", "Season"],
        title="Select the Variable to View: ")
    year_select = Select(value="All",
                         options=[*df["Year"].unique()] + ["All"],
                         title="Select a year to view: ")
    season_select = Select(value="All",
                           options=[*df["Season"].unique()] + ["All"],
                           title="Select a season to view: ")

    # create interactivity components
    def update_plot(attr, old, new):

        layout.children[1] = var_loc_heatmap(var=var_select.value,
                                             year=year_select.value,
                                             season=season_select.value)

    # add in interactivity component
    var_select.on_change('value', update_plot)
    year_select.on_change("value", update_plot)
    season_select.on_change("value", update_plot)

    layout = Column(Column(var_select, year_select, season_select),
                    var_loc_heatmap())

    return layout
コード例 #12
0
def bar():

    df = pd.read_pickle("./data/Rat_Sightings.pkl")

    def bar_chart(var="Borough", year="All", season="All", month="All"):
        """ Takes in a dataframe and a column name in a string format as inputs
        and generates a simple bar chart with hover capability and color mapping
        the most intense categories.
        """

        if year == "All" and season == "All" and month == "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.assign(n=0).groupby(var).n.count().reset_index())

        elif year != "All" and season == "All" and month == "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Year == '%s'" % year).assign(
                n=0).groupby(var).n.count().reset_index())

        elif year != "All" and season != "All" and month == "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Year == '%s'" % year).query(
                "Season == '%s'" %
                season).assign(n=0).groupby(var).n.count().reset_index())
        elif year != "All" and season != "All" and month != "All":

            groupeddf = (df.query("Year == '%s'" % year).query(
                "Season == '%s'" % season).query(
                    "Month == '%s'" %
                    month).assign(n=0).groupby(var).n.count().reset_index())

        elif year == "All" and season != "All" and month != "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Season == '%s'" % season).query(
                "Month == '%s'" %
                month).assign(n=0).groupby(var).n.count().reset_index())
        elif year == "ALl" and season == "All" and month != "All":
            groupeddf = (df.query("Month == '%s'" % month).assign(
                n=0).groupby(var).n.count().reset_index())

        else:

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.assign(n=0).groupby(var).n.count().reset_index())

        # add in the percentages
        groupeddf["perc"] = (groupeddf["n"] / groupeddf["n"].sum()) * 100

        # assign names of columns
        groupeddf.columns = [var, var + "_rat_sightings", "perc"]

        # keep names as list
        names = [*groupeddf.columns]

        # keep unique values of col_name
        uniq_vals = [*groupeddf[var].unique()]

        # instantiate color mapper
        mapper = LogColorMapper(palette=Blues9[::-1])

        # instantiate figure
        p = figure(y_range=uniq_vals, title="Rat Sightings by %s" % var)

        # fill figure
        p.hbar(y=var,
               right=names[1],
               height=0.9,
               source=ColumnDataSource(data=groupeddf),
               alpha=0.6,
               hover_alpha=0.9,
               fill_color={
                   "field": names[1],
                   "transform": mapper
               })

        # add hover tool
        p.add_tools(
            HoverTool(tooltips=[("%s" % var, "@%s" % var),
                                ("%s" % names[1], "@%s" % (names[1])),
                                ("Percentage of Sightings", "@perc %")],
                      point_policy="follow_mouse"))

        return p

    # create a select for users
    var_select = Select(value="Borough",
                        options=[
                            "Borough", "postalCode", "Neighborhood",
                            "Location Type", "Year", "Season"
                        ],
                        title="Select the Variable to View: ")
    year_select = Select(value="All",
                         options=[*df["Year"].unique()] + ["All"],
                         title="Select a year to view: ")
    season_select = Select(value="All",
                           options=[*df["Season"].unique()] + ["All"],
                           title="Select a season to view: ")
    month_select = Select(value="All",
                          options=[*df.Month.unique()] + ["All"],
                          title="Select a month to view: ")

    # create interactivity component
    def update_plot(attr, old, new):

        layout.children[1] = bar_chart(var=var_select.value,
                                       year=year_select.value,
                                       season=season_select.value,
                                       month=month_select.value)

    # add in interactivity component
    var_select.on_change("value", update_plot)
    year_select.on_change("value", update_plot)
    season_select.on_change("value", update_plot)
    month_select.on_change("value", update_plot)

    # define layout
    layout = Column(
        Column(var_select, year_select, season_select, month_select),
        bar_chart())

    return layout
コード例 #13
0
def scatter():

    df = pd.read_pickle("./data/Rat_Sightings.pkl")

    def var_loc_scatter(var="Borough",
                        loc_var="Location Type",
                        year="All",
                        season="All"):
        """ This function takes in the dataframe, variable, and location
        type variable as inputs. The function will group the dataframe by
        the specified variable and location type to find the counts of various
        location types for each unique value of the variable. The function
        will then generate a scatter plot and return the resulting dataframe.
        """

        if year == "All" and season == "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.assign(n=0).groupby(
                [var,
                 loc_var]).n.count().reset_index().rename(columns={
                     loc_var: "loc_type",
                     "n": "rat_sightings"
                 }))

        elif year != "All" and season == "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Year == '%s'" % year).assign(n=0).groupby(
                [var,
                 loc_var]).n.count().reset_index().rename(columns={
                     loc_var: "loc_type",
                     "n": "rat_sightings"
                 }))

        elif year != "All" and season != "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query("Year == '%s'" % year).query(
                "Season == '%s'" % season).assign(n=0).groupby([
                    var, loc_var
                ]).n.count().reset_index().rename(columns={
                    loc_var: "loc_type",
                    "n": "rat_sightings"
                }))

        elif year == "All" and season != "All":

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.query(
                "Season == '%s'" % season).assign(n=0).groupby([
                    var, loc_var
                ]).n.count().reset_index().rename(columns={
                    loc_var: "loc_type",
                    "n": "rat_sightings"
                }))

        else:

            #group dataframe by indicated column name and rename columns
            groupeddf = (df.assign(n=0).groupby(
                [var,
                 loc_var]).n.count().reset_index().rename(columns={
                     loc_var: "loc_type",
                     "n": "rat_sightings"
                 }))

        # get total percentages
        groupeddf["total_perc"] = (groupeddf.rat_sightings /
                                   groupeddf.rat_sightings.sum()) * 100

        # get local percentages
        # use map?

        # create a list of the unique values
        uniq_vals = [*groupeddf[var].unique()]

        # create color mapper
        mapper = LogColorMapper(palette=Blues9[::-1])

        # instantiate figure
        p = figure(y_range=uniq_vals)

        p.circle(x="rat_sightings",
                 y=jitter(var, width=0.6, range=p.y_range),
                 source=ColumnDataSource(data=groupeddf),
                 alpha=0.6,
                 size=30,
                 hover_alpha=0.9,
                 fill_color={
                     "field": "rat_sightings",
                     "transform": mapper
                 })

        p.add_tools(
            HoverTool(tooltips=[(
                "%s" % var, "@%s" %
                var), ("Location Type",
                       "@loc_type"), ("Num. of Rat Sightings",
                                      "@rat_sightings"),
                                ("Percentage of Rat Sightings within the %ss" %
                                 var, "@total_perc %")]))

        return p

    # create a select for users
    var_select = Select(
        value="Borough",
        options=["Borough", "postalCode", "Neighborhood", "Year", "Season"],
        title="Select the Variable to View: ")
    year_select = Select(value="All",
                         options=[*df["Year"].unique()] + ["All"],
                         title="Select a year to view: ")
    season_select = Select(value="All",
                           options=[*df["Season"].unique()] + ["All"],
                           title="Select a season to view: ")

    # create interactivity components
    def update_plot(attr, old, new):

        layout.children[1] = var_loc_scatter(var=var_select.value,
                                             year=year_select.value,
                                             season=season_select.value)

    # add in interactivity component
    var_select.on_change('value', update_plot)
    year_select.on_change("value", update_plot)
    season_select.on_change("value", update_plot)

    layout = Column(Column(var_select, year_select, season_select),
                    var_loc_scatter())

    return layout