Beispiel #1
0
def init_mention_plot():
    cprint('%s: init @ mentios barcharts...' % TAG, 'yellow', attrs=['bold'])
    global mention_count
    global mention_barplot
    mention_count = count_mentions()
    y = []
    mentions = []
    for (mention, freq) in mention_count:
        y.append(freq)
        mentions.append(mention)
    x = np.arange(len(mentions))
    source = ColumnDataSource(dict(
        x=x,
        top=y,
    ))
    glyph = VBar(x='x', top='top', bottom=0, width=0.85, fill_color='#ff7f0e')
    mention_barplot.add_glyph(source, glyph)

    xaxis = LinearAxis()
    xaxis.ticker = x
    xaxis.major_label_overrides = {
        i: mention
        for i, mention in enumerate(mentions)
    }
    mention_barplot.add_layout(xaxis, 'below')
    mention_barplot.xaxis.major_label_orientation = +np.pi / 2

    yaxis = LinearAxis()
    yaxis.axis_label = 'Overall number of @ mentions'
    yaxis.axis_label_text_font_size = '14pt'
    yaxis.ticker = np.linspace(0, max(y), 11, dtype=np.int)[1:]
    mention_barplot.add_layout(yaxis, 'left')

    mention_barplot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
    mention_barplot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))
Beispiel #2
0
def init_user_plot():
    cprint('%s: init @ users barcharts...' % TAG, 'yellow', attrs=['bold'])
    global user_count
    global user_barplot
    user_count = count_users()
    y = []
    users = []
    for (user, freq) in user_count.items():
        y.append(freq)
        users.append(user)
    x = np.arange(len(users))

    wlist = []
    for user in users:
        prefixes = ['@' + user]
        for prefix, freq in sorted(user_tweet_freq[user].items(),
                                   key=lambda kv: kv[1],
                                   reverse=True)[:10]:
            prefixes.append(' %s:%d' % (prefix, freq))
        wlist.append(list(prefixes))

    source = ColumnDataSource(dict(x=x, top=y, wlist=wlist))
    glyph = VBar(x='x', top='top', bottom=0, width=0.85, fill_color='#1f77b4')
    user_barplot.add_glyph(source, glyph)

    xaxis = LinearAxis()
    xaxis.ticker = x
    xaxis.major_label_overrides = {
        i: '@' + user
        for i, user in enumerate(users)
    }
    #xaxis.major_label_standoff = -35
    user_barplot.add_layout(xaxis, 'below')
    user_barplot.xaxis.major_label_orientation = +np.pi / 2

    yaxis = LinearAxis()
    yaxis.axis_label = 'Overall number of tweets per @'
    yaxis.axis_label_text_font_size = '14pt'
    yaxis.ticker = np.linspace(0, max(y), 11, dtype=np.int)[1:]
    user_barplot.add_layout(yaxis, 'left')

    user_barplot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
    user_barplot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))
Beispiel #3
0
def init_plot():
    cprint('%s: init barcharts per neighbourhood...' % TAG,
           'yellow',
           attrs=['bold'])
    init_wordcount()
    global prefix_count
    global wword_count
    global word_barplots
    global word_sources
    global mapper

    global svg_div

    init_user_plot()
    init_mention_plot()

    count_words(prefix_count, wword_count)

    # update colorbar
    min_freq, max_freq = get_freq_range(prefix_count)
    mapper['transform'].low = min_freq
    mapper['transform'].high = max_freq

    # update colorbar tickers
    steps = 13
    while max_freq < steps:
        steps = steps // 2
    color_bar.ticker = FixedTicker(
        ticks=np.linspace(min_freq, max_freq, steps, dtype=np.int))

    y = np.arange(config.NUM_WORD_BARS)
    for i, (neigh, wcount) in enumerate(prefix_count.items()):
        wordfreqlist = sorted(wcount.items(),
                              key=lambda kv: kv[1],
                              reverse=True)
        x = []
        prefixes = []
        wlist = []
        for prefix, freq in wordfreqlist[:config.NUM_WORD_BARS]:
            x.append(freq)
            prefixes.append(prefix)
            wlist.append([' %s:%d' % (k,v) \
                        for (k,v) in sorted(wword_count[neigh][prefix].items(),
                                key=lambda kv:kv[1], reverse=True)[:5]])

        color_index = np.round(
            minmax_scale([np.mean(x[:5]), min_freq, max_freq],
                         feature_range=(0, 5))[0])
        map_fill_color = Spectral6[np.int(color_index)]
        svg.change_fill_color(neigh.replace(' ', ''), map_fill_color)

        plt = word_barplots[i]
        src = word_sources[i]
        src.data = dict(y=y, right=x, wlist=wlist)

        t = Title()
        t.text = neigh.title()[:13]
        plt.title = t

        glyph = HBar(y='y',
                     right='right',
                     left=0,
                     height=0.90,
                     fill_color=mapper)
        word_hbarglyphs.append(glyph)
        plt.add_glyph(src, glyph)

        xaxis = LinearAxis()
        xaxis.ticker = np.linspace(0, max(x), 5, dtype=np.int)[1:]
        plt.add_layout(xaxis, 'below')
        plt.xaxis.major_label_orientation = +np.pi / 2

        yaxis = LinearAxis()
        yaxis.ticker = y
        yaxis.major_label_overrides = {
            i: prefix
            for i, prefix in enumerate(prefixes)
        }
        yaxis.major_label_standoff = -35
        plt.add_layout(yaxis, 'left')

        plt.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
        plt.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

    svg_div.text = svg.to_string()
Beispiel #4
0
def parallel_plot(df, axes, color=None, palette=None):
    """From a dataframe create a parallel coordinate plot
    """
    logger = logging.getLogger('cave.plot.parallel.plot')

    npts = df.shape[0]
    ndims = len(df.columns)

    if color is None:
        color = np.ones(npts)
    if palette is None:
        palette = ['#ff0000']

    cmap = LinearColorMapper(high=color.min(),
                             low=color.max(),
                             palette=palette)

    data_source = ColumnDataSource(
        dict(xs=np.arange(ndims)[None, :].repeat(npts, axis=0).tolist(),
             ys=np.array((df - df.min()) / (df.max() - df.min())).tolist(),
             color=color))

    p = figure(x_range=(-1, ndims),
               y_range=(0, 1),
               width=800,
               tools="pan, box_zoom")

    # Create x axis ticks from columns contained in dataframe
    fixed_x_ticks = FixedTicker(ticks=np.arange(ndims), minor_ticks=[])
    formatter_x_ticks = FuncTickFormatter(code="return columns[index]",
                                          args={"columns": df.columns})
    p.xaxis.ticker = fixed_x_ticks
    p.xaxis.formatter = formatter_x_ticks

    p.yaxis.visible = False
    p.y_range.start = 0
    p.y_range.end = 1
    p.y_range.bounds = (-0.1, 1.1)  # add a little padding around y axis
    p.xgrid.visible = False
    p.ygrid.visible = False

    # Create extra y axis for each dataframe column
    tickformatter = BasicTickFormatter(precision=1)
    for index, col in enumerate(df.columns):
        if col in axes:
            start = axes[col]['lower']
            end = axes[col]['upper']
        else:
            logger.warning(
                "Parallel plot didn't receive information about the axes. "
                "This will likely fail for categorical data")
            start = df[col].min()
            end = df[col].max()

        if np.isnan(start) or np.isnan(end):
            raise ValueError(
                "NaN's not allowed in limits of axes! %s: (%s, %s)" %
                (col, str(start), str(end)))

        logger.debug('Limits for %s are (%s, %s)' % (col, start, end))

        bound_min = start + abs(end - start) * (p.y_range.bounds[0] -
                                                p.y_range.start)
        bound_max = end + abs(end - start) * (p.y_range.bounds[1] -
                                              p.y_range.end)
        p.extra_y_ranges.update({
            col:
            Range1d(start=bound_min,
                    end=bound_max,
                    bounds=(bound_min, bound_max))
        })

        num_ticks = 8 if not 'choices' in axes[col] else len(
            axes[col]['choices'])
        fixedticks = FixedTicker(ticks=np.linspace(start, end, num_ticks),
                                 minor_ticks=[])

        axis = LinearAxis(fixed_location=index,
                          y_range_name=col,
                          ticker=fixedticks,
                          formatter=tickformatter)
        if 'choices' in axes[col]:
            # Note, override-dicts need to be created on assign (https://github.com/bokeh/bokeh/issues/8166)
            axis.major_label_overrides = {
                i: v
                for i, v in enumerate(axes[col]['choices'])
            }
        p.add_layout(axis, 'right')

    # create the data renderer ( MultiLine )
    # specify selected and non selected stylew
    non_selected_line_style = dict(line_color='grey',
                                   line_width=0.1,
                                   line_alpha=0.5)

    selected_line_style = dict(line_color={
        'field': 'color',
        'transform': cmap
    },
                               line_width=1)

    parallel_renderer = p.multi_line(xs="xs",
                                     ys="ys",
                                     source=data_source,
                                     **non_selected_line_style)

    # Specify selection style
    selected_lines = MultiLine(**selected_line_style)

    # Specify non selection style
    nonselected_lines = MultiLine(**non_selected_line_style)

    parallel_renderer.selection_glyph = selected_lines
    parallel_renderer.nonselection_glyph = nonselected_lines
    p.y_range.start = p.y_range.bounds[0]
    p.y_range.end = p.y_range.bounds[1]

    rect_source = ColumnDataSource({
        'x': [],
        'y': [],
        'width': [],
        'height': []
    })

    # add rectange selections
    selection_renderer = p.rect(x='x',
                                y='y',
                                width='width',
                                height='height',
                                source=rect_source,
                                fill_alpha=0.7,
                                fill_color='#009933')
    selection_tool = ParallelSelectionTool(renderer_select=selection_renderer,
                                           renderer_data=parallel_renderer,
                                           box_width=10)
    # custom resets (reset only axes not selections)
    reset_axes = ParallelResetTool()

    # add tools and activate selection ones
    p.add_tools(selection_tool, reset_axes)
    p.toolbar.active_drag = selection_tool
    return p
Beispiel #5
0
def timeline_days_hours(interval_frequency, all_co2_dataframe,
                        sensors_with_anomalies, data, upper_bound,
                        destination_path):
    times = pd.date_range(start='00:00:00',
                          end='23:55:00',
                          freq=str(interval_frequency) +
                          'Min').strftime('%H:%M:%S')
    days = all_co2_dataframe['timestamp'].dt.strftime('%Y-%m-%d').to_list()
    days = list(dict.fromkeys(days))
    timestamp = list(times)
    for sensor_name in sensors_with_anomalies:
        all_durations = []
        for k in data['anomalies']['anomaly_co2_values'][sensor_name][0]:
            k = data['anomalies']['anomaly_co2_values'][sensor_name][0].index(
                k)
            all_durations.append(
                int(data['anomalies']['anomaly_co2_values'][sensor_name][0][k]
                    ['duration']))
        data['anomalies']['anomaly_co2_values'][sensor_name][0]
        p = figure(plot_height=500,
                   plot_width=2000,
                   x_range=timestamp,
                   y_range=days,
                   title='Timeline of periods with CO2 levels higher than ' +
                   str(upper_bound) + ' ppm in ' + sensor_name +
                   '\n Based on data for last ' + str(len(days)) + ' days',
                   active_drag=None,
                   toolbar_location=None)
        p.x_range.range_padding = 0
        p.y_range.range_padding = 0
        p.title.text_font_size = '15pt'
        p.xaxis.axis_label_text_font_size = "15pt"
        p.yaxis.axis_label_text_font_size = "15pt"
        p.yaxis.major_label_text_font_size = '9pt'
        p.xaxis.major_label_text_font_size = '5pt'
        # set x axis to invisible
        p.xaxis.visible = False
        # Add custom axis with tickers labels only every 1 hour
        labels = np.arange(0, 288, 12).tolist()
        ticker = FixedTicker()
        ticker.ticks = labels
        xaxis = LinearAxis(ticker=ticker)
        xaxis.major_label_orientation = math.pi / 3
        p.add_layout(xaxis, 'below')

        xaxis.major_label_overrides = {
            0: '00:00',
            12: '01:00',
            24: '02:00',
            36: '3:00',
            48: '04:00',
            60: '05:00',
            72: '06:00',
            84: '07:00',
            96: '08:00',
            108: '09:00',
            120: '10:00',
            132: '11:00',
            144: '12:00',
            156: '13:00',
            168: '14:00',
            180: '15:00',
            192: '16:00',
            204: '17:00',
            216: '18:00',
            228: '19:00',
            240: '20:00',
            252: '21:00',
            264: '22:00',
            276: '23:00'
        }
        #add anomalies recorded for each day in considered period
        for i in data['anomalies']['anomaly_co2_values'][sensor_name][0]:
            i = data['anomalies']['anomaly_co2_values'][sensor_name][0].index(
                i)
            x = []
            y = []
            x.append(data['anomalies']['anomaly_co2_values'][sensor_name][0][i]
                     ['anomalies_details'][0][0][11:])
            x.append(data['anomalies']['anomaly_co2_values'][sensor_name][0][i]
                     ['anomalies_details'][-1][0][11:])
            y.append(data['anomalies']['anomaly_co2_values'][sensor_name][0][i]
                     ['anomalies_details'][0][0][0:10])
            y.append(data['anomalies']['anomaly_co2_values'][sensor_name][0][i]
                     ['anomalies_details'][-1][0][0:10])
            if data['anomalies']['anomaly_co2_values'][sensor_name][0][i][
                    'anomalies_details'][0][0][0:10] == data['anomalies'][
                        'anomaly_co2_values'][sensor_name][0][i][
                            'anomalies_details'][-1][0][0:10]:
                # print('yes')
                p.line(x,
                       y,
                       line_width=2,
                       color='blue',
                       legend_label='CO2 above critical value')
                p.circle(x, y, fill_color="blue", line_color='blue', size=5)
            else:
                x1 = x.copy()
                y1 = y.copy()
                x2 = x.copy()
                y2 = y.copy()
                x1[-1] = times[-1]
                y1[-1] = y1[0]
                p.line(x1, y1, line_width=2, color='blue')
                x2[0] = times[0]
                y2[0] = y2[-1]
                p.line(x2, y2, line_width=2, color='blue')
                x3 = [x1[0], x2[-1]]
                y3 = [y1[0], y2[-1]]
                p.circle(x3, y3, fill_color="blue", line_color='blue', size=5)

        #save graph in output location
        output_file(destination_path + '/anomalies_timeline_' + sensor_name +
                    '.html')
        save(p)
    return p