コード例 #1
0
    def add_categorical_colormap(self, palette, categorical_name, **kwargs):
        """
        Create a Categorical Colormap 
            
        Parameters
        ----------
        palette: str or Tuple 
            The color palette of the colormap. It can either be one of Bokeh's default palettes or a Tuple of colors in hexadecimal format.
        categorical_name: str 
            The column name of the loaded dataset that contains the categorical values
        
        Returns
        -------
        cmap: Dict
            The Categorical Colormap 
        """
        if not (isinstance(palette, tuple) or palette in ALLOWED_CATEGORICAL_COLOR_PALLETES):
            raise ValueError(f'Invalid Palette Name/Tuple. Allowed (pre-built) Palettes: {ALLOWED_CATEGORICAL_COLOR_PALLETES}')

        categories = sorted(np.unique(self.source.data[categorical_name]).tolist())
        palette = palette if isinstance(palette, tuple) else getattr(palettes, palette)[len(categories)]

        # print(categories)
        cmap = bokeh_mdl.CategoricalColorMapper(palette=palette, factors=categories, **kwargs)

        # self.cmap = {'type':'add_categorical_colormap', 'cmap':{'field': categorical_name, 'transform': cmap}}
        self.cmap = {'field': categorical_name, 'transform': cmap}
        return self.cmap
コード例 #2
0
ファイル: cluster.py プロジェクト: ConorFWild/pandda_2_tools
def output_html_graph(df,
                      out_path,
                      ):
    df["cluster"] = df["cluster"].apply(str)

    # Get cds
    cds = ColumnDataSource(df)

    # # use whatever palette you want...
    palette = d3['Category20'][(len(df['cluster'].unique()) % 19) + 2]
    color_map = bmo.CategoricalColorMapper(factors=df['cluster'].unique(),
                                           palette=palette)

    # Define tooltipts
    TOOLTIPS = [
        ("dtag", "@dtag"),
        ("(component_0,component_1)", "($x, $y)"),
        ("cluster", "@cluster")
    ]

    # Gen figure
    p = figure(plot_width=800,
               plot_height=800,
               tooltips=TOOLTIPS,
               title="Mouse over the dots")

    # Plot data
    p.circle('component_0',
             'component_1',
             color={'field': 'cluster', 'transform': color_map}, size=10, source=cds)

    # Save figure
    save(p,
         str(out_path),
         )
コード例 #3
0
def clicks_v_impressions(data,
                         click_lower_bound=5,
                         output=False,
                         semantic_term='PriceResearch'):
    """
    :param data: The dataframe of download.csv
    :param click_lower_bound: Records with clicks fewer than this number are excluded from the plot (for performance).
    :param output: If false, assumes the function is running from a Jupyter notebook, if a path as a string, will write
    the plot as a .html file to the supplie path.
    """

    # Generate random portfolio values
    # TODO remove this after the classification package is operable
    test_portfolio_vals = np.array(
        ['paid', 'paid | organic', 'none', 'organic'])
    data['Portfolio Classification'] = test_portfolio_vals[np.random.randint(
        0, len(test_portfolio_vals), len(data))]

    palette = d3['Category10'][len(data['Portfolio Classification'].unique())]

    # First, plot impressions vs. clicks and the tooltip will be the search query itself
    data = data[data['Clicks'] > click_lower_bound]

    bool_col = data['Semantic Classification'].str.contains(semantic_term)
    booleanDictionary = {
        True: 'Contains {}'.format(semantic_term),
        False: 'Does not Contain {}'.format(semantic_term)
    }
    data['bool'] = bool_col.map(booleanDictionary)

    color_map = bmo.CategoricalColorMapper(factors=data['bool'].unique(),
                                           palette=palette)
    source = ColumnDataSource(data=data)

    # Now we will generate the binary category

    TOOLTIPS = [('Search Term', '@{Search term}'), ('Cost', '@Cost')]

    if output == False:
        output_notebook()
    elif type(output) == str:
        output_file(output)

    p = figure(plot_width=800, plot_height=400, tooltips=TOOLTIPS)
    p.circle(x='Clicks',
             y='Impressions',
             source=source,
             color={
                 'field': 'bool',
                 'transform': color_map
             },
             legend='bool')
    p.xaxis.axis_label = 'Paid Clicks'
    p.yaxis.axis_label = 'Paid Impressions'

    # Plotting parameters
    show(p)
コード例 #4
0
def exportTFIDF_HTML(tsne_tfidf_df,
                     filename='images/tfidf.html',
                     char_lenght=200,
                     title="TF-IDF Clustering",
                     plot_width=890,
                     plot_height=600):
    # ---- Visualize INTERACTIVE 2-D graph using bokeh ----
    from bokeh.resources import CDN
    from bokeh.embed import file_html
    import bokeh.plotting as bp
    from bokeh.palettes import d3
    import bokeh.models as bmo
    from bokeh.models import HoverTool, BoxSelectTool

    tsne_tfidf_df['description'] = tsne_tfidf_df['description'].apply(
        lambda x: x[1:char_lenght])

    plot_tfidf = bp.figure(
        plot_width=plot_width,
        plot_height=plot_height,
        title=title,
        tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
        x_axis_type=None,
        y_axis_type=None,
        min_border=1)

    # string = "Category%s" %(len(tsne_tfidf_df['category'].unique()))
    palette = d3["Category20"][len(tsne_tfidf_df['category'].unique())]
    color_map = bmo.CategoricalColorMapper(
        factors=tsne_tfidf_df['category'].map(str).unique(), palette=palette)

    plot_tfidf.scatter(x='x',
                       y='y',
                       color={
                           'field': 'category',
                           'transform': color_map
                       },
                       legend='category',
                       source=tsne_tfidf_df)
    hover = plot_tfidf.select(dict(type=HoverTool))
    hover.tooltips = {"description": "@description", "category": "@category"}

    html = file_html(plot_tfidf, CDN, "Plot")
    f = open(filename, 'w')
    f.write(html)
    f.close()
コード例 #5
0
def plotTSNE_DF(tsne_df,
                filename='tSNE_Graph.html',
                char_lenght=50,
                title="tSNE Clustering",
                plot_width=890,
                plot_height=501):
    def _printtext(x, char_lenght):
        return " ".join(x.split()[0:char_lenght])

    tsne_df['description'] = tsne_df['description'].apply(
        lambda x: _printtext(x, char_lenght))

    plot_tsne = bp.figure(
        plot_width=plot_width,
        plot_height=plot_height,
        title=title,
        tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
        x_axis_type=None,
        y_axis_type=None,
        min_border=1)

    palette = d3['Category20'][20] + d3['Category20b'][20]
    color_map = bmo.CategoricalColorMapper(factors=tsne_df['topic'].unique(),
                                           palette=palette)

    plot_tsne.scatter('x',
                      'y',
                      source=tsne_df,
                      color={
                          'field': 'topic',
                          'transform': color_map
                      },
                      legend='topic')
    hover = plot_tsne.select(dict(type=HoverTool))
    hover.tooltips = {
        "description": "@description",
        "topic": "@topic",
        "category": "@category",
        "username": "******"
    }

    html = file_html(plot_tsne, CDN, "Plot")
    f = open(filename, 'w')
    f.write(html)
    f.close()
コード例 #6
0
def make_color_mapper(y_values, y_type, formatting):
    """ Generates color mapper which takes in values and outputs the color hexcode.

    :param (pd.Series) y_values: pandas Series to be plotted, for calculating min/max
    :param (str) y_type: 'sequential', 'divergent', or 'categorical' -- for palette
    :param (dict) formatting: see DEFAULTFORMAT from params.py
    :return: Bokeh colormapper object """

    try:
        palette = PALETTES[y_type][formatting['palette']][
            formatting['ncolors']].copy()
    except KeyError:  ## if palette is not in default list
        palette = get_palette_colors(formatting['palette'],
                                     formatting['ncolors']).copy()
    except TypeError:  ## if formatting['palette'] is a list
        palette = formatting['palette'].copy()

    if formatting['reverse_palette']:
        palette.reverse()

    if y_type in ['sequential', 'divergent']:
        c_min = formatting['min'] if isinstance(formatting['min'],
                                                (int,
                                                 float)) else min(y_values)
        c_max = formatting['max'] if isinstance(formatting['max'],
                                                (int,
                                                 float)) else max(y_values)
        below_color = formatting['low_color'] if isinstance(
            formatting['low_color'], str) else None
        above_color = formatting['high_color'] if isinstance(
            formatting['low_color'], str) else None

        mapper_fx = {
            'lin': models.LinearColorMapper,
            'log': models.LogColorMapper
        }
        mapper = mapper_fx[formatting['lin_or_log']](palette=palette,
                                                     low=c_min,
                                                     high=c_max,
                                                     low_color=below_color,
                                                     high_color=above_color)
    else:
        mapper = models.CategoricalColorMapper(factors=y_values.unique(),
                                               palette=palette)
    return mapper
コード例 #7
0
def exportKmeansDF(kmeans_df,
                   filename='KMeansGraph.html',
                   char_lenght=200,
                   title="KMeans clustering",
                   plot_width=890,
                   plot_height=600):
    import bokeh.plotting as bp
    from bokeh.palettes import d3
    import bokeh.models as bmo
    from bokeh.models import HoverTool, BoxSelectTool
    from bokeh.embed import file_html
    from bokeh.resources import CDN
    plot_kmeans = bp.figure(
        plot_width=plot_width,
        plot_height=plot_height,
        title=title,
        tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
        x_axis_type=None,
        y_axis_type=None,
        min_border=1)

    palette = d3['Category20'][20] + d3['Category20b'][20]
    color_map = bmo.CategoricalColorMapper(
        factors=kmeans_df['cluster'].unique(), palette=palette)

    plot_kmeans.scatter('x',
                        'y',
                        source=kmeans_df,
                        color={
                            'field': 'cluster',
                            'transform': color_map
                        },
                        legend='cluster')
    hover = plot_kmeans.select(dict(type=HoverTool))
    hover.tooltips = {
        "description": "@description",
        "cluster": "@cluster",
        "category": "@category"
    }

    html = file_html(plot_kmeans, CDN, "Plot")
    f = open(filename, 'w')
    f.write(html)
    f.close()
コード例 #8
0
ファイル: Modelo.py プロジェクト: ds4a82/secop-analysis
def exportKmeansDF(kmeans_df, filename = 'KMeansGraph.html', char_lenght = 200, title="KMeans clustering", plot_width=890, plot_height=600):
    plot_kmeans = bp.figure(
        plot_width=plot_width
        , plot_height=plot_height
        , title=title
        , tools= "pan,wheel_zoom,box_zoom,reset,hover"
        , x_axis_type=None, y_axis_type=None, min_border=1)
    palette = d3['Category20'][12] + d3['Category20b'][12]
    color_map = bmo.CategoricalColorMapper(
        factors=kmeans_df['cluster'].unique()
        , palette=palette
        )
    plot_kmeans.scatter('x', 'y', source=kmeans_df,
                        color={'field': 'cluster', 'transform': color_map},
                        legend='cluster')
    hover = plot_kmeans.select(dict(type=HoverTool))
    hover.tooltips={"description": "@descripcion_del_proceso", "cluster": "@cluster"}
    html = file_html(plot_kmeans, CDN, "Plot")
    output_file(filename, title=title, mode='inline', root_dir=None)
    save(plot_kmeans)
コード例 #9
0
        'new_vaccinations_smoothed', 'total_vaccinations_per_hundred',
        'people_vaccinated_per_hundred', 'people_fully_vaccinated_per_hundred',
        'new_vaccinations_smoothed_per_million', 'stringency_index',
        'population', 'population_density', 'median_age', 'aged_65_older',
        'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
        'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
        'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
        'life_expectancy', 'human_development_index'
    ],
    aggfunc=np.mean)
#covid_pivoted_whole= covid_pivoted.merge(continent, on='iso_code')
covid_pivoted['continent'] = [index[1] for index in covid_pivoted.index]
source = bpl.ColumnDataSource(covid_pivoted)

# use whatever palette you want...
palette = d3['Category10'][len(covid_pivoted['continent'].unique())]
color_map = bmo.CategoricalColorMapper(
    factors=covid_pivoted['continent'].unique(), palette=palette)

# create figure and plot
p = bpl.figure()
p.scatter(x='total_tests_per_thousand',
          y='total_cases_per_million',
          color={
              'field': 'continent',
              'transform': color_map
          },
          legend_label='continent',
          source=source)
boi.output_file('allatok.html')
bpl.show(p)
コード例 #10
0
source = ColumnDataSource(plot_df)
plot_df['since'] = [2019 - ele for ele in plot_df.since]

# tooltips for plot
TOOLTIPS = [
    ("#species", "@n_species"),
    ("bird common", "@species_common"),
    ("travel distance", "@median_travel_distance"),
    ("#checklists", "@n_checklists"),
    ("group size", "@mean_group_size"),
    ("starting year", "@since"),
    ("median_start", "@median_start"),
    ("percent_travel", "@percent_travel")
]

# plot users
palette = d3['Category20'][max(3, len(plot_df['label'].unique()))]
color_map = bmo.CategoricalColorMapper(factors=plot_df['label'].unique(),
                                       palette=palette)

# create figure and plot
birders_plot = figure(title=' ', tooltips=TOOLTIPS)
birders_plot.circle('x', 'y', color='black', fill_color={'field': 'label', 'transform': color_map}, size=6,
                    alpha=0.2, fill_alpha=0.6, source=source)
birders_plot.toolbar.logo = None
birders_plot.toolbar_location = None

# save output
output_file('users_NY_2005-2019.html')
show(birders_plot)
コード例 #11
0
plt.margins(0.02)

#Creating Best Fit Line
from sklearn.linear_model import LinearRegression
line_model = LinearRegression()
line_length = np.linspace(min(restrict["MP/G"]),
                          max(restrict["MP/G"])).reshape(-1, 1)
line_model.fit(restrict[["MP/G"]], restrict[["PTS/G"]])
bfline = line_model.predict(line_length)

import bokeh.plotting as bp
import bokeh.models as bm
import bokeh.io as bi

source = bp.ColumnDataSource(restrict)
color_scat = bm.CategoricalColorMapper(factors=["PG", "SG", "SF", "PF", "C"],
                                       palette=color)
hover = bm.HoverTool(tooltips=[("Player", "@Player"), ("Position",
                                                       "@Pos"), ("Tm", "@Tm")])
plot = bp.figure(title="2017-2018 NBA Season",
                 title_location="above",
                 x_axis_label="Minutes Played",
                 y_axis_label="Points Scored",
                 tools=[hover, "pan", "wheel_zoom"])

plot.circle(x="MP/G",
            y="PTS/G",
            source=source,
            color=dict(field="Pos", transform=color_scat))

#plot.line([line_length], [bfline], line_color = "black", line_width=3)
コード例 #12
0
def visualization(tsne_lda, _lda_keys, descriptors, divergent_keys,
                  divergent_topics, labels, outdir, model_num):

    df = pd.DataFrame({
        "descriptors": descriptors,
        "topic": [str(i) for i in divergent_keys],
        "x": tsne_lda[:, 0],
        "y": tsne_lda[:, 1],
    })

    df_cor = pd.DataFrame()
    df_cor['x'] = tsne_lda[:, 0]
    df_cor['y'] = tsne_lda[:, 1]
    df_cor['topics'] = _lda_keys

    coordinates = []
    for topic in divergent_topics:
        temp_df = df_cor[df_cor['topics'] == topic]
        x_cor = np.average(temp_df['x'].tolist())
        y_cor = np.average(temp_df['y'].tolist())
        coordinates.append([x_cor, y_cor])

    source = bpl.ColumnDataSource(df)

    unique_labels = list(set(labels))

    colors = itertools.cycle(palette_dark25)

    palette = [next(colors) for i in range(len(unique_labels))]
    palette = palette + ['gray']

    color_map = bmo.CategoricalColorMapper(
        factors=[str(i) for i in unique_labels] + ['others'], palette=palette)

    TOOLS = "hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"

    p = bpl.figure(
        plot_width=2000,
        plot_height=2000,
        #                      title=title,
        tools=TOOLS,
        x_axis_type=None,
        y_axis_type=None,
        min_border=1)

    p.scatter(x='x',
              y='y',
              color={
                  'field': 'topic',
                  'transform': color_map
              },
              legend='topic',
              source=source,
              fill_alpha=0.3,
              radius=0.2)

    hover = p.select(dict(type=HoverTool))
    hover.tooltips = """<font size='5pt'>Corpus: @topic <br /> Descriptors: @descriptors</font>"""

    for i in range(len(divergent_topics)):
        p.text(coordinates[i][0] - 5,
               coordinates[i][1], [divergent_topics[i]],
               text_color="black",
               text_font_size='50pt',
               text_font_style='bold')

    bpl.output_file(outdir + "%s_%d_topics.html" %
                    ('visualization', model_num))
    bpl.show(p)
コード例 #13
0
    df_nans.to_dict('list')
)

## Bokeh worflow

# output_notebook()

#
# Figures and colormaps
#

p_answered = figure(tools=TOOLS)
p_nanswered = figure(tools=TOOLS)

ans_cmap = bmo.CategoricalColorMapper(
    factors=df_ans.cluster.unique(),
    palette=all_palettes['Set1'][len(df_ans.cluster.unique())]
)

nans_cmap = bmo.CategoricalColorMapper(
    factors=df_nans.cluster.unique(),
    palette=all_palettes['Set1'][len(df_nans.cluster.unique())]
) # this changes the order with which the colors are applied

#
# Visualizations
#

p_answered.scatter(
    x='x', 
    y='y',
    source=src_ans,
コード例 #14
0
## Bokeh worflow

# output_notebook()

#
# Figures and colormaps
#

p_answered = figure(tools=TOOLS)
p_nanswered = figure(tools=TOOLS)

# palettes assume 3 if number of clusters less than it
color_palettes = lambda x: all_palettes['Set1'][3] if (len(x) < 3) else all_palettes['Set1'][len(x)]

ans_cmap = bmo.CategoricalColorMapper(
    factors=df_ans.kmeans_cluster.unique(),
    palette=color_palettes(df_ans.kmeans_cluster.unique())
)

nans_cmap = bmo.CategoricalColorMapper(
    factors=df_nans.kmeans_cluster.unique(),
    palette=color_palettes(df_nans.kmeans_cluster.unique())
) # this changes the order with which the colors are applied

#
# Visualizations
#

p_answered.scatter(
    x='x', 
    y='y',
    source=src_ans,
コード例 #15
0
plot_title = "KMeans Clustering of the News: %s - %s (%s articles)" % (
    title_format_datetime(from_datetime), title_format_datetime(to_datetime),
    len(data.index))
plot_kmeans = bp.figure(
    plot_width=1600,
    plot_height=1200,
    title=plot_title,
    tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
    x_axis_type=None,
    y_axis_type=None,
    min_border=1)

palette = d3['Category20'][20] + d3['Category20b'][20] + d3['Category20c'][
    20] + brewer['BrBG'][9] + brewer['RdYlBu'][11]
color_map = bmo.CategoricalColorMapper(factors=kmeans_df['cluster'].unique(),
                                       palette=palette)

plot_kmeans.scatter('x',
                    'y',
                    source=kmeans_df,
                    color={
                        'field': 'cluster',
                        'transform': color_map
                    })
hover = plot_kmeans.select(dict(type=HoverTool))
hover.tooltips = {
    "Category": "@category",
    "Cluster": "@Desc",
    "Description": "@description"
}
コード例 #16
0
def graphVectorSpace(tfidfVectors, extraColumns, dateForTitle, storyMap,
                     threshold):
    # Better results seem to be obtained by breaking the dimensionality reduction into two steps

    # First reduce to fifty dimensions with SVD
    from sklearn.decomposition import TruncatedSVD
    svd = TruncatedSVD(n_components=50, random_state=0)
    svdResults = svd.fit_transform(tfidfVectors)

    # Next continue to two dimensions with TSNE
    from sklearn.manifold import TSNE
    tsneModel = TSNE(n_components=2, verbose=1, random_state=0, n_iter=500)
    tsneResults = tsneModel.fit_transform(svdResults)
    tfidf2dDataFrame = pd.DataFrame(tsneResults)
    tfidf2dDataFrame.columns = ['x', 'y']

    tfidf2dDataFrame['publication'] = extraColumns['publication']
    tfidf2dDataFrame['id'] = extraColumns['id']
    tfidf2dDataFrame['content'] = extraColumns['content no nonascii'].map(
        lambda x: x[:200])

    # All articles will be marked as NA to indicate that they have not been assigned to a story
    # Then those which have been assigned one will be updated to refer to that
    tfidf2dDataFrame['category'] = 'NA'

    # If the threshold is not provided, then just graph the vector space as is
    # With colours indicating desired story grouping
    # This still has value because it shows how well stories cluster together
    if threshold == None:
        graphTitle = (
            "TF-IDF article clustering - story assignment from map - " +
            dateForTitle[0])
        for story, storyArticles in storyMap.items():
            for article in storyArticles:
                if len(tfidf2dDataFrame[tfidf2dDataFrame['id'] ==
                                        article].index) == 1:
                    i = tfidf2dDataFrame[tfidf2dDataFrame['id'] ==
                                         article].index[0]
                    tfidf2dDataFrame['category'][i] = story
    else:
        graphTitle = (
            "TF-IDF article clustering - story assignment computed - " +
            dateForTitle[0])
        nonZeroCoords = initialiseAllNonZeroCoords(tfidfVectors)
        for story, storyArticles in storyMap.items():
            leadArticleIndex = extraColumns[extraColumns['id'] ==
                                            storyArticles[0]].index[0]
            # Compute score of all articles in corpus relative to first article in story (.product)
            scores = productRelatednessScores(tfidfVectors, nonZeroCoords,
                                              leadArticleIndex)
            rankedIndices = np.argsort(scores)
            for article in rankedIndices:
                if scores[article] >= threshold:
                    tfidf2dDataFrame['category'][article] = story

    import bokeh.plotting as bp
    from bokeh.models import HoverTool
    from bokeh.plotting import show
    from bokeh.palettes import d3
    import bokeh.models as bmo

    plot_tfidf = bp.figure(
        plot_width=700,
        plot_height=600,
        title=graphTitle,
        tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
        x_axis_type=None,
        y_axis_type=None,
        min_border=1)

    numCats = len(tfidf2dDataFrame['category'].unique())
    palette = d3['Category20'][numCats]
    color_map = bmo.CategoricalColorMapper(
        factors=tfidf2dDataFrame['category'].map(str).unique(),
        palette=palette)

    plot_tfidf.scatter(x='x',
                       y='y',
                       color={
                           'field': 'category',
                           'transform': color_map
                       },
                       legend='category',
                       source=tfidf2dDataFrame)
    hover = plot_tfidf.select(dict(type=HoverTool))
    plot_tfidf.legend.click_policy = "hide"
    hover.tooltips = {
        "id": "@id",
        "publication": "@publication",
        "content": "@content",
        "category": "@category"
    }

    show(plot_tfidf)
from bokeh.plotting import figure, show, output_notebook, reset_output
from bokeh.palettes import d3
import bokeh.models as bmo
from bokeh.io import save, output_file

output_notebook()
plot_tfidf = bp.figure(plot_width=700,
                       plot_height=600,
                       title="TF-IDF Clustering of the reviews",
                       tools="pan,wheel_zoom,box_zoom,reset,hover,save",
                       x_axis_type=None,
                       y_axis_type=None,
                       min_border=1)

palette = d3['Category10'][len(tsne_tfidf_df['category'].unique()) + 1]
color_map = bmo.CategoricalColorMapper(
    factors=tsne_tfidf_df['category'].map(str).unique(), palette=palette)

plot_tfidf.scatter(x='x',
                   y='y',
                   color={
                       'field': 'category',
                       'transform': color_map
                   },
                   legend_group='category',
                   source=tsne_tfidf_df)
hover = plot_tfidf.select(dict(type=HoverTool))
hover.tooltips = {"description": "@description", "category": "@category"}

show(plot_tfidf)

# In the bokeh plot, each cluster is determined by the variety of unique words commonly used in the reviews. Reviews that are clustered together tend to contain the same unique words and hence, are grouped together.