def plot_covariance_returns_correlation(correlation, title):

    graph_path = 'graphs/{}.html'.format(_sanatize_string(title))
    data = []

    dendro_top = ff.create_dendrogram(correlation, orientation='bottom')
    for i in range(len(dendro_top['data'])):
        dendro_top['data'][i]['yaxis'] = 'y2'
    data.extend(dendro_top['data'])

    dendro_left = ff.create_dendrogram(correlation, orientation='right')
    for i in range(len(dendro_left['data'])):
        dendro_left['data'][i]['xaxis'] = 'x2'
    data.extend(dendro_left['data'])

    heatmap_hover_text = _generate_hover_text(correlation.index,
                                              correlation.columns,
                                              correlation.values, 'Ticker 2',
                                              'Ticker 1', 'Correlation')
    heatmap_trace = go.Heatmap(x=dendro_top['layout']['xaxis']['tickvals'],
                               y=dendro_left['layout']['yaxis']['tickvals'],
                               z=correlation.values,
                               zauto=False,
                               zmax=1.0,
                               zmin=-1.0,
                               text=heatmap_hover_text,
                               hoverinfo='text')
    data.append(heatmap_trace)

    xaxis1_layout = {
        'showgrid': False,
        'showline': False,
        'zeroline': False,
        'showticklabels': False,
        'ticks': ""
    }
    xaxis2_layout = {
        'showgrid': False,
        'zeroline': False,
        'showticklabels': False
    }

    layout = go.Layout(title=title, showlegend=False, width=800, height=800)

    figure = go.Figure(data=data, layout=layout)
    figure['layout']['xaxis'].update({'domain': [.15, 1]})
    figure['layout']['xaxis'].update(xaxis1_layout)
    figure['layout']['yaxis'].update({'domain': [0, .85]})
    figure['layout']['yaxis'].update(xaxis1_layout)

    #     figure['layout']['xaxis2'].update({'domain': [0, .15]})
    #     figure['layout']['xaxis2'].update(xaxis2_layout)
    #     figure['layout']['yaxis2'].update({'domain': [.825, .975]})
    #     figure['layout']['yaxis2'].update(xaxis2_layout)

    offline_py.plot(figure, filename=graph_path, auto_open=False)
    display(
        HTML(
            'The graph for {} is too large. You can view it <a href="{}" target="_blank">here</a>.'
            .format(title, graph_path)))
예제 #2
0
def clustermap(df):
    orientation = {'arch' : 'right', 'other' : 'bottom'}
    dendro_arch = ff.create_dendrogram(df, orientation= orientation['arch'], labels=df.index)
    dendro_arch_leaves = dendro_arch['layout']['yaxis']['ticktext']
    dendro_other = ff.create_dendrogram(df.T, orientation= orientation['other'], labels=df.T.index)
    dendro_other_leaves = dendro_other['layout']['xaxis']['ticktext']
    clustered_df = df[dendro_other_leaves].loc[dendro_arch_leaves]
    return {'heatmap':clustered_df,'arch_dendro': dendro_arch,'other_dendro': dendro_other}
예제 #3
0
def dendrogram(nd, sample_list, p_cat_spec):

    # load categorical spectra
    f_cat_spec = extra_tools.get_data_file(p_cat_spec)
    cat_spec = pd.read_table(f_cat_spec, index_col=0)
    nd_spec = cat_spec.as_matrix().T
    sample_spec = [
        'GOLD-{}'.format(c).upper() for c in cat_spec.columns.tolist()
    ]

    nd = np.vstack((nd, nd_spec))
    sample_list.extend(sample_spec)

    def _PSD_dist(nd):
        return hc.distance.pdist(nd, extra_tools.PSD_sym_KL)

    dend = ff.create_dendrogram(nd,
                                labels=sample_list,
                                distfun=_PSD_dist,
                                linkagefun=hc.centroid,
                                orientation='left')
    dend['layout'].update({
        'width': 1500,
        'height': 800,
        'font': dict(size=18),
        'margin': go.Margin(l=450),
    })
    dend['layout']['xaxis'].update({'title': 'KL Divergence'})

    # div_dend = py.plot(dend, output_type='div')

    return dend
예제 #4
0
def generate_dendrogram(reducedX, new_labels, dendroname):
    fig = ff.create_dendrogram(reducedX, orientation='left', labels=new_labels)
    print("dendogram created\n")
    fig['layout'].update({'width': 1000, 'height': 800})
    print("plotting started\n")
    plot(fig, filename=dendroname)
    print("plotting done\n")
예제 #5
0
def display_tree(filtered_data, words):
    if len(words):
        filtered_data = pd.DataFrame(filtered_data)
        taksony = filtered_data['takson w bazie']
        filtered_data = filtered_data[['kraj', 'region', 'miejscowość']]
        kraje = filtered_data['kraj']
        regiony = filtered_data['region']
        miejsca = filtered_data['miejscowość']
        dict_kraje = {}
        dict_regiony = {}
        dict_miejsca = {}
        for i, kraj in enumerate(kraje.values):
            dict_kraje[kraj] = i
        for i, region in enumerate(regiony.values):
            dict_regiony[region] = i
        for i, miejsc in enumerate(miejsca.values):
            dict_miejsca[miejsc] = i

        for key in dict_kraje.keys():
            filtered_data['kraj'] = filtered_data['kraj'].replace(
                key, dict_kraje[key])
        for key in dict_regiony.keys():
            filtered_data['region'] = filtered_data['region'].replace(
                key, dict_regiony[key])
        for key in dict_miejsca.keys():
            filtered_data['miejscowość'] = filtered_data[
                'miejscowość'].replace(key, dict_miejsca[key])

        print(filtered_data)

        fig = ff.create_dendrogram(filtered_data, labels=miejsca.values)

        return html.Div([dcc.Graph(id='dendro_map', figure=fig)])
예제 #6
0
        def update_dendro_graph(num_clusters, selectedData, selected_metrics,
                                norm):

            app._prev_cluster_clicks

            data = app._df.loc[app._df.visible].dropna()[selected_metrics]
            if norm == 'Znorm':
                data = data.apply(zscore)

            color_thresh = None
            if app._prev_cluster_clicks == 0:
                color_thresh = 0.0

            dendro = ff.create_dendrogram(
                data,
                linkagefun=lambda x: shc.linkage(
                    data, 'ward', metric='euclidean'),
                color_threshold=color_thresh)
            dendro['layout'].update({
                'height': 600,
                'xaxis': {
                    'automargin': True,
                    'showticklabels': False
                }
            })
            return dendro
예제 #7
0
def draw_dendogram(data, title, labels, format):

    figure = ff.create_dendrogram(
        data,
        orientation='left',
        labels=labels,
        linkagefun=lambda x: cx.get_linkage(data, 'single', 'euclidean'))

    figure['layout']['title'] = title

    # remove ticks from axis, change font size and remove lines in xaxis
    figure['layout']['xaxis'].update({'ticks': '', 'tickfont': dict(size=24)})
    figure['layout']['yaxis'].update({
        'ticks': '',
        'tickfont': dict(size=24),
        'showline': False
    })

    # margin configuration
    figure['layout']['margin'].update({
        'b': 40,
        'l': 300,
        'r': 15,
        't': 40,
        'pad': 5
    })

    figure['layout'].update({'autosize': False, 'width': 1200, 'height': 1500})

    fname = title + '_dendogram'

    if format == 'pdf':
        pio.write_image(figure, fname + '.' + format)
    else:
        offline.plot(figure, filename=fname + '.html')
예제 #8
0
    def plot(self):
        title = self.titlestring % (self.DS.name, self.DS.clustname,
                                    self.DS.levels)
        self.shortname = self.DS.shortclustname + self.shortname
        means = []
        for c in self.DS.clusters[self.DS.levels]:
            means.append(np.average(c, axis=0))
        X = np.column_stack(means).T
        try:
            fig = ff.create_dendrogram(X)
        except:
            return '''
                <div class="row" style="margin-top:20%">
                    <div class="col-md-4 offset-md-4 text-center">
                        <h1><b>Only one cluster found.</b></h1>
                        <h3>Perhaps try another algorithm?</h2>
                </div>
                '''
        if self.plot_mode != "div":
            fig["layout"]["title"] = title

        fig["layout"]["xaxis"]["title"] = "Cluster Labels"
        fig["layout"]["yaxis"]["title"] = "Cluster Mean Distances"
        #del fig.layout["width"]
        #del fig.layout["height"]

        return self.makeplot(fig, "agg/" + self.shortname)
예제 #9
0
def plotly_dendrogram(df: pd.DataFrame(),
                      labels=None,
                      orientation='left',
                      color_threshold=1,
                      height=None,
                      width=None,
                      max_label_lenght=None):

    if labels is None:
        labels = df.index

    if max_label_lenght is not None:
        labels = [i[:max_label_lenght] for i in labels]

    if height is None:
        height = max(500, 10 * len(df))
    fig = ff.create_dendrogram(df,
                               color_threshold=color_threshold,
                               labels=labels,
                               orientation=orientation)

    fig.update_layout(width=width, height=height, font_family="Monospace")
    fig.update_layout(xaxis_showgrid=True, yaxis_showgrid=True)

    fig.update_yaxes(automargin=True)
    fig.update_xaxes(automargin=True)
    return fig
예제 #10
0
def hierarchical():
    df = pd.read_csv("data/cluster.csv")
    X = df.drop(['connections'], axis=1)
    fig = ff.create_dendrogram(X, orientation='bottom', labels=list(df['connections']), linkagefun=lambda x: linkage(X, 'ward', metric='euclidean'))
    print list(df['connections'])
    fig['layout'].update({'width':1200, 'height':650, 'title': 'Hierarchical Clustering', 'margin': {'b':250}})
    graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)
    return render_template('hierarchical.html', graphJSON=graphJSON)
예제 #11
0
def generate_dendro(words):
    try:
        similarities = np.array([word_vectors.distances(w, words) for w in words])
        figure = ff.create_dendrogram(similarities, labels=words)
        figure['layout'].update({'width': 800, 'height': 500})
        return figure
    except KeyError:
        pass
예제 #12
0
 def get_dendrogram_graph(self, document_list):
     tfidf_matrix = self.__get_tfidf_matrix(document_list)
     similarity_matrix = 1 - cosine_similarity(tfidf_matrix)
     names = [str(i) for i in range(len(similarity_matrix))]
     fig = ff.create_dendrogram(similarity_matrix, orientation='left', labels=names)
     fig['layout'].update({'width':1000, 'height':1500})
     fig['layout']["images"] = self.watermark_image
     fig['layout']["title"] = "DENDOGRAM GRAPH"
     return iplot(fig, filename='dendrogram_with_labels')
예제 #13
0
def dendrogram(df, title='Dendrogram',
               out_path=None, layout_kwargs={}, to_image=False):

    fig = ff.create_dendrogram(df, linkagefun=lambda x: linkage(df, method='ward', metric='euclidean'))

    layout_kwargs['title'] = title
    fig.update_layout(**layout_kwargs)

    generate_plot(fig, out_path=out_path, out_filename=title, to_image=to_image)
예제 #14
0
def sort_row_data(ar, label):
    if CLUSTERING_MODE:
        npar = np.array(ar)
        fig_tmp = ff.create_dendrogram(npar, orientation='right')
        leaves = list(map(int, fig_tmp['layout']['yaxis']['ticktext']))
        return [ar[i] for i in leaves], [label[i] for i in leaves]
    else:
        to_sort = [("".join([str(e) for e in ar[i]]), ar[i], label[i])
                   for i in range(len(label))]
        to_sort.sort()
        return [t[1] for t in to_sort], [t[2] for t in to_sort]
예제 #15
0
    def subplot_dendogram(self, l1_df, l2_df, l3_df, l1_label, l1_color):
        # we will pass three dataframe 1) high level clustering
        # 2) second level clustering
        # 3) third level clustering
        # l1_df, data_frame for first level clustering
        # l2_df, data frame for second level clustering
        # l3_df, dataframe for 3rd level clustering
        # this function we will use to plot dendogram for cluster and sub-clusterLevel

        fig = make_subplots(rows=1,
                            cols=3,
                            subplot_titles=("Level-1", "Level-2", "Level-3"))

        if len(l1_label) > 1:
            fig = ff.create_dendrogram(l1_df.to_numpy(),
                                       orientation='right',
                                       labels=l1_label)
            y = list(fig['layout']['yaxis']['tickvals'])
            new_labels = list(fig['layout']['yaxis']['ticktext'])
        else:
            fig = go.Figure()
            new_labels = list(l1_label)
            y = [1]
        x = np.full((1, len(y)), .1)[0]
        size = np.full((1, len(y)), 10)[0]
        color = np.full((1, len(y)), '#AED6F1')[0]

        ## we want to change color of sub cluster which contains that genes
        inds = []

        if not l1_color == None:

            try:
                for label in l1_color:

                    inds.append(new_labels.index(label))

            except Exception:
                pass

        color[inds] = '#F1C40F'
        # print('inds',inds,new_labels,color_labels)
        fig.add_trace(
            go.Scatter(mode='markers',
                       x=x,
                       y=y,
                       text=new_labels,
                       hoverinfo='text',
                       marker=dict(size=size, color=color)))

        ####

        return fig
예제 #16
0
def update_dendrogram(hc_data):
    if hc_data == {}:
        return go.Figure()

    Z = np.array(hc_data['linkage'])

    figure = FF.create_dendrogram(
        Z,
        orientation='bottom',
        linkagefun=lambda x: linkage(Z, 'ward', metric='euclidean'))

    return figure
예제 #17
0
def render_dendrogram(animal_list,
                      results,
                      outdir,
                      outfilename,
                      threshold=0.5):
    """ Renders a Dendrogram given a BDD or CSD matrix.

    Given an animal_list of length n and an n by n triangular matrix of distances
    between the animal objects, this function renders a dendrogram using Plotly and
    prints the result (in both .png and .html form) to outdir.

    Parameters
    ----------
    animal_list : list of Animal() objects
        Corresponds to the animals that the pair-wise distances were calculated for.
        Order is assumed to match the order of the results.
    results : 2D array of floats (upper-triangular, empty diagonal)
        results[i][j] is the distances between trajectories of animal[i] and animal[j].
    outdir : str
        Absolute path to the output directory for the .csv files exported by the function.
    outfilename : str
        Name that will be given to the files printed by this function.
    threshold : float
        Value at which the separation of clusters in the dendrogram will be made.
    """
    html_outpath = os.path.join(outdir, outfilename + '.html').replace(' ', '')
    png_outpath = os.path.join(outdir, outfilename + '.png').replace(' ', '')

    # Flatten results into condensed distance array
    dists = [item for sublist in results for item in sublist]
    dists = np.array([[d for d in dists if d != '']])

    fig = ff.create_dendrogram(dists,
                               labels=animal_list,
                               distfun=lambda x: x[0],
                               linkagefun=lambda y: linkage(y, 'ward'),
                               color_threshold=threshold)
    animals_sorted = fig['layout']['xaxis']['ticktext']
    label_vals = fig['layout']['xaxis']['tickvals']

    def tickgen(animal):
        color = COLORS[animal.get_group()]
        text = animal.get_name()
        return f"<span style='color:{str(color)}'> {str(text)} </span>"

    fig.update_layout(
        xaxis={
            'ticktext': [tickgen(a) for a in animals_sorted],
            'range': [0, label_vals[0] + label_vals[-1]]
        })
    fig.write_image(png_outpath)
    plotly.offline.plot(fig, filename=html_outpath, auto_open=False)
예제 #18
0
def update_figure(input_text):
    st = sent_tokenize(input_text)
    labs = [s[:130] for s in st]
    sent_vecs = np.array([sent_embeddings(x) for x in st])

    # calculate full dendrogram
    fig = ff.create_dendrogram(
        sent_vecs,
        orientation='left',
        labels=labs,
        linkagefun=lambda x: linkage(sent_vecs, 'complete', metric='cosine'))
    fig.update_layout(margin=dict(l=800))
    return fig
예제 #19
0
    def clustered_data(self):
        data_array, labels, pathways = self._map_to_data_array()

        dx = ff.create_dendrogram(
            data_array, orientation='bottom', linkagefun=self.linkage_func)

        dy = ff.create_dendrogram(
            data_array.T, orientation='right', linkagefun=self.linkage_func)

        x_dendro_leaves = list(map(int, dx['layout']['xaxis']['ticktext']))
        y_dendro_leaves = list(map(int, dy['layout']['yaxis']['ticktext']))

        heat_data = data_array.T
        heat_data = heat_data[y_dendro_leaves, :]
        heat_data = heat_data[:, x_dendro_leaves]

        hx = np.array(
            list(map(lambda x: '%s_%d' % x, zip(labels, range(len(labels))))))[
                x_dendro_leaves]
        hy = pathways[y_dendro_leaves]

        return {'x': hx.tolist(), 'y': hy.tolist(), 'z': heat_data.tolist()}
예제 #20
0
    def load_dendogram_cluster(self,
                               med_df,
                               orig_labels,
                               height=900,
                               color_labels=None):
        # this function we will use to plot dendogram for cluster and sub-clusterLevel
        if len(orig_labels) > 1:
            fig = ff.create_dendrogram(med_df.to_numpy(),
                                       orientation='right',
                                       labels=orig_labels)
            y = list(fig['layout']['yaxis']['tickvals'])
            new_labels = list(fig['layout']['yaxis']['ticktext'])
        else:
            fig = go.Figure()
            new_labels = list(orig_labels)
            y = [1]
        x = np.full((1, len(y)), .1)[0]
        size = np.full((1, len(y)), 10)[0]
        color = np.full((1, len(y)), '#AED6F1')[0]

        ## we want to change color of sub cluster which contains that genes
        inds = []

        if not color_labels == None:

            try:
                for label in color_labels:

                    inds.append(new_labels.index(label))

            except Exception:
                pass

        color[inds] = '#F1C40F'
        # print('inds',inds,new_labels,color_labels)
        fig.add_trace(
            go.Scatter(mode='markers',
                       x=x,
                       y=y,
                       text=new_labels,
                       hoverinfo='text',
                       marker=dict(size=size, color=color)))
        # fig.update_layout(width=600, height=900)
        fig.update_layout(height=height, clickmode='event+select')
        fig['layout']['yaxis']['side'] = 'right'
        fig['layout']['margin']['r'] = 5
        fig['layout']['margin']['l'] = 5

        return fig
예제 #21
0
def heatmap_by_clustering(table_correlations, hovertemplate, customdata, zmin=-1, zmax=1):
    fig = create_dendrogram(table_correlations.replace(np.nan, 0), orientation="bottom", distfun=lambda df: 1 - df)
    for scatter in fig["data"]:
        scatter["yaxis"] = "y2"

    order_dendrogram = list(map(int, fig["layout"]["xaxis"]["ticktext"]))
    labels = table_correlations.columns[order_dendrogram]

    fig.update_layout(xaxis={"ticktext": labels, "mirror": False})
    fig.update_layout(yaxis2={"domain": [0.85, 1], "showticklabels": False, "showgrid": False, "zeroline": False})

    heat_correlations = table_correlations.loc[labels, labels].values
    if customdata is not None:
        heat_customdata = customdata.loc[labels, labels].values
    else:
        heat_customdata = None

    heatmap = go.Heatmap(
        x=fig["layout"]["xaxis"]["tickvals"],
        y=fig["layout"]["xaxis"]["tickvals"],
        z=heat_correlations,
        colorscale=BLUE_WHITE_RED,
        customdata=heat_customdata,
        hovertemplate=hovertemplate,
        zmin=zmin,
        zmax=zmax,
    )

    fig.update_layout(
        yaxis={
            "domain": [0, 0.85],
            "mirror": False,
            "showgrid": False,
            "zeroline": False,
            "ticktext": labels,
            "tickvals": fig["layout"]["xaxis"]["tickvals"],
            "showticklabels": True,
            "ticks": "outside",
            "tickfont": {"size": 15},
        },
        xaxis={"tickfont": {"size": 15}},
    )

    fig.add_trace(heatmap)

    fig["layout"]["width"] = 1100
    fig["layout"]["height"] = 1100

    return fig
예제 #22
0
def draw_dendrogram(X):
    dendro = ff.create_dendrogram(X)
    dendro['layout'].update({'width': 800, 'height': 500})

    #     aPlot = plotly.offline.plot(fig,
    #                             config={"displayModeBar": False},
    #                             show_link=False,
    #                             include_plotlyjs=False,
    #                             output_type='div')
    #   py.iplot(dendro, filename='simple_dendrogram')
    p = plotly.offline.plot(dendro,
                            filename='dendrogram',
                            output_type='div',
                            include_plotlyjs=False,
                            show_link=False)
    return p
예제 #23
0
def main():
    fName = sys.argv[1]
    rowIds = None
    fIn = open(fName, "r")
    df = pd.read_csv(fName, header=None, skiprows=1)
    colsToUse = fIn.readline().strip("\n").split(",")
    colsToDrop = []
    for i in range(len(colsToUse)):
        if colsToUse[i] == '0':
            if i == 0:
                rowIds = df.iloc[:,0]
            colsToDrop.append(i)
    df.drop(df.columns[colsToDrop], axis = 1, inplace=True)
    fig = ff.create_dendrogram(df)
    fig.update_layout(width=800, height=500)
    fig.show()
예제 #24
0
def hiearchical_clustering(df_dendogram):

    import plotly.figure_factory as ff
    from sklearn.preprocessing import StandardScaler
    from scipy.cluster.hierarchy import dendrogram, linkage, cophenet
    from scipy.spatial.distance import pdist
    import scipy.cluster.hierarchy as sch

    scaler = StandardScaler()
    df_preprocess = scaler.fit_transform(df_dendogram.iloc[:, 2:])

    list_coph = []
    methods = {
        'single': 'Nearest Point Algorithm (single)',
        'complete': 'Farthest Point Algorithm (complete)',
        'ward': 'Incremental Algorithm (ward)',
        'average': 'UPGMA Algorithm (average)',
        'weighted': 'WPGMA Algorithm (weighted)',
        'median': 'WPGMC Algorithm (median)',
        'centroid': 'UPGMC Algorithm (centroid)'
    }

    for m in list(methods.keys()):
        Z = linkage(df_preprocess, m)
        c, coph_dists = cophenet(Z, pdist(df_preprocess))
        list_coph.append(c)
    results = pd.DataFrame(
        zip(list(methods.values()), list_coph),
        columns=['Algorithm and Distance Method', 'Cophenetic Correlation'])
    results = results.sort_values(by='Cophenetic Correlation', ascending=False)
    results.reset_index(drop=True, inplace=True)

    best = [
        m for m, c in zip(list(methods.keys()), list_coph)
        if c == max(list_coph)
    ]

    names = list(df_dendogram.Squad)
    fig = ff.create_dendrogram(df_preprocess,
                               orientation='bottom',
                               labels=names,
                               linkagefun=lambda x: sch.linkage(x, best[0]))
    fig.update_layout(width=800, height=500,
                      title = 'Dendogram using ' + \
                      " ".join(methods[best[0]].split(" ")[:-1]))
    #paper_bgcolor='rgb(143,188,143)')
    return fig, results
예제 #25
0
    def _get_dendrogram_fig(self) -> Figure:
        """Generate a dendrogram figure object in plotly.

        :return: A plotly figure object
        """
        labels = [
            self._id_temp_label_map[file_id]
            for file_id in self._doc_term_matrix.index.values
        ]

        return ff.create_dendrogram(
            self._doc_term_matrix,
            orientation=self._dendro_option.orientation,
            distfun=lambda matrix: pdist(
                matrix, metric=self._dendro_option.dist_metric),
            linkagefun=lambda dist: linkage(
                dist, method=self._dendro_option.linkage_method),
            labels=labels)
예제 #26
0
def make_dendogram_plot(name,rep_set,loci_names,loci_dict,filename):
    dendogram_matrix = []
    for sample1 in rep_set:
        dendogram_array = []
        for sample2 in rep_set:
            correct_calls = []
            for locus in loci_names:
                if loci_dict[sample1][locus]['genotype'] == loci_dict[sample2][locus]['genotype']:
                    correct_calls.append(1)
                else:
                    correct_calls.append(0)
            cell_percent = sum(correct_calls) / len(correct_calls)
            dendogram_array.append(cell_percent)
        dendogram_matrix.append(dendogram_array)
    data_frame = numpy.array(dendogram_matrix)
    dendro = ff.create_dendrogram(data_frame, orientation='left', labels=rep_set)
    dendro['layout'].update({'width':800, 'height':500})
    py.image.save_as(dendro, filename=filename)
예제 #27
0
def main():
    """
    Тут відбувається обчислення всіх масивів й побудова графіків.
    """
    names = ['alpha', 'beta', 'gamma', 'delta']
    companies = array([[67, 57, 49, 81, 63], [73, 59, 41, 87, 59],
                       [65, 57, 43, 77, 63], [67, 55, 87, 73, 63]])
    z = scale(companies)
    weights = linspace(1, 5, 5)
    unweighted = score(z)
    non_normalized = score(z, weights)
    normalized = score(z, weights / sum(weights))
    figure = Figure(
    )  # Графік оцінок кожної з варіацій таксонометричного методу.
    figure.add_trace(Bar(name='Незважені', x=names, y=unweighted))
    figure.add_trace(
        Bar(name='Зважені ненормалізовані', x=names, y=non_normalized))
    figure.add_trace(Bar(name='Зважені нормалізовані', x=names, y=normalized))
    figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20})
    figure.write_image('images/scores.png', width=1200, height=600)
    features = ['досвід', 'фінанси', 'іновації', 'динаміка', 'стабільність']
    standard = max(companies, 0)
    figure = Figure()  # Графік профілів таксонометричного методу й еталону.
    figure.add_trace(
        Bar(name='Незважені', x=features, y=companies[argmin(unweighted)]))
    figure.add_trace(
        Bar(name='Зважені ненормалізовані',
            x=features,
            y=companies[argmin(non_normalized)]))
    figure.add_trace(
        Bar(name='Зважені нормалізовані',
            x=features,
            y=companies[argmin(normalized)]))
    figure.add_trace(Bar(name='Еталон', x=features, y=standard))
    figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20})
    figure.write_image('images/profiles.png', width=1200, height=600)
    figure = create_dendrogram(  # Дендрограма відносно еталонного рішення.
        append(companies, [standard], 0),
        orientation='left',
        labels=names + ['standard'])
    figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20})
    figure.write_image('images/dendrogram.png', width=1200, height=600)
예제 #28
0
    def _get_dendrogram_fig(self) -> Figure:
        """Generate a dendrogram figure object in plotly.

        :return: A plotly figure object
        """
        labels = [self._id_temp_label_map[file_id]
                  for file_id in self._doc_term_matrix.index.values]

        return ff.create_dendrogram(
            self._doc_term_matrix,
            orientation=self._dendro_option.orientation,

            distfun=lambda matrix: pdist(
                matrix, metric=self._dendro_option.dist_metric),

            linkagefun=lambda dist: linkage(
                dist, method=self._dendro_option.linkage_method),

            labels=labels
        )
예제 #29
0
def plot_dendro(sfam, method='ssap', show='jup'):
    '''
    Plots dendrogram from the distance matrix (SSAP/overlap)
    '''
    if method == 'ssap':
        directory = './distance_matrices/'
    elif method == 'over':
        directory = './percent_overlap/'
    else:
        print('Unknown method')
        return
    t = exclude_missing_data(fetch_sfam_matrix(sfam, method=method))
    names = t.index
    dendro = ff.create_dendrogram(t.fillna(0), labels=names)
    dendro['layout'].update({'width': 800, 'height': 500})
    if show == 'jup':
        plotly.offline.iplot(dendro, filename='simple_dendrogram')
    elif show == 'html':
        plotly.offline.plot(dendro, filename='simple_dendrogram')
    else:
        print("Unknown show method")
def hierarchical_plotly(df):

    #initializing the text array to contain all reviews by region
    text_array = []
    #getting the list of the regions
    region = df['region'].unique()
    for reg in range(9):
        data = np.array(df.loc[df.region == region[reg], ['text']])
        b = data.ravel()
        data_str = ' '.join(b)
        text_array.append(data_str)

    txt = np.array(text_array)

    vect = CountVectorizer()
    bag = vect.fit_transform(txt)
    #checking the vocab
    #print(vect.vocabulary_)
    S = 1 - cosine_similarity(bag)
    lnk = ward(S)
    #evaluate the quality of ward cluster using silhouette scores
    ward_label = AgglomerativeClustering(n_clusters=3, linkage='ward')
    w_label = ward_label.fit_predict(lnk)
    h_silhouette_score = silhouette_score(lnk, w_label)
    #use plotly to create a dendrogram
    fig = ff.create_dendrogram(
        S,
        orientation='left',
        labels=region,
        linkagefun=lambda x: linkage(S, 'ward', metric='euclidean'))
    #Update the width, height, and title
    fig['layout'].update(
        width=800,
        height=600,
        title=
        'Hierarchical Clustering Dendrogram of Reviews by Region (sihouette score = '
        + str(round(h_silhouette_score, 2)) + ')')
    fig['layout'].update(xaxis=dict(  #range=[0, 0.05],
        title='cosine similarity distance'))
    py.plot(fig, filename='dendrogram_with_labels')
예제 #31
0
파일: plot.py 프로젝트: vishalbelsare/DALEX
def plot_dendrogram(linkage_matrix, labels=None):
    fig = create_dendrogram(
        linkage_matrix,
        labels=labels,
        distfun=lambda x: x,
        linkagefun=lambda x: x,
        orientation="left",
        color_threshold=-0.1,
        colorscale=["#46bac2"],
    )
    if labels is None:
        fig.update_yaxes(visible=False, showticklabels=False)

    fig.update_xaxes(range=[-0.05, 1.05], visible=False, showticklabels=False)

    fig.update_layout(xaxis={
        "mirror": False,
        "showgrid": False,
        "showline": False,
        "zeroline": False,
        "ticks": "",
        "fixedrange": True,
    },
                      yaxis={
                          "mirror": False,
                          "showgrid": False,
                          "showline": False,
                          "zeroline": False,
                          "ticks": "",
                          "fixedrange": True,
                      },
                      font={"color": "#371ea3"},
                      template="none",
                      margin={
                          "t": 78,
                          "b": 71,
                          "r": 30
                      })

    return fig
예제 #32
0
파일: tools.py 프로젝트: plotly/plotly.py
 def create_dendrogram(*args, **kwargs):
     FigureFactory._deprecated('create_dendrogram')
     from plotly.figure_factory import create_dendrogram
     return create_dendrogram(*args, **kwargs)