Esempio n. 1
0
    def _generate(self) -> ReportResult:
        from immuneML.util.TCRdistHelper import TCRdistHelper
        from tcrdist.rep_diff import hcluster_diff
        from tcrdist.summarize import member_summ

        PathBuilder.build(self.result_path)

        subsampled_dataset = self._extract_positive_example_dataset()
        reference_sequences = self._extract_reference_sequences()
        tcr_rep = TCRdistHelper.compute_tcr_dist(subsampled_dataset, [self.label.name], self.cores)
        tcr_rep.hcluster_df, tcr_rep.Z = hcluster_diff(clone_df=tcr_rep.clone_df, pwmat=tcr_rep.pw_alpha + tcr_rep.pw_beta, x_cols=["epitope"],
                                                       count_col='count')

        figures, tables = [], []

        logging.info(f'{TCRdistMotifDiscovery.__name__}: created {tcr_rep.hcluster_df.shape[0]} clusters, now discovering motifs in clusters.')

        for index, row in tcr_rep.hcluster_df.iterrows():
            if len(row['neighbors_i']) >= self.min_cluster_size:
                figure_outputs, table_outputs = self._discover_motif_in_cluster(tcr_rep, index, row, reference_sequences)
                figures.extend(figure_outputs)
                tables.extend(table_outputs)

        res_summary = member_summ(res_df=tcr_rep.hcluster_df, clone_df=tcr_rep.clone_df, addl_cols=['epitope'])
        res_summary.to_csv(self.result_path / "tcrdist_summary.csv")

        tables.append(ReportOutput(path=self.result_path / "tcrdist_summary.csv", name="TCRdist summary (csv)"))

        return ReportResult(name=self.name, info="TCRdist motif discovery", output_figures=figures, output_tables=tables)
Esempio n. 2
0
def test_introduction_6():
    """
    Basic Specificity Neighborhoods based on a Hierarchical Clustering
    """
    import pandas as pd
    from tcrdist.repertoire import TCRrep

    df = pd.read_csv("dash.csv")
    tr = TCRrep(cell_df=df,
                organism='mouse',
                chains=['beta', 'alpha'],
                db_file='alphabeta_gammadelta_db.tsv')

    from tcrdist.rep_diff import hcluster_diff, member_summ
    from hierdiff import plot_hclust_props

    # diff testing is pasted on binary comparison, so all epitope not 'PA' are set to 'X'
    tr.clone_df['PA'] = [
        'PA' if x == 'PA' else 'X' for x in tr.clone_df.epitope
    ]

    res, Z = hcluster_diff(tr.clone_df,
                           tr.pw_beta,
                           x_cols=['PA'],
                           count_col='count')

    res_summary = member_summ(res_df=res,
                              clone_df=tr.clone_df,
                              addl_cols=['epitope'])

    res_detailed = pd.concat([res, res_summary], axis=1)

    html = plot_hclust_props(
        Z,
        title='PA Epitope Example',
        res=res_detailed,
        tooltip_cols=['cdr3_b_aa', 'v_b_gene', 'j_b_gene', 'epitope'],
        alpha=0.00001,
        colors=['blue', 'gray'],
        alpha_col='pvalue')

    with open('hierdiff_example.html', 'w') as fh:
        fh.write(html)
Esempio n. 3
0
def test_gallery_hdiff():
    """
    All imports are provided here, and are repeated 
    step-wise below, for clarity, and for
    module cut-and-paste. This example
    performs paired alpha-beta analysis,
    but code blocks can be used for single
    chain analysis as well.
    """
    import pandas as pd
    from tcrdist.repertoire import TCRrep
    from tcrdist.rep_diff import hcluster_diff, member_summ
    from tcrsampler.sampler import TCRsampler
    from tcrdist.adpt_funcs import get_centroid_seq
    from tcrdist.summarize import _select
    from palmotif import compute_pal_motif, svg_logo
    from hierdiff import plot_hclust_props
    """
    Load a subset of data that contains paired alpha-beta
    chain mouse TCR receptors that recognized 
    the PA or PB1 epitopes (present in mouse influenza). 
    """
    import pandas as pd
    df = pd.read_csv("dash.csv")
    conditional = df['epitope'].apply( lambda x: x in ['PA','PB1'])
    """
    For illustrative/testing purposes, randomly subset the data to include 
    only 100 clones. Increase for more informative plot.
    """
    df = df[conditional].\
        reset_index(drop = True).\
        sample(100, random_state = 3).\
        reset_index(drop = True).\
        copy()
    """
    Load DataFrame into TCRrep instance, 
    which automatically computes attributes:
    1. .clone_df DataFrame
    2. .pw_beta nd.array 
    3. .pw_alpha nd.array 
    """
    from tcrdist.repertoire import TCRrep
    tr = TCRrep(cell_df = df, 
                organism = 'mouse', 
                chains = ['beta','alpha'], 
                db_file = 'alphabeta_gammadelta_db.tsv')

    """
    Apply hcluster_diff, which hierarchically clusters.
    
    Note
    ----
    pwmat could easily be tr.pw_beta or tr.pw_alpha if 
    clustering should be done on a single chain.
    """
    from tcrdist.rep_diff import hcluster_diff
    tr.hcluster_df, tr.Z =\
        hcluster_diff(clone_df = tr.clone_df, 
                      pwmat    = tr.pw_beta + tr.pw_alpha,
                      x_cols = ['epitope'], 
                      count_col = 'count')

    """
    Load a custom background, mouse appropriate dataset to sample CDR3s 
    according to the V and J gene usage frequencies observed in each node.
    See the tcrsampler package for more details 
    (https://github.com/kmayerb/tcrsampler/blob/master/docs/getting_default_backgrounds.md)
    """
    from tcrsampler.sampler import TCRsampler

    t = TCRsampler()
    t.download_background_file("ruggiero_mouse_sampler.zip")
    tcrsampler_beta = TCRsampler(default_background = 'ruggiero_mouse_beta_t.tsv.sampler.tsv')
    tcrsampler_alpha = TCRsampler(default_background = 'ruggiero_mouse_alpha_t.tsv.sampler.tsv')

    """
    Add an SVG graphic to every node of the tree 
    aligned to the cluster centroid.
    """
    from tcrdist.adpt_funcs import get_centroid_seq
    from tcrdist.summarize import _select
    from palmotif import compute_pal_motif, svg_logo

    """Beta Chain"""
    svgs_beta = list()
    for i,r in tr.hcluster_df.iterrows():

        dfnode = tr.clone_df.iloc[r['neighbors_i'],]
        if dfnode.shape[0] > 2:
            centroid, *_ = get_centroid_seq(df = dfnode)
        else:
            centroid = dfnode['cdr3_b_aa'].to_list()[0]
        print(f"BETA-CHAIN: {centroid}")

        gene_usage_beta = dfnode.groupby(['v_b_gene','j_b_gene']).size()
        sampled_rep = tcrsampler_beta.sample( gene_usage_beta.reset_index().to_dict('split')['data'],
                        flatten = True, depth = 10)
        sampled_rep  = [x for x in sampled_rep if x is not None]
        motif, stat = compute_pal_motif(
                        seqs = _select(df = tr.clone_df, 
                                       iloc_rows = r['neighbors_i'], 
                                       col = 'cdr3_b_aa'),
                        refs = sampled_rep, 
                        centroid = centroid)
        
        svgs_beta.append(svg_logo(motif, return_str= True))

    """Add Beta SVG graphics to hcluster_df"""
    tr.hcluster_df['svg_beta'] = svgs_beta


    """Alpha Chain"""
    svgs_alpha = list()
    for i,r in tr.hcluster_df.iterrows():

        dfnode = tr.clone_df.iloc[r['neighbors_i'],]
        if dfnode.shape[0] > 2:
            centroid, *_ = get_centroid_seq(df = dfnode)
        else:
            centroid = dfnode['cdr3_a_aa'].to_list()[0]
        print(f"ALPHA-CHAIN: {centroid}")
        gene_usage_alpha = dfnode.groupby(['v_a_gene','j_a_gene']).size()
        sampled_rep = tcrsampler_alpha.sample( gene_usage_alpha.reset_index().to_dict('split')['data'], 
                        flatten = True, depth = 10)
        
        sampled_rep  = [x for x in sampled_rep if x is not None]
        motif, stat = compute_pal_motif(
                        seqs = _select(df = tr.clone_df, 
                                       iloc_rows = r['neighbors_i'], 
                                       col = 'cdr3_a_aa'),
                        refs = sampled_rep, 
                        centroid = centroid)

        svgs_alpha.append(svg_logo(motif, return_str= True))
    
    """Add Alpha SVG graphics to hcluster_df"""
    tr.hcluster_df['svg_alpha'] = svgs_alpha
    """
    Produce summary information for tooltips. 
    For instance, describe percentage of TCRs with 
    a given epitope at a given node.
    """
    res_summary = member_summ(  res_df = tr.hcluster_df,
                                clone_df = tr.clone_df, 
                                addl_cols=['epitope'])

    tr.hcluster_df_detailed = \
        pd.concat([tr.hcluster_df, res_summary], axis = 1)
    """
    Write D3 html for interactive denogram graphic. 
    Specify desired tooltips.
    """
    from hierdiff import plot_hclust_props
    html = plot_hclust_props(tr.Z,
                title='PA Epitope Example',
                res=tr.hcluster_df_detailed,
                tooltip_cols=['cdr3_b_aa','v_b_gene', 'j_b_gene','svg_alpha','svg_beta'],
                alpha=0.00001, colors = ['blue','gray'],
                alpha_col='pvalue')

    with open('hierdiff_example_PA_v_PB1.html', 'w') as fh:
        fh.write(html)
Esempio n. 4
0
def test_workflow_2():
    """
    Load all the TCRs associated with a particular epitope in 
    the Adaptive Biotechnology COVID19 Data Release 2
    """
    import os
    import pandas as pd
    from tcrdist.repertoire import TCRrep
    from tcrdist.adpt_funcs import get_basic_centroids

    path = os.path.join('tcrdist', 'data', 'covid19')
    file = 'mira_epitope_16_1683_QYIKWPWYI_YEQYIKWPW_YEQYIKWPWY.tcrdist3.csv'
    filename = os.path.join(path, file)

    df = pd.read_csv(filename, sep=",")

    df = df[[
        'cell_type', 'subject', 'v_b_gene', 'j_b_gene', 'cdr3_b_aa', 'epitope',
        'age', 'sex', 'cohort'
    ]]

    df['count'] = 1

    tr = TCRrep(cell_df=df, organism='human', chains=['beta'])

    tr = get_basic_centroids(tr, max_dist=200)

    tr.centroids_df

    tr.clone_df['covid'] = [
        'healthy' if x.find("Healthy") != -1 else "covid"
        for x in tr.clone_df.cohort
    ]

    from tcrdist.rep_diff import neighborhood_diff, hcluster_diff, member_summ
    import hierdiff
    #tr.clone_df['covid'] = ['healthy' if x.find("Healthy") != -1 else "covid" for x in tr.clone_df.cohort]
    #nd = neighborhood_diff(tr.clone_df, tr.pw_beta, x_cols = ['covid'], count_col = 'count')

    tr.clone_df['covid'] = [
        'healthy' if x.find("Healthy") != -1 else "covid"
        for x in tr.clone_df.cohort
    ]
    res, Z = hcluster_diff(tr.clone_df,
                           tr.pw_beta,
                           x_cols=['covid'],
                           count_col='count')

    res_summary = member_summ(res_df=res,
                              clone_df=tr.clone_df,
                              addl_cols=['cohort', 'subject'])

    res_detailed = pd.concat([res, res_summary], axis=1)

    html = hierdiff.plot_hclust_props(Z,
                                      title='PA Epitope Example',
                                      res=res_detailed,
                                      tooltip_cols=[
                                          'cdr3_b_aa', 'v_b_gene', 'j_b_gene',
                                          'cohort', 'subject'
                                      ],
                                      alpha=0.05,
                                      alpha_col='pvalue')

    with open('hierdiff_example.html', 'w') as fh:
        fh.write(html)
from tcrdist.mappers import vdjdb_to_tcrdist2, vdjdb_to_tcrdist2_mapping_TRA, vdjdb_to_tcrdist2_mapping_TRB
selin_a = selin.loc[selin['Gene'] == 'TRA'].rename(vdjdb_to_tcrdist2_mapping_TRA, axis=1)
selin_b = selin.loc[selin['Gene'] == 'TRB'].rename(vdjdb_to_tcrdist2_mapping_TRB, axis=1)

"""COMPUTE TCRDISTANCES (SEE DOCS PAGE: https://tcrdist3.readthedocs.io/en/latest/tcrdistances.html)"""
from tcrdist.repertoire import TCRrep
tr = TCRrep(cell_df=selin_a,
            organism='human',
            chains=['alpha'])

"""COMPUTE TCRDISTANCES (SEE DOCS PAGE:https://tcrdist3.readthedocs.io/en/latest/index.html#hierarchical-neighborhoods)"""
from tcrdist.rep_diff import hcluster_diff
tr.hcluster_df, tr.Z =\
    hcluster_diff(clone_df = tr.clone_df, 
                  pwmat    = tr.pw_alpha,
                  x_cols = ['cohort'], 
                  count_col = 'count')

"""
SEE TCRSAMPLER (https://github.com/kmayerb/tcrsampler/blob/master/docs/tcrsampler.md)
Here we used olga human alpha synthetic sequences for best coverage
"""
from tcrsampler.sampler import TCRsampler
t = TCRsampler()
#t.download_background_file('olga_sampler.zip') # ONLY IF NOT ALREADY DONE
tcrsampler_alpha = TCRsampler(default_background = 'olga_human_alpha_t.sampler.tsv')
tcrsampler_alpha.build_background(max_rows = 1000) 

"""SEE PALMOTIF DOCS (https://github.com/agartland/palmotif)"""
from palmotif import compute_pal_motif, svg_logo
from tcrdist.summarize import _select
Esempio n. 6
0
def _auto_hdiff2(
        tcrrep,
        html_name='DEFAULT.html',
        pwmat_str_b='pw_beta',
        pwmat_str_a='pw_alpha',
        single=True,
        generate_svgs=True,
        combine_olga=False,
        verbose=True,
        prune=3,
        default_hcluster_diff_kwargs=_get_default_kwargs(chains=['beta'])[0],
        default_member_summ_kwargs=_get_default_kwargs(chains=['beta'])[1],
        default_plot_hclust_props=_get_default_kwargs(chains=['beta'])[2]):
    """
    Automatic Hierarchical Cluster Plotting

    Parameters
    ----------

    html_name : str
        name for html file output e.g., 'DEFAULT.html'
    pwmat_str_b : str
        name of pairwise matrix attribute to be used for clustering beta chains e.g., 'pw_beta'
    pwmat_str_a : str
        name of pairwise matrix attribute to be used for clustering alpha chains e.g., 'pw_alpha'
    single : bool
        If true, make summary based on each clone being present in single-copy, 
        otherwise, the 'count' column is used when calculating percentages.
        NOTE: 'count_col' in default_hcluster_diff_kwargs can also set to 
        'single'. If true, diversity metrics will also be based on clones 
        rather than clonal abundances. 
    generate_svgs : bool 
        If True, SVG logos are produced for each node where .hcluster_df['prune'] is 0
    verbose : bool
        report on status
    default_hcluster_diff_kwargs: dict
        kwargs dictionary for (tcrdist.rep_diff.hcluster_diff)

            clone_df : pd.DataFrame [nclones x metadata]
                Contains metadata for each clone.
            pwmat : np.ndarray [nclones x nclones]
                Square distance matrix for defining neighborhoods
            x_cols : list
                List of columns to be tested for association with the neighborhood
            count_col : str
                Column in clone_df that specifies counts.
                Default none assumes count of 1 cell for each row.
            subset_ind : None or np.ndarray with partial index of df, optional
                Provides option to tally counts only within a subset of df, but to maintain the clustering
                of all individuals. Allows for one clustering of pooled TCRs,
                but tallying/testing within a subset (e.g. participants or conditions)
            hclust_method : str
                Method for hierarchical clustering, passed to the scipy.clustering.hierarchy
                linkage function.
            optimal_ordering : bool
                Flag passed to the scipy.clustering.hierarchy linkage function to improve
                visual tree layout. Can be slow for large trees.
            test_method : str or None
                Specifies Fisher's exact test ("fishers"), Chi-squared ("chi2") or
                Cochran-Mantel-Haenszel test ("chm") for testing.
    default_member_summ_kwargs: dict
        kwargs dictionary for (tcrdist.rep_diff.member_summ)

            Return additional summary info about each result (row)) based on the
            members of the cluster. This is helpful for preparing strings 
            to add to the tooltip in hierdiff.plot_hclust_props.

            res_df : pd.DataFrame [nclusters x result cols]
                Returned from neighborhood_diff or hcluster_diff
            clone_df : pd.DataFrame [nclones x metadata]
                Contains metadata for each clone.
            key_col : str
                Column in res_df that specifies the iloc of members in the clone_df
            count_col : str
                Column in clone_df that specifies counts.
                Default none assumes count of 1 cell for each row.
            addl_cols : list
                Columns to summarize
            addl_n : int
                Number of top N clones to include in the summary of
                each cluster.
    default_plot_hclust_props : dict
        kwargs dictionary for (hierdiff.plot_hclust_props)

            Plot tree of linkage-based hierarchical clustering, with nodes colored using stacked bars
            representing proportion of cluster members associated with specific conditions. Nodes also optionally
            annotated with pvalue, number of members or cluster ID.
            
            Z : linkage matrix
                Result of calling sch.linkage on a compressed pair-wise distance matrix
            res : pd.DataFrame
                Result from calling hcluster_diff, with observed/frequencies and p-values for each node
            alpha_col : str
                Column in res to use for 'alpha' annotation
            alpha : float
                Threshold for plotting the stacked bars and annotation
            colors : tuple of valid colors
                Used for stacked bars of conditions at each node
            prune_col : str/column in res
                Column of res that indicates whether a result/node can be pruned from the tree.
                The tree will not print any pruned nodes that only contain other pruned nodes.
    """
    import os
    import pandas as pd
    import numpy as np
    import warnings
    from tcrsampler.sampler import TCRsampler
    from palmotif import compute_pal_motif, svg_logo
    from tcrdist.adpt_funcs import get_centroid_seq, get_centroid_seq_alpha
    from tcrdist.summarize import _select
    from tcrdist.repertoire import TCRrep
    from tcrdist.rep_diff import hcluster_diff, member_summ
    from tcrdist.summarize import _select
    from tcrdist.pgen import OlgaModel
    from palmotif import compute_pal_motif, svg_logo
    from hierdiff import plot_hclust_props
    from numpy.random import randint
    from tcrdist.diversity import generalized_simpsons_entropy
    from tcrdist.diversity import fuzzy_diversity

    # Load clone_df directly from input object
    if default_hcluster_diff_kwargs['clone_df'] is None:
        default_hcluster_diff_kwargs['clone_df'] = getattr(tcrrep, 'clone_df')

    # Get appropirate pwmat_str (pw_beta, pw_alpha, or one of the CDRs, e.g., pw_cdr3_b_aa)
    if 'alpha' in tcrrep.chains:
        pwmat_str = pwmat_str_a
    if 'beta' in tcrrep.chains:
        pwmat_str = pwmat_str_b

    # Get appropirate pairwise matrix

    if default_hcluster_diff_kwargs['pwmat'] is None:
        if verbose: print(f"pwmat set with {pwmat_str}")
        default_hcluster_diff_kwargs['pwmat'] = getattr(tcrrep, pwmat_str)
    else:
        if verbose: print("pwmat was directly provided as a kwarg")
    """Handle the Fact that 2 or more catagoorical levels are need to run hierdiff"""

    x_cols = default_hcluster_diff_kwargs['x_cols']
    if x_cols is None:
        tcrrep.clone_df['dummy'] = \
            [['X1','X2'][randint(2)] for x in range(tcrrep.clone_df.shape[0])]
        default_hcluster_diff_kwargs['x_cols'] = ['dummy']
        default_hcluster_diff_kwargs['test_method'] = 'fishers'
        warnings.warn(
            f"Because x_cols was None, setting random dummy values, and using {default_hcluster_diff_kwargs['test_method']}\n",
            stacklevel=2)

    elif tcrrep.clone_df[x_cols].nunique()[0] == 2:
        default_hcluster_diff_kwargs['test_method'] = 'fishers'

    elif tcrrep.clone_df[x_cols].nunique()[0] > 2:
        default_hcluster_diff_kwargs['test_method'] = 'chi2'

    elif tcrrep.clone_df[x_cols].nunique()[0] < 2:
        tcrrep.clone_df['dummy'] = \
            [['X1','X2'][randint(2)] for x in range(tcrrep.clone_df.shape[0])]
        default_hcluster_diff_kwargs['x_cols'] = ['dummy']
        default_hcluster_diff_kwargs['test_method'] = 'fishers'
        warnings.warn(
            f"Because x_cols was None, setting random dummy values, and using {default_hcluster_diff_kwargs['test_method']}\n",
            stacklevel=2)
    """ Run hcluster_df """

    bar = IncrementalBar(f'Run hcluster_diff :', max=2, suffix='%(percent)d%%')
    bar.next()
    tcrrep.hcluster_df, tcrrep.Z = hcluster_diff(
        **default_hcluster_diff_kwargs)
    bar.next()
    bar.finish()

    tcrrep.hcluster_df['prune'] = tcrrep.hcluster_df['K_neighbors'].apply(
        lambda x: 1 if (x < prune) else 0)  #
    """ Do Basic Summary """
    mean_distance_ = list()
    percentage_node_25_ = list()
    percentage_node_50_ = list()
    percentage_node_75_ = list()
    n_rows = tcrrep.hcluster_df.shape[0]
    bar = IncrementalBar(f'Evaluate Clusters :',
                         max=n_rows,
                         suffix='%(percent)d%%')
    for i, r in tcrrep.hcluster_df.iterrows():
        bar.next()
        # <dfnode> is dataframe with all the clones at a given tree node
        dfnode = tcrrep.clone_df.iloc[r['neighbors_i'], ]
        # <pwnod> is dataframe with all the clones at a given tree node
        pwnode = getattr(tcrrep,
                         pwmat_str)[r['neighbors_i'], :][:, r['neighbors_i']]
        # get the non-diaganol entries.
        node_non_diag_entries = pwnode[~np.eye(pwnode.shape[0], dtype=bool)]
        # Compute the mean distance at the node
        mean_distance_.append(str(round(node_non_diag_entries.mean(), 1)))

        percentage_node_25 = 100 * (node_non_diag_entries <
                                    25).sum() / (node_non_diag_entries.size)
        percentage_node_50 = 100 * (node_non_diag_entries < 50).sum() / (
            node_non_diag_entries.size
        )  #100*((pwnode < 50).sum() - pwnode.shape[0]) / (pwnode.shape[0] * pwnode.shape[1])
        percentage_node_75 = 100 * (node_non_diag_entries < 75).sum() / (
            node_non_diag_entries.size
        )  #100*((pwnode < 100).sum() - pwnode.shape[0]) / (pwnode.shape[0] * pwnode.shape[1])
        percentage_node_25_.append(f"{round(percentage_node_25,1)}%")
        percentage_node_50_.append(f"{round(percentage_node_50,1)}%")
        percentage_node_75_.append(f"{round(percentage_node_75,1)}%")
    bar.next()
    bar.finish()
    tcrrep.hcluster_df['mean_dist'] = mean_distance_
    tcrrep.hcluster_df['pct_dist_25'] = percentage_node_25_
    tcrrep.hcluster_df['pct_dist_50'] = percentage_node_50_
    tcrrep.hcluster_df['pct_dist_75'] = percentage_node_75_
    """
    By default, treat each clone as a single entity if 'count_col' is single 
    """
    if default_member_summ_kwargs['count_col'] == 'single':
        single = True
        print("MAKING 'single' variable")
        tcrrep.hcluster_df['single'] = 1

    if single:
        tcrrep.clone_df['single'] = 1
        default_member_summ_kwargs['count_col'] = 'single'
    """ member_summ"""
    tcrrep.res_summary = \
        member_summ(res_df = tcrrep.hcluster_df,clone_df = tcrrep.clone_df, **default_member_summ_kwargs)

    tcrrep.hcluster_df_detailed = pd.concat(
        [tcrrep.hcluster_df.copy(),
         tcrrep.res_summary.copy()], axis=1)
    """ Add diversity stats"""
    tcrrep.clone_df['single'] = 1
    if single:
        fdiv75 = lambda ind: fuzzy_diversity(
            tcrrep.clone_df.iloc[ind, :]['single'],
            getattr(tcrrep, pwmat_str)[ind, :][:, ind],
            order=2,
            threshold=75)
        fdiv50 = lambda ind: fuzzy_diversity(
            tcrrep.clone_df.iloc[ind, :]['single'],
            getattr(tcrrep, pwmat_str)[ind, :][:, ind],
            order=2,
            threshold=50)
        fdiv25 = lambda ind: fuzzy_diversity(
            tcrrep.clone_df.iloc[ind, :]['single'],
            getattr(tcrrep, pwmat_str)[ind, :][:, ind],
            order=2,
            threshold=25)
    else:
        fdiv75 = lambda ind: fuzzy_diversity(
            tcrrep.clone_df.iloc[ind, :]['count'],
            getattr(tcrrep, pwmat_str)[ind, :][:, ind],
            order=2,
            threshold=75)
        fdiv50 = lambda ind: fuzzy_diversity(
            tcrrep.clone_df.iloc[ind, :]['count'],
            getattr(tcrrep, pwmat_str)[ind, :][:, ind],
            order=2,
            threshold=50)
        fdiv25 = lambda ind: fuzzy_diversity(
            tcrrep.clone_df.iloc[ind, :]['count'],
            getattr(tcrrep, pwmat_str)[ind, :][:, ind],
            order=2,
            threshold=25)
    tcrrep.hcluster_df_detailed['fuzzy_simpson_diversity_25'] = [
        str(round(fdiv25(ind), 2))
        for ind in tcrrep.hcluster_df_detailed.neighbors_i.to_list()
    ]
    tcrrep.hcluster_df_detailed['fuzzy_simpson_diversity_50'] = [
        str(round(fdiv50(ind), 2))
        for ind in tcrrep.hcluster_df_detailed.neighbors_i.to_list()
    ]
    tcrrep.hcluster_df_detailed['fuzzy_simpson_diversity_75'] = [
        str(round(fdiv75(ind), 2))
        for ind in tcrrep.hcluster_df_detailed.neighbors_i.to_list()
    ]
    """Optional Add SVGs to hcluster_detailed"""
    if 'beta' in tcrrep.chains:
        _tcrsampler_svgs(tcrrep=tcrrep,
                         default_background=None,
                         default_background_if_missing=None,
                         cdr3_name='cdr3_b_aa',
                         pwmat_str=pwmat_str_b,
                         chain='beta',
                         gene_names=['v_b_gene', 'j_b_gene'],
                         combine_olga=combine_olga)

    if 'alpha' in tcrrep.chains:
        _tcrsampler_svgs(tcrrep=tcrrep,
                         default_background=None,
                         default_background_if_missing=None,
                         cdr3_name='cdr3_a_aa',
                         pwmat_str=pwmat_str_a,
                         chain='alpha',
                         gene_names=['v_a_gene', 'j_a_gene'],
                         combine_olga=combine_olga)
    """ Plot """
    html = plot_hclust_props(tcrrep.Z,
                             res=tcrrep.hcluster_df_detailed,
                             prune_col='prune',
                             **default_plot_hclust_props)
    """ Write File """
    with open(html_name, 'w') as fh:
        print(f"WRITING {html_name}")
        fh.write(html)