Esempio n. 1
0
def cluster_reps(reps, threshold=1.0, plot=1):
    """Do clustering based `reps`.

    Returns a tuple with
    - The indices of the largest cluster found
    - The condensed distance matrix
    - Cluster linkage

    Keyword arguments:
        `threshold` : chisq threshold to use in discrimination.
        `plot` : Plot results, if True.
    """
    cdm = chi2cdm(reps)
    links = hc.linkage(cdm, method='complete')
    clist = filter_with_linkage(links, threshold)
    print("Clusters: %s" % str(clist))
    if plot:
        first = reps[0,...]
        aver = mean_stack(reps)
        filtered = mean_stack(reps[clist[0],...])
        plot_clustering(filtered, first, aver, clist[0], cdm, links, threshold)

    return (clist[0], cdm, links)
Esempio n. 2
0
def filter_outliers(reps, threshold=1.0, plot=1):
    """Filter by removing repetitions having mutual chisq above `threshold`.

    Returns a tuple containing the included indices and the condensed
    distance matrix.

    Repetitions are removed iteratively by checking which repetition
    contributes the largest number of over the threshold chi-squared values
    (outliers) in the chisq-distance matrix, and removing that point.
    If two repetitions cause an equal number of outliers, the repetition
    which has the highest chisq distance to a non-outlier distance matrix
    point is removed.
    """
    cdm = chi2cdm(reps)
    dmat = squareform(cdm)
    incinds = filter_distmat(dmat, threshold)

    if plot:
        first = reps[0,...]
        aver = mean_stack(reps)
        filtered = mean_stack(reps[incinds,...])
        plot_outliers(filtered, first, aver, incinds, cdm, threshold)

    return incinds, cdm