コード例 #1
0
ファイル: clustering.py プロジェクト: tpikonen/solution
def average_positions(filenames, chi2cutoff=1.15, write=True, plot=1):
    """Filter and average over positions in a capillary.

    """
    filenames.sort()
    stack = stack_datafiles(filenames)

    incinds, cdm, links = cluster_reps(stack, threshold=chi2cutoff, plot=plot)
    ms = mean_stack(stack[incinds,...])

    disinds = range(len(filenames))
    for i in incinds:
        disinds.remove(i)
    included  = [ [filenames[i], md5_file(filenames[i])]
        for i in incinds ]
    discarded = [ [filenames[i], md5_file(filenames[i])]
        for i in disinds ]
    ad = { 'chi2cutoff': float(chi2cutoff),
        'included': included,
        'discarded': discarded,
        'chi2matrix' : map(float, list(cdm)),
        'incinds' : map(int, list(incinds)),
        'linkage' : [ map(float, ll) for ll in list(links) ] }

    outarr = np.zeros((7, ms.shape[1]))
    outarr[0:3,:] = ms
    outarr[3:5,:] = stack[0,1:3,:]
    outarr[5:7,:] = mean_stack(stack)[1:3,:]

    if write:
        fname = filenames[0]
        fname = "%s.clu.ydat" % fname[:(fname.find('.p'))]
        print(fname)
        write_ydat(outarr, fname, addict=ad, cols=['q', 'I', 'Ierr', 'I_first', 'Ierr_first', 'I_all', 'Ierr_all'])
    return ms
コード例 #2
0
ファイル: outliers.py プロジェクト: tpikonen/solution
def filter_matfile(fname, outstem, p_reject=0.001, plot=1):
    stack = read_mat(fname)
    md5 = md5_file(fname)
    print("Rejection probability: %0.3g" % p_reject)
    N = np.sum(np.logical_not(np.isnan(stack[0,0,1,:])))
    print("Number of valid channels: %d" % N)
    threshold = chi2.ppf(1.0 - p_reject, N) / N
    print("Chisq rejection threshold: %0.3g" % threshold)

    for pos in range(stack.shape[0]):
        reps = stack[pos,...]
        incinds, cdm = filter_outliers(reps, threshold=threshold, plot=plot)
        ms = mean_stack(reps[incinds,...])
        disinds = range(reps.shape[0])
        for i in incinds:
            disinds.remove(i)
        print("Pos %d, discarded: %s" % (pos, str(disinds)))
        ad = { 'chi2cutoff' : float(threshold),
            'rejection_prob' : float(p_reject),
            'incinds' : map(int, list(incinds)),
            'disinds' : map(int, list(disinds)),
            'chi2matrix' : map(float, list(cdm)),
            'method' : "filter_outliers",
            'inputfile' : [ fname, md5 ],
            'inputposition' : int(pos),
            'q~unit' : '1/nm',
            'I~unit' : 'arb.',
            'Ierr~unit' : 'arb.',
            'I_first~unit' : 'arb.',
            'Ierr_first~unit' : 'arb.',
            'I_all~unit' : 'arb.',
            'Ierr_all~unit' : 'arb.',
            }
        outarr = np.zeros((7, ms.shape[1]))
        outarr[0:3,:] = ms
        outarr[3:5,:] = reps[0,1:3,:]
        outarr[5:7,:] = mean_stack(reps)[1:3,:]

        outname = "%s.p%02d.out.ydat" % (outstem, pos)
        print(outname)
        write_ydat(outarr, outname, addict=ad,
            cols=['q','I','Ierr','I_first','Ierr_first','I_all','Ierr_all'],
            attributes=['~unit'])
コード例 #3
0
ファイル: repfilter.py プロジェクト: tpikonen/solution
def filter_matfile(fname, outstem):
    stack = read_mat(fname)
    for pos in range(stack.shape[0]):
        print("File: %s, pos %d" % (fname, pos))
        sys.stdout.flush()
        first = stack[pos,0,...]
        aver = mean_stack(stack[pos,...])
        filt, inds = chifilter_points(stack[pos,...])
        outname = "%s.p%02d.fil.ydat" % (outstem, pos)
        write_filtered(filt, first, aver, inds, outname, \
            os.path.basename(fname), pos)
        print(outname)
コード例 #4
0
ファイル: clustering.py プロジェクト: tpikonen/solution
def cluster_reps(reps, threshold=1.0, plot=1):
    """Do clustering based `reps`.

    Returns a tuple with
    - The indices of the largest cluster found
    - The condensed distance matrix
    - Cluster linkage

    Keyword arguments:
        `threshold` : chisq threshold to use in discrimination.
        `plot` : Plot results, if True.
    """
    cdm = chi2cdm(reps)
    links = hc.linkage(cdm, method='complete')
    clist = filter_with_linkage(links, threshold)
    print("Clusters: %s" % str(clist))
    if plot:
        first = reps[0,...]
        aver = mean_stack(reps)
        filtered = mean_stack(reps[clist[0],...])
        plot_clustering(filtered, first, aver, clist[0], cdm, links, threshold)

    return (clist[0], cdm, links)
コード例 #5
0
ファイル: outliers.py プロジェクト: tpikonen/solution
def filter_outliers(reps, threshold=1.0, plot=1):
    """Filter by removing repetitions having mutual chisq above `threshold`.

    Returns a tuple containing the included indices and the condensed
    distance matrix.

    Repetitions are removed iteratively by checking which repetition
    contributes the largest number of over the threshold chi-squared values
    (outliers) in the chisq-distance matrix, and removing that point.
    If two repetitions cause an equal number of outliers, the repetition
    which has the highest chisq distance to a non-outlier distance matrix
    point is removed.
    """
    cdm = chi2cdm(reps)
    dmat = squareform(cdm)
    incinds = filter_distmat(dmat, threshold)

    if plot:
        first = reps[0,...]
        aver = mean_stack(reps)
        filtered = mean_stack(reps[incinds,...])
        plot_outliers(filtered, first, aver, incinds, cdm, threshold)

    return incinds, cdm
コード例 #6
0
ファイル: repfilter.py プロジェクト: tpikonen/solution
def chifilter_points(reps, chi2cutoff=1.1, winhw=25, plot=0):
    """Return an average of repetitions statistically similar to the first.

    Array of repetitions `reps` has the shape (nreps, q/I/Ierr, len(q))
    and contains nreps curves with the q-scale and errors.
    The q-scales must be identical in all repetitions.

    Repetitions are compared to the first one point by point. The chi**2
    between first measurement and the repetition is calculated on an interval
    centered on the compared point with half-width `winhw`. Points which
    have chi**2 > `chi2cutoff` in are discarded from the averaging.
    """
    nreps = reps.shape[0]
    qlen = reps.shape[2]
    incmap = np.zeros((nreps, qlen), dtype=np.bool)
    incmap[0,:] = True
    def chi2wfilt(x, y, pos, winhw=winhw):
        ind = slice(max(0, pos-winhw), min(qlen, pos+winhw+1))
        chi2 = chivectors(x[:,ind], y[:,ind])
        return chi2 < chi2cutoff
    first = reps[0,...]
    for rep in range(1,nreps):
        for qind in range(qlen):
            incmap[rep,qind] = chi2wfilt(first, reps[rep,...], qind)

    filt = np.zeros((3, qlen))
    filt[0,:] = first[0,:]
    def sumsq(x): return np.sum(np.square(x))
    for qind in range(qlen):
        filt[1,qind] = np.mean(reps[incmap[:,qind], 1, qind])
        N = np.sum(incmap[:,qind])
        prop = np.sqrt(sumsq(reps[incmap[:,qind], 2, qind])) / N
#        sdev = np.std(reps[incmap[:,qind], 2, qind]) / np.sqrt(N)
#        filt[2,qind] = max(prop, sdev)
        filt[2,qind] = prop

    if plot:
        aver = mean_stack(reps)
        plot_filtered(filt, first, aver, incmap, figno=plot)

    return (filt, incmap)