Esempio n. 1
0
def average_positions(filenames, chi2cutoff=1.15, write=True, plot=1):
    """Filter and average over positions in a capillary.

    """
    filenames.sort()
    stack = stack_datafiles(filenames)

    incinds, cdm, links = cluster_reps(stack, threshold=chi2cutoff, plot=plot)
    ms = mean_stack(stack[incinds,...])

    disinds = range(len(filenames))
    for i in incinds:
        disinds.remove(i)
    included  = [ [filenames[i], md5_file(filenames[i])]
        for i in incinds ]
    discarded = [ [filenames[i], md5_file(filenames[i])]
        for i in disinds ]
    ad = { 'chi2cutoff': float(chi2cutoff),
        'included': included,
        'discarded': discarded,
        'chi2matrix' : map(float, list(cdm)),
        'incinds' : map(int, list(incinds)),
        'linkage' : [ map(float, ll) for ll in list(links) ] }

    outarr = np.zeros((7, ms.shape[1]))
    outarr[0:3,:] = ms
    outarr[3:5,:] = stack[0,1:3,:]
    outarr[5:7,:] = mean_stack(stack)[1:3,:]

    if write:
        fname = filenames[0]
        fname = "%s.clu.ydat" % fname[:(fname.find('.p'))]
        print(fname)
        write_ydat(outarr, fname, addict=ad, cols=['q', 'I', 'Ierr', 'I_first', 'Ierr_first', 'I_all', 'Ierr_all'])
    return ms
Esempio n. 2
0
def subtract_background_from_ydats(scanfile, indir, outdir, scannumber=-1, highqnorm=False):
    """Subtract backround from SAXS data in .ydat files.

    If `highqnorm` is True, normalize the buffer to the sample intensity
    in q-range [4.0, 5.0] 1/nm and adjust with a constant before subtracting.
    """
    scans = read_yaml(scanfile)
    if scannumber > 0:
        scannos = [ scannumber ]
    else:
        scannos = scans.keys()
        scannos.sort()
    for scanno in scannos:
        print("Scan #%03d" % scanno)
        try:
            bufscan = scans[scanno][0]
        except TypeError:
            print("Scan #%03d is a buffer" % scanno)
            continue
        try:
            conc = scans[scanno][1]
        except TypeError:
            print("No concentration for scan #02d." % scanno)
            conc = 1.0
        print("Using concentration %g g/l." % conc)
        filelist = glob.glob(indir+"/s%03d.*.fil.ydat" % scanno)
        for posno in xrange(len(filelist)):
            bufname = indir + "/bufs%03d.p%02d.out.ydat" % (bufscan, posno)
            buf, dbuf = read_ydat(bufname, addict=1)
            fname = indir + "/s%03d.p%02d.fil.ydat" % (scanno, posno)
            sam, dsam = read_ydat(fname, addict=1)
            outname = os.path.basename(fname)
            outname = outdir+'/'+outname[:outname.find('.fil.ydat')]+'.sub.ydat'
            ad = {
                'samfile': [os.path.basename(fname), md5_file(fname)],
                'buffile': [os.path.basename(bufname), md5_file(bufname)],
                'position' : dsam.get('inputposition', "unknown"),
                'q~unit' : dsam.get('q~unit', "unknown"),
                'I~unit' : dsam.get('I~unit', "unknown"),
                'Ierr~unit' : dsam.get('Ierr~unit', "unknown"),
                }
            if highqnorm:
                # 1 + 0.007 1/(g/l) is the excess of scattered intensity
                # in a protein sample versus buffer in the q-range
                # used [4.0, 5.0] 1/nm per concentration.
                scale = highq_scale(sam, buf)
                bufscale = scale * 1.0/(1.0 + 0.007*conc)
                print("scale: %g, bufscale: %g" % (scale, bufscale))
                buf[1,:] = bufscale * buf[1,:]
                buf[2,:] = bufscale * buf[2,:]
                ad['normalization'] = float(bufscale)
            else:
                ad['normalization'] = 'transmission'
            # Assumes the standard q, I, Ierr ordering in index 0 columns
            sub = errsubtract(sam, buf)
            sub[1:3,:] = sub[1:3,:] / conc
            write_ydat(sub, outname, addict=ad, attributes=['~unit'])
            print(os.path.basename(outname))
Esempio n. 3
0
def write_filtered(filtered, first, aver, incmap, fname, inputfile=None, pos=-1):
    """Write an 'ydat' YAML file `fname` with filtered data and index array.

    `filtered` contains the filtered data, `incmap` the point by point inclusion
    array (bool matrix) of points used in the averaging and `first` the
    data set used in comparison for the filtering.
    """
    # FIXME: take a list of filenames which are filtered as an argument
    #        and write them to the file
    with open(fname, "w") as fp:
        indent = '  '
        fp.write('incmap: !!seq [\n' + indent)
        slist = incmap_to_strings(incmap.T)
        perrow = 1 + (80 / len(slist[0]))
        i = 0
        while i < perrow * ((len(slist)-1)/perrow): # until last row
            fp.write(slist[i])
            if (i+1) % perrow or not i:
                fp.write(', ')
            else:
                fp.write(',\n' + indent)
            i += 1
        while i < len(slist): # last row
            fp.write(slist[i])
            if i < len(slist)-1:
                fp.write(', ')
            else:
                fp.write(']\n')
            i += 1
        ad = {
            'method' : "filter_repetitions",
            'q~unit' : '1/nm',
            'I~unit' : 'arb.',
            'Ierr~unit' : 'arb.',
            'I_first~unit' : 'arb.',
            'Ierr_first~unit' : 'arb.',
            'I_all~unit' : 'arb.',
            'Ierr_all~unit' : 'arb.',
            }
        if inputfile:
            ad['inputfile'] = [ inputfile, md5_file(inputfile) ]
        if pos >= 0:
            ad['inputposition'] = int(pos)
        outarr = np.zeros((7, filtered.shape[1]))
        outarr[0:3,:] = filtered
        outarr[3:5,:] = first[1:3,:]
        outarr[5:7,:] = aver[1:3,:]
        cols = ['q', 'I', 'Ierr', 'I_first', 'Ierr_first', 'I_all', 'Ierr_all']
        write_ydat(outarr, fp, cols=cols, addict=ad, attributes=['~unit'])
Esempio n. 4
0
def filter_matfile(fname, outstem, p_reject=0.001, plot=1):
    stack = read_mat(fname)
    md5 = md5_file(fname)
    print("Rejection probability: %0.3g" % p_reject)
    N = np.sum(np.logical_not(np.isnan(stack[0,0,1,:])))
    print("Number of valid channels: %d" % N)
    threshold = chi2.ppf(1.0 - p_reject, N) / N
    print("Chisq rejection threshold: %0.3g" % threshold)

    for pos in range(stack.shape[0]):
        reps = stack[pos,...]
        incinds, cdm = filter_outliers(reps, threshold=threshold, plot=plot)
        ms = mean_stack(reps[incinds,...])
        disinds = range(reps.shape[0])
        for i in incinds:
            disinds.remove(i)
        print("Pos %d, discarded: %s" % (pos, str(disinds)))
        ad = { 'chi2cutoff' : float(threshold),
            'rejection_prob' : float(p_reject),
            'incinds' : map(int, list(incinds)),
            'disinds' : map(int, list(disinds)),
            'chi2matrix' : map(float, list(cdm)),
            'method' : "filter_outliers",
            'inputfile' : [ fname, md5 ],
            'inputposition' : int(pos),
            'q~unit' : '1/nm',
            'I~unit' : 'arb.',
            'Ierr~unit' : 'arb.',
            'I_first~unit' : 'arb.',
            'Ierr_first~unit' : 'arb.',
            'I_all~unit' : 'arb.',
            'Ierr_all~unit' : 'arb.',
            }
        outarr = np.zeros((7, ms.shape[1]))
        outarr[0:3,:] = ms
        outarr[3:5,:] = reps[0,1:3,:]
        outarr[5:7,:] = mean_stack(reps)[1:3,:]

        outname = "%s.p%02d.out.ydat" % (outstem, pos)
        print(outname)
        write_ydat(outarr, outname, addict=ad,
            cols=['q','I','Ierr','I_first','Ierr_first','I_all','Ierr_all'],
            attributes=['~unit'])
Esempio n. 5
0
def write_stack_ydat(fname, stack, fnames, dvals, conf):
    """Write a single position from a stack to an .ydat file.
    """
    sh = stack.shape
    outarr = np.zeros((2*sh[0]+1, sh[-1]))
    outarr[0,:] = stack[0,0,:] # q
    for pos in xrange(sh[0]):
        outarr[2*pos+1,:] = stack[pos,1,:] # I
        outarr[2*pos+2,:] = stack[pos,2,:] # Ierr
    ad = {  'frames': list(fnames),
            'transmissions': dvals,
            'indfile': [ os.path.basename(conf['Indfile']),
                        md5_file(conf['Indfile']) ],
            'q~unit': '1/nm',
        }
    cols = ['q']
    Icols = [ "I%02d" % n for n in range(len(fnames))]
    errcols = [ "Ierr%02d" % n for n in range(len(fnames))]
    cols.extend([ col for lsub in zip(Icols, errcols) for col in lsub ])
    ad.update([ ("I%02d~unit" % n, "arb.") for n in range(len(fnames)) ])
    ad.update([ ("Ierr%02d~unit" % n, "arb.") for n in range(len(fnames)) ])
    write_ydat(outarr, fname, cols=cols, addict=ad, attributes=['~unit'])