Ejemplo n.º 1
0
def surface_model(config):
    """The surface model tool contains everything you need to build basic 
    multicomponent (i.e. colleciton of Gaussian) surface priors for the 
    multicomponent surface model."""

    # Load configuration JSON into a local dictionary
    configdir, configfile = split(abspath(config))
    config = json_load_ascii(config, shell_replace=True)

    # Determine top level parameters
    for q in ['output_model_file', 'sources', 'normalize', 'wavelength_file']:
        if q not in config:
            raise ValueError("Missing parameter: %s" % q)
    wavelength_file = expand_path(configdir, config['wavelength_file'])
    normalize = config['normalize']
    reference_windows = config['reference_windows']
    outfile = expand_path(configdir, config['output_model_file'])

    # load wavelengths file, and change units to nm if needed
    q = s.loadtxt(wavelength_file)
    if q.shape[1] > 2:
        q = q[:, 1:]
    if q[0, 0] < 100:
        q = q * 1000.0
    wl = q[:, 0]
    nchan = len(wl)

    # build global reference windows
    refwl = []
    for wi, window in enumerate(reference_windows):
        active_wl = aand(wl >= window[0], wl < window[1])
        refwl.extend(wl[active_wl])
    normind = s.array([s.argmin(abs(wl - w)) for w in refwl])
    refwl = s.array(refwl, dtype=float)

    # create basic model template
    model = {
        'normalize': normalize,
        'wl': wl,
        'means': [],
        'covs': [],
        'refwl': refwl
    }

    # each "source" (i.e. spectral library) is treated separately
    for si, source_config in enumerate(config['sources']):

        # Determine source parameters
        for q in [
                'input_spectrum_files', 'windows', 'n_components', 'windows'
        ]:
            if q not in source_config:
                raise ValueError('Source %i is missing a parameter: %s' %
                                 (si, q))

        # Determine whether we should synthesize our own mixtures
        if 'mixtures' in source_config:
            mixtures = source_config['mixtures']
        elif 'mixtures' in config:
            mixtures = config['mixtures']
        else:
            mixtures = 0

        infiles = [
            expand_path(configdir, fi)
            for fi in source_config['input_spectrum_files']
        ]
        ncomp = int(source_config['n_components'])
        windows = source_config['windows']

        # load spectra
        spectra = []
        for infile in infiles:

            hdrfile = infile + '.hdr'
            rfl = envi.open(hdrfile, infile)
            nl, nb, ns = [
                int(rfl.metadata[n]) for n in ('lines', 'bands', 'samples')
            ]
            swl = s.array([float(f) for f in rfl.metadata['wavelength']])

            # Maybe convert to nanometers
            if swl[0] < 100:
                swl = swl * 1000.0

            # Load library and adjust interleave, if needed
            rfl_mm = rfl.open_memmap(interleave='source', writable=True)
            if rfl.metadata['interleave'] == 'bip':
                x = s.array(rfl_mm[:, :, :])
            if rfl.metadata['interleave'] == 'bil':
                x = s.array(rfl_mm[:, :, :]).transpose((0, 2, 1))
            x = x.reshape(nl * ns, nb)

            # import spectra and resample
            for x1 in x:
                p = interp1d(swl,
                             x1,
                             kind='linear',
                             bounds_error=False,
                             fill_value='extrapolate')
                spectra.append(p(wl))

        # calculate mixtures, if needed
        n = float(len(spectra))
        nmix = int(n * mixtures)
        for mi in range(nmix):
            s1, m1 = spectra[int(s.rand() * n)], s.rand()
            s2, m2 = spectra[int(s.rand() * n)], 1.0 - m1
            spectra.append(m1 * s1 + m2 * s2)

        # Flag bad data
        spectra = s.array(spectra)
        use = s.all(s.isfinite(spectra), axis=1)
        spectra = spectra[use, :]

        # Accumulate total list of window indices
        window_idx = -s.ones((nchan), dtype=int)
        for wi, win in enumerate(windows):
            active_wl = aand(wl >= win['interval'][0], wl < win['interval'][1])
            window_idx[active_wl] = wi

        # Two step model generation.  First step is k-means clustering.
        # This is more "stable" than Expectation Maximization with an
        # unconstrained covariance matrix
        kmeans = KMeans(init='k-means++', n_clusters=ncomp, n_init=10)
        kmeans.fit(spectra)
        Z = kmeans.predict(spectra)

        # Now fit the full covariance for each component
        for ci in range(ncomp):

            m = s.mean(spectra[Z == ci, :], axis=0)
            C = s.cov(spectra[Z == ci, :], rowvar=False)

            for i in range(nchan):
                window = windows[window_idx[i]]

                # Each spectral interval, or window, is constructed
                # using one of several rules.  We can draw the covariance
                # directly from the data...
                if window['correlation'] == 'EM':
                    C[i, i] = C[i, i] + float(window['regularizer'])

                # Alternatively, we can use a band diagonal form,
                # a Gaussian process that promotes local smoothness.
                elif window['correlation'] == 'GP':
                    width = float(window['gp_width'])
                    magnitude = float(window['gp_magnitude'])
                    kernel = norm.pdf((wl - wl[i]) / width)
                    kernel = kernel / kernel.sum() * magnitude
                    C[i, :] = kernel
                    C[:, i] = kernel
                    C[i, i] = C[i, i] + float(window['regularizer'])

                # To minimize bias, leave the channels independent
                # and uncorrelated
                elif window['correlation'] == 'decorrelated':
                    ci = C[i, i]
                    C[:, i] = 0
                    C[i, :] = 0
                    C[i, i] = ci + float(window['regularizer'])

                else:
                    raise ValueError('I do not recognize the method ' +
                                     window['correlation'])

            # Normalize the component spectrum if desired
            if normalize == 'Euclidean':
                z = s.sqrt(s.sum(pow(m[normind], 2)))
            elif normalize == 'RMS':
                z = s.sqrt(s.mean(pow(m[normind], 2)))
            elif normalize == 'None':
                z = 1.0
            else:
                raise ValueError('Unrecognized normalization: %s\n' %
                                 normalize)
            m = m / z
            C = C / (z**2)

            model['means'].append(m)
            model['covs'].append(C)

    model['means'] = s.array(model['means'])
    model['covs'] = s.array(model['covs'])

    s.io.savemat(outfile, model)
Ejemplo n.º 2
0
def surface_model(config):
    """."""

    configdir, configfile = split(abspath(config))
    config = json_load_ascii(config, shell_replace=True)

    # Determine top level parameters
    for q in ['output_model_file', 'sources', 'normalize', 'wavelength_file']:
        if q not in config:
            raise ValueError("Missing parameter: %s" % q)

    wavelength_file = expand_path(configdir, config['wavelength_file'])
    normalize = config['normalize']
    reference_windows = config['reference_windows']
    outfile = expand_path(configdir, config['output_model_file'])

    # load wavelengths file
    q = s.loadtxt(wavelength_file)
    if q.shape[1] > 2:
        q = q[:, 1:]
    if q[0, 0] < 100:
        q = q * 1000.0
    wl = q[:, 0]
    nchan = len(wl)

    # build global reference windows
    refwl = []
    for wi, window in enumerate(reference_windows):
        active_wl = aand(wl >= window[0], wl < window[1])
        refwl.extend(wl[active_wl])
    normind = s.array([s.argmin(abs(wl - w)) for w in refwl])
    refwl = s.array(refwl, dtype=float)

    # create basic model template
    model = {
        'normalize': normalize,
        'wl': wl,
        'means': [],
        'covs': [],
        'refwl': refwl
    }

    for si, source_config in enumerate(config['sources']):

        # Determine source parameters
        for q in [
                'input_spectrum_files', 'windows', 'n_components', 'windows'
        ]:
            if q not in source_config:
                raise ValueError('Source %i is missing a parameter: %s' %
                                 (si, q))

        # Determine whether we should synthesize our own mixtures
        if 'mixtures' in source_config:
            mixtures = source_config['mixtures']
        elif 'mixtures' in config:
            mixtures = config['mixtures']
        else:
            mixtures = 0

        infiles = [
            expand_path(configdir, fi)
            for fi in source_config['input_spectrum_files']
        ]
        ncomp = int(source_config['n_components'])
        windows = source_config['windows']

        # load spectra
        spectra = []
        for infile in infiles:

            print('Loading  ' + infile)
            hdrfile = infile + '.hdr'
            rfl = envi.open(hdrfile, infile)
            nl, nb, ns = [
                int(rfl.metadata[n]) for n in ('lines', 'bands', 'samples')
            ]
            swl = s.array([float(f) for f in rfl.metadata['wavelength']])

            # Maybe convert to nanometers
            if swl[0] < 100:
                swl = swl * 1000.0

            rfl_mm = rfl.open_memmap(interleave='source', writable=True)
            if rfl.metadata['interleave'] == 'bip':
                x = s.array(rfl_mm[:, :, :])
            if rfl.metadata['interleave'] == 'bil':
                x = s.array(rfl_mm[:, :, :]).transpose((0, 2, 1))
            x = x.reshape(nl * ns, nb)

            # import spectra and resample
            for x1 in x:
                p = interp1d(swl,
                             x1,
                             kind='linear',
                             bounds_error=False,
                             fill_value='extrapolate')
                spectra.append(p(wl))

        # calculate mixtures, if needed
        n = float(len(spectra))
        nmix = int(n * mixtures)
        for mi in range(nmix):
            s1, m1 = spectra[int(s.rand() * n)], s.rand()
            s2, m2 = spectra[int(s.rand() * n)], 1.0 - m1
            spectra.append(m1 * s1 + m2 * s2)

        spectra = s.array(spectra)
        use = s.all(s.isfinite(spectra), axis=1)
        spectra = spectra[use, :]

        # accumulate total list of window indices
        window_idx = -s.ones((nchan), dtype=int)
        for wi, win in enumerate(windows):
            active_wl = aand(wl >= win['interval'][0], wl < win['interval'][1])
            window_idx[active_wl] = wi

        # Two step model.  First step is k-means initialization
        kmeans = KMeans(init='k-means++', n_clusters=ncomp, n_init=10)
        kmeans.fit(spectra)
        Z = kmeans.predict(spectra)

        for ci in range(ncomp):

            m = s.mean(spectra[Z == ci, :], axis=0)
            C = s.cov(spectra[Z == ci, :], rowvar=False)

            for i in range(nchan):
                window = windows[window_idx[i]]
                if window['correlation'] == 'EM':
                    C[i, i] = C[i, i] + float(window['regularizer'])
                elif window['correlation'] == 'decorrelated':
                    ci = C[i, i]
                    C[:, i] = 0
                    C[i, :] = 0
                    C[i, i] = ci + float(window['regularizer'])
                else:
                    raise ValueError('I do not recognize the source  ' +
                                     window['correlation'])

            # Normalize the component spectrum if desired
            if normalize == 'Euclidean':
                z = s.sqrt(s.sum(pow(m[normind], 2)))
            elif normalize == 'RMS':
                z = s.sqrt(s.mean(pow(m[normind], 2)))
            elif normalize == 'None':
                z = 1.0
            else:
                raise ValueError('Unrecognized normalization: %s\n' %
                                 normalize)
            m = m / z
            C = C / (z**2)

            model['means'].append(m)
            model['covs'].append(C)

    model['means'] = s.array(model['means'])
    model['covs'] = s.array(model['covs'])

    s.io.savemat(outfile, model)
    print("saving results to", outfile)