Beispiel #1
0
  def  readin(self):
    """readin()

    Read in image data from a directory.
    """
    
    # Read in the initialization data (images) from initdirname, if present.
    # This variable is called 'initfilename', but it's a directory here.
    if self.initfilename != '':
      printt('Reading initialization data set from %s' % self.initfilename)
      (self.initdata, unused_labels, width, height) = ImageData.read_image_dir(self.initfilename)
      self.initdata = np.asarray(self.initdata)
      self.initdata = self.initdata.T
      print self.initdata.shape

    ########## Read in the data to analyze
    # Labels are individual filenames
    (self.data, self.labels, self.width, self.height) = \
        ImageData.read_image_dir(self.filename)
      
    self.data = np.asarray(self.data)
    if len(self.data) == 0: 
      print 'Error: no image files found.'
      sys.exit(1)

    print 'Read %d image files with %d pixels each.' % self.data.shape
    self.data = self.data.T
    print self.data.shape
        
    print ' Dimensions: %d width, %d height.' % (self.width, self.height)
Beispiel #2
0
    def readin(self):
        """readin()

    Read in image data from a directory.
    """

        # Read in the initialization data (images) from initdirname, if present.
        # This variable is called 'initfilename', but it's a directory here.
        if self.initfilename != '':
            printt('Reading initialization data set from %s' %
                   self.initfilename)
            (self.initdata, unused_labels, width,
             height) = ImageData.read_image_dir(self.initfilename)
            self.initdata = np.asarray(self.initdata)
            self.initdata = self.initdata.T
            print self.initdata.shape

        ########## Read in the data to analyze
        # Labels are individual filenames
        (self.data, self.labels, self.width, self.height) = \
            ImageData.read_image_dir(self.filename)

        self.data = np.asarray(self.data)
        if len(self.data) == 0:
            print 'Error: no image files found.'
            sys.exit(1)

        print 'Read %d image files with %d pixels each.' % self.data.shape
        self.data = self.data.T
        print self.data.shape

        print ' Dimensions: %d width, %d height.' % (self.width, self.height)
Beispiel #3
0
  def  readin(self):
    """readin()

    Read in image data from a directory.
    """
    
    # Read in the initialization data (images) from initdirname, if present.
    # This variable is called 'initfilename', but it's a directory here.
    if self.initfilename != '':
      printt('Reading initialization data set from %s' % self.initfilename)
      (self.initdata, unused_labels, imshape) = \
          ImageData.read_image_dir(self.initfilename)
      self.initdata = np.asarray(self.initdata)
      self.initdata = self.initdata.T
      print 'Initializing with %d images (%s).' % \
          (self.initdata.shape[1], str(imshape))
      print self.initdata.shape

    ########## Read in the data to analyze
    # Labels are individual filenames
    (self.data, self.labels, self.imshape) = \
        ImageData.read_image_dir(self.filename)
      
    self.data = np.asarray(self.data)
    print self.data.shape

    if len(self.data) == 0: 
      print 'Error: no image files found.'
      sys.exit(1)

    self.data = self.data.T
    print 'Read %d images (%s).' % \
        (self.data.shape[1], str(self.imshape))
Beispiel #4
0
    def __init__(self,
                 inputname=None,
                 initfilename=None,
                 startsol=-1,
                 endsol=-1,
                 initpriorsols=False,
                 shotnoisefilt=0):
        """LIBSData(inputname="", sol=-1)

    Read in LIBS (ChemCam) data in CSV format from inputname.
    If inputname ends in .csv, treat it as a CSV file.
    If inputname ends in .pkl, treat it as a pickled file.
    Otherwise, treat it as a directory and look for a .pkl file inside;
    if not found, generate it with contents from all .csv files present.

    If present, also read in data from initfilename (must be .csv).
    This data will be used to initialize the DEMUD model.

    Optionally, specify a sol range (startsol-endsol) for data to analyze.
    Optionally, use data prior to startsol to initialize the model.
    Optionally, specify the width of a median filter to apply.
    """

        input_type = inputname[-3:]

        if input_type == 'csv':
            filename = inputname
            expname  = 'libs-' + \
                os.path.splitext(os.path.basename(filename))[0]
            #filename[filename.rfind('/')+1:filename.find('.')]
        elif input_type == 'pkl':
            if shotnoisefilt > 0:
                #filename = inputname[:-4] + ('-snf%d.pkl' % shotnoisefilt)
                filename = os.path.splitext(inputname)[0] + \
                    ('-snf%d.pkl' % shotnoisefilt)
            else:
                filename = inputname
            expname  = 'libs-' + \
                os.path.splitext(os.path.basename(filename))[0]
            #filename[filename.rfind('/')+1:filename.find('.')]
        else:  # assume directory
            input_type = 'dir'
            #filename = inputname + '/libs-mean-norm.pkl'
            filename = os.path.join(inputname, 'libs-mean-norm.pkl')
            if shotnoisefilt > 0:
                #filename = filename[:-4] + ('-snf%d.pkl' % shotnoisefilt)
                filename = os.path.splitext(inputname)[0] + \
                    ('-snf%d.pkl' % shotnoisefilt)
                #expname  = 'libs-' + inputname[inputname.rfind('/')+1:]
            expname = 'libs-' + os.path.basename(inputname)

        Dataset.__init__(self, filename, expname, initfilename)

        printt('Reading %s data from %s.' % (input_type, self.filename))

        if input_type == 'dir' and not os.path.exists(filename):
            LIBSData.read_dir(inputname, filename, shotnoisefilt)

        self.readin(startsol, endsol, initpriorsols, shotnoisefilt)
Beispiel #5
0
    def medfilter(cls, data, L, fw=[]):
        """medfilter(cls, data, L)

    Filter each column of data using a window of width L.
    Replace each value with its median from the surrounding window.
    Inspired by http://staff.washington.edu/bdjwww/medfilt.py .

    Optionally, specify feature weights so they can factor in
    to the median calculation.  Any zero-valued weights make the
    median calculation ignore those items.  Values greater than
    zero are NOT weighted; they all participate normally.
    """

        if data == []:
            print 'Error: empty data; cannot filter.'
            return data

        if L < 3:
            print 'Error: L (%d) is too small; minimum 3.' % L
            return data

        printt(
            'Filtering shot noise with a width of %d (this may take some time).'
            % L)

        Lwing = (L - 1) / 2

        (d, n) = data.shape  # assume items are column vectors
        data2 = np.zeros_like(data)

        for j in range(n):
            for i in range(d):

                # Specify the range over which to compute the median
                if (i < Lwing):
                    ind = range(0, i + Lwing + 1)
                elif (i >= d - Lwing):
                    ind = range(i - Lwing, d)
                else:
                    ind = range(i - Lwing, i + Lwing + 1)

                # If featureweights are specified,
                # adjust ind to only include the nonzero ones.
                if fw != []:
                    # If there aren't any features with nonzero weights,
                    # this won't use anything (set data value to 0)
                    ind = [i for i in ind if fw[i] > 0]

                # Perform the median filter.
                # If there are no valid features to use, set this point to 0
                # (it won't be used later anyway)
                if ind == []:
                    data2[i, j] = 0
                else:
                    data2[i, j] = np.median(data[ind, j])

        return data2
Beispiel #6
0
  def  medfilter(cls, data, L, fw=[]):
    """medfilter(cls, data, L)

    Filter each column of data using a window of width L.
    Replace each value with its median from the surrounding window.
    Inspired by http://staff.washington.edu/bdjwww/medfilt.py .

    Optionally, specify feature weights so they can factor in
    to the median calculation.  Any zero-valued weights make the
    median calculation ignore those items.  Values greater than
    zero are NOT weighted; they all participate normally.
    """

    if data == []: 
      print 'Error: empty data; cannot filter.'
      return data
    
    if L < 3:
      print 'Error: L (%d) is too small; minimum 3.' % L
      return data

    printt('Filtering shot noise with a width of %d (this may take some time).' % L)

    Lwing = (L-1)/2

    (d,n) = data.shape  # assume items are column vectors
    data2 = np.zeros_like(data)

    for j in range(n):
      for i in range(d):

        # Specify the range over which to compute the median
        if (i < Lwing):
          ind = range(0, i+Lwing+1)
        elif (i >= d - Lwing):
          ind = range(i-Lwing, d)
        else:
          ind = range(i-Lwing, i+Lwing+1)

        # If featureweights are specified,
        # adjust ind to only include the nonzero ones.
        if fw != []:
          # If there aren't any features with nonzero weights,
          # this won't use anything (set data value to 0)
          ind = [i for i in ind if fw[i]>0]

        # Perform the median filter.
        # If there are no valid features to use, set this point to 0
        # (it won't be used later anyway)
        if ind == []:
          data2[i, j] = 0
        else:
          data2[i, j] = np.median(data[ind, j])

    return data2
Beispiel #7
0
  def  __init__(self, inputname=None, initfilename=None,
                startsol=-1, endsol=-1, initpriorsols=False, 
                shotnoisefilt=0):
    """LIBSData(inputname="", sol=-1)

    Read in LIBS (ChemCam) data in CSV format from inputname.
    If inputname ends in .csv, treat it as a CSV file.
    If inputname ends in .pkl, treat it as a pickled file.
    Otherwise, treat it as a directory and look for a .pkl file inside;
    if not found, generate it with contents from all .csv files present.

    If present, also read in data from initfilename (must be .csv).
    This data will be used to initialize the DEMUD model.

    Optionally, specify a sol range (startsol-endsol) for data to analyze.
    Optionally, use data prior to startsol to initialize the model.
    Optionally, specify the width of a median filter to apply.
    """

    input_type = inputname[-3:]

    if input_type == 'csv':
      filename = inputname
      expname  = 'libs-' + \
          os.path.splitext(os.path.basename(filename))[0]
      #filename[filename.rfind('/')+1:filename.find('.')]
    elif input_type == 'pkl':
      if shotnoisefilt > 0:
        #filename = inputname[:-4] + ('-snf%d.pkl' % shotnoisefilt)
        filename = os.path.splitext(inputname)[0] + \
            ('-snf%d.pkl' % shotnoisefilt)
      else:
        filename = inputname
      expname  = 'libs-' + \
          os.path.splitext(os.path.basename(filename))[0]
      #filename[filename.rfind('/')+1:filename.find('.')]
    else:  # assume directory
      input_type = 'dir'
      #filename = inputname + '/libs-mean-norm.pkl'
      filename = os.path.join(inputname, 'libs-mean-norm.pkl')
      if shotnoisefilt > 0:
        #filename = filename[:-4] + ('-snf%d.pkl' % shotnoisefilt)
        filename = os.path.splitext(inputname)[0] + \
            ('-snf%d.pkl' % shotnoisefilt)
        #expname  = 'libs-' + inputname[inputname.rfind('/')+1:]
      expname  = 'libs-' + os.path.basename(inputname)

    Dataset.__init__(self, filename, expname, initfilename)

    printt('Reading %s data from %s.' % (input_type, self.filename))

    if input_type == 'dir' and not os.path.exists(filename):
      LIBSData.read_dir(inputname, filename, shotnoisefilt)
    
    self.readin(startsol, endsol, initpriorsols, shotnoisefilt)
Beispiel #8
0
    def read_image_dir(cls, dirname):
        """read_image_dir(dirname)

    Read in all of the images in dirname and return
    - a list of data
    - a list of labels
    - imshape: (width, height) or (width, height, depth) tuple
    """

        data = []
        labels = []  # Save the individual file names

        imshape = (-1, -1, -1)

        # Read in the image data
        files = sorted(os.listdir(dirname))
        numimages = len(os.listdir(dirname))
        print numimages
        printt("Loading files:")
        counter = 0
        for idx, f in enumerate(files):
            # Unix-style wildcards.
            if (fnmatch.fnmatch(f, '*.jpg') or fnmatch.fnmatch(f, '*.JPG')
                    or fnmatch.fnmatch(f, '*.png')):
                # Read in the image
                filename = dirname + '/' + f
                im = imread(filename)

                if imshape[0] == -1:
                    #data = np.zeros([], dtype=np.float32).reshape(numimages, np.prod(im.shape))
                    data = np.zeros([numimages, np.prod(im.shape)],
                                    dtype=np.float32)
                    #data = np.array([], dtype=np.float32).reshape(0,np.prod(im.shape))
                    imshape = im.shape
                else:
                    # Ensure that all images are the same dimensions
                    if imshape != im.shape:
                        if len(im.shape) == 2:
                            # Convert grayscale to rgb
                            im = np.dstack((im, im, im))
                        else:
                            raise ValueError(
                                'Images must all have the same dimensions.')

                #data = np.vstack([data, im.reshape(1,np.prod(im.shape))])
                data[counter] = im.reshape(1, np.prod(im.shape))

                labels.append(f)
                progbar(idx, len(files))
                counter += 1

        return (data, labels, imshape)
Beispiel #9
0
    def readin(self, nskip):
        """readin()
    """

        (self.xvals, self.data,
         self.labels) = FloatDataset.read_csv(self.filename, nskip)

        self.data = self.data.T  # features x samples

        # If there was no header with feature names, just create an empty xvals
        if self.xvals == []:
            self.xvals = numpy.arange(self.data.shape[0]).reshape(-1, 1)

        # Read in the init data file, if present
        if self.initfilename != '':
            printt('Reading initialization data set from %s' %
                   self.initfilename)
            (_, self.initdata,
             _) = FloatDataset.read_csv(self.initfilename, nskip)
            self.initdata = self.initdata.T  # features x samples
Beispiel #10
0
    def prune_and_normalize(cls, data, wavelengths, shotnoisefilt):
        """prune_and_normalize(cls, data, wavelengths, shotnoisefilt)

    Subset LIBS data to only use wavelengths below 850 nm,
    set negative values to zero,
    then normalize responses for each of the three spectrometers.

    If shotnoisefilt >= 3, run a median filter on the data with width as specified.

    Return the pruned and normalized data.
    """

        print 'Pruning and normalizing the data.'

        # Only use data between 270 and 820 nm (ends are noisy)
        use = np.where(np.logical_and(wavelengths >= 270,
                                      wavelengths < 820))[0]
        # Instead of stripping these bands out now, first do shot noise
        # filtering (if desired).  Then strip the bands later.

        # If desired, apply a median filter to strip out impulsive noise
        # Note: this is slow. :)  Probably can be optimized in some awesome fashion.
        if shotnoisefilt >= 3:
            printt('Filtering out shot noise with width %d.' % shotnoisefilt)
            # Create a vector of 0's (to ignore) and 1's (to use).
            fw = np.zeros((data.shape[0], 1))
            fw[use] = 1
            data = LIBSData.medfilter(data, shotnoisefilt, fw)

        # Now remove bands we do not use
        wavelengths = wavelengths[use]
        data = data[use, :]

        # Replace negative values with 0
        negvals = (data < 0)
        printt('--- %d negative values.' % len(np.where(negvals)[0]))
        data[np.where(data < 0)] = 0
        printt('--- %d negative values.' % len(np.where(data < 0)[0]))

        # Normalize the emission values for each of the
        # three spectrometers independently
        # Nina: VIS begins at 382.13812; VNIR starts at 473.1842
        vis_spec = 382.13812
        vnir_spec = 473.1842
        spec1 = np.where(
            np.logical_and(wavelengths >= 0, wavelengths < vis_spec))[0]
        spec2 = np.where(
            np.logical_and(wavelengths >= vis_spec,
                           wavelengths < vnir_spec))[0]
        spec3 = np.where(wavelengths >= vnir_spec)[0]
        for waves in [spec1, spec2, spec3]:
            data[waves, :] = data[waves, :] / np.sum(data[waves, :], axis=0)

        return (data, wavelengths)
Beispiel #11
0
  def  prune_and_normalize(cls, data, wavelengths, shotnoisefilt):
    """prune_and_normalize(cls, data, wavelengths, shotnoisefilt)

    Subset LIBS data to only use wavelengths below 850 nm,
    set negative values to zero,
    then normalize responses for each of the three spectrometers.

    If shotnoisefilt >= 3, run a median filter on the data with width as specified.

    Return the pruned and normalized data.
    """

    print 'Pruning and normalizing the data.'

    # Only use data between 270 and 820 nm (ends are noisy)
    use = np.where(np.logical_and(wavelengths >= 270,
                                  wavelengths < 820))[0]
    # Instead of stripping these bands out now, first do shot noise
    # filtering (if desired).  Then strip the bands later.

    # If desired, apply a median filter to strip out impulsive noise
    # Note: this is slow. :)  Probably can be optimized in some awesome fashion.
    if shotnoisefilt >= 3:
      printt('Filtering out shot noise with width %d.' % shotnoisefilt)
      # Create a vector of 0's (to ignore) and 1's (to use).
      fw = np.zeros((data.shape[0],1))
      fw[use] = 1
      data = LIBSData.medfilter(data, shotnoisefilt, fw)

    # Now remove bands we do not use
    wavelengths = wavelengths[use]
    data        = data[use,:]
    
    # Replace negative values with 0
    negvals = (data < 0)
    printt('--- %d negative values.' % len(np.where(negvals)[0]))
    data[np.where(data < 0)] = 0
    printt('--- %d negative values.' % len(np.where(data<0)[0]))

    # Normalize the emission values for each of the
    # three spectrometers independently
    # Nina: VIS begins at 382.13812; VNIR starts at 473.1842
    vis_spec  = 382.13812
    vnir_spec = 473.1842
    spec1 = np.where(np.logical_and(wavelengths >= 0,
                                    wavelengths < vis_spec))[0]
    spec2 = np.where(np.logical_and(wavelengths >= vis_spec,
                                    wavelengths < vnir_spec))[0]
    spec3 = np.where(wavelengths >= vnir_spec)[0]
    for waves in [spec1, spec2, spec3]:
      data[waves,:] = data[waves,:] / np.sum(data[waves,:], axis=0)

    return (data, wavelengths)
Beispiel #12
0
  def  read_dir(cls, dirname, outfile, shotnoisefilt=0):
    """read_dir(dirname, outfile)

    Read in raw LIBS data from .csv files in dirname.
    Pickle the result and save it to outfile.
    Note: does NOT update object fields.
    Follow this with a call to readin().
    """

    # First read in the target names and sol numbers.
    targets = {}
    sols    = {}
    # Location of this file is hard-coded!
    # My latest version goes through sol 707.
    metafile = 'msl_ccam_obs.csv'
    with open(os.path.join(dirname, metafile)) as f:
      datareader = csv.reader(f)
      # Index targets, sols by spacecraft clock value
      for row in datareader:
        [sol, edr_type, sclk, target] = [row[i] for i in [0,1,2,5]]
        if edr_type != 'CL5':
          continue
        prior_targets = [t for t in targets.values() if target in t]
        n_prior = len(prior_targets)
        # Add 1 so shots are indexed from 1, not 0
        targets[sclk] = target + ':%d' % (n_prior + 1)
        sols[sclk]    = sol
    print 'Read %d target names from %s.' % (len(targets), metafile)

    print 'Now reading LIBS data from %s.' % dirname

    data        = []
    labels      = []
    wavelengths = []

    files = os.listdir(dirname)
    f_ind = 0
#    for f in files[:len(files)]:
      # Select only CSV files
#      if fnmatch.fnmatch(f, 'CL5_*.csv'):
    for f in fnmatch.filter(files, 'CL5_*.csv') +  \
          fnmatch.filter(files, 'cl5_*.csv'):
      # Extract site_drive_seqid from the filename
      filename = f[f.rfind('/')+1:]
      printt(' Processing %s.' % filename)
      sclk  = filename[4:13]
      site  = filename[18:21]
      drive = filename[21:25]
      seqid = filename[29:34]
      target = targets[sclk]
      sol    = sols[sclk]
      
      # If it's a cal target, skip it
      if 'Cal Target' in target:
        print 'Skipping %s' % target
        continue
        
      #site_drive_seqid_target = '%s_%s_%s_%s' % (site, drive, seqid, target)
      #drive_sclk_target = '%s_%s_%s' % (drive, sclk, target)
      sol_sclk_target = 'Sol%s_%s_%s' % (sol, sclk, target)
      print(' Spacecraft clock %s, identifier %s.' % \
              (sclk, sol_sclk_target))

      with open(os.path.join(dirname, f), 'r') as csvfile:
        datareader = csv.reader(csvfile)

        row = datareader.next()
        while row[0][0] == '#':
          # Save the last row (comment line)
          lastrow = row
          row    = datareader.next()
        # The last comment line contains the header strings
        # starting with 'wave' or 'nm'
        mylabels = [l.strip() for l in lastrow]

        mydata = [[float(x) for x in row]]
        for row in datareader:
          # Skip over empty lines
          if row[0] == '':
            continue
          mydata += [[float(x) for x in row]]

        mydata = np.array(mydata)

        # Store the wavelengths
        waveind = [ind for ind,name in enumerate(mylabels) \
                     if 'wave' in name]
        if len(waveind) != 1:
          printt('Expected 1 match on "wave"; got %d.' % len(waveind))
          sys.exit(1)
        mywaves = mydata[:,waveind[0]]

        # Keep only the shots
        #shots = [ind for ind,name in enumerate(mylabels) \
        #         if 'shot' in name]
        # Keep only the mean
        shots = [ind for ind,name in enumerate(mylabels) \
                   if 'mean' in name]
                   #myshotnames = ['%s_%d_%s' % (site_drive_sclk_target,
                   #                      f_ind, mylabels[shot])
        myshotnames = ['%s_%s' % (sol_sclk_target, mylabels[shot])
                       for shot in shots]

        mydata = mydata[:,[l for l in shots]]
        
        printt(' Read %d new items, %d features.' % mydata.shape[::-1])

        if wavelengths != [] and np.any(wavelengths != mywaves):
          printt('Error: wavelengths in file %d do not match previous.' % f_ind)
        if f_ind == 0:
          data        = mydata
          wavelengths = mywaves
        else:
          data   = np.concatenate((data, mydata),1)
        labels  += myshotnames

        f_ind = f_ind + 1
        printt('Total so far: %d items, %d files.' % (data.shape[1], f_ind))

    print
    if data == []:
      printt('No data files found, exiting.')
      sys.exit()

    printt('Read a total of %d items, %d features.' % data.shape[::-1])

    labels  = np.array(labels)

    # Prune and normalize
    (data, wavelengths) = LIBSData.prune_and_normalize(data, wavelengths, shotnoisefilt)

    printt('Saving to %s.' % outfile)
    outf = open(outfile, 'w')
    pickle.dump((data, labels, wavelengths), outf)
    outf.close()
    print 'Done!'
Beispiel #13
0
  def  filter_data(self, data, labels):
    """filter_data(data, labels)

    Filter out bad quality data, using criteria provided by Nina Lanza:
    1) Large, broad features (don't correspond to narrow peaks)
    2) Low SNR

    For each item thus filtered, write out a plot of the data
    with an explanation:
    1) Annotate in red the large, broad feature, or
    2) Annotate in text the SNR.

    Returns updated (filtered) data and label arrays.
    """

    n = data.shape[1]

    newdata = data
    remove_ind = []

    printt("Filtering out data with large, broad features.")
    #for i in [78]: # test data gap
    #for i in [1461]: # test broad feature
    #for i in [3400]: # test broad feature
    for i in range(n):
      waves     = range(data.shape[0])
      this_data = data[waves,i]
      peak_ind  = this_data.argmax()
      peak_wave = self.xvals[waves[peak_ind]]

      # Set min peak to examine as 30% of max
      min_peak = 0.15 * this_data[peak_ind]

      # Track red_waves: indices of bands that contribute to deciding
      # to filter out this item (if indeed it is).
      # These same wavelengths will be removed from further consideration
      # regardless of filtering decision
      red_waves = []
        
      # Iterate over peaks sufficiently big to be of interest
      while this_data[peak_ind] >= min_peak:
        #print "%d) Max peak: %f nm (index %d, %f)" % (i,
        #                                              self.xvals[waves[peak_ind]],
        #                                              peak_ind,
        #                                              this_data[peak_ind])
        red_waves = [waves[peak_ind]]
        
        # Set the low value to look for (indicates nice narrow peak)
        low_value = 0.1 * this_data[peak_ind]
        
        filter_item = True # guilty until proven innocent
        # Note: band resolution/spacing is not the same for diff ranges?
        # Sweep left and right up to 400 bands (10 nm), looking for low_value
        min_wave_ind = peak_ind
        max_wave_ind = peak_ind
        for j in range(1,401):
          min_wave_ind = max(min_wave_ind-1, 0)
          max_wave_ind = min(max_wave_ind+1, len(waves)-1)
          red_waves += [waves[min_wave_ind]]
          red_waves += [waves[max_wave_ind]]

          # If there's a data gap, ignore it
          if ((self.xvals[waves[min_wave_ind]+1] -
               self.xvals[waves[min_wave_ind]]) > 1):
            min_wave_ind += 1
          if ((self.xvals[waves[max_wave_ind]] -
               self.xvals[waves[max_wave_ind]-1]) > 1):
            max_wave_ind -= 1

          # Stop if we've gone more than 10 nm
          if (((self.xvals[waves[peak_ind]] - 
                self.xvals[waves[min_wave_ind]]) > 10) or
              ((self.xvals[waves[max_wave_ind]] -
                self.xvals[waves[peak_ind]]) > 10)):
            filter_item = True
            #print '%.2f: %.2f to %.2f' % (self.xvals[waves[peak_ind]],
            #                              self.xvals[waves[min_wave_ind]],
            #                              self.xvals[waves[max_wave_ind]])
            break
          
          #print 'checking %f, %f' % (self.xvals[waves[min_wave_ind]],
          #                           self.xvals[waves[max_wave_ind]])
          if this_data[min_wave_ind] <= low_value or \
             this_data[max_wave_ind] <= low_value:
            # success! data is good
            #print '  %f: %f' % (self.xvals[waves[min_wave_ind]],
            #                    this_data[min_wave_ind])
            #print '  %f: %f' % (self.xvals[waves[max_wave_ind]],
            #                    this_data[max_wave_ind])
            filter_item = False
            break
          
        # Remove the wavelengths we've considered
        [waves.remove(w) for w in red_waves if w in waves]

        # Filter the item out
        if filter_item:
          print "Filter item %d (%s) due to [%.2f, %.2f] nm " % (i,
                                                                 labels[i],
                                                        self.xvals[min(red_waves)],
                                                        self.xvals[max(red_waves)])
          # record it for later removal
          remove_ind += [i]

          '''
          # generate a plot, highlighting the problematic feature in red_waves
          pylab.clf()
          pylab.plot(self.xvals, data[:,i], 'k-', linewidth=1)
          pylab.plot(self.xvals[min(red_waves):max(red_waves)+1],
                     data[min(red_waves):max(red_waves)+1,i], 'r-', linewidth=1)
          pylab.xlabel(self.xlabel, fontsize=16)
          pylab.ylabel(self.ylabel, fontsize=16)
          pylab.xticks(fontsize=16)
          pylab.yticks(fontsize=16)
          pylab.title('Filtered item %d, %s' % (i, labels[i]))
          
          if not os.path.exists('filtered'):
            os.mkdir('filtered')
          pylab.savefig(os.path.join('filtered', 
                                     '%s-filtered-%d.pdf' % i))
          '''
          
          break
        
        else: # keep going
          # Update this_data to ignore previously considered wavelengths
          this_data = data[waves,i]
          peak_ind  = this_data.argmax()

    # Remove all filtered items
    newdata   = np.array([data[:,i] for i in range(data.shape[1]) \
                          if i not in remove_ind]).T
    newlabels = np.array([labels[i] for i in range(len(labels)) \
                          if i not in remove_ind])
    printt(" ... from %d to %d items (%d removed)." % (n, newdata.shape[1],
                                                      n-newdata.shape[1]))
    n = newdata.shape[1]

    printt("Filtering out low-SNR data.")

    # Filter out any item left that has a max peak value < 0.01.
    # (these are normalized probabilities now)
    remove_ind = []
    for i in range(n):
      if max(newdata[:,i]) < 0.01:
        remove_ind +=[i]

    # Remove all filtered items
    newdata   = np.array([newdata[:,i] for i in range(newdata.shape[1]) \
                          if i not in remove_ind]).T
    newlabels = np.array([newlabels[i] for i in range(len(newlabels)) \
                          if i not in remove_ind])

    print " ... from %d to %d items (%d removed)." % (n, newdata.shape[1],
                                                      n-newdata.shape[1])

    #sys.exit(0)
    
    return (newdata, newlabels)
Beispiel #14
0
  def  read_from_scratch(self, filename, shotnoisefilt=0, fwfile=''):
    """read_from_scratch()

    Read in ENVI (hyperspectral) data from filename.
    Assume header file is filename.hdr.

    Optionally, specify the width of a median filter to apply.
    Optionally, specify a file containing per-feature weights.

    Strongly inspired by enviread.m from Ian Howat, [email protected].

    See ROI_utils.py for full development and testing.
    """

    envi_file = filename

    # Read in the header file.  Try a few options to find the .hdr file. 
    hdrfilenames = [envi_file + '.hdr',
                    envi_file[0:envi_file.rfind('.IMG')] + '.hdr',
                    envi_file[0:envi_file.rfind('.img')] + '.hdr']
    for hdrfile in hdrfilenames:
      if os.path.exists(hdrfile):
        break
      
    info = ENVIData.read_envihdr(hdrfile)
    self.lines   = info['lines']
    self.samples = info['samples']
    print '%d lines, %d samples, %d bands.' % (self.lines, self.samples, info['bands'])

    # Set binary format parameters
    byte_order = info['byte order']
    if   (byte_order == 0):
      machine = 'ieee-le'
    elif (byte_order == 1):
      machine = 'ieee-be'
    else:
      machine = 'n'

    dtype = info['data type']
    if   (dtype == 1):
      format = 'uint8'
    elif (dtype == 2):
      format = 'int16'
    elif (dtype == 3):
      format = 'int32'
    elif (dtype == 4):
      format = 'float32'
    elif (dtype == 5):
      format = 'float64'  # Note: 'float' is the same as 'float64'
    elif (dtype == 6):
      print ':: Sorry, Complex (2x32 bits) data currently not supported.'
      print ':: Importing as double-precision instead.'
      format = 'float64'
    elif (dtype == 9):
      print ':: Sorry, double-precision complex (2x64 bits) data currently not supported.'
      return
    elif (dtype == 12):
      format = 'uint16'
    elif (dtype == 13):
      format = 'uint32'
    elif (dtype == 14):
      format = 'int64'
    elif (dtype == 15):
      format = 'uint64'
    else:
      print 'Error: File type number: %d not supported' % dtype
      return None
    print 'Reading data format %s' % format

    # Read in the data
    try:
      dfile = open(envi_file, 'r')
    except IOError:
      print ":: Error: data file '%s' not found." % envi_file
      return None

    self.data = np.zeros((info['bands'], info['lines'] * info['samples']),
                            format)

    raw_data  = np.fromfile(dfile, format, -1)
    dfile.close()

    band_format = info['interleave'].lower()

    if (band_format == 'bsq'):
      print "Reading BSQ: Band, Row, Col; %s" % machine
      raw_data = raw_data.reshape((info['bands'],info['lines'],info['samples']))
      for b in range(info['bands']):
        for i in range(info['lines'] * info['samples']):
          l = int(i / info['samples'])
          s = i % info['samples']
          self.data[b,i] = raw_data[b,l,s]

    elif (band_format == 'bil'):
      print "Reading BIL: Row, Band, Col; %s" % machine
      raw_data = raw_data.reshape((info['lines'],info['bands'],info['samples']))
      for b in range(info['bands']):
        for i in range(info['lines'] * info['samples']):
          l = int(i / info['samples'])
          s = i % info['samples']
          self.data[b,i] = raw_data[l,b,s]
    
    elif (band_format == 'bip'):
      print "Reading BIP: Row, Col, Band; %s" % machine
      raw_data = raw_data.reshape((info['lines'],info['samples'],info['bands']))
      for b in range(info['bands']):
        self.data[b,:] = raw_data[:,:,b].reshape(-1)

    # Determine whether we need to swap byte order
    little_endian = (struct.pack('=f', 2.3) == struct.pack('<f', 2.3))
    if (     little_endian and machine == 'ieee-be') or \
        (not little_endian and machine == 'ieee-le'):
      self.data.byteswap(True)

    self.xlabel = 'Wavelength (nm)'
    self.xvals  = info['wavelength']
    self.ylabel = 'Reflectance'

    # Let labels be x,y coordinates
    self.labels = []
    for l in range(info['lines']):
      for s in range(info['samples']):
        self.labels += ['%d,%d' % (l,s)]

    # Data pre-processing (UCIS specific)
    #if 'UCIS' in envi_file or 'ucis' in envi_file:
    if 'mars_yard' in envi_file:
      printt('Filtering out water absorption and known noisy bands,')
      printt(' from %d' % len(self.xvals))
      # Water: 1.38 and 1.87 nm
      # Also prune out the first 10 and last 3 bands
      waves_use = [w for w in self.xvals
                   if ((w > 480 and w < 1330) or 
                       (w > 1400 and w < 1800) or
                       (w > 1900 and w < 2471))]
      bands_use = [np.where(self.xvals == w)[0][0] for w in waves_use]
      self.data  = self.data[bands_use, :]
      self.xvals = self.xvals[bands_use]
      printt(' to %d bands.' % len(self.xvals))

    # Filter out shot noise (median filter)
    # warning: this is slow... (should be optimized?)
    from demud import read_feature_weights
    if shotnoisefilt >= 3:
      # Read in feature weights, if needed
      fw = read_feature_weights(fwfile, self.xvals)
      self.data = LIBSData.medfilter(self.data, shotnoisefilt, fw)
 
    # Store the RGB data for later use
    self.rgb_data = self.get_RGB()
Beispiel #15
0
    def readin(self,
               startsol=-1,
               endsol=-1,
               initpriorsols=False,
               shotnoisefilt=0):
        """readin()
    
    Read in LIBS data from self.filename.
    Read in initialization data from self.initfilename.
    Normalize according to Nina's instructions.

    Optionally, specify a sol range (startsol-endsol) for data to analyze.
    Optionally, use data prior to startsol to initialize the model.
    Optionally, specify the width of a median filter to apply.
    """

        input_type = os.path.splitext(self.filename)[1][1:]

        self.data = []
        self.initdata = []
        self.xlabel = 'Wavelength (nm)'
        self.ylabel = 'Intensity'

        if input_type == 'csv':
            (self.data, self.labels) = LIBSData.read_csv_data(self.filename)

            # Prune off first column (wavelengths)
            wavelengths = self.data[:, 0]
            self.xvals = wavelengths.reshape(-1, 1)
            self.data = self.data[:, 1:]  # features x samples

            (self.data, self.xvals) = \
                LIBSData.prune_and_normalize(self.data, self.xvals, shotnoisefilt)

            (self.data, self.labels) = self.filter_data(self.data, self.labels)

        elif input_type == 'pkl':

            inf = open(self.filename, 'r')
            (self.data, self.labels, self.xvals) = pickle.load(inf)
            inf.close()

            # Temporary: until I re-run full extraction on shiva
            use = np.where(np.logical_and(self.xvals >= 270,
                                          self.xvals < 820))[0]
            self.xvals = self.xvals[use]
            self.data = self.data[use, :]

            (self.data, self.labels) = self.filter_data(self.data, self.labels)

        else:  # Unknown format

            printt(' Error: Unknown input type for %s; no data read in' % \
                   self.filename)

        # Read in the init data file, if present
        if self.initfilename != '':
            printt('Reading initialization data set from %s' %
                   self.initfilename)
            (self.initdata,
             unused_labels) = LIBSData.read_csv_data(self.initfilename)

            # Prune off first column (wavelengths)
            wavelengths = self.initdata[:, 0]
            self.initdata = self.initdata[:, 1:]  # features x samples
            (self.initdata, unused_xvals) = \
                LIBSData.prune_and_normalize(self.initdata, wavelengths, shotnoisefilt)
            print self.initdata.shape

            (self.initdata,
             unused_labels) = self.filter_data(self.initdata, unused_labels)
            print self.initdata.shape

        ########## Subselect by sol, if specified ##########
        if startsol > -1 and endsol >= -1:
            printt("Analyzing data from sols %d-%d only." % (startsol, endsol))
            current_sols  = [i for (i,s) in enumerate(self.labels) \
                             if (int(s.split('_')[0][3:]) >= startsol and \
                                 int(s.split('_')[0][3:]) <= endsol)]
            if initpriorsols:
                previous_sols = [i for (i,s) in enumerate(self.labels) \
                                 if int(s.split('_')[0][3:]) < startsol]
                printt(
                    "Putting previous sols' (before %d) data in initialization model."
                    % startsol)
                # Concatenate initdata with data from all previous sols
                if self.initdata != []:
                    print self.initdata.shape
                    print self.data[:, previous_sols].shape
                    self.initdata = np.hstack(
                        (self.initdata, self.data[:, previous_sols]))
                else:
                    self.initdata = self.data[:, previous_sols]

            # Prune analysis data set to only include data from the sol of interest
            self.data = self.data[:, current_sols]
            self.labels = self.labels[current_sols]
Beispiel #16
0
    def plot_item(self, m, ind, x, r, k, label, U, rerr, feature_weights):
        """plot_item(self, m, ind, x, r, k, label, U, rerr, feature_weights)

    Plot selection m (index ind, data in x) and its reconstruction r,
    with k and label to annotate of the plot.
    Use fancy ChemCam elemental annotations.

    If feature_weights are specified, omit any 0-weighted features from the plot.
    """

        if x == [] or r == []:
            print "Error: No data in x and/or r."
            return

        # Select the features to plot
        if feature_weights != []:
            goodfeat = [f for f in range(len(feature_weights)) \
                          if feature_weights[f] > 0]
        else:
            goodfeat = range(len(self.xvals))

        pylab.clf()
        # xvals, x, and r need to be column vectors
        pylab.plot(self.xvals[goodfeat], r[goodfeat], 'r-', linewidth=0.5)
        pylab.plot(self.xvals[goodfeat], x[goodfeat], 'k-', linewidth=1)
        # Boost font sizes for axis and tick labels
        pylab.xlabel(self.xlabel, fontsize=16)
        pylab.ylabel(self.ylabel, fontsize=16)
        pylab.xticks(fontsize=16)
        pylab.yticks(fontsize=16)
        pylab.title('DEMUD selection %d (%s), item %d, using K=%d' % \
                    (m, label, ind, k))

        #print 'Reading in emission bands.'
        # Read in the emission bands
        emissions = {}
        with open('LIBS-elts-RCW-NL.txt') as f:
            for line in f:
                vals = line.strip().split()
                if len(vals) < 2:
                    break
                (wave, elt) = vals
                emissions[wave] = elt
            f.close()

        # Get unique elements
        elts = list(set(emissions.values()))
        # Generate per-element colors
        #colors = ['c','m','b','r','g','b']
        colors = [
            '#ff0000', '#00ff00', '#0000ff', '#00ffff', '#ff00ff', '#ffff00',
            '#aa0000', '#00aa00', '#0000aa', '#00aaaa', '#aa00aa', '#aaaa00',
            '#550000', '#005500', '#000055', '#005555', '#550055', '#555500'
        ]
        elt_color = {}
        for (i, e) in enumerate(elts):
            elt_color[e] = colors[i % len(colors)]

        # record the selection number
        outdir = os.path.join('results', self.name)
        if not os.path.exists(outdir):
            os.mkdir(outdir)
        selfile = os.path.join(outdir, 'sels-%s.txt' % self.name)
        if m == 0:
            with open(selfile, 'w') as f:
                f.write('%d\n' % ind)
                f.close()
        else:
            with open(selfile, 'a') as f:
                f.write('%d\n' % ind)
                f.close()

        res = x - r
        abs_res = np.absolute(res)
        mx = abs_res.max()
        mn = abs_res.min()
        #printt('Absolute residuals: min %2.g, max %.2g.\n' % (mn, mx))
        if mn == mx and mx == 0:
            return

        sorted_abs_res = np.sort(abs_res, 0)
        #frac_annotate = 0.002
        frac_annotate = 0.004
        width = 8
        min_match_nm = 2
        num_annotate = int(math.floor(frac_annotate * len(abs_res)))
        thresh = sorted_abs_res[-num_annotate]
        #printt('Annotating top %.3f%% of residuals (%d above %.2g).' % \
        #    (frac_annotate * 100, num_annotate, thresh))

        band_ind = (np.where(abs_res >= thresh)[0]).tolist()
        for band in band_ind:
            w = float(self.xvals[band])
            [b, elt] = LIBSData.find_closest(w, emissions, min_match_nm)
            reproj = r[band]
            #printt('%.2f nm (%s): Expected %g, got %g' % (w, elt, reproj, x[band]))
            if b == -1:
                b = 1
                printt('No match for %.2f nm (%f)' % (w, r[band]))
                # Draw the triangle using gray, but don't add to legend
                pylab.fill([w - width, w + width, w],
                           [reproj, reproj, x[band]],
                           '0.6',
                           zorder=1,
                           label='_nolegend_')
            else:
                if x[band] > reproj:
                    sn = '+'
                else:
                    sn = '-'
                pylab.fill([w - width, w + width, w],
                           [reproj, reproj, x[band]],
                           elt_color[elt],
                           zorder=2,
                           label='%s%s %.2f' % (sn, elt, w))

            pylab.legend(fontsize=8)
        figfile = '%s/%s-sel-%d.pdf' % (outdir, self.name, m)
        pylab.savefig(figfile)
        print 'Wrote plot to %s' % figfile

        # I don't think this works -- hasn't been tested?
        '''
Beispiel #17
0
  def  readin(self):
    """readin()
    
    Also read in segmentation map from segmapfile
    and average the data, re-storing it in reduced form
    in self.data.

    Set self.labels to record the segment ids.
    """

    super(SegENVIData, self).readin()
    # data is wavelengths x pixels

    # Segmentation maps from SLIC are "raster scan, 32-bit float" (per Hua)
    # Actually, nicer to read them as ints.
    self.segmap = np.fromfile(self.segmapfile, dtype='int32', count=-1)
    if self.lines * self.samples != self.segmap.shape[0]:
      printt('Error: mismatch in number of pixels between image and segmap.')
      return

    goodbands = range(len(self.xvals))
    # For AVIRIS data:
    if 'f970619' in self.name:
      printt('Removing known bad bands, assuming AVIRIS data.')
      # Per Hua's email of July 3, 2013, use a subset of good bands:
      # Indexing from 1: [10:100 116:150 180:216]
      # Subtract 1 to index from 0, but not to the end values
      # because range() is not inclusive of end
      goodbands  = range(9,100) + range(115,150) + range(179,216)
    # For UCIS data:
    elif 'mars_yard' in self.name:
      printt('Removing known bad bands, assuming UCIS data.')
      # Per Hua's email of May 8, 2014, use a subset of good bands.
      # Exclude 1.4-1.9 um (per Diana).
      waterband_min = np.argmin([abs(x-1400) for x in self.xvals])
      waterband_max = np.argmin([abs(x-1900) for x in self.xvals])
      waterbands    = range(waterband_min, waterband_max+1)
      # Based on Hua's visual examination, exclude bands
      # 1-6, 99-105, and 145-155.
      # Good bands are therefore 7-98, 106-144, and 156-maxband.
      # Subtract 1 to index from 0, but not to the end values
      # because range() is not inclusive of end
      maxband    = len(self.xvals)
      goodbands  = range(6,98) + range(105,144) + range(155,maxband)
      # Remove the water bands
      printt('Removing water absorption bands.')
      printt('%d good bands -> ' % len(goodbands))
      goodbands  = list(set(goodbands) - set(waterbands))
      printt(' %d good bands' % len(goodbands))
      
    self.data  = self.data[goodbands,:]
    self.xvals = self.xvals[goodbands]
    
    #self.segmap = self.segmap.reshape(self.lines, self.samples)
    self.labels  = np.unique(self.segmap)
    printt('Found %d segments.' % len(self.labels))
    newdata = np.zeros((self.data.shape[0], len(self.labels)))
    for i, s in enumerate(self.labels):
      pixels  = np.where(self.segmap == s)[0]
      #print '%d: %s: %d pixels' % (i, str(s), len(pixels))
      # Compute and store the mean
      newdata[:,i] = self.data[:,pixels].mean(1)

    printt('Finished averaging the spectra.')
    # Update data with the averaged version
    self.data = newdata

    self.name  = self.name + '-seg'
Beispiel #18
0
  def  plot_item(self, m, ind, x, r, k, label, U, rerr, feature_weights):
    """plot_item(self, m, ind, x, r, k, label, U, rerr, feature_weights)

    Plot selection m (index ind, data in x) and its reconstruction r,
    with k and label to annotate of the plot.
    Use fancy ChemCam elemental annotations.

    If feature_weights are specified, omit any 0-weighted features from the plot.
    """

    if x == [] or r == []: 
      print "Error: No data in x and/or r."
      return
  
    # Select the features to plot
    if feature_weights != []:
      goodfeat = [f for f in range(len(feature_weights)) \
                    if feature_weights[f] > 0]
    else:
      goodfeat = range(len(self.xvals))

    pylab.clf()
    # xvals, x, and r need to be column vectors
    pylab.plot(self.xvals[goodfeat], r[goodfeat], 'r-', linewidth=0.5)
    pylab.plot(self.xvals[goodfeat], x[goodfeat], 'k-', linewidth=1)
    # Boost font sizes for axis and tick labels
    pylab.xlabel(self.xlabel, fontsize=16)
    pylab.ylabel(self.ylabel, fontsize=16)
    pylab.xticks(fontsize=16)
    pylab.yticks(fontsize=16)
    pylab.title('DEMUD selection %d (%s), item %d, using K=%d' % \
                (m, label, ind, k))

    #print 'Reading in emission bands.'
    # Read in the emission bands
    emissions = {}
    with open('LIBS-elts-RCW-NL.txt') as f:
      for line in f:
        vals = line.strip().split()
        if len(vals) < 2:
          break
        (wave, elt) = vals
        emissions[wave] = elt
      f.close()
      
    # Get unique elements
    elts = list(set(emissions.values()))
    # Generate per-element colors
    #colors = ['c','m','b','r','g','b']
    colors = ['#ff0000', '#00ff00', '#0000ff',
              '#00ffff', '#ff00ff', '#ffff00', 
              '#aa0000', '#00aa00', '#0000aa',
              '#00aaaa', '#aa00aa', '#aaaa00', 
              '#550000', '#005500', '#000055',
              '#005555', '#550055', '#555500']
    elt_color = {}
    for (i,e) in enumerate(elts):
      elt_color[e] = colors[i % len(colors)]

    # record the selection number
    outdir  = os.path.join('results', self.name)
    if not os.path.exists(outdir):
      os.mkdir(outdir)
    selfile = os.path.join(outdir, 'sels-%s.txt' % self.name)
    if m == 0:
      with open(selfile, 'w') as f:
        f.write('%d\n' % ind)
        f.close()
    else:
      with open(selfile, 'a') as f:
        f.write('%d\n' % ind)
        f.close()

    res = x - r
    abs_res = np.absolute(res)
    mx = abs_res.max()
    mn = abs_res.min()
    #printt('Absolute residuals: min %2.g, max %.2g.\n' % (mn, mx))
    if mn == mx and mx == 0:
      return

    sorted_abs_res = np.sort(abs_res,0)
    #frac_annotate = 0.002
    frac_annotate = 0.004
    width = 8
    min_match_nm  = 2
    num_annotate = int(math.floor(frac_annotate * len(abs_res)))
    thresh = sorted_abs_res[-num_annotate]
    #printt('Annotating top %.3f%% of residuals (%d above %.2g).' % \
    #    (frac_annotate * 100, num_annotate, thresh))

    band_ind = (np.where(abs_res >= thresh)[0]).tolist()
    for band in band_ind:
      w = float(self.xvals[band])
      [b, elt] = LIBSData.find_closest(w, emissions, min_match_nm)
      reproj = r[band]
      #printt('%.2f nm (%s): Expected %g, got %g' % (w, elt, reproj, x[band]))
      if b == -1:
        b = 1
        printt('No match for %.2f nm (%f)' % (w, r[band]))
        # Draw the triangle using gray, but don't add to legend
        pylab.fill([w-width, w+width, w],
                   [reproj,  reproj,  x[band]],
                   '0.6', zorder=1,
                   label='_nolegend_')
      else:
        if x[band] > reproj:
          sn = '+'
        else:
          sn = '-'
        pylab.fill([w-width, w+width, w],
                   [reproj,  reproj,  x[band]],
                   elt_color[elt], zorder=2,
                   label='%s%s %.2f' % (sn, elt, w))

      pylab.legend(fontsize=8)
    figfile = '%s/%s-sel-%d.pdf' % (outdir, self.name, m)
    pylab.savefig(figfile)
    print 'Wrote plot to %s' % figfile

    # I don't think this works -- hasn't been tested?
    '''
Beispiel #19
0
    def read_dir(cls, dirname, outfile, shotnoisefilt=0):
        """read_dir(dirname, outfile)

    Read in raw LIBS data from .csv files in dirname.
    Pickle the result and save it to outfile.
    Note: does NOT update object fields.
    Follow this with a call to readin().
    """

        # First read in the target names and sol numbers.
        targets = {}
        sols = {}
        # Location of this file is hard-coded!
        # My latest version goes through sol 707.
        metafile = 'msl_ccam_obs.csv'
        with open(os.path.join(dirname, metafile)) as f:
            datareader = csv.reader(f)
            # Index targets, sols by spacecraft clock value
            for row in datareader:
                [sol, edr_type, sclk, target] = [row[i] for i in [0, 1, 2, 5]]
                if edr_type != 'CL5':
                    continue
                prior_targets = [t for t in targets.values() if target in t]
                n_prior = len(prior_targets)
                # Add 1 so shots are indexed from 1, not 0
                targets[sclk] = target + ':%d' % (n_prior + 1)
                sols[sclk] = sol
        print 'Read %d target names from %s.' % (len(targets), metafile)

        print 'Now reading LIBS data from %s.' % dirname

        data = []
        labels = []
        wavelengths = []

        files = os.listdir(dirname)
        f_ind = 0
        #    for f in files[:len(files)]:
        # Select only CSV files
        #      if fnmatch.fnmatch(f, 'CL5_*.csv'):
        for f in fnmatch.filter(files, 'CL5_*.csv') +  \
              fnmatch.filter(files, 'cl5_*.csv'):
            # Extract site_drive_seqid from the filename
            filename = f[f.rfind('/') + 1:]
            printt(' Processing %s.' % filename)
            sclk = filename[4:13]
            site = filename[18:21]
            drive = filename[21:25]
            seqid = filename[29:34]
            target = targets[sclk]
            sol = sols[sclk]

            # If it's a cal target, skip it
            if 'Cal Target' in target:
                print 'Skipping %s' % target
                continue

            #site_drive_seqid_target = '%s_%s_%s_%s' % (site, drive, seqid, target)
            #drive_sclk_target = '%s_%s_%s' % (drive, sclk, target)
            sol_sclk_target = 'Sol%s_%s_%s' % (sol, sclk, target)
            print(' Spacecraft clock %s, identifier %s.' % \
                    (sclk, sol_sclk_target))

            with open(os.path.join(dirname, f), 'r') as csvfile:
                datareader = csv.reader(csvfile)

                row = datareader.next()
                while row[0][0] == '#':
                    # Save the last row (comment line)
                    lastrow = row
                    row = datareader.next()
                # The last comment line contains the header strings
                # starting with 'wave' or 'nm'
                mylabels = [l.strip() for l in lastrow]

                mydata = [[float(x) for x in row]]
                for row in datareader:
                    # Skip over empty lines
                    if row[0] == '':
                        continue
                    mydata += [[float(x) for x in row]]

                mydata = np.array(mydata)

                # Store the wavelengths
                waveind = [ind for ind,name in enumerate(mylabels) \
                             if 'wave' in name]
                if len(waveind) != 1:
                    printt('Expected 1 match on "wave"; got %d.' %
                           len(waveind))
                    sys.exit(1)
                mywaves = mydata[:, waveind[0]]

                # Keep only the shots
                #shots = [ind for ind,name in enumerate(mylabels) \
                #         if 'shot' in name]
                # Keep only the mean
                shots = [ind for ind,name in enumerate(mylabels) \
                           if 'mean' in name]
                #myshotnames = ['%s_%d_%s' % (site_drive_sclk_target,
                #                      f_ind, mylabels[shot])
                myshotnames = [
                    '%s_%s' % (sol_sclk_target, mylabels[shot])
                    for shot in shots
                ]

                mydata = mydata[:, [l for l in shots]]

                printt(' Read %d new items, %d features.' % mydata.shape[::-1])

                if wavelengths != [] and np.any(wavelengths != mywaves):
                    printt(
                        'Error: wavelengths in file %d do not match previous.'
                        % f_ind)
                if f_ind == 0:
                    data = mydata
                    wavelengths = mywaves
                else:
                    data = np.concatenate((data, mydata), 1)
                labels += myshotnames

                f_ind = f_ind + 1
                printt('Total so far: %d items, %d files.' %
                       (data.shape[1], f_ind))

        print
        if data == []:
            printt('No data files found, exiting.')
            sys.exit()

        printt('Read a total of %d items, %d features.' % data.shape[::-1])

        labels = np.array(labels)

        # Prune and normalize
        (data,
         wavelengths) = LIBSData.prune_and_normalize(data, wavelengths,
                                                     shotnoisefilt)

        printt('Saving to %s.' % outfile)
        outf = open(outfile, 'w')
        pickle.dump((data, labels, wavelengths), outf)
        outf.close()
        print 'Done!'
Beispiel #20
0
  def  plot_item(self, m, ind, x, r, k, label, U=[], scores=[], feature_weights=[]):
    """plot_item(self, m, ind, x, r, k, label, U, scores, feature_weights)

    Plot selection m (index ind, data in x) and its reconstruction r,
    with k and label to annotate of the plot.

    Also show a spatial plot indicating where the selected pixel is
    and an abundance plot of similarity across the data set.

    U and scores are optional; ignored in this method, used in some
    classes' submethods.

    If feature_weights are specified, omit any 0-weighted features from the plot.
    """
    
    if x == [] or r == []: 
      printt("Error: No data in x and/or r.")
      return

    (l,s) = [int(v) for v in label.split(',')]

    # Select the features to plot
    if feature_weights != []:
      goodfeat = [f for f in range(len(feature_weights)) \
                    if feature_weights[f] > 0]
    else:
      goodfeat = range(len(self.xvals))

    # Set up the subplots
    pylab.figure()
    #pylab.subplots_adjust(wspace=0.1, left=0)
    pylab.subplots_adjust(wspace=0.05)

    # Plot #1: expected vs. observed feature vectors
    # xvals, x, and r need to be column vectors
    pylab.subplot(2,2,1)
    pylab.plot(self.xvals[goodfeat], r[goodfeat], 'r-', label='Expected')
    pylab.plot(self.xvals[goodfeat], x[goodfeat], 'b.-', label='Observations')
    pylab.ylim([0.0, max(1.0, x.max())])

    pylab.xlabel(self.xlabel)
    pylab.ylabel(self.ylabel)
    pylab.legend(fontsize=10, loc=2)

    # Plot #2: zoom of selected pixel, 20x20 context
    pylab.subplot(2,2,2)
    winwidth = 20
    minl = max(0, l-winwidth/2)
    mins = max(0, s-winwidth/2)
    maxl = min(self.lines,   l+winwidth/2)
    maxs = min(self.samples, s+winwidth/2)
    rgb_data = self.get_RGB()
    pylab.imshow(rgb_data[minl:maxl, mins:maxs],
                 interpolation='none') #, alpha=0.85)
    pylab.gca().add_patch(Rectangle((min(winwidth/2,s)-1,
                                     min(winwidth/2,l)-1),
                                     2, 2,
                                     fill=None, alpha=1))
    pylab.axis('off')
    pylab.title('Zoom')

    # Spatial selection plot
    # this is an inset axes over the main axes
    #a = pylab.axes([.15, .75, .3, .15])
    pylab.subplot(2,2,3)
    # Use alpha to lighten the RGB data
    plt = pylab.imshow(rgb_data, interpolation='none', 
                       alpha=0.85)
    pylab.plot(s, l, 'x', markeredgewidth=2, scalex=False, scaley=False)
    #pylab.setp(a, xticks=[], yticks=[])
    pylab.axis('off')
    pylab.title('Selection')

    # Also update the priority map.
    self.pr_map[l,s] = m+1
    #print 'setting %d, %d to %d' % (l, s, -m)
    n_tot = self.lines * self.samples
    n_pri = len(self.pr_map.nonzero()[0])
    n_unp = n_tot - n_pri
    printt(' %d prioritized; %d (%.2f%%) unprioritized remain' % \
             (n_pri, n_unp, n_unp * 100.0 / n_tot))

    # Abundance map
    # Compute distance from selected x to all other items
    abund = np.zeros((self.lines, self.samples))
    nbands = self.data.shape[0]
    for l_ind in range(self.lines):
      for s_ind in range(self.samples):
        if l_ind == l and s_ind == s:
          abund[l_ind,s_ind] = 0
          continue
        d = self.data[:, l_ind*self.samples + s_ind]
        # Use Euclidean distance.
        #abund[l,s] = math.sqrt(pow(np.sum(x - d), 2)) / float(nbands)
        # Use spectral angle distance
        num   = np.dot(x, d)
        denom = np.linalg.norm(x) * np.linalg.norm(d)
        if num > denom: # ensure math.acos() doesn't freak out; clip to 1.0
          num = denom
        abund[l_ind,s_ind] = math.acos(num / denom)
        
        # Propagate current priority to similar items (not yet prioritized)
        # This threshold is subjectively chosen.
        # I used 0.10 for the Mars yard UCIS cube from Diana.
        # I used different values for the micro-UCIS cubes from Bethany
        # (see Evernote notes).
        # UCIS
        if self.pr_map[l_ind,s_ind] == 0 and abund[l_ind,s_ind] <= 0.10:
        # micro-UCIS
        #if self.pr_map[l_ind,s_ind] == 0 and abund[l_ind,s_ind] <= 0.13:
          self.pr_map[l_ind,s_ind] = m+1


    printt('Abundance: ', abund.min(), abund.max())
    pylab.subplot(2,2,4)
    # Use colormap jet_r so smallest value is red and largest is blue
    pylab.imshow(abund, interpolation='none', cmap='jet_r', vmin=0, vmax=0.15)
    pylab.axis('off')
    pylab.title('Abundance')

    pylab.suptitle('DEMUD selection %d (%s), item %d, using K=%d' % \
                   (m, label, ind, k))
          
    # Write the plot to a file.
    outdir = os.path.join('results', self.name)
    if not os.path.exists(outdir):
      os.mkdir(outdir)

    figfile = os.path.join(outdir, 'sel-%d-k-%d-(%s).pdf' % (m, k, label))
    pylab.savefig(figfile)
    print 'Wrote plot to %s' % figfile
    pylab.close()

    # Write the priority map to an image file
    pylab.figure()
    # Start with colormap jet_r so smallest value is red and largest is blue
    # Max_c must be at least 2 and no greater than 255.
    # Values greater than 255 will be mapped to the last color.
    # (Imposed because we're then saving this out as an ENVI classification map with bytes.
    #  May want to be more flexible in the future, but I can't imagine really wanting to see
    #  more than 255 distinct colors?)
    max_c    = 255 if m > 254   else m+2
    max_c    = 2   if max_c < 2 else max_c
    cmap     = matplotlib.cm.get_cmap('jet_r', max_c)
    # Tweak so 0 is white; red starts at 1
    jet_map_v    = cmap(np.arange(max_c))
    #jet_map_v[0] = [1,1,1,1]  # white
    cmap         = matplotlib.colors.LinearSegmentedColormap.from_list("jet_map_white", jet_map_v)
    pr_map_plot = np.copy(self.pr_map)
    # Set unprioritized items to one shade darker than most recent
    pr_map_plot[pr_map_plot == 0] = m+2
    #pylab.imshow(pr_map_plot, interpolation='none', cmap=cmap, vmin=1, vmax=m+1)
    pylab.imshow(pr_map_plot, interpolation='none', cmap=cmap)
    prmapfig = os.path.join(outdir, 'prmap-k-%d.pdf' % k)
    pylab.savefig(prmapfig)
    if (m % 10) == 0:
      prmapfig = os.path.join(outdir, 'prmap-k-%d-m-%d.pdf' % (k, m))
      pylab.savefig(prmapfig)
    print 'Wrote priority map figure to %s (max_c %d)' % (prmapfig, max_c)
    pylab.close()

    # Write the priority map contents to a file as a 64-bit float map 
    # (retained for backward compatibility for Hua,
    # but superseded by ENVI data file next)
    prmapfile = os.path.join(outdir, 'prmap-k-%d-hua.dat' % k)
    fid = open(prmapfile, 'wb')
    self.pr_map.astype('float64').tofile(fid)
    fid.close()
    print "Wrote Hua's priority map (data) to %s" % prmapfile

    # Write an ENVI data file and header file
    prmapfile = os.path.join(outdir, 'prmap-k-%d.dat' % k)
    fid = open(prmapfile, 'wb')
    self.pr_map.astype('uint8').tofile(fid)  # save out as bytes

    # This is a class map header file
    prmaphdr  = os.path.join(outdir, 'prmap-k-%d.dat.hdr' % k)
    fid = open(prmaphdr, 'w')
    fid.write('ENVI\n')
    fid.write('description = { DEMUD prioritization map }\n')
    fid.write('samples = %d\n' % self.samples)
    fid.write('lines = %d\n'   % self.lines)
    fid.write('bands = 1\n')
    fid.write('header offset = 0\n')          # 0 bytes
    fid.write('file type = Classification\n')
    fid.write('data type = 1\n')              # byte (max 255 priorities)
    fid.write('interleave = bip\n')           # Irrelevant for single 'band'
    fid.write('byte order = 0\n')             # Least-significant byte first
    fid.write('classes = %d\n' % k)           # Number of classes
    # Classes include None (0) and then integers up to number of classes.
    fid.write("class names = {'None', " + ', '.join(["'%d'" % a for a in range(1, max_c)]) + '}\n')
    fid.write('class lookup = {' + 
              ',\n                '.join([' %d, %d, %d' % (r*255,g*255,b*255) for (r,g,b,a) in jet_map_v]) + 
              ' }\n')
    fid.close()
    print 'Wrote ENVI data/header to priority map figure to %s[.hdr]' % prmapfile

    # Write the selections (spectra) in ASCII format
    selfile = os.path.join(outdir, 'selections-k%d.txt' % k)
    # If this is the first selection, open for write
    # to clear out previous run.
    if m == 0:
      fid = open(selfile, 'w')
      # Output a header
      fid.write('# Index, Score')
      for w in self.xvals.tolist():
        fid.write(', %.3f' % w)
      fid.write('\n')

      # If scores is empty, the (first) selection was pre-specified,
      # so there are no scores.  Output 0 for this item.
      if scores == []:
        fid.write('%d,0.0,' % (m))
    else:
      fid = open(selfile, 'a')
      fid.write('%d,%f,' % (m, scores[m]))

    # Now output the feature vector itself
    # Have to reshape x because it's a 1D column vector
    np.savetxt(fid, x.reshape(1, x.shape[0]), fmt='%.5f', delimiter=',')

    fid.close()
Beispiel #21
0
    def filter_data(self, data, labels):
        """filter_data(data, labels)

    Filter out bad quality data, using criteria provided by Nina Lanza:
    1) Large, broad features (don't correspond to narrow peaks)
    2) Low SNR

    For each item thus filtered, write out a plot of the data
    with an explanation:
    1) Annotate in red the large, broad feature, or
    2) Annotate in text the SNR.

    Returns updated (filtered) data and label arrays.
    """

        n = data.shape[1]

        newdata = data
        remove_ind = []

        printt("Filtering out data with large, broad features.")
        #for i in [78]: # test data gap
        #for i in [1461]: # test broad feature
        #for i in [3400]: # test broad feature
        for i in range(n):
            waves = range(data.shape[0])
            this_data = data[waves, i]
            peak_ind = this_data.argmax()
            peak_wave = self.xvals[waves[peak_ind]]

            # Set min peak to examine as 30% of max
            min_peak = 0.15 * this_data[peak_ind]

            # Track red_waves: indices of bands that contribute to deciding
            # to filter out this item (if indeed it is).
            # These same wavelengths will be removed from further consideration
            # regardless of filtering decision
            red_waves = []

            # Iterate over peaks sufficiently big to be of interest
            while this_data[peak_ind] >= min_peak:
                #print "%d) Max peak: %f nm (index %d, %f)" % (i,
                #                                              self.xvals[waves[peak_ind]],
                #                                              peak_ind,
                #                                              this_data[peak_ind])
                red_waves = [waves[peak_ind]]

                # Set the low value to look for (indicates nice narrow peak)
                low_value = 0.1 * this_data[peak_ind]

                filter_item = True  # guilty until proven innocent
                # Note: band resolution/spacing is not the same for diff ranges?
                # Sweep left and right up to 400 bands (10 nm), looking for low_value
                min_wave_ind = peak_ind
                max_wave_ind = peak_ind
                for j in range(1, 401):
                    min_wave_ind = max(min_wave_ind - 1, 0)
                    max_wave_ind = min(max_wave_ind + 1, len(waves) - 1)
                    red_waves += [waves[min_wave_ind]]
                    red_waves += [waves[max_wave_ind]]

                    # If there's a data gap, ignore it
                    if ((self.xvals[waves[min_wave_ind] + 1] -
                         self.xvals[waves[min_wave_ind]]) > 1):
                        min_wave_ind += 1
                    if ((self.xvals[waves[max_wave_ind]] -
                         self.xvals[waves[max_wave_ind] - 1]) > 1):
                        max_wave_ind -= 1

                    # Stop if we've gone more than 10 nm
                    if (((self.xvals[waves[peak_ind]] -
                          self.xvals[waves[min_wave_ind]]) > 10)
                            or ((self.xvals[waves[max_wave_ind]] -
                                 self.xvals[waves[peak_ind]]) > 10)):
                        filter_item = True
                        #print '%.2f: %.2f to %.2f' % (self.xvals[waves[peak_ind]],
                        #                              self.xvals[waves[min_wave_ind]],
                        #                              self.xvals[waves[max_wave_ind]])
                        break

                    #print 'checking %f, %f' % (self.xvals[waves[min_wave_ind]],
                    #                           self.xvals[waves[max_wave_ind]])
                    if this_data[min_wave_ind] <= low_value or \
                       this_data[max_wave_ind] <= low_value:
                        # success! data is good
                        #print '  %f: %f' % (self.xvals[waves[min_wave_ind]],
                        #                    this_data[min_wave_ind])
                        #print '  %f: %f' % (self.xvals[waves[max_wave_ind]],
                        #                    this_data[max_wave_ind])
                        filter_item = False
                        break

                # Remove the wavelengths we've considered
                [waves.remove(w) for w in red_waves if w in waves]

                # Filter the item out
                if filter_item:
                    print "Filter item %d (%s) due to [%.2f, %.2f] nm " % (
                        i, labels[i], self.xvals[min(red_waves)],
                        self.xvals[max(red_waves)])
                    # record it for later removal
                    remove_ind += [i]
                    '''
          # generate a plot, highlighting the problematic feature in red_waves
          pylab.clf()
          pylab.plot(self.xvals, data[:,i], 'k-', linewidth=1)
          pylab.plot(self.xvals[min(red_waves):max(red_waves)+1],
                     data[min(red_waves):max(red_waves)+1,i], 'r-', linewidth=1)
          pylab.xlabel(self.xlabel, fontsize=16)
          pylab.ylabel(self.ylabel, fontsize=16)
          pylab.xticks(fontsize=16)
          pylab.yticks(fontsize=16)
          pylab.title('Filtered item %d, %s' % (i, labels[i]))
          
          if not os.path.exists('filtered'):
            os.mkdir('filtered')
          pylab.savefig(os.path.join('filtered', 
                                     '%s-filtered-%d.pdf' % i))
          '''

                    break

                else:  # keep going
                    # Update this_data to ignore previously considered wavelengths
                    this_data = data[waves, i]
                    peak_ind = this_data.argmax()

        # Remove all filtered items
        newdata   = np.array([data[:,i] for i in range(data.shape[1]) \
                              if i not in remove_ind]).T
        newlabels = np.array([labels[i] for i in range(len(labels)) \
                              if i not in remove_ind])
        printt(" ... from %d to %d items (%d removed)." %
               (n, newdata.shape[1], n - newdata.shape[1]))
        n = newdata.shape[1]

        printt("Filtering out low-SNR data.")

        # Filter out any item left that has a max peak value < 0.01.
        # (these are normalized probabilities now)
        remove_ind = []
        for i in range(n):
            if max(newdata[:, i]) < 0.01:
                remove_ind += [i]

        # Remove all filtered items
        newdata   = np.array([newdata[:,i] for i in range(newdata.shape[1]) \
                              if i not in remove_ind]).T
        newlabels = np.array([newlabels[i] for i in range(len(newlabels)) \
                              if i not in remove_ind])

        print " ... from %d to %d items (%d removed)." % (n, newdata.shape[1],
                                                          n - newdata.shape[1])

        #sys.exit(0)

        return (newdata, newlabels)
Beispiel #22
0
  def  readin(self):
    """readin()
    
    Also read in segmentation map from segmapfile
    and average the data, re-storing it in reduced form
    in self.data.

    Set self.labels to record the segment ids.
    """

    super(SegENVIData, self).readin()
    # data is wavelengths x pixels

    # Segmentation maps from SLIC are "raster scan, 32-bit float" (per Hua)
    # Actually, nicer to read them as ints.
    self.segmap = np.fromfile(self.segmapfile, dtype='int32', count=-1)
    if self.lines * self.samples != self.segmap.shape[0]:
      printt('Error: mismatch in number of pixels between image and segmap.')
      return

    goodbands = range(len(self.xvals))
    # For AVIRIS data:
    if 'f970619' in self.name:
      printt('Removing known bad bands, assuming AVIRIS data.')
      # Per Hua's email of July 3, 2013, use a subset of good bands:
      # Indexing from 1: [10:100 116:150 180:216]
      # Subtract 1 to index from 0, but not to the end values
      # because range() is not inclusive of end
      goodbands  = range(9,100) + range(115,150) + range(179,216)
    # For UCIS data:
    elif 'mars_yard' in self.name:
      printt('Removing known bad bands, assuming UCIS data.')
      # Per Hua's email of May 8, 2014, use a subset of good bands.
      # Exclude 1.4-1.9 um (per Diana).
      waterband_min = np.argmin([abs(x-1400) for x in self.xvals])
      waterband_max = np.argmin([abs(x-1900) for x in self.xvals])
      waterbands    = range(waterband_min, waterband_max+1)
      # Based on Hua's visual examination, exclude bands
      # 1-6, 99-105, and 145-155.
      # Good bands are therefore 7-98, 106-144, and 156-maxband.
      # Subtract 1 to index from 0, but not to the end values
      # because range() is not inclusive of end
      maxband    = len(self.xvals)
      goodbands  = range(6,98) + range(105,144) + range(155,maxband)
      # Remove the water bands
      printt('Removing water absorption bands.')
      printt('%d good bands -> ' % len(goodbands))
      goodbands  = list(set(goodbands) - set(waterbands))
      printt(' %d good bands' % len(goodbands))
      
    self.data  = self.data[goodbands,:]
    self.xvals = self.xvals[goodbands]
    
    #self.segmap = self.segmap.reshape(self.lines, self.samples)
    self.labels  = np.unique(self.segmap)
    printt('Found %d segments.' % len(self.labels))
    newdata = np.zeros((self.data.shape[0], len(self.labels)))
    for i, s in enumerate(self.labels):
      pixels  = np.where(self.segmap == s)[0]
      #print '%d: %s: %d pixels' % (i, str(s), len(pixels))
      # Compute and store the mean
      newdata[:,i] = self.data[:,pixels].mean(1)

    printt('Finished averaging the spectra.')
    # Update data with the averaged version
    self.data = newdata

    self.name  = self.name + '-seg'
Beispiel #23
0
  def  readin(self, startsol=-1, endsol=-1, initpriorsols=False, shotnoisefilt=0):
    """readin()
    
    Read in LIBS data from self.filename.
    Read in initialization data from self.initfilename.
    Normalize according to Nina's instructions.

    Optionally, specify a sol range (startsol-endsol) for data to analyze.
    Optionally, use data prior to startsol to initialize the model.
    Optionally, specify the width of a median filter to apply.
    """

    input_type = os.path.splitext(self.filename)[1][1:]

    self.data     = []
    self.initdata = []
    self.xlabel   = 'Wavelength (nm)'
    self.ylabel   = 'Intensity'

    if input_type == 'csv':
      (self.data, self.labels) = LIBSData.read_csv_data(self.filename)

      # Prune off first column (wavelengths)
      wavelengths = self.data[:,0]
      self.xvals  = wavelengths.reshape(-1,1)
      self.data   = self.data[:,1:]  # features x samples

      (self.data, self.xvals) = \
          LIBSData.prune_and_normalize(self.data, self.xvals, shotnoisefilt)
      
      (self.data, self.labels) = self.filter_data(self.data, self.labels)

    elif input_type == 'pkl':

      inf = open(self.filename, 'r')
      (self.data, self.labels, self.xvals) = pickle.load(inf)
      inf.close()

      # Temporary: until I re-run full extraction on shiva
      use = np.where(np.logical_and(self.xvals >= 270,
                                    self.xvals < 820))[0]
      self.xvals = self.xvals[use]
      self.data  = self.data[use,:]

      (self.data, self.labels) = self.filter_data(self.data, self.labels)

    else:  # Unknown format

      printt(' Error: Unknown input type for %s; no data read in' % \
             self.filename)

    # Read in the init data file, if present
    if self.initfilename != '':
      printt('Reading initialization data set from %s' % self.initfilename)
      (self.initdata, unused_labels) = LIBSData.read_csv_data(self.initfilename)

      # Prune off first column (wavelengths)
      wavelengths = self.initdata[:,0]
      self.initdata = self.initdata[:,1:] # features x samples
      (self.initdata, unused_xvals) = \
          LIBSData.prune_and_normalize(self.initdata, wavelengths, shotnoisefilt)
      print self.initdata.shape

      (self.initdata, unused_labels) = self.filter_data(self.initdata, unused_labels)
      print self.initdata.shape

    ########## Subselect by sol, if specified ##########
    if startsol > -1 and endsol >=-1:
      printt("Analyzing data from sols %d-%d only." % (startsol, endsol))
      current_sols  = [i for (i,s) in enumerate(self.labels) \
                       if (int(s.split('_')[0][3:]) >= startsol and \
                           int(s.split('_')[0][3:]) <= endsol)]
      if initpriorsols:
        previous_sols = [i for (i,s) in enumerate(self.labels) \
                         if int(s.split('_')[0][3:]) < startsol]
        printt("Putting previous sols' (before %d) data in initialization model." % startsol)
        # Concatenate initdata with data from all previous sols
        if self.initdata != []:
          print self.initdata.shape
          print self.data[:,previous_sols].shape
          self.initdata = np.hstack((self.initdata, self.data[:,previous_sols]))
        else:
          self.initdata = self.data[:,previous_sols]

      # Prune analysis data set to only include data from the sol of interest
      self.data   = self.data[:,current_sols]
      self.labels = self.labels[current_sols]
Beispiel #24
0
  def  plot_item(self, m, ind, x, r, k, label, U=[], scores=[], feature_weights=[]):
    """plot_item(self, m, ind, x, r, k, label, U, scores, feature_weights)

    Plot selection m (index ind, data in x) and its reconstruction r,
    with k and label to annotate of the plot.

    Also show a spatial plot indicating where the selected pixel is
    and an abundance plot of similarity across the data set.

    U and scores are optional; ignored in this method, used in some
    classes' submethods.

    If feature_weights are specified, omit any 0-weighted features from the plot.
    """
    
    if x == [] or r == []: 
      printt("Error: No data in x and/or r.")
      return

    (l,s) = [int(v) for v in label.split(',')]

    # Select the features to plot
    if feature_weights != []:
      goodfeat = [f for f in range(len(feature_weights)) \
                    if feature_weights[f] > 0]
    else:
      goodfeat = range(len(self.xvals))

    # Set up the subplots
    pylab.figure()
    #pylab.subplots_adjust(wspace=0.1, left=0)
    pylab.subplots_adjust(wspace=0.05)

    # Plot #1: expected vs. observed feature vectors
    # xvals, x, and r need to be column vectors
    pylab.subplot(2,2,1)
    pylab.plot(self.xvals[goodfeat], r[goodfeat], 'r-', label='Expected')
    pylab.plot(self.xvals[goodfeat], x[goodfeat], 'b.-', label='Observations')
    pylab.ylim([0.0, max(1.0, x.max())])

    pylab.xlabel(self.xlabel)
    pylab.ylabel(self.ylabel)
    pylab.legend(fontsize=10, loc=2)

    # Plot #2: zoom of selected pixel, 20x20 context
    pylab.subplot(2,2,2)
    winwidth = 20
    minl = max(0, l-winwidth/2)
    mins = max(0, s-winwidth/2)
    maxl = min(self.lines,   l+winwidth/2)
    maxs = min(self.samples, s+winwidth/2)
    rgb_data = self.get_RGB()
    pylab.imshow(rgb_data[minl:maxl, mins:maxs],
                 interpolation='none') #, alpha=0.85)
    pylab.gca().add_patch(Rectangle((min(winwidth/2,s)-1,
                                     min(winwidth/2,l)-1),
                                     2, 2,
                                     fill=None, alpha=1))
    pylab.axis('off')
    pylab.title('Zoom')

    # Spatial selection plot
    # this is an inset axes over the main axes
    #a = pylab.axes([.15, .75, .3, .15])
    pylab.subplot(2,2,3)
    # Use alpha to lighten the RGB data
    plt = pylab.imshow(rgb_data, interpolation='none', 
                       alpha=0.85)
    pylab.plot(s, l, 'x', markeredgewidth=2, scalex=False, scaley=False)
    #pylab.setp(a, xticks=[], yticks=[])
    pylab.axis('off')
    pylab.title('Selection')

    # Also update the priority map.
    self.pr_map[l,s] = m+1
    #print 'setting %d, %d to %d' % (l, s, -m)
    n_tot = self.lines * self.samples
    n_pri = len(self.pr_map.nonzero()[0])
    n_unp = n_tot - n_pri
    printt(' %d prioritized; %d (%.2f%%) unprioritized remain' % \
             (n_pri, n_unp, n_unp * 100.0 / n_tot))

    # Abundance map
    # Compute distance from selected x to all other items
    abund = np.zeros((self.lines, self.samples))
    nbands = self.data.shape[0]
    for l_ind in range(self.lines):
      for s_ind in range(self.samples):
        if l_ind == l and s_ind == s:
          abund[l_ind,s_ind] = 0
          continue
        d = self.data[:, l_ind*self.samples + s_ind]
        # Use Euclidean distance.
        #abund[l,s] = math.sqrt(pow(np.sum(x - d), 2)) / float(nbands)
        # Use spectral angle distance
        abund[l_ind,s_ind] = math.acos(np.dot(x, d) /
                                       (np.linalg.norm(x) * np.linalg.norm(d)))
        
        # Propagate current priority to similar items (not yet prioritized)
        # This threshold is subjectively chosen.
        # I used 0.10 for the Mars yard UCIS cube from Diana.
        # I used different values for the micro-UCIS cubes from Bethany
        # (see Evernote notes).
        if self.pr_map[l_ind,s_ind] == 0 and abund[l_ind,s_ind] <= 0.13:
          self.pr_map[l_ind,s_ind] = m+1


    printt('Abundance: ', abund.min(), abund.max())
    pylab.subplot(2,2,4)
    # Use colormap jet_r so smallest value is red and largest is blue
    pylab.imshow(abund, interpolation='none', cmap='jet_r', vmin=0, vmax=0.15)
    pylab.axis('off')
    pylab.title('Abundance')

    pylab.suptitle('DEMUD selection %d (%s), item %d, using K=%d' % \
                   (m, label, ind, k))
          
    # Write the plot to a file.
    outdir = os.path.join('results', self.name)
    if not os.path.exists(outdir):
      os.mkdir(outdir)

    figfile = os.path.join(outdir, 'sel-%d-k-%d-(%s).pdf' % (m, k, label))
    pylab.savefig(figfile)
    print 'Wrote plot to %s' % figfile

    # Write the priority map to an image file
    prmapfig = os.path.join(outdir, 'prmap-k-%d.pdf' % k)
    pylab.figure()
    # Start with colormap jet_r so smallest value is red and largest is blue
    # Max_c must be at least 2 and no greater than 255.
    # Values greater than 255 will be mapped to the last color.
    # (Imposed because we're then saving this out as an ENVI classification map with bytes.
    #  May want to be more flexible in the future, but I can't imagine really wanting to see
    #  more than 255 distinct colors?)
    max_c    = 255 if m > 255   else m+1
    max_c    = 2   if max_c < 2 else max_c
    cmap     = matplotlib.cm.get_cmap('jet_r', max_c)
    # Tweak so 0 is white; red starts at 1
    jet_map_v    = cmap(np.arange(max_c))
    jet_map_v[0] = [1,1,1,1]  # white
    cmap         = matplotlib.colors.LinearSegmentedColormap.from_list("jet_map_white", jet_map_v)
    #pylab.imshow(self.pr_map, interpolation='none', cmap=cmap, vmin=1, vmax=m+1)
    pylab.imshow(self.pr_map, interpolation='none', cmap=cmap)
    pylab.savefig(prmapfig)
    print 'Wrote priority map figure to %s' % prmapfig

    # Write the priority map contents to a file as a 64-bit float map 
    # (retained for backward compatibility for Hua,
    # but superseded by ENVI data file next)
    prmapfile = os.path.join(outdir, 'prmap-k-%d-hua.dat' % k)
    fid = open(prmapfile, 'wb')
    self.pr_map.astype('float64').tofile(fid)
    fid.close()
    print "Wrote Hua's priority map (data) to %s" % prmapfile

    # Write an ENVI data file and header file
    prmapfile = os.path.join(outdir, 'prmap-k-%d.dat' % k)
    fid = open(prmapfile, 'wb')
    self.pr_map.astype('uint8').tofile(fid)  # save out as bytes

    # This is a class map header file
    prmaphdr  = os.path.join(outdir, 'prmap-k-%d.dat.hdr' % k)
    fid = open(prmaphdr, 'w')
    fid.write('ENVI\n')
    fid.write('description = { DEMUD prioritization map }\n')
    fid.write('samples = %d\n' % self.samples)
    fid.write('lines = %d\n'   % self.lines)
    fid.write('bands = 1\n')
    fid.write('header offset = 0\n')          # 0 bytes
    fid.write('file type = Classification\n')
    fid.write('data type = 1\n')              # byte (max 255 priorities)
    fid.write('interleave = bip\n')           # Irrelevant for single 'band'
    fid.write('byte order = 0\n')             # Least-significant byte first
    # Classes include None (0) and then integers up to number of classes.
    fid.write("class names = {'None', " + ', '.join(["'%d'" % a for a in range(1, max_c)]) + '}\n')
    fid.write('class lookup = {' + 
              ',\n                '.join([' %d, %d, %d' % (r*255,g*255,b*255) for (r,g,b,a) in jet_map_v]) + 
              ' }\n')
    fid.close()
    print 'Wrote ENVI data/header to priority map figure to %s[.hdr]' % prmapfile

    # Write the selections (spectra) in ASCII format
    selfile = os.path.join(outdir, 'selections-k%d.txt' % k)
    # If this is the first selection, open for write
    # to clear out previous run.
    if m == 0:
      fid = open(selfile, 'w')
      # Output a header
      fid.write('# Index, Score')
      for w in self.xvals.tolist():
        fid.write(', %.3f' % w)
      fid.write('\n')

      # If scores is empty, the (first) selection was pre-specified,
      # so there are no scores.  Output 0 for this item.
      if scores == []:
        fid.write('%d,0.0,' % (m))
    else:
      fid = open(selfile, 'a')
      fid.write('%d,%f,' % (m, scores[m]))

    # Now output the feature vector itself
    # Have to reshape x because it's a 1D column vector
    np.savetxt(fid, x.reshape(1, x.shape[0]), fmt='%.5f', delimiter=',')

    fid.close()