コード例 #1
0
ファイル: test_formula.py プロジェクト: fperez/nipy
def test_design():
    # Check that you get the design matrix we expect
    t1 = F.Term("x")
    t2 = F.Term('y')

    n = F.make_recarray([2,4,5], 'x')
    yield assert_almost_equal, t1.formula.design(n)['x'], n['x']

    f = t1.formula + t2.formula
    n = F.make_recarray([(2,3),(4,5),(5,6)], 'xy')

    yield assert_almost_equal, f.design(n)['x'], n['x']
    yield assert_almost_equal, f.design(n)['y'], n['y']

    f = t1.formula + t2.formula + F.I + t1.formula * t2.formula
    yield assert_almost_equal, f.design(n)['x'], n['x']
    yield assert_almost_equal, f.design(n)['y'], n['y']
    yield assert_almost_equal, f.design(n)['1'], 1
    yield assert_almost_equal, f.design(n)['x*y'], n['x']*n['y']

    ny = ML.rec_drop_fields(n, 'y')
    yield assert_raises, ValueError, f.design, ny
    n = np.array([(2,3,'a'),(4,5,'b'),(5,6,'a')], np.dtype([('x', np.float),
                                                            ('y', np.float),
                                                            ('f', 'S1')]))
    f = F.Factor('f', ['a','b'])
    ff = t1.formula * f + F.I
    yield assert_almost_equal, ff.design(n)['f_a*x'], n['x']*[1,0,1]
    yield assert_almost_equal, ff.design(n)['f_b*x'], n['x']*[0,1,0]
    yield assert_almost_equal, ff.design(n)['1'], 1
コード例 #2
0
def gethistprices(query, numrows=1000, **kwargs):
    
    rec_arr = sqlite2rec(query, **kwargs)

    import matplotlib.mlab as mlab
    
    import numpy as np

    (syms, posuniq, pos) = np.unique(rec_arr.sym, True, True)
    
    new_rec_arr = mlab.rec_append_fields(rec_arr, 'idx', pos)
    
    nosym = mlab.rec_drop_fields(new_rec_arr, ['sym',])
    
    recnumrecs = mlab.rec_groupby(nosym, ('idx',), (('idx', len, 'idxcount'), ))

    idx = np.nonzero(recnumrecs.idxcount >= numrows)[0]

    idxcount = len(recnumrecs[idx])

    xs = np.empty((idxcount, numrows, len(nosym[0])-1), dtype=float)

    for i in xrange(idxcount):

        if kwargs.has_key('verbose') and kwargs['verbose'] and i % 50 == 0:
            print '%d of %d' % (i, idxcount)
        
        curdata = nosym[nosym.idx == idx[i]]

        curdata_arr = np.array(curdata.tolist(), dtype=float)
        xs[i] = curdata_arr[0:numrows:,0:-1]
        
    return (syms[idx], xs)
コード例 #3
0
ファイル: gen_merge.py プロジェクト: fgassert/aqueduct_atlas
def join_rec(r1,field1,r2,field2):
    """1-to-1 joining with non-unique lefthand side keys"""
    mapping = dict(zip(r2[field2], range(len(r2))))
    diff = np.setdiff1d(r1[field1],r2[field2])
    r2len = len(r2)
    if len(diff) > 0:
        print "WARNING: %s no matching key: %s" % (field2, diff)
        for i in range(len(diff)):
            mapping[diff[i]]=r2len
    r2copy = mlab.rec_drop_fields(r2, (field2,))
    r2copy.resize(r2len+1)
    joinfields = list(r2copy.dtype.names)
    dtypes = []
    for i in range(len(joinfields)):
        if r2copy.dtype[i].kind == "i":
            dtypes.append(np.double)
        else:
            dtypes.append(r2copy.dtype[i])
        if r2copy.dtype[i].kind == "f":
            r2copy[r2copy.dtype.names[i]][-1]=NULL_VALUE
        while joinfields[i] in r1.dtype.names:
            joinfields[i] = joinfields[i]+"_"
    rightrec = r2copy[[mapping[key] for key in r1[field1]]]
    r1 = mlab.rec_append_fields(r1, joinfields, [rightrec[n] for n in rightrec.dtype.names], dtypes)
    return r1
コード例 #4
0
ファイル: tileqa.py プロジェクト: desihub/desisurvey
def writefiles(tiles, fnbase, overwrite=False):
    from astropy.io import fits
    from astropy.io import ascii
    from matplotlib.mlab import rec_drop_fields
    from astropy import table
    fits.writeto(fnbase+'.fits', tiles, overwrite=overwrite)
    hdulist = fits.open(fnbase+'.fits', mode='update')
    hdulist[1].header['EXTNAME'] = 'TILES'
    hdulist.close()
    tilestab = table.Table(
        rec_drop_fields(tiles, ['brightra', 'brightdec', 'brightvtmag']))
    metadata = {'tileid': ('', 'Unique tile ID'),
                'ra': ('deg', 'Right ascension'),
                'dec': ('deg', 'Declination'),
                'pass': ('', 'DESI layer'),
                'in_desi': ('', '1=within DESI footprint; 0=outside'),
                'ebv_med':('mag', 'Median Galactic E(B-V) extinction in tile'),
                'airmass':('', 'Airmass if observed at hour angle 15 deg'),
                'star_density':('deg^-2', 'median number density of Gaia stars brighter than 19.5 mag in tile'),
                'exposefac':('', 'Multiplicative exposure time factor from airmass and E(B-V)'),
                'program':('', 'DARK, GRAY, BRIGHT, or EXTRA'),
                'obsconditions':('', '1 for DARK, 2 for GRAY, 4 for BRIGHT, 0 for EXTRA'),
                'brightra':('deg', 'RAs of 3 brightest Tycho-2 stars in tile'),
                'brightdec':('deg', 'Decs of 3 brightest Tycho-2 stars in tile'),
                'brightvtmag':('mag', 'V_T magnitudes of 3 brightest Tycho-2 stars in tile'),
                'centerid':('', 'Unique tile ID of pass 0 tile corresponding to this tile'),
                }
    from astropy import units as u
    unitdict = {'': None, 'deg': u.deg, 'mag': u.mag, 'deg^-2': 1/u.mag/u.mag}
    for name in tilestab.dtype.names:
        tilestab[name].unit = unitdict[metadata[name][0]]
        tilestab[name].description = metadata[name][1]
    ascii.write(tilestab, fnbase+'.ecsv', format='ecsv', overwrite=overwrite)
コード例 #5
0
ファイル: hierarchical.py プロジェクト: rkern/kabuki
    def subj_by_subj_map_init(self, runs=2, verbose=-1, **map_kwargs):
        """
        initializing nodes by finding the MAP for each subject separately
        Input:
            runs - number of MAP runs for each subject
            map_kwargs - other arguments that will be passes on to the map function

        Note: This function should be run prior to the nodes creation, i.e.
        before running mcmc() or map()
        """

        # check if nodes were created. if they were it cause problems for deepcopy
        assert (not self.nodes), "function should be used before nodes are initialized."

        # init
        subjs = self._subjs
        n_subjs = len(subjs)

        empty_s_model = deepcopy(self)
        empty_s_model.is_group_model = False
        del empty_s_model._num_subjs, empty_s_model._subjs, empty_s_model.data

        self.create_nodes()

        # loop over subjects
        for i_subj in range(n_subjs):
            # create and fit single subject
            if verbose > 1: print "*!*!* fitting subject %d *!*!*" % subjs[i_subj]
            t_data = self.data[self.data['subj_idx'] == subjs[i_subj]]
            t_data = rec_drop_fields(t_data, ['data_idx'])
            s_model = deepcopy(empty_s_model)
            s_model.data = t_data
            s_model.map(method='fmin_powell', runs=runs, **map_kwargs)

            # copy to original model
            for (name, node) in s_model.group_nodes.iteritems():
                self.subj_nodes[name][i_subj].value = node.value

        #set group and var nodes
        for (param_name, d) in self.params_dict.iteritems():
            for (tag, nodes) in d.subj_nodes.iteritems():
                subj_values = [x.value for x in nodes]
                #set group node
                if d.group_nodes:
                    d.group_nodes[tag].value = np.mean(subj_values)
                #set var node
                if d.var_nodes:
                    if d.var_type == 'std':
                        d.var_nodes[tag].value = np.std(subj_values)
                    elif d.var_type == 'precision':
                        d.var_nodes[tag].value = np.std(subj_values)**-2
                    elif d.var_type == 'sample_size':
                        v = np.var(subj_values)
                        m = np.mean(subj_values)
                        d.var_nodes[tag].value = (m * (1 - m)) / v - 1
                    else:
                        raise ValueError, "unknown var_type"
コード例 #6
0
def load_aeronet(fname, keep_fields='all', header=False):
    """loads aeronet lev 2.0 csv file.
    fname: data file name
    keep_fields: 'all' or a list of fields
    header: whether to return header information along with the data.
    """
    std_day = datetime(1900,1,1,0,0,0)
    def date2daynum(datestr):
        the_day = datetime.strptime(datestr, '%d:%m:%Y')
        return float((the_day - std_day).days)

    def time2seconds(timestr):
        h, m, s = [int(t) for t in timestr.split(':')]
        return float(h * 3600 + m * 60 + s)

    def daynum_seconds2datetime(daynum, seconds):
        return std_day + timedelta(days=int(daynum), seconds=int(seconds))

    headlines = []
    f = open(fname, 'r')
    for line_i, line in enumerate(f):
        line = line.rstrip()
        if line.startswith('Date(dd-mm-yy'):
            datefield, timefield = [re.sub(r'\W', '', tk) for tk in line.split(',')[0:2]]
            break
        headlines.append(line)
    skip_header_lines = line_i

    if header:
        headline = ','.join(headlines)
        headerd = dict()
        for attrname, converter in [('location', str), ('long', float), ('lat', float), ('elev', float), ('nmeas', int), ('PI', str), ('email', str)]:
            m = re.search(r'%s.{0,1}=([^,\s]*)' % attrname, headline, flags=re.I)
            if m:
                try:
                    headerd[attrname] = converter(m.group(1))
                except Exception:
                    pass

    rawd = np.genfromtxt(fname, skip_header=skip_header_lines, delimiter=',', names=True, converters={0:date2daynum, 1:time2seconds})
    lend = len(rawd)
    dates = np.zeros(len(rawd), dtype='O')
    for i in range(lend):
        dates[i] = daynum_seconds2datetime(rawd[datefield][i], rawd[timefield][i])

    newd = mlab.rec_append_fields(rawd, 'datetime', dates)
    newd = mlab.rec_drop_fields(newd, [datefield, timefield, 'Last_Processing_Date'])

    if keep_fields is not 'all':
        keep_fields = ['datetime'] + keep_fields
#        print keep_fields
        newd = mlab.rec_keep_fields(newd, keep_fields)
    if header:
        return newd, headerd
    else:
        return newd
コード例 #7
0
def to_hdf(ec,h5file):
    with h5plus.File(h5file) as h5:
        for k in dsetkeys:
            h5[k] = getattr(ec,k)

        r = ec.dfAc_st.to_records()
        rless = mlab.rec_drop_fields(r,['index'])
        sindex = r['index'].astype(str)
        r = mlab.rec_append_fields(rless,'index', sindex)
        h5.attrs['dfAc_st'] = r
        h5.attrs['kAs'] = ec.kAs
コード例 #8
0
def atpy2h5(files, out, diff='all', name='ds'):
    """
   atpy format to h5

   Parameters
   ----------

   inp  : globable string specifying where the input files are
   out  : output h5 file.  In none exists, we create it.

   diff : List of fields that are stored as stacked arrays.  Those
          that are not different, we store the first element.
   """
    nfiles = len(files)
    t0 = atpy.Table(files[0])
    h5 = File(out)
    ds, ds1d = diffDS(t0.table_name,
                      t0.data.dtype, (nfiles, t0.data.size),
                      h5,
                      diff=diff)

    kicL = []
    nFail = 0
    #   import pdb;pdb.set_trace()
    for i in range(nfiles):
        if np.mod(i, 100) == 0:
            print i
        try:
            hdu = pyfits.open(files[i])
            data = hdu[1].data
            kic = hdu[1].header['KEPLERID']
            assert type(kic) == int
            kicL.append(kic)

            if diff != 'all':
                data = mlab.rec_keep_fields(data, diff)
                ds1d[:] = mlab.rec_drop_fields(data, diff)

            ds[i - nFail] = data

        except:
            print sys.exc_info()[1]
            nFail += 1

    ds.resize(ds.shape[0] - nFail, axis=0)
    kicL = np.array(kicL)
    h5.create_dataset('KIC', data=kicL)
    print "%i files failed" % nFail
    h5.close()
コード例 #9
0
ファイル: tileqa.py プロジェクト: schlafly/desisurvey
def writefiles(tiles, fnbase, overwrite=False):
    from astropy.io import fits
    from astropy.io import ascii
    from matplotlib.mlab import rec_drop_fields
    from astropy import table
    # under duress... uppercase
    tiles.dtype.names = [n.upper() for n in tiles.dtype.names]
    fits.writeto(fnbase + '.fits', tiles, overwrite=overwrite)
    hdulist = fits.open(fnbase + '.fits', mode='update')
    hdulist[1].header['EXTNAME'] = 'TILES'
    hdulist.close()
    tilestab = table.Table(
        rec_drop_fields(tiles, ['BRIGHTRA', 'BRIGHTDEC', 'BRIGHTVTMAG']))
    metadata = {
        'tileid': ('', 'Unique tile ID'),
        'ra': ('deg', 'Right ascension'),
        'dec': ('deg', 'Declination'),
        'pass': ('', 'DESI layer'),
        'in_desi': ('', '1=within DESI footprint; 0=outside'),
        'ebv_med': ('mag', 'Median Galactic E(B-V) extinction in tile'),
        'airmass': ('', 'Airmass if observed at hour angle 15 deg'),
        'star_density':
        ('deg^-2',
         'median number density of Gaia stars brighter than 19.5 mag in tile'),
        'exposefac':
        ('', 'Multiplicative exposure time factor from airmass and E(B-V)'),
        'program': ('', 'DARK, GRAY, BRIGHT, or EXTRA'),
        'obsconditions':
        ('', '1 for DARK, 2 for GRAY, 4 for BRIGHT, 0 for EXTRA'),
        'brightra': ('deg', 'RAs of 3 brightest Tycho-2 stars in tile'),
        'brightdec': ('deg', 'Decs of 3 brightest Tycho-2 stars in tile'),
        'brightvtmag': ('mag',
                        'V_T magnitudes of 3 brightest Tycho-2 stars in tile'),
        'centerid':
        ('', 'Unique tile ID of pass 0 tile corresponding to this tile'),
    }
    metadatacaps = {k.upper(): v for k, v in metadata.items()}
    from astropy import units as u
    unitdict = {
        '': None,
        'deg': u.deg,
        'mag': u.mag,
        'deg^-2': 1 / u.mag / u.mag
    }
    for name in tilestab.dtype.names:
        tilestab[name].unit = unitdict[metadatacaps[name][0]]
        tilestab[name].description = metadatacaps[name][1]
    ascii.write(tilestab, fnbase + '.ecsv', format='ecsv', overwrite=overwrite)
コード例 #10
0
ファイル: h5plus.py プロジェクト: howardisaacson/utils
def atpy2h5(files,out,diff='all',name='ds'):
   """
   atpy format to h5

   Parameters
   ----------

   inp  : globable string specifying where the input files are
   out  : output h5 file.  In none exists, we create it.

   diff : List of fields that are stored as stacked arrays.  Those
          that are not different, we store the first element.
   """
   nfiles = len(files)
   t0 = atpy.Table(files[0])
   h5 = File(out)
   ds,ds1d = diffDS(t0.table_name,t0.data.dtype,(nfiles,t0.data.size)
                    ,h5,diff=diff)
   
   kicL = []   
   nFail = 0 
#   import pdb;pdb.set_trace()
   for i in range(nfiles):
      if np.mod(i,100)==0:
         print i
      try:
         hdu = pyfits.open(files[i])
         data = hdu[1].data
         kic  = hdu[1].header['KEPLERID']
         assert type(kic) == int
         kicL.append(kic)
         
         if diff!='all':
            data = mlab.rec_keep_fields(data,diff)
            ds1d[:] =  mlab.rec_drop_fields(data,diff)

         ds[i-nFail] = data
   
      except:
         print sys.exc_info()[1]
         nFail +=1
         
   ds.resize(ds.shape[0]-nFail,axis=0)
   kicL = np.array(kicL)
   h5.create_dataset('KIC',data=kicL)
   print "%i files failed" % nFail
   h5.close()
コード例 #11
0
def modcols(r0):
    """
    Modify Columns

    1. Changes TIME, CADENCENO to t, cad
    2. rnQ      - normalize quarter
    3. rnanTime - remove nans from time series
    """

    r = r0.copy()
    oldName = ['TIME', 'CADENCENO']
    newName = ['t', 'cad']
    for o, n in zip(oldName, newName):
        r = mlab.rec_append_fields(r, n, r[o])
        r = mlab.rec_drop_fields(r, o)

    r = keplerio.rnQ(r)
    r = keplerio.rnanTime(r)
    return r
コード例 #12
0
ファイル: geoprocessor.py プロジェクト: lemma-osu/pynnmap
    def create_info_table(self, raster_join_field, attribute_file,
            attribute_join_field, drop_fields=None):
        """
        Create ArcInfo table from attribute csv file

        Parameters
        ----------
        raster : str
            name of raster to join attributes to
        raster_join_field : str
            field in raster to use for joining to attribute data
        attribute_file : str
            name and path of file containing attribute information
        attribute_join_field : str
            field in attribute file to use to join to raster
        drop_fields : list of str
            fields in the attribute file to drop before join to raster

        Returns
        -------
        name of temp ArcInfo table, list of fields to join from info table

        """
        print 'Building info table from attribute file'

        # Crosswalk of numpy types to ESRI types for numeric data
        numpy_to_esri_type = {
            ('b', 1): 'SHORT',
            ('i', 1): 'SHORT',
            ('i', 2): 'SHORT',
            ('i', 4): 'LONG',
            ('f', 4): 'FLOAT',
            ('f', 8): 'DOUBLE',
        }

        # Read the CSV file in to a recarray
        ra = mlab.csv2rec(attribute_file)
        col_names = [x.upper() for x in ra.dtype.names]
        ra.dtype.names = col_names

        # If there are fields to drop, do that now and get a new recarray
        if drop_fields is not None:

            # Ensure that the drop fields are actually fields in the current
            # recarray
            drop_fields = [x for x in drop_fields if x in ra.dtype.names]

            # Create a new recarray with these fields omitted
            ra = mlab.rec_drop_fields(ra, drop_fields)
            col_names = list(ra.dtype.names)

        # Get the column types and formats
        col_types = \
            [(ra.dtype[i].kind, ra.dtype[i].itemsize) for i in
                xrange(len(ra.dtype))]
        formats = [ra.dtype[i].str for i in xrange(len(ra.dtype))]

        # Sanitize column names
        #   No field name may be longer than 16 chars
        #   No field name can start with a number
        for i in xrange(len(col_names)):
            if len(col_names[i]) > 16:
                col_names[i] = col_names[i][0:16]
            if col_names[i][0].isdigit():
                col_names[i] = col_names[i].lstrip('0123456789')

        # Reset the names for the recarray
        ra.dtype.names = col_names

        # Sanitize the data
        #   Change True/False to 1/0 to be read into short type
        bit_fields = [(i, n) for (i, (n, t)) in
            enumerate(zip(col_names, col_types)) if t[0] == 'b']
        if bit_fields:
            for rec in ra:
                for (col_num, field) in bit_fields:
                    value = getattr(rec, field)
                    if value == True:
                        setattr(rec, field, 1)
                    else:
                        setattr(rec, field, 0)

            # Change the bit fields to be short integer
            for (col_num, field) in bit_fields:
                formats[col_num] = '<i2'

        # Create a sanitized recarray and output back to CSV
        temp_csv = os.path.join(env.workspace, 'xxtmp.csv')
        ra2 = np.rec.fromrecords(ra, names=col_names, formats=formats)
        mlab.rec2csv(ra2, temp_csv)

        # Create a scratch name for the temporary ArcInfo table 
        temp_table = arcpy.CreateScratchName('', '', 'ArcInfoTable')

        # Create the ArcInfo table and add the fields
        table_name = os.path.basename(temp_table)
        arcpy.CreateTable_management(env.workspace, table_name)
        for (n, t) in zip(col_names, col_types):
            try:
                esri_type = numpy_to_esri_type[t]
                arcpy.AddField_management(temp_table, n, esri_type)
            except KeyError:
                if t[0] == 'S':
                    arcpy.AddField_management(temp_table, n, 'TEXT', '#', '#',
                        t[1])
                else:
                    err_msg = 'Type not found for ' + str(t)
                    print err_msg
                    continue

        # Append the records from the CSV field to the temporary INFO table
        arcpy.Append_management(temp_csv, temp_table, 'NO_TEST')

        # Strip out the join field from the names if they are the same
        raster_join_field = raster_join_field.upper()
        attribute_join_field = attribute_join_field.upper()
        if raster_join_field == attribute_join_field:
            col_names.remove(attribute_join_field)

        # Create a semi-colon delimited string of the fields we want to join
        field_list = ';'.join(col_names)
        
        # Clean up
        os.remove(temp_csv)

        return temp_table, field_list
コード例 #13
0
    def create_info_table(self,
                          raster_join_field,
                          attribute_file,
                          attribute_join_field,
                          drop_fields=None):
        """
        Create ArcInfo table from attribute csv file

        Parameters
        ----------
        raster_join_field : str
            field in raster to use for joining to attribute data
        attribute_file : str
            name and path of file containing attribute information
        attribute_join_field : str
            field in attribute file to use to join to raster
        drop_fields : list of str
            fields in the attribute file to drop before join to raster

        Returns
        -------
        name of temp ArcInfo table, list of fields to join from info table

        """
        print('Building info table from attribute file')

        # Crosswalk of numpy types to ESRI types for numeric data
        numpy_to_esri_type = {
            ('b', 1): 'SHORT',
            ('i', 1): 'SHORT',
            ('i', 2): 'SHORT',
            ('i', 4): 'LONG',
            ('f', 4): 'FLOAT',
            ('f', 8): 'DOUBLE',
        }

        # Read the CSV file in to a recarray
        ra = mlab.csv2rec(attribute_file)
        col_names = [str(x).upper() for x in ra.dtype.names]
        ra.dtype.names = col_names

        # If there are fields to drop, do that now and get a new recarray
        if drop_fields is not None:

            # Ensure that the drop fields are actually fields in the current
            # recarray
            drop_fields = [x for x in drop_fields if x in ra.dtype.names]

            # Create a new recarray with these fields omitted
            ra = mlab.rec_drop_fields(ra, drop_fields)
            col_names = list(ra.dtype.names)

        # Get the column types and formats
        col_types = [(ra.dtype[i].kind, ra.dtype[i].itemsize)
                     for i in range(len(ra.dtype))]
        formats = [ra.dtype[i].str for i in range(len(ra.dtype))]

        # Sanitize column names
        #   No field name may be longer than 16 chars
        #   No field name can start with a number
        for i in range(len(col_names)):
            if len(col_names[i]) > 16:
                col_names[i] = col_names[i][0:16]
            if col_names[i][0].isdigit():
                col_names[i] = col_names[i].lstrip('0123456789')

        # Reset the names for the recarray
        ra.dtype.names = col_names

        # Sanitize the data
        # Change True/False to 1/0 to be read into short type
        bit_fields = [(i, n)
                      for (i, (n, t)) in enumerate(zip(col_names, col_types))
                      if t[0] == 'b']
        if bit_fields:
            for rec in ra:
                for (col_num, field) in bit_fields:
                    value = getattr(rec, field)
                    if value:
                        setattr(rec, field, 1)
                    else:
                        setattr(rec, field, 0)

            # Change the bit fields to be short integer
            for (col_num, field) in bit_fields:
                formats[col_num] = '<i2'

        # Create a sanitized recarray and output back to CSV
        temp_csv = os.path.join(env.workspace, 'xxtmp.csv')
        ra2 = np.rec.fromrecords(ra, names=col_names, formats=formats)
        mlab.rec2csv(ra2, temp_csv)

        # Create a scratch name for the temporary ArcInfo table
        temp_table = arcpy.CreateScratchName('', '', 'ArcInfoTable')

        # Create the ArcInfo table and add the fields
        table_name = os.path.basename(temp_table)
        arcpy.CreateTable_management(env.workspace, table_name)
        for (n, t) in zip(col_names, col_types):
            try:
                esri_type = numpy_to_esri_type[t]
                arcpy.AddField_management(temp_table, n, esri_type)
            except KeyError:
                if t[0] == 'S':
                    arcpy.AddField_management(temp_table, n, 'TEXT', '#', '#',
                                              t[1])
                else:
                    err_msg = 'Type not found for ' + str(t)
                    print(err_msg)
                    continue

        # Append the records from the CSV field to the temporary INFO table
        arcpy.Append_management(temp_csv, temp_table, 'NO_TEST')

        # Strip out the join field from the names if they are the same
        raster_join_field = raster_join_field.upper()
        attribute_join_field = attribute_join_field.upper()
        if raster_join_field == attribute_join_field:
            col_names.remove(attribute_join_field)

        # Create a semi-colon delimited string of the fields we want to join
        field_list = ';'.join(col_names)

        # Clean up
        os.remove(temp_csv)

        return temp_table, field_list
コード例 #14
0
def to_matrix(rec):
    new_rec = mlab.rec_drop_fields(rec, ['Date'])
    new_mat = np.c_[[new_rec[nm] for nm in new_rec.dtype.names]].T
    return new_mat
コード例 #15
0
    def _create_story(self):

        # Set up an empty list to hold the story
        story = []

        # Import the report styles
        styles = report_styles.get_report_styles()

        # Create a page break
        story = self._make_page_break(story, self.LANDSCAPE)

        # This class is somewhat of a hack, in that it likely only works on
        # rotated paragraphs which fit into the desired cell area
        class RotatedParagraph(p.Paragraph):
            def wrap(self, availHeight, availWidth):
                h, w = \
                    p.Paragraph.wrap(self, self.canv.stringWidth(self.text),
                        self.canv._leading)
                return w, h

            def draw(self):
                self.canv.rotate(90)
                self.canv.translate(0.0, -10.0)
                p.Paragraph.draw(self)

        # Section title
        title_str = '<strong>Local-Scale Accuracy Assessment: '
        title_str += 'Error Matrix for Vegetation Classes at Plot '
        title_str += 'Locations</strong>'

        para = p.Paragraph(title_str, styles['section_style'])
        t = p.Table([[para]], colWidths=[10.0 * u.inch])
        t.setStyle(
            p.TableStyle([
                ('TOPPADDING', (0, 0), (-1, -1), 3),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 3),
                ('BACKGROUND', (0, 0), (-1, -1), '#957348'),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('GRID', (0, 0), (-1, -1), 0.25, colors.black),
            ]))
        story.append(t)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Read in the vegclass error matrix
        names = ['P_' + str(x) for x in range(1, 12)]
        names.insert(0, 'OBSERVED')
        names.extend(['TOTAL', 'CORRECT', 'FUZZY_CORRECT'])
        vc_data = mlab.csv2rec(self.vc_errmatrix_file, skiprows=1, names=names)
        vc_data = mlab.rec_drop_fields(vc_data, ['OBSERVED'])

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # Get the class names from the metadata
        vegclass_metadata = mp.get_attribute('VEGCLASS')
        vc_codes = vegclass_metadata.codes

        # Create a list of lists to hold the vegclass table
        vegclass_table = []

        # Add an empty row which will be a span row for the predicted label
        header_row = []
        for i in xrange(2):
            header_row.append('')
        prd_str = '<strong>Predicted Class</strong>'
        para = p.Paragraph(prd_str, styles['body_style_10_center'])
        header_row.append(para)
        for i in xrange(len(vc_data) - 1):
            header_row.append('')
        vegclass_table.append(header_row)

        # Add the predicted labels
        summary_labels = ('Total', '% Correct', '% FCorrect')
        header_row = []
        for i in xrange(2):
            header_row.append('')
        for code in vc_codes:
            label = re.sub('-', '-<br/>', code.label)
            para = p.Paragraph(label, styles['body_style_10_right'])
            header_row.append(para)
        for label in summary_labels:
            label = re.sub(' ', '<br/>', label)
            para = p.Paragraph(label, styles['body_style_10_right'])
            header_row.append(para)
        vegclass_table.append(header_row)

        # Set a variable to distinguish between plot counts and percents
        # in order to format them differently
        format_break = 11

        # Set the cells which should be blank
        blank_cells = \
            [(11, 12), (11, 13), (12, 11), (12, 13), (13, 11), (13, 12)]

        # Add the data
        for (i, row) in enumerate(vc_data):
            vegclass_row = []
            for (j, elem) in enumerate(row):

                # Blank cells
                if (i, j) in blank_cells:
                    elem_str = ''

                # Cells that represent plot counts
                elif i <= format_break and j <= format_break:
                    elem_str = '%d' % int(elem)

                # Cells that represent percentages
                else:
                    elem_str = '%.1f' % float(elem)
                para = p.Paragraph(elem_str, styles['body_style_10_right'])
                vegclass_row.append(para)

            # Add the observed labels at the beginning of each data row
            if i == 0:
                obs_str = '<strong>Observed Class</strong>'
                para = \
                    RotatedParagraph(obs_str, styles['body_style_10_center'])
            else:
                para = ''
            vegclass_row.insert(0, para)

            if i < len(vc_codes):
                label = vc_codes[i].label
            else:
                index = i - len(vc_codes)
                label = summary_labels[index]
            para = p.Paragraph(label, styles['body_style_10_right'])
            vegclass_row.insert(1, para)

            # Add this row to the table
            vegclass_table.append(vegclass_row)

        # Set up the widths for the table cells
        widths = []
        widths.append(0.3)
        widths.append(0.85)
        for i in xrange(len(vc_codes)):
            widths.append(0.56)
        for i in xrange(3):
            widths.append(0.66)
        widths = [x * u.inch for x in widths]

        # Convert the vegclass table into a reportlab table
        t = p.Table(vegclass_table, colWidths=widths)
        t.setStyle(
            p.TableStyle([
                ('SPAN', (0, 0), (1, 1)),
                ('SPAN', (0, 2), (0, -1)),
                ('SPAN', (2, 0), (-1, 0)),
                ('BACKGROUND', (0, 0), (-1, -1), '#f1efe4'),
                ('GRID', (0, 0), (-1, -1), 1, colors.white),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('VALIGN', (0, 2), (0, -1), 'MIDDLE'),
                ('VALIGN', (2, 1), (-1, 1), 'MIDDLE'),
                ('TOPPADDING', (0, 0), (-1, -1), 2),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 3),
            ]))

        # Set up the shading for the truly correct cells
        correct = {}
        for i in xrange(len(vc_codes)):
            val = i + 1
            correct[val] = val

        for key in correct:
            val = correct[key]
            t.setStyle(
                p.TableStyle([
                    ('BACKGROUND', (key + 1, val + 1), (key + 1, val + 1),
                     '#aaaaaa'),
                ]))

        # Set up the shading for the fuzzy correct cells
        fuzzy = {}
        fuzzy[1] = [2]
        fuzzy[2] = [1, 3, 5, 8]
        fuzzy[3] = [2, 4, 5]
        fuzzy[4] = [3, 6, 7]
        fuzzy[5] = [2, 3, 6, 8]
        fuzzy[6] = [4, 5, 7, 9]
        fuzzy[7] = [4, 6, 10, 11]
        fuzzy[8] = [2, 5, 9]
        fuzzy[9] = [6, 8, 10]
        fuzzy[10] = [7, 9, 11]
        fuzzy[11] = [7, 10]

        for key in fuzzy:
            for elem in fuzzy[key]:
                t.setStyle(
                    p.TableStyle([
                        ('BACKGROUND', (key + 1, elem + 1),
                         (key + 1, elem + 1), '#dddddd'),
                    ]))

        # Add this table to the story
        story.append(t)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Explanation and definitions of vegetation class categories
        cell_str = """
            Cell values are model plot counts.  Dark gray cells represent
            plots where the observed class matches the predicted class
            and are included in the percent correct.  Light gray cells
            represent cases where the observed and predicted differ
            slightly (within +/- one class) based on canopy cover,
            hardwood proportion or average stand diameter, and are
            included in the percent fuzzy correct.
        """
        para = p.Paragraph(cell_str, styles['body_style_9'])
        story.append(para)
        story.append(p.Spacer(0, 0.1 * u.inch))

        head_str = '''
            <strong>Vegetation Class (VEGCLASS) Definitions</strong> --
            CANCOV (canopy cover of all live trees), BAH_PROP (proportion of
            hardwood basal area), and QMD_DOM (quadratic mean diameter of
            all dominant and codominant trees).
        '''
        para = p.Paragraph(head_str, styles['body_style_9'])
        story.append(para)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Print out the vegclass code definitions
        for code in vc_codes:
            label = code.label
            desc = self.txt_to_html(code.description)
            doc_str = '<strong>' + label + ':</strong> ' + desc
            para = p.Paragraph(doc_str, styles['body_style_9'])
            story.append(para)

        return story
コード例 #16
0
def fit_classifier(aml_clean_path, class_path, test=False, performance=False, 
                   n_fits=100, test_split=0.2, save_clf=True):
    '''Fits random forest classifier to aml_ref_clean formatted csv.
    Note that the species code should be contained in the folder col.'''

    # Get class_path dir, used for ancillary file names
    class_dir, tail = os.path.split(class_path)
    prefix = tail.split('.')[0]

    # Load refe_features_table
    table = csv2rec(aml_clean_path)

    # Only use calls with qual < 0.3 (Armitage)
    table = table[table.qual < 0.3]

    # Get target col (y) with integer codes instead of spp names
    y_str = table.folder  # Assumes spp name is in folder col
    y_str_uniq = set(list(y_str))

    n_spp = len(y_str_uniq)
    spp_codes = range(0, n_spp)
    code_table = np.array(zip(spp_codes, y_str_uniq),
                          dtype = [('code','<i8'), ('spp', '|S8')])

    y = np.zeros(len(y_str))  # Get col of full length with codes, not names
    for code, spp in code_table:
        y[y_str == spp] = int(code)

    # Get filename col for later grouping into passes
    f = table.filename

    # Remove non-feature cols from table
    table = rec_drop_fields(table, ['path', 'folder', 'filename', 'st', 'dc', 
                                    'qual', 'pmc'])

    # Get list of feature names remaining in table
    feature_names = table.dtype.names

    # Recarray to ndarray - http://stackoverflow.com/questions/5957380/
    # convert-structured-array-to-regular-numpy-array
    X = table.view((float, len(table.dtype.names)))

    # Partition data if test, holding portion for testing
    if not test:
        X_tr = X
        y_tr = y
        f_tr = f
        X_te = X
        y_te = y
        f_te = f
    else:
        # Use StratifiedShuffleSplit since train_test_split does not stratify
        sss = StratifiedShuffleSplit(y, 1, test_size=test_split)
        for train_index, test_index in sss:  # Only once since n_iter=1 above
            X_tr, X_te = X[train_index], X[test_index]
            y_tr, y_te = y[train_index], y[test_index]
            f_tr, f_te = f[train_index], f[test_index]

        sort_ind = f_te.argsort()  # Sort test data for pass analysis later
        X_te = X_te[sort_ind,:]  # Sort rows
        y_te = y_te[sort_ind]
        f_te = f_te[sort_ind]
        # (Train data order does not matter)

    # Define and fit classifier
    clf = RandomForestClassifier(n_estimators=n_fits, oob_score=True, 
                                 compute_importances=True)
    clf.fit(X_tr, y_tr)

    # If performance, save various performance metrics
    # NOTE: Performance of passes is difficult to understand if if test=True,
    # as the calls in one pass may be split up.
    if performance:

        # Get OOB score
        print 'OOB Score: ', clf.oob_score_

        # Predict on test data, which may be held out (test=True) or all data
        y_te_pr = clf.predict(X_te)

        # Get true data and predictions by passes
        pred_te = clf.predict_proba(X_te)  # Prob of each spp
        f_te_p, pred_te_p, other = sum_group(f_te, pred_te, [y_te])
        y_te_p = other[0]  # Actual spp for each pass

        y_te_p_pr = []
        for row in xrange(len(y_te_p)):  # Find pred species for each pass
            y_te_p_pr.append(pred_te_p[row].argmax())  # First ind, ties bias
        y_te_p_pr = np.array(y_te_p_pr)

        # Get accuracy and confusion matrix for calls
        def make_conf_mat(y_te, y_te_pr, type):
            conf_mat = metrics.confusion_matrix(y_te, y_te_pr)
            conf_mat_frac = conf_mat / np.sum(conf_mat, axis=0)
            print type, ' Accuracy: ', metrics.zero_one_score(y_te, y_te_pr)

            np.savetxt(os.path.join(class_dir, prefix+'_conf_'+type+'.csv'),
                       conf_mat, fmt='%i', delimiter=',')
            np.savetxt(os.path.join(class_dir, prefix+'_conffr_'+type+'.csv'), 
                       conf_mat_frac, fmt = '%.6f', delimiter=',')

        make_conf_mat(y_te, y_te_pr, 'call')
        make_conf_mat(y_te_p, y_te_p_pr, 'pass')

    # Save spp_code table, feature_names, and pickle classifier
    rec2csv(code_table, os.path.join(class_dir, prefix + '_spp_codes.csv'))
    rec2csv(np.array(list(feature_names), dtype=[('features', 'S8')]),
        os.path.join(class_dir, prefix + '_feature_names.csv'))
    if save_clf:
        joblib.dump(clf, class_path, compress = 9)
コード例 #17
0
ファイル: ordination.py プロジェクト: lemma-osu/pynnmap
    def run(self):
        # Convert the species and environment matrices to numpy rec arrays
        spp_ra = utilities.csv2rec(self.spp_file)
        env_ra = utilities.csv2rec(self.env_file)

        # Extract the plot IDs from both the species and environment matrices
        # and ensure that they are equal
        spp_plot_ids = getattr(spp_ra, self.id_field)
        env_plot_ids = getattr(env_ra, self.id_field)
        if not np.all(spp_plot_ids == env_plot_ids):
            err_msg = "Species and environment plot IDs do not match"
            raise ValueError(err_msg)

        # Drop the ID column from both arrays
        spp_ra = mlab.rec_drop_fields(spp_ra, [self.id_field])
        env_ra = mlab.rec_drop_fields(env_ra, [self.id_field])

        # For the environment matrix, only keep the variables specified
        env_ra = mlab.rec_keep_fields(env_ra, self.variables)

        # Convert these matrices to pure floating point arrays
        spp = np.array([spp_ra[x] for x in spp_ra.dtype.names], dtype=float).T
        env = np.array([env_ra[x] for x in env_ra.dtype.names], dtype=float).T

        # Apply transformation if desired
        if self.species_transform == "SQRT":
            spp = np.sqrt(spp)
        elif self.species_transform == "LOG":
            spp = np.log(spp)

        # Create the RDA object
        cca = numpy_ordination.NumpyRDA(spp, env)

        # Open the output file
        numpy_fh = open(self.ord_file, "w")

        # Eigenvalues
        numpy_fh.write("### Eigenvalues ###\n")
        for (i, e) in enumerate(cca.eigenvalues):
            numpy_fh.write("RDA" + str(i + 1) + "," + "%.10f" % e + "\n")
        numpy_fh.write("\n")

        # Print out variable means
        numpy_fh.write("### Variable Means ###\n")
        for (i, m) in enumerate(cca.env_means):
            numpy_fh.write("%s,%.10f\n" % (self.variables[i], m))
        numpy_fh.write("\n")

        # Print out environmental coefficients loadings
        numpy_fh.write("### Coefficient Loadings ###\n")
        header_str = ",".join(["RDA%d" % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write("VARIABLE," + header_str + "\n")
        for (i, c) in enumerate(cca.coefficients()):
            coeff = ",".join(["%.10f" % x for x in c])
            numpy_fh.write("%s,%s\n" % (self.variables[i], coeff))
        numpy_fh.write("\n")

        # Print out biplot scores
        numpy_fh.write("### Biplot Scores ###\n")
        header_str = ",".join(["RDA%d" % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write("VARIABLE," + header_str + "\n")
        for (i, b) in enumerate(cca.biplot_scores()):
            scores = ",".join(["%.10f" % x for x in b])
            numpy_fh.write("%s,%s\n" % (self.variables[i], scores))
        numpy_fh.write("\n")

        # Print out species centroids
        numpy_fh.write("### Species Centroids ###\n")
        header_str = ",".join(["RDA%d" % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write("SPECIES," + header_str + "\n")
        for (i, c) in enumerate(cca.species_centroids()):
            scores = ",".join(["%.10f" % x for x in c])
            numpy_fh.write("%s,%s\n" % (spp_ra.dtype.names[i], scores))
        numpy_fh.write("\n")

        # Print out species tolerances
        numpy_fh.write("### Species Tolerances ###\n")
        header_str = ",".join(["RDA%d" % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write("SPECIES," + header_str + "\n")
        for (i, t) in enumerate(cca.species_tolerances()):
            scores = ",".join(["%.21f" % x for x in t])
            numpy_fh.write("%s,%s\n" % (spp_ra.dtype.names[i], scores))
        numpy_fh.write("\n")

        # Print out miscellaneous species information
        numpy_fh.write("### Miscellaneous Species Information ###\n")
        numpy_fh.write("SPECIES,WEIGHT,N2\n")
        species_weights, species_n2 = cca.species_information()
        for i in xrange(len(species_weights)):
            numpy_fh.write("%s,%.10f,%.10f\n" % (spp_ra.dtype.names[i], species_weights[i], species_n2[i]))
        numpy_fh.write("\n")

        # Print out site LC scores
        numpy_fh.write("### Site LC Scores ###\n")
        header_str = ",".join(["RDA%d" % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write("ID," + header_str + "\n")
        for (i, s) in enumerate(cca.site_lc_scores()):
            scores = ",".join(["%.10f" % x for x in s])
            numpy_fh.write("%d,%s\n" % (spp_plot_ids[i], scores))
        numpy_fh.write("\n")

        # Print out site WA scores
        numpy_fh.write("### Site WA Scores ###\n")
        header_str = ",".join(["RDA%d" % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write("ID," + header_str + "\n")
        for (i, s) in enumerate(cca.site_wa_scores()):
            scores = ",".join(["%.10f" % x for x in s])
            numpy_fh.write("%d,%s\n" % (spp_plot_ids[i], scores))
        numpy_fh.write("\n")

        # Miscellaneous site information
        numpy_fh.write("### Miscellaneous Site Information ###\n")
        numpy_fh.write("ID,WEIGHT,N2\n")
        site_weights, site_n2 = cca.site_information()
        for i in xrange(len(site_weights)):
            numpy_fh.write("%s,%.10f,%.10f\n" % (spp_plot_ids[i], site_weights[i], site_n2[i]))

        # Close the file
        numpy_fh.close()
コード例 #18
0
ファイル: gen_all.py プロジェクト: fgassert/aqueduct_atlas
 def _postprocess(self, output_file, gu_poly, generator_list, overwrite=False, supplementary_figures=False, **kwargs):
     generatoroutputs = []
 
     for g in generator_list:
         if supplementary_figures:
             gkw = g.G_kwargs
         else:
             gkw = {}
         generatoroutputs.append(g.gen(*g.G_args, overwrite = overwrite, **gkw))
     
     gu_arr = gen_gu.gen(*gen_gu.G_args, overwrite=overwrite, **gen_gu.G_kwargs)
     print gu_arr
     
     print "merging arrays"
     out_arr = gen_merge.join_recs_on_keys(gu_arr, generatoroutputs, (BASIN_ID_FIELD, ADMIN_ID_FIELD, GW_ID_FIELD))
     sr = ap.SpatialReference(PRJNAME)
     ap.Project_management(gu_poly,output_file,sr)
     print out_arr[BASIN_NAME_FIELD]
     
     missing_fields = np.setdiff1d(ALL_FIELDS,out_arr.dtype.names)
     if len(missing_fields)>0:
         print "WARNING: missing fields %s" % missing_fields
         obs = len(out_arr[GU_FIELD])
         out_arr = mlab.rec_append_fields(out_arr, missing_fields, [np.repeat(np.nan, obs) for _ in missing_fields])
             
     extra_fields = np.setdiff1d(out_arr.dtype.names,ALL_FIELDS)
     print "dropping extra fields %s" % extra_fields
     out_arr = mlab.rec_drop_fields(out_arr,extra_fields)
     
     print "generating pre-weighted_columns"
     if WEIGHTING_SCHEMES is not None:
         new_cols = []
         names = []
         for n, weights in WEIGHTING_SCHEMES.iteritems():
             keys = weights.keys()
             values = weights.values()
             indicator_array = np.vstack([out_arr[f] for f in keys]).T
             indicator_array[indicator_array==NULL_VALUE] = np.nan
             scores = np.squeeze(np.asarray(aggregate_scores.aggregate_scores(indicator_array,values)))
             scores[np.isnan(scores)]=NULL_VALUE
             new_cols.append(scores)
             names.append(n)
         out_arr = mlab.rec_append_fields(out_arr, names, new_cols)
     
     for field in MAP_FIELDS:
         out_arr[field][out_arr[field]==""] = "No data"
     
     
     mlab.rec2csv(out_arr,"bin/test.csv")
     print "dropping fields"
     drop = [f.baseName for f in ap.ListFields(output_file) if not(f.required) and not(f.baseName == GU_FIELD)]
     if len(drop)>0:
         ap.DeleteField_management(output_file,drop)
     
     print "joining"
     ap.da.ExtendTable(output_file,GU_FIELD,out_arr,GU_FIELD)
     
     print "indexing"
     try:
         ap.AddSpatialIndex_management(output_file)
         ap.AddIndex_management(output_file,GU_FIELD,GU_FIELD,"UNIQUE")
     except Exception, e:
         print e
コード例 #19
0
ファイル: ordination.py プロジェクト: valpasq/pynnmap
    def run(self):
        # Convert the species and environment matrices to numpy rec arrays
        spp_ra = utilities.csv2rec(self.spp_file)
        env_ra = utilities.csv2rec(self.env_file)

        # Extract the plot IDs from both the species and environment matrices
        # and ensure that they are equal
        spp_plot_ids = getattr(spp_ra, self.id_field)
        env_plot_ids = getattr(env_ra, self.id_field)
        if not np.all(spp_plot_ids == env_plot_ids):
            err_msg = 'Species and environment plot IDs do not match'
            raise ValueError(err_msg)

        # Drop the ID column from both arrays
        spp_ra = mlab.rec_drop_fields(spp_ra, [self.id_field])
        env_ra = mlab.rec_drop_fields(env_ra, [self.id_field])

        # For the environment matrix, only keep the variables specified
        env_ra = mlab.rec_keep_fields(env_ra, self.variables)

        # Convert these matrices to pure floating point arrays
        spp = np.array([spp_ra[x] for x in spp_ra.dtype.names], dtype=float).T
        env = np.array([env_ra[x] for x in env_ra.dtype.names], dtype=float).T

        # Apply transformation if desired
        if self.species_transform == 'SQRT':
            spp = np.sqrt(spp)
        elif self.species_transform == 'LOG':
            spp = np.log(spp)

        # Create the RDA object
        cca = numpy_ordination.NumpyRDA(spp, env)

        # Open the output file
        numpy_fh = open(self.ord_file, 'w')

        # Eigenvalues
        numpy_fh.write('### Eigenvalues ###\n')
        for (i, e) in enumerate(cca.eigenvalues):
            numpy_fh.write('RDA' + str(i + 1) + ',' + '%.10f' % e + '\n')
        numpy_fh.write('\n')

        # Print out variable means
        numpy_fh.write('### Variable Means ###\n')
        for (i, m) in enumerate(cca.env_means):
            numpy_fh.write('%s,%.10f\n' % (self.variables[i], m))
        numpy_fh.write('\n')

        # Print out environmental coefficients loadings
        numpy_fh.write('### Coefficient Loadings ###\n')
        header_str = ','.join(['RDA%d' % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write('VARIABLE,' + header_str + '\n')
        for (i, c) in enumerate(cca.coefficients()):
            coeff = ','.join(['%.10f' % x for x in c])
            numpy_fh.write('%s,%s\n' % (self.variables[i], coeff))
        numpy_fh.write('\n')

        # Print out biplot scores
        numpy_fh.write('### Biplot Scores ###\n')
        header_str = ','.join(['RDA%d' % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write('VARIABLE,' + header_str + '\n')
        for (i, b) in enumerate(cca.biplot_scores()):
            scores = ','.join(['%.10f' % x for x in b])
            numpy_fh.write('%s,%s\n' % (self.variables[i], scores))
        numpy_fh.write('\n')

        # Print out species centroids
        numpy_fh.write('### Species Centroids ###\n')
        header_str = ','.join(['RDA%d' % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write('SPECIES,' + header_str + '\n')
        for (i, c) in enumerate(cca.species_centroids()):
            scores = ','.join(['%.10f' % x for x in c])
            numpy_fh.write('%s,%s\n' % (spp_ra.dtype.names[i], scores))
        numpy_fh.write('\n')

        # Print out species tolerances
        numpy_fh.write('### Species Tolerances ###\n')
        header_str = \
            ','.join(['RDA%d' % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write('SPECIES,' + header_str + '\n')
        for (i, t) in enumerate(cca.species_tolerances()):
            scores = ','.join(['%.21f' % x for x in t])
            numpy_fh.write('%s,%s\n' % (spp_ra.dtype.names[i], scores))
        numpy_fh.write('\n')

        # Print out miscellaneous species information
        numpy_fh.write('### Miscellaneous Species Information ###\n')
        numpy_fh.write('SPECIES,WEIGHT,N2\n')
        species_weights, species_n2 = cca.species_information()
        for i in xrange(len(species_weights)):
            numpy_fh.write(
                '%s,%.10f,%.10f\n' %
                (spp_ra.dtype.names[i], species_weights[i], species_n2[i]))
        numpy_fh.write('\n')

        # Print out site LC scores
        numpy_fh.write('### Site LC Scores ###\n')
        header_str = ','.join(['RDA%d' % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write('ID,' + header_str + '\n')
        for (i, s) in enumerate(cca.site_lc_scores()):
            scores = ','.join(['%.10f' % x for x in s])
            numpy_fh.write('%d,%s\n' % (spp_plot_ids[i], scores))
        numpy_fh.write('\n')

        # Print out site WA scores
        numpy_fh.write('### Site WA Scores ###\n')
        header_str = ','.join(['RDA%d' % (i + 1) for i in xrange(cca.rank)])
        numpy_fh.write('ID,' + header_str + '\n')
        for (i, s) in enumerate(cca.site_wa_scores()):
            scores = ','.join(['%.10f' % x for x in s])
            numpy_fh.write('%d,%s\n' % (spp_plot_ids[i], scores))
        numpy_fh.write('\n')

        # Miscellaneous site information
        numpy_fh.write('### Miscellaneous Site Information ###\n')
        numpy_fh.write('ID,WEIGHT,N2\n')
        site_weights, site_n2 = cca.site_information()
        for i in xrange(len(site_weights)):
            numpy_fh.write('%s,%.10f,%.10f\n' %
                           (spp_plot_ids[i], site_weights[i], site_n2[i]))

        # Close the file
        numpy_fh.close()
コード例 #20
0
    def _create_story(self):

        # Set up an empty list to hold the story
        story = []

        # Import the report styles
        styles = report_styles.get_report_styles()

        # Create a page break
        story = self._make_page_break(story, self.LANDSCAPE)

        # This class is somewhat of a hack, in that it likely only works on
        # rotated paragraphs which fit into the desired cell area
        class RotatedParagraph(p.Paragraph):

            def wrap(self, availHeight, availWidth):
                h, w = \
                    p.Paragraph.wrap(self, self.canv.stringWidth(self.text),
                        self.canv._leading)
                return w, h

            def draw(self):
                self.canv.rotate(90)
                self.canv.translate(0.0, -10.0)
                p.Paragraph.draw(self)

        # Section title
        title_str = '<strong>Local-Scale Accuracy Assessment: '
        title_str += 'Error Matrix for Vegetation Classes at Plot '
        title_str += 'Locations</strong>'

        para = p.Paragraph(title_str, styles['section_style'])
        t = p.Table([[para]], colWidths=[10.0 * u.inch])
        t.setStyle(
            p.TableStyle([
                ('TOPPADDING', (0, 0), (-1, -1), 3),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 3),
                ('BACKGROUND', (0, 0), (-1, -1), '#957348'),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('GRID', (0, 0), (-1, -1), 0.25, colors.black),
            ]))
        story.append(t)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Read in the vegclass error matrix
        names = ['P_' + str(x) for x in range(1, 12)]
        names.insert(0, 'OBSERVED')
        names.extend(['TOTAL', 'CORRECT', 'FUZZY_CORRECT'])
        vc_data = mlab.csv2rec(self.vc_errmatrix_file, skiprows=1,
            names=names)
        vc_data = mlab.rec_drop_fields(vc_data, ['OBSERVED'])

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # Get the class names from the metadata
        vegclass_metadata = mp.get_attribute('VEGCLASS')
        vc_codes = vegclass_metadata.codes

        # Create a list of lists to hold the vegclass table
        vegclass_table = []

        # Add an empty row which will be a span row for the predicted label
        header_row = []
        for i in xrange(2):
            header_row.append('')
        prd_str = '<strong>Predicted Class</strong>'
        para = p.Paragraph(prd_str, styles['body_style_10_center'])
        header_row.append(para)
        for i in xrange(len(vc_data) - 1):
            header_row.append('')
        vegclass_table.append(header_row)

        # Add the predicted labels
        summary_labels = ('Total', '% Correct', '% FCorrect')
        header_row = []
        for i in xrange(2):
            header_row.append('')
        for code in vc_codes:
            label = re.sub('-', '-<br/>', code.label)
            para = p.Paragraph(label, styles['body_style_10_right'])
            header_row.append(para)
        for label in summary_labels:
            label = re.sub(' ', '<br/>', label)
            para = p.Paragraph(label, styles['body_style_10_right'])
            header_row.append(para)
        vegclass_table.append(header_row)

        # Set a variable to distinguish between plot counts and percents
        # in order to format them differently
        format_break = 11

        # Set the cells which should be blank
        blank_cells = \
            [(11, 12), (11, 13), (12, 11), (12, 13), (13, 11), (13, 12)]

        # Add the data
        for (i, row) in enumerate(vc_data):
            vegclass_row = []
            for (j, elem) in enumerate(row):

                # Blank cells
                if (i, j) in blank_cells:
                    elem_str = ''

                # Cells that represent plot counts
                elif i <= format_break and j <= format_break:
                    elem_str = '%d' % int(elem)

                # Cells that represent percentages
                else:
                    elem_str = '%.1f' % float(elem)
                para = p.Paragraph(elem_str, styles['body_style_10_right'])
                vegclass_row.append(para)

            # Add the observed labels at the beginning of each data row
            if i == 0:
                obs_str = '<strong>Observed Class</strong>'
                para = \
                    RotatedParagraph(obs_str, styles['body_style_10_center'])
            else:
                para = ''
            vegclass_row.insert(0, para)

            if i < len(vc_codes):
                label = vc_codes[i].label
            else:
                index = i - len(vc_codes)
                label = summary_labels[index]
            para = p.Paragraph(label, styles['body_style_10_right'])
            vegclass_row.insert(1, para)

            # Add this row to the table
            vegclass_table.append(vegclass_row)

        # Set up the widths for the table cells
        widths = []
        widths.append(0.3)
        widths.append(0.85)
        for i in xrange(len(vc_codes)):
            widths.append(0.56)
        for i in xrange(3):
            widths.append(0.66)
        widths = [x * u.inch for x in widths]

        # Convert the vegclass table into a reportlab table
        t = p.Table(vegclass_table, colWidths=widths)
        t.setStyle(
            p.TableStyle([
                ('SPAN', (0, 0), (1, 1)),
                ('SPAN', (0, 2), (0, -1)),
                ('SPAN', (2, 0), (-1, 0)),
                ('BACKGROUND', (0, 0), (-1, -1), '#f1efe4'),
                ('GRID', (0, 0), (-1, -1), 1, colors.white),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('VALIGN', (0, 2), (0, -1), 'MIDDLE'),
                ('VALIGN', (2, 1), (-1, 1), 'MIDDLE'),
                ('TOPPADDING', (0, 0), (-1, -1), 2),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 3),
            ]))

        # Set up the shading for the truly correct cells
        correct = {}
        for i in xrange(len(vc_codes)):
            val = i + 1
            correct[val] = val

        for key in correct:
            val = correct[key]
            t.setStyle(
                p.TableStyle([
                    ('BACKGROUND', (key + 1, val + 1), (key + 1, val + 1),
                        '#aaaaaa'),
                ]))

        # Set up the shading for the fuzzy correct cells
        fuzzy = {}
        fuzzy[1] = [2]
        fuzzy[2] = [1, 3, 5, 8]
        fuzzy[3] = [2, 4, 5]
        fuzzy[4] = [3, 6, 7]
        fuzzy[5] = [2, 3, 6, 8]
        fuzzy[6] = [4, 5, 7, 9]
        fuzzy[7] = [4, 6, 10, 11]
        fuzzy[8] = [2, 5, 9]
        fuzzy[9] = [6, 8, 10]
        fuzzy[10] = [7, 9, 11]
        fuzzy[11] = [7, 10]

        for key in fuzzy:
            for elem in fuzzy[key]:
                t.setStyle(
                    p.TableStyle([
                        ('BACKGROUND', (key + 1, elem + 1),
                            (key + 1, elem + 1), '#dddddd'),
                    ]))

        # Add this table to the story
        story.append(t)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Explanation and definitions of vegetation class categories
        cell_str = """
            Cell values are model plot counts.  Dark gray cells represent
            plots where the observed class matches the predicted class
            and are included in the percent correct.  Light gray cells
            represent cases where the observed and predicted differ
            slightly (within +/- one class) based on canopy cover,
            hardwood proportion or average stand diameter, and are
            included in the percent fuzzy correct.
        """
        para = p.Paragraph(cell_str, styles['body_style_9'])
        story.append(para)
        story.append(p.Spacer(0, 0.1 * u.inch))

        head_str = '''
            <strong>Vegetation Class (VEGCLASS) Definitions</strong> --
            CANCOV (canopy cover of all live trees), BAH_PROP (proportion of
            hardwood basal area), and QMD_DOM (quadratic mean diameter of
            all dominant and codominant trees).
        '''
        para = p.Paragraph(head_str, styles['body_style_9'])
        story.append(para)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Print out the vegclass code definitions
        for code in vc_codes:
            label = code.label
            desc = self.txt_to_html(code.description)
            doc_str = '<strong>' + label + ':</strong> ' + desc
            para = p.Paragraph(doc_str, styles['body_style_9'])
            story.append(para)

        return story
コード例 #21
0
def fit_sections(im,
                 psf,
                 nx,
                 ny,
                 overlap=50,
                 weight=None,
                 dq=None,
                 blist=None,
                 **kw):
    bdx = numpy.round(numpy.linspace(0, im.shape[0], nx + 1)).astype('i4')
    bdlx = numpy.clip(bdx - overlap, 0, im.shape[0])
    bdrx = numpy.clip(bdx + overlap, 0, im.shape[0])
    bdy = numpy.round(numpy.linspace(0, im.shape[1], ny + 1)).astype('i4')
    bdly = numpy.clip(bdy - overlap, 0, im.shape[1])
    bdry = numpy.clip(bdy + overlap, 0, im.shape[1])
    modelim = numpy.zeros_like(im)
    skyim = numpy.zeros_like(im)
    prisofar = numpy.zeros_like(im, dtype='bool')
    # this holder for stars gets filled out more completely later after
    # the first fit; for the moment, we just want the critical fields to
    # exist
    stars = numpy.zeros(0,
                        dtype=[('x', 'f4'), ('y', 'f4'), ('flux', 'f4'),
                               ('primary', 'i4'), ('psf', 'i4')])
    t0 = time.time()
    if kw.get('verbose', False):
        print('Starting new CCD at %s' % time.ctime())
        sys.stdout.flush()
    psfs = []
    for i in range(nx):
        for j in range(ny):
            sall = numpy.s_[bdlx[i]:bdrx[i + 1], bdly[j]:bdry[j + 1]]
            spri = numpy.s_[bdx[i]:bdx[i + 1], bdy[j]:bdy[j + 1]]
            dx, dy = (bdrx[i + 1] - bdlx[i], bdry[j + 1] - bdly[j])
            sfit = numpy.s_[bdx[i] - bdlx[i]:dx + bdx[i + 1] - bdrx[i + 1],
                            bdy[j] - bdly[j]:dy + bdy[j + 1] - bdry[j + 1]]
            mfixed = in_bounds(stars['x'], stars['y'],
                               [bdlx[i] - 0.5, bdrx[i + 1] - 0.5],
                               [bdly[j] - 0.5, bdry[j + 1] - 0.5])
            ol2 = overlap / 2
            mfixed &= ~in_bounds(stars['x'], stars['y'],
                                 [bdx[i] - 0.5 - ol2, bdx[i + 1] - 0.5 + ol2],
                                 [bdy[j] - 0.5 - ol2, bdy[j + 1] - 0.5 + ol2])
            xp, yp = (numpy.round(c).astype('i4')
                      for c in (stars['x'], stars['y']))
            mfixed &= (stars['primary'] == 1) | (prisofar[xp, yp] == 0)
            fixedstars = {f: stars[f][mfixed] for f in stars.dtype.names}
            fixedstars['x'] -= bdlx[i]
            fixedstars['y'] -= bdly[j]
            fixedstars['psfob'] = psfs
            fixedstars['offset'] = (bdlx[i], bdly[j])
            if (i == 0) and (j == 0):
                tpsf = psf
            elif j != 0:
                tpsf = psfs[-1]
            else:
                tpsf = psfs[-ny]
            if blist is not None:  # cut to bright stars in subimage
                mb = ((blist[0] >= bdlx[i]) & (blist[0] <= bdrx[i + 1]) &
                      (blist[1] >= bdly[j]) & (blist[1] <= bdry[j + 1]))
                blist0 = [
                    blist[0][mb] - bdlx[i], blist[1][mb] - bdly[j],
                    blist[2][mb]
                ]
                # offset X & Y to new positions
            else:
                blist0 = None
            res0 = crowdsource_base.fit_im(im[sall].copy(),
                                           tpsf,
                                           weight=weight[sall].copy(),
                                           dq=dq[sall].copy(),
                                           fixedstars=fixedstars,
                                           blist=blist0,
                                           **kw)
            newstars, skypar0, model0, sky0, psf0 = res0
            newstars['x'] += bdlx[i]
            newstars['y'] += bdly[j]
            primary0 = in_bounds(newstars['x'], newstars['y'],
                                 [bdx[i] - 0.5, bdx[i + 1] - 0.5],
                                 [bdy[j] - 0.5, bdy[j + 1] - 0.5])
            newstars['primary'] = primary0
            newstars['psf'] = (numpy.ones(len(newstars['x']), dtype='i4') *
                               len(psfs))
            dtypenames = newstars.keys()
            dtypeformats = [newstars[n].dtype for n in dtypenames]
            dtype = dict(names=dtypenames, formats=dtypeformats)
            newstars = numpy.fromiter(zip(*newstars.itervalues()),
                                      dtype=dtype,
                                      count=len(newstars['x']))
            stars = (newstars if len(stars) == 0 else numpy.append(
                stars, newstars))
            psf0.offset = (bdlx[i], bdly[j])
            psfs.append(psf0)
            modelim[spri] = model0[sfit]
            skyim[spri] = sky0[sfit]
            prisofar[spri] = sky0[sfit]
            if kw.get('verbose', False):
                t1 = time.time()
                print('Fit tile (%d, %d) of (%d, %d); %d sec elapsed' %
                      (i + 1, j + 1, nx, ny, t1 - t0))
                t0 = t1  # import csplot
                sys.stdout.flush()
    stars = stars[stars['primary'] == 1]
    from matplotlib.mlab import rec_drop_fields
    stars = rec_drop_fields(stars, ['primary'])
    return stars, modelim, skyim, psfs
コード例 #22
0
def classify_calls(aml_clean_path, class_path, param_dict):
    '''Classify calls by species and save files.'''

    # Get dirs
    output_dir, tail = os.path.split(aml_clean_path)
    class_dir, tail = os.path.split(class_path)
    prefix = tail.split('.')[0]

    # Load spp_names as list
    spp_path = os.path.join(class_dir, prefix + '_spp_codes.csv')
    spp_names = list(csv2rec(spp_path).spp)
    spp_names_comma = ''.join([x + ',' for x in spp_names])[:-1]

    # Load classifier from pickle
    clf = joblib.load(class_path)

    # Load aml_clean as recarray
    table = csv2rec(aml_clean_path)

    # Only use calls with qual < maxqual
    table = table[table.qual < float(param_dict['maxqual'])]

    # Save path, folder, call, and qual fields for later
    path = table.path
    folder = table.folder
    call = table.filename
    qual = table.qual

    # Remove non-feature cols from table
    table = rec_drop_fields(table, ['path', 'folder', 'filename', 'st', 'dc', 
                                    'qual', 'pmc'])

    # Recarray to ndarray, since classifier required ndarray
    X = table.view((float, len(table.dtype.names)))

    # Predict probabilities for each call
    pred = clf.predict_proba(X)

    # Save call_prob and call_bin files
    header = 'path,folder,pass,qual,' + spp_names_comma

    file_callpr = open(os.path.join(output_dir, 'call_prob.csv'), 'w')
    file_callbi = open(os.path.join(output_dir, 'call_bin.csv'), 'w')

    file_callpr.write(header + '\n')
    file_callbi.write(header + '\n')

    for row in xrange(0, len(call)):  # For all calls
        row_comma_prob = ''.join([str(x)+',' for x in pred[row]])[:-1]
        row_bin = (pred[row] == pred[row].max()) + 0  # +0 makes int not bool
        row_comma_bin = ''.join([str(x)+',' for x in row_bin])[:-1]

        file_callpr.write(path[row] + ',' + folder[row] + ',' + call[row] + 
                          ',' + str(qual[row]) + ',' + row_comma_prob + '\n')
        file_callbi.write(path[row] + ',' + folder[row] + ',' + call[row] + 
                          ',' + str(qual[row]) + ',' + row_comma_bin + '\n')

    file_callpr.close()
    file_callbi.close()

    # Get array of unique filenames (ie, passes, each may contain many calls)
    passes = np.unique(call)

    # Set up pass files for writing
    header = 'path,folder,pass,ncalls,' + spp_names_comma

    file_passpr = open(os.path.join(output_dir, 'pass_prob.csv'), 'w')
    file_passmaxpr = open(os.path.join(output_dir, 'pass_maxprob.csv'), 'w')
    file_passbi = open(os.path.join(output_dir, 'pass_bin.csv'), 'w')

    file_passpr.write(header + '\n')
    file_passmaxpr.write(header + '\n')
    file_passbi.write(header + '\n')  

    # Loop through passes
    for this_pass in passes:

        # Get boolean locations in table of calls associated with this pass
        these_calls = (call == this_pass)
        first_call = np.argmax(these_calls)  # Row of first call

        # Take subset of pred corresponding to calls in pass
        these_preds = pred[these_calls]

        # Get descriptor variables for this pass
        this_path = path[first_call]
        this_folder = folder[first_call]
        this_ncalls = np.shape(these_preds)[0]

        # Get summed probability for each species
        if these_preds.shape[0] == 1:  # If only one call
            pass_prob = these_preds / this_ncalls
            pass_prob = pass_prob.flatten()
        else:
            pass_prob = np.sum(these_preds, 0) / this_ncalls

        # Find the species with the maximum prob
        pass_maxprob = (pass_prob == pass_prob.max()) + 0

        # Find all calls with prob greater than minprob, cast to int
        minprob_calls = (these_preds > float(param_dict['minprob'])) + 0

        # Count number of calls for each species that meet minprob
        num_minprob_calls = np.sum(minprob_calls, 0)

        # Find all species with sufficient calls to meet mincalls
        pass_bin = (num_minprob_calls >= float(param_dict['mincalls'])) + 0

        # Write files
        row_comma_prob = ''.join([str(x)+',' for x in pass_prob])[:-1]
        file_passpr.write(this_path + ',' + this_folder + ',' + this_pass + 
                          ',' + str(this_ncalls) + ',' + row_comma_prob + '\n')

        row_comma_maxprob = ''.join([str(x)+',' for x in pass_maxprob])[:-1]
        file_passmaxpr.write(this_path + ',' + this_folder + ',' + this_pass + 
                             ',' + str(this_ncalls) + ',' + row_comma_maxprob + 
                             '\n')

        row_comma_bin = ''.join([str(x)+',' for x in pass_bin])[:-1]
        file_passbi.write(this_path + ',' + this_folder + ',' + this_pass + 
                          ',' + str(this_ncalls) + ',' + row_comma_bin + '\n')

    # Close pass files
    file_passpr.close()
    file_passmaxpr.close()
    file_passbi.close()