Esempio n. 1
0
    def _get_attribute_data(self):
        """
        Retrieve the attribute data for which predictions will be made.  This
        should be called one time, after which it is stored in an instance-
        level variable along with the attribute names (self.attrs)

        Parameters
        ----------
        None

        Returns
        -------
        stand_attr_data : numpy recarray
            Recarray with all stand attributes

        attrs : list of strs
            List of all continuous variables in stand_attr_data
        """

        # Get the stand attribute table and read into a recarray
        p = self.parameter_parser
        stand_attr_file = p.stand_attribute_file
        stand_attr_data = utilities.csv2rec(stand_attr_file)

        # Get the stand attribute metadata and retrieve only the
        # continuous accuracy attributes
        stand_metadata_file = p.stand_metadata_file
        mp = xsmp.XMLStandMetadataParser(stand_metadata_file)
        attrs = [
            x.field_name for x in mp.attributes
            if x.field_type == 'CONTINUOUS' and x.accuracy_attr == 1
        ]

        return (stand_attr_data, attrs)
    def _create_histograms(self):

        # Open the area estimate file into a recarray
        ae_data = utilities.csv2rec(self.regional_accuracy_file)

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # Subset the attributes to those that are accuracy attributes,
        # are identified to go into the report, and are not species variables
        attrs = []
        for attr in mp.attributes:
            if attr.accuracy_attr == 1 and attr.project_attr == 1 and \
                    attr.species_attr == 0:
                attrs.append(attr.field_name)

        # Iterate over the attributes and create a histogram file of each
        histogram_files = []
        for attr in attrs:

            # Metadata for this attribute
            metadata = mp.get_attribute(attr)

            # Get the observed and predicted data for this attribute
            obs_vals = self._get_histogram_data(ae_data, attr, 'OBSERVED')
            prd_vals = self._get_histogram_data(ae_data, attr, 'PREDICTED')

            # Set the areas for the observed and predicted data
            obs_area = obs_vals.AREA
            prd_area = prd_vals.AREA

            # Set the bin names (same for both observed and predicted series)
            bin_names = obs_vals.BIN_NAME
            if np.all(bin_names != prd_vals.BIN_NAME):
                err_msg = 'Bin names are not the same for ' + attr
                raise ValueError(err_msg)

            # Create the output file name
            output_file = attr.lower() + '_histogram.png'

            # Create the histogram
            mplf.draw_histogram([obs_area, prd_area],
                                bin_names,
                                metadata,
                                output_type=mplf.FILE,
                                output_file=output_file)

            # Add this to the list of histogram files
            histogram_files.append(output_file)

        # Return the list of histograms just created
        return histogram_files
    def _create_scatterplots(self):

        # Open files into recarrays
        obs_data = utilities.csv2rec(self.observed_file)
        prd_data = utilities.csv2rec(self.predicted_file)

        # Subset the obs_data to just those IDs in the predicted data
        ids1 = getattr(obs_data, self.id_field)
        ids2 = getattr(prd_data, self.id_field)
        common_ids = np.in1d(ids1, ids2)
        obs_data = obs_data[common_ids]

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # Subset the attributes to those that are continuous, are accuracy
        # attributes, are identified to go into the report, and are not
        # species variables
        attrs = []
        for attr in mp.attributes:
            if attr.field_type == 'CONTINUOUS' and attr.project_attr == 1 and \
                    attr.accuracy_attr == 1 and attr.species_attr == 0:
                attrs.append(attr.field_name)

        # Iterate over the attributes and create a scatterplot file of each
        scatter_files = []
        for attr in attrs:

            # Metadata for this attribute
            metadata = mp.get_attribute(attr)

            # Observed and predicted data matrices for this attribute
            obs_vals = getattr(obs_data, attr)
            prd_vals = getattr(prd_data, attr)

            # Create the output file name
            output_file = attr.lower() + '_scatter.png'

            # Create the scatterplot
            mplf.draw_scatterplot(obs_vals,
                                  prd_vals,
                                  metadata,
                                  output_type=mplf.FILE,
                                  output_file=output_file)

            # Add this to the list of scatterplot files
            scatter_files.append(output_file)

        # Return the list of scatterplots just created
        return scatter_files
Esempio n. 4
0
def join_attributes(raster, raster_join_field, attribute_file,
        attribute_join_field, attribute_metadata_file):
    """
    Join attributes to a raster

    Parameters:
    -----------
    raster : str
        raster to join attributes to
    raster_join_field : str
        name of join field in raster
    attribute_file: str
        name of file with attributes to join to raster
    attribute_join_field: str
        name of join field in attribute file
    attribute_metadata_file: str
        name of file with attribute metadata to decide what variables to
        drop from join file

    Returns:
    --------
    None
    """
    model_dir = get_path(raster)
    gp = geoprocessor.Geoprocessor(model_dir)

    # create list of attributes to drop from join file - we only want a
    # subset of all variables joined to the NN grids (PROJECT_ATTR = 1), so
    # we need to specify all variables that have PROJECT_ATTR = 0 in the
    # metadata
    mp = xsmp.XMLStandMetadataParser(attribute_metadata_file)
    drop_fields = [x.field_name for x in mp.attributes if x.project_attr == 0]

    try:
        gp.join_attributes(raster, raster_join_field, attribute_file,
            attribute_join_field, drop_fields=drop_fields)
    except:
        print sys.exc_info()
    def _create_story(self):

        # Set up an empty list to hold the story
        story = []

        # Import the report styles
        styles = report_styles.get_report_styles()

        # Create a page break
        story = self._make_page_break(story, self.PORTRAIT)

        # Section title
        title_str = '<strong>Data Dictionary</strong>'
        para = p.Paragraph(title_str, styles['section_style'])
        t = p.Table([[para]], colWidths=[7.5 * u.inch])
        t.setStyle(
            p.TableStyle([
                ('TOPPADDING', (0, 0), (-1, -1), 6),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
                ('BACKGROUND', (0, 0), (-1, -1), '#957348'),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('GRID', (0, 0), (-1, -1), 0.25, colors.black),
            ]))
        story.append(t)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # Subset the attributes to those that are accuracy attributes, are
        # identified to go into the report, and are not species variables
        attrs = []
        for attr in mp.attributes:
            if attr.accuracy_attr == 1 and attr.project_attr == 1 and \
                    attr.species_attr == 0:
                attrs.append(attr.field_name)

        # Set up the master dictionary table
        dictionary_table = []

        # Iterate through the attributes and print out the field information
        # and codes if present
        for attr in attrs:
            metadata = mp.get_attribute(attr)
            field_name = metadata.field_name
            units = metadata.units
            description = metadata.description

            field_para = p.Paragraph(field_name, styles['body_style_10'])
            if units != 'none':
                description += ' (' + units + ')'
            field_desc_para = p.Paragraph(description, styles['body_style_10'])

            # If this field has codes, create a sub table underneath the
            # field description
            if metadata.codes:

                # Set up a container to hold the code rows
                code_table = []

                # Iterate over all code rows and append to the code_table
                for code in metadata.codes:
                    code_para = \
                        p.Paragraph(code.code_value, styles['code_style'])
                    description = self.txt_to_html(code.description)
                    code_desc_para = \
                        p.Paragraph(description, styles['code_style'])
                    code_table.append([code_para, code_desc_para])

                # Convert this to a reportlab table
                t = p.Table(code_table,
                            colWidths=[0.75 * u.inch, 4.5 * u.inch])
                t.setStyle(
                    p.TableStyle([
                        ('TOPPADDING', (0, 0), (-1, -1), 3),
                        ('BOTTOMPADDING', (0, 0), (-1, -1), 3),
                        ('BACKGROUND', (0, 0), (-1, -1), '#f7f7ea'),
                        ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                        ('GRID', (0, 0), (-1, -1), 0.25, colors.white),
                    ]))

                # Create a stack of the field description and field codes
                elements = \
                    [[field_desc_para], [t]]

            # If no codes exist, just add the field description
            else:
                elements = [[field_desc_para]]

            # Create a reportlab table of the field description and
            # (if present) field codes
            description_table = \
                p.Table(elements, colWidths=[5.25 * u.inch])
            description_table.setStyle(
                p.TableStyle([
                    ('TOPPADDING', (0, 0), (-1, 0), 0),
                    ('BOTTOMPADDING', (0, -1), (-1, -1), 0),
                    ('LEFTPADDING', (0, 0), (-1, -1), 0),
                    ('RIGHTPADDING', (0, 0), (-1, -1), 0),
                    ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                    ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ]))

            dictionary_table.append([field_para, description_table])

        # Format the dictionary table into a reportlab table
        t = p.Table(dictionary_table, colWidths=[1.6 * u.inch, 5.4 * u.inch])
        t.setStyle(
            p.TableStyle([
                ('TOPPADDING', (0, 0), (0, -1), 5),
                ('BOTTOMPADDING', (0, 0), (0, -1), 5),
                ('GRID', (0, 0), (-1, -1), 1, colors.white),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('BACKGROUND', (0, 0), (-1, -1), '#f1efe4'),
            ]))
        story.append(t)

        # Description of the species information that is attached to ArcInfo
        # grids.  We don't enumerate the codes here, but just give this
        # summary information
        spp_str = """
            Individual species abundances are attached to ArcInfo grids that
            LEMMA distributes.  For this model, fields designate species
            codes based on the <link color="#0000ff"
            href="http://plants.usda.gov/">USDA PLANTS database</link> from
            the year 2000, and values represent species
        """
        if self.model_type in ['sppsz', 'sppba']:
            spp_str += " basal area (m^2/ha)."
        elif self.model_type in ['trecov', 'wdycov']:
            spp_str += " percent cover."

        para = p.Paragraph(spp_str, styles['body_style'])
        story.append(p.Spacer(0, 0.1 * u.inch))
        story.append(para)

        # Return this story
        return story
Esempio n. 6
0
    def run_diagnostic(self):

        # Shortcut to the parameter parser
        p = self.parameter_parser

        # ID field
        id_field = p.summary_level + 'ID'

        # Root directory for Riemann files
        root_dir = p.riemann_output_folder

        # Read in hex input file
        obs_data = utilities.csv2rec(self.hex_attribute_file)

        # Get the hexagon levels and ensure that the fields exist in the
        # hex_attribute file
        hex_resolutions = p.riemann_hex_resolutions
        hex_fields = [x[0] for x in hex_resolutions]
        for field in hex_fields:
            if field not in obs_data.dtype.names:
                err_msg = 'Field ' + field + ' does not exist in the '
                err_msg += 'hex_attribute file'
                raise ValueError(err_msg)

        # Create the directory structure based on the hex levels
        hex_levels = ['hex_' + str(x[1]) for x in hex_resolutions]
        all_levels = ['plot_pixel'] + hex_levels
        for level in all_levels:
            sub_dir = os.path.join(root_dir, level)
            if not os.path.exists(sub_dir):
                os.makedirs(sub_dir)

        # Get the values of k
        k_values = p.riemann_k_values

        # Create a dictionary of plot ID to image year (or model_year for
        # non-imagery models) for these plots
        if p.model_type in p.imagery_model_types:
            id_x_year = dict((x[id_field], x.IMAGE_YEAR) for x in obs_data)
        else:
            id_x_year = dict((x[id_field], p.model_year) for x in obs_data)

        # Create a PredictionRun instance
        pr = prediction_run.PredictionRun(p)

        # Get the neighbors and distances for these IDs
        pr.calculate_neighbors_at_ids(id_x_year, id_field=id_field)

        # Create the lookup of id_field to LOC_ID for the hex plots
        nsa_id_dict = dict((x[id_field], x.LOC_ID) for x in obs_data)

        # Create a dictionary between id_field and no_self_assign_field
        # for the model plots
        env_file = p.environmental_matrix_file
        env_data = utilities.csv2rec(env_file)
        model_nsa_id_dict = dict(
            (getattr(x, id_field), x.LOC_ID) for x in env_data)

        # Stitch the two dictionaries together
        for id in sorted(model_nsa_id_dict.keys()):
            if id not in nsa_id_dict:
                nsa_id_dict[id] = model_nsa_id_dict[id]

        # Get the stand attribute metadata and retrieve only the
        # continuous accuracy attributes
        stand_metadata_file = p.stand_metadata_file
        mp = xsmp.XMLStandMetadataParser(stand_metadata_file)
        attrs = [
            x.field_name for x in mp.attributes
            if x.field_type == 'CONTINUOUS' and x.accuracy_attr == 1
        ]

        # Subset the attributes for fields that are in the
        # hex_attribute file
        attrs = [x for x in attrs if x in obs_data.dtype.names]
        plot_pixel_obs = mlab.rec_keep_fields(obs_data, [id_field] + attrs)

        # Write out the plot_pixel observed file
        file_name = 'plot_pixel_observed.csv'
        output_file = os.path.join(root_dir, 'plot_pixel', file_name)
        utilities.rec2csv(plot_pixel_obs, output_file)

        # Iterate over values of k
        for k in k_values:

            # Construct the output file name
            file_name = '_'.join(('plot_pixel', 'predicted', 'k' + str(k)))
            file_name += '.csv'
            output_file = os.path.join(root_dir, 'plot_pixel', file_name)
            out_fh = open(output_file, 'w')

            # For the plot/pixel scale, retrieve the independent predicted
            # data for this value of k.  Even though attributes are being
            # returned from this function, we want to use the attribute list
            # that we've already found above.
            prediction_generator = pr.calculate_predictions_at_k(
                k=k,
                id_field=id_field,
                independent=True,
                nsa_id_dict=nsa_id_dict)

            # Write out the field names
            out_fh.write(id_field + ',' + ','.join(attrs) + '\n')

            # Write out the predictions for this k
            for plot_prediction in prediction_generator:

                # Write this record to the predicted attribute file
                pr.write_predicted_record(plot_prediction, out_fh, attrs=attrs)

            # Close this file
            out_fh.close()

        # Create the fields for which to extract statistics at the hexagon
        # levels
        mean_fields = [(id_field, len, 'PLOT_COUNT')]
        mean_fields.extend([(x, np.mean, x) for x in attrs])
        mean_fields = tuple(mean_fields)

        sd_fields = [(id_field, len, 'PLOT_COUNT')]
        sd_fields.extend([(x, np.std, x) for x in attrs])
        sd_fields = tuple(sd_fields)

        stat_sets = {
            'mean': mean_fields,
            'std': sd_fields,
        }

        # For each hexagon level, associate the plots with their hexagon ID
        # and find observed and predicted statistics for each hexagon
        for hex_resolution in hex_resolutions:

            (hex_id_field, hex_distance) = hex_resolution[0:2]
            min_plots_per_hex = hex_resolution[3]
            prefix = 'hex_' + str(hex_distance)

            # Create a crosswalk between the id_field and the hex_id_field
            id_x_hex = mlab.rec_keep_fields(obs_data, [id_field, hex_id_field])

            # Iterate over all sets of statistics and write a unique file
            # for each set
            for (stat_name, stat_fields) in stat_sets.iteritems():

                # Get the output file name
                obs_out_file = \
                    '_'.join((prefix, 'observed', stat_name)) + '.csv'
                obs_out_file = os.path.join(root_dir, prefix, obs_out_file)

                # Write out the observed file
                self.write_hex_stats(obs_data, hex_id_field, stat_fields,
                                     min_plots_per_hex, obs_out_file)

            # Iterate over values of k for the predicted values
            for k in k_values:

                # Open the plot_pixel predicted file for this value of k
                # and join the hex_id_field to the recarray
                prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv'
                prd_file = os.path.join(root_dir, 'plot_pixel', prd_file)
                prd_data = utilities.csv2rec(prd_file)
                prd_data = mlab.rec_join(id_field, prd_data, id_x_hex)

                # Iterate over all sets of statistics and write a unique file
                # for each set
                for (stat_name, stat_fields) in stat_sets.iteritems():

                    # Get the output file name
                    prd_out_file = '_'.join((prefix, 'predicted', 'k' + str(k),
                                             stat_name)) + '.csv'
                    prd_out_file = os.path.join(root_dir, prefix, prd_out_file)

                    # Write out the predicted file
                    self.write_hex_stats(prd_data, hex_id_field, stat_fields,
                                         min_plots_per_hex, prd_out_file)

        # Calculate the ECDF and AC statistics
        # For ECDF and AC, it is a paired comparison between the observed
        # and predicted data.  We do this at each value of k and for each
        # hex resolution level.

        # Open the stats file
        stats_file = p.hex_statistics_file
        stats_fh = open(stats_file, 'w')
        header_fields = ['LEVEL', 'K', 'VARIABLE', 'STATISTIC', 'VALUE']
        stats_fh.write(','.join(header_fields) + '\n')

        # Create a list of RiemannComparison instances which store the
        # information needed to do comparisons between observed and predicted
        # files for any level or value of k
        compare_list = []
        for hex_resolution in hex_resolutions:
            (hex_id_field, hex_distance) = hex_resolution[0:2]
            prefix = 'hex_' + str(hex_distance)
            obs_file = '_'.join((prefix, 'observed', 'mean')) + '.csv'
            obs_file = os.path.join(root_dir, prefix, obs_file)
            for k in k_values:
                prd_file = '_'.join(
                    (prefix, 'predicted', 'k' + str(k), 'mean')) + '.csv'
                prd_file = os.path.join(root_dir, prefix, prd_file)
                r = RiemannComparison(prefix, obs_file, prd_file, hex_id_field,
                                      k)
                compare_list.append(r)

        # Add the plot_pixel comparisons to this list
        prefix = 'plot_pixel'
        obs_file = 'plot_pixel_observed.csv'
        obs_file = os.path.join(root_dir, prefix, obs_file)
        for k in k_values:
            prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv'
            prd_file = os.path.join(root_dir, prefix, prd_file)
            r = RiemannComparison(prefix, obs_file, prd_file, id_field, k)
            compare_list.append(r)

        # Do all the comparisons
        for c in compare_list:

            # Open the observed file
            obs_data = utilities.csv2rec(c.obs_file)

            # Open the predicted file
            prd_data = utilities.csv2rec(c.prd_file)

            # Ensure that the IDs between the observed and predicted
            # data line up
            ids1 = getattr(obs_data, c.id_field)
            ids2 = getattr(prd_data, c.id_field)
            if np.all(ids1 != ids2):
                err_msg = 'IDs do not match between observed and '
                err_msg += 'predicted data'
                raise ValueError(err_msg)

            for attr in attrs:
                arr1 = getattr(obs_data, attr)
                arr2 = getattr(prd_data, attr)
                rv = RiemannVariable(arr1, arr2)

                gmfr_stats = rv.gmfr_statistics()
                for stat in ('gmfr_a', 'gmfr_b', 'ac', 'ac_sys', 'ac_uns'):
                    stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k,
                                                        attr, stat.upper(),
                                                        gmfr_stats[stat])
                    stats_fh.write(stat_line)

                ks_stats = rv.ks_statistics()
                for stat in ('ks_max', 'ks_mean'):
                    stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k,
                                                        attr, stat.upper(),
                                                        ks_stats[stat])
                    stats_fh.write(stat_line)
Esempio n. 7
0
 def setUp(self):
     xml_file_name = 'data/stand_attr.xml'
     self.xmp = xsmp.XMLStandMetadataParser(xml_file_name)
Esempio n. 8
0
    def run_diagnostic(self):

        # Read in the observed data from the area estimate file
        (obs_area, obs_nf_hectares, obs_ns_hectares) = \
           self.get_observed_estimates()

        # Get the observed and predicted data arrays
        (prd_area, prd_nf_hectares) = self.get_predicted_estimates()
        prd_ns_hectares = 0.0

        # Get the weights of the two datasets
        obs_weights = obs_area.HECTARES
        prd_weights = prd_area.HECTARES

        # Open the output file and print out the header line
        stats_fh = open(self.statistics_file, 'w')
        header_fields = ['VARIABLE', 'DATASET', 'BIN_NAME', 'AREA']
        stats_fh.write(','.join(header_fields) + '\n')

        # Get a metadata parser
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # Iterate over all fields and print out the area histogram statistics
        for v in obs_area.dtype.names:

            # Skip over the HECTARES field
            if v == 'HECTARES':
                continue

            # Get the metadata for this field
            try:
                fm = mp.get_attribute(v)
            except:
                err_msg = v + ' is missing metadata.'
                print err_msg
                continue

            # Skip over ID fields
            if fm.field_type == 'ID':
                continue

            # Get the actual data
            try:
                obs_vals = getattr(obs_area, v)
                prd_vals = getattr(prd_area, v)
            except AttributeError:
                continue

            obs_vw = histogram.VariableVW(obs_vals, obs_weights)
            prd_vw = histogram.VariableVW(prd_vals, prd_weights)

            # Figure out how to bin the data based on field type
            if fm.field_type == 'CONTINUOUS':
                bins = histogram.bin_continuous([obs_vw, prd_vw], bins=7)
            else:
                if fm.codes:
                    class_names = {}
                    for code in fm.codes:
                        class_names[code.code_value] = code.label
                else:
                    class_names = None
                bins = histogram.bin_categorical([obs_vw, prd_vw],
                                                 class_names=class_names)

            bins[0].name = 'OBSERVED'
            bins[1].name = 'PREDICTED'

            # Handle special cases of nonsampled and nonforest area
            self.insert_class(bins[0], 'Unsampled', obs_ns_hectares)
            self.insert_class(bins[0], 'Nonforest', obs_nf_hectares)
            self.insert_class(bins[1], 'Unsampled', prd_ns_hectares)
            self.insert_class(bins[1], 'Nonforest', prd_nf_hectares)

            for bin in bins:
                for i in range(0, len(bin.bin_counts)):
                    out_data = [
                        '%s' % v,
                        '%s' % bin.name,
                        '"%s"' % bin.bin_names[i],
                        '%.3f' % bin.bin_counts[i],
                    ]
                    stats_fh.write(','.join(out_data) + '\n')
Esempio n. 9
0
    def _create_story(self):

        # Set up an empty list to hold the story
        story = []

        # Import the report styles
        styles = report_styles.get_report_styles()

        # Create a page break
        story = self._make_page_break(story, self.LANDSCAPE)

        # This class is somewhat of a hack, in that it likely only works on
        # rotated paragraphs which fit into the desired cell area
        class RotatedParagraph(p.Paragraph):
            def wrap(self, availHeight, availWidth):
                h, w = \
                    p.Paragraph.wrap(self, self.canv.stringWidth(self.text),
                        self.canv._leading)
                return w, h

            def draw(self):
                self.canv.rotate(90)
                self.canv.translate(0.0, -10.0)
                p.Paragraph.draw(self)

        # Section title
        title_str = '<strong>Local-Scale Accuracy Assessment: '
        title_str += 'Error Matrix for Vegetation Classes at Plot '
        title_str += 'Locations</strong>'

        para = p.Paragraph(title_str, styles['section_style'])
        t = p.Table([[para]], colWidths=[10.0 * u.inch])
        t.setStyle(
            p.TableStyle([
                ('TOPPADDING', (0, 0), (-1, -1), 3),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 3),
                ('BACKGROUND', (0, 0), (-1, -1), '#957348'),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('GRID', (0, 0), (-1, -1), 0.25, colors.black),
            ]))
        story.append(t)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Read in the vegclass error matrix
        names = ['P_' + str(x) for x in range(1, 12)]
        names.insert(0, 'OBSERVED')
        names.extend(['TOTAL', 'CORRECT', 'FUZZY_CORRECT'])
        vc_data = mlab.csv2rec(self.vc_errmatrix_file, skiprows=1, names=names)
        vc_data = mlab.rec_drop_fields(vc_data, ['OBSERVED'])

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # Get the class names from the metadata
        vegclass_metadata = mp.get_attribute('VEGCLASS')
        vc_codes = vegclass_metadata.codes

        # Create a list of lists to hold the vegclass table
        vegclass_table = []

        # Add an empty row which will be a span row for the predicted label
        header_row = []
        for i in xrange(2):
            header_row.append('')
        prd_str = '<strong>Predicted Class</strong>'
        para = p.Paragraph(prd_str, styles['body_style_10_center'])
        header_row.append(para)
        for i in xrange(len(vc_data) - 1):
            header_row.append('')
        vegclass_table.append(header_row)

        # Add the predicted labels
        summary_labels = ('Total', '% Correct', '% FCorrect')
        header_row = []
        for i in xrange(2):
            header_row.append('')
        for code in vc_codes:
            label = re.sub('-', '-<br/>', code.label)
            para = p.Paragraph(label, styles['body_style_10_right'])
            header_row.append(para)
        for label in summary_labels:
            label = re.sub(' ', '<br/>', label)
            para = p.Paragraph(label, styles['body_style_10_right'])
            header_row.append(para)
        vegclass_table.append(header_row)

        # Set a variable to distinguish between plot counts and percents
        # in order to format them differently
        format_break = 11

        # Set the cells which should be blank
        blank_cells = \
            [(11, 12), (11, 13), (12, 11), (12, 13), (13, 11), (13, 12)]

        # Add the data
        for (i, row) in enumerate(vc_data):
            vegclass_row = []
            for (j, elem) in enumerate(row):

                # Blank cells
                if (i, j) in blank_cells:
                    elem_str = ''

                # Cells that represent plot counts
                elif i <= format_break and j <= format_break:
                    elem_str = '%d' % int(elem)

                # Cells that represent percentages
                else:
                    elem_str = '%.1f' % float(elem)
                para = p.Paragraph(elem_str, styles['body_style_10_right'])
                vegclass_row.append(para)

            # Add the observed labels at the beginning of each data row
            if i == 0:
                obs_str = '<strong>Observed Class</strong>'
                para = \
                    RotatedParagraph(obs_str, styles['body_style_10_center'])
            else:
                para = ''
            vegclass_row.insert(0, para)

            if i < len(vc_codes):
                label = vc_codes[i].label
            else:
                index = i - len(vc_codes)
                label = summary_labels[index]
            para = p.Paragraph(label, styles['body_style_10_right'])
            vegclass_row.insert(1, para)

            # Add this row to the table
            vegclass_table.append(vegclass_row)

        # Set up the widths for the table cells
        widths = []
        widths.append(0.3)
        widths.append(0.85)
        for i in xrange(len(vc_codes)):
            widths.append(0.56)
        for i in xrange(3):
            widths.append(0.66)
        widths = [x * u.inch for x in widths]

        # Convert the vegclass table into a reportlab table
        t = p.Table(vegclass_table, colWidths=widths)
        t.setStyle(
            p.TableStyle([
                ('SPAN', (0, 0), (1, 1)),
                ('SPAN', (0, 2), (0, -1)),
                ('SPAN', (2, 0), (-1, 0)),
                ('BACKGROUND', (0, 0), (-1, -1), '#f1efe4'),
                ('GRID', (0, 0), (-1, -1), 1, colors.white),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('VALIGN', (0, 2), (0, -1), 'MIDDLE'),
                ('VALIGN', (2, 1), (-1, 1), 'MIDDLE'),
                ('TOPPADDING', (0, 0), (-1, -1), 2),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 3),
            ]))

        # Set up the shading for the truly correct cells
        correct = {}
        for i in xrange(len(vc_codes)):
            val = i + 1
            correct[val] = val

        for key in correct:
            val = correct[key]
            t.setStyle(
                p.TableStyle([
                    ('BACKGROUND', (key + 1, val + 1), (key + 1, val + 1),
                     '#aaaaaa'),
                ]))

        # Set up the shading for the fuzzy correct cells
        fuzzy = {}
        fuzzy[1] = [2]
        fuzzy[2] = [1, 3, 5, 8]
        fuzzy[3] = [2, 4, 5]
        fuzzy[4] = [3, 6, 7]
        fuzzy[5] = [2, 3, 6, 8]
        fuzzy[6] = [4, 5, 7, 9]
        fuzzy[7] = [4, 6, 10, 11]
        fuzzy[8] = [2, 5, 9]
        fuzzy[9] = [6, 8, 10]
        fuzzy[10] = [7, 9, 11]
        fuzzy[11] = [7, 10]

        for key in fuzzy:
            for elem in fuzzy[key]:
                t.setStyle(
                    p.TableStyle([
                        ('BACKGROUND', (key + 1, elem + 1),
                         (key + 1, elem + 1), '#dddddd'),
                    ]))

        # Add this table to the story
        story.append(t)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Explanation and definitions of vegetation class categories
        cell_str = """
            Cell values are model plot counts.  Dark gray cells represent
            plots where the observed class matches the predicted class
            and are included in the percent correct.  Light gray cells
            represent cases where the observed and predicted differ
            slightly (within +/- one class) based on canopy cover,
            hardwood proportion or average stand diameter, and are
            included in the percent fuzzy correct.
        """
        para = p.Paragraph(cell_str, styles['body_style_9'])
        story.append(para)
        story.append(p.Spacer(0, 0.1 * u.inch))

        head_str = '''
            <strong>Vegetation Class (VEGCLASS) Definitions</strong> --
            CANCOV (canopy cover of all live trees), BAH_PROP (proportion of
            hardwood basal area), and QMD_DOM (quadratic mean diameter of
            all dominant and codominant trees).
        '''
        para = p.Paragraph(head_str, styles['body_style_9'])
        story.append(para)
        story.append(p.Spacer(0, 0.1 * u.inch))

        # Print out the vegclass code definitions
        for code in vc_codes:
            label = code.label
            desc = self.txt_to_html(code.description)
            doc_str = '<strong>' + label + ':</strong> ' + desc
            para = p.Paragraph(doc_str, styles['body_style_9'])
            story.append(para)

        return story
Esempio n. 10
0
    def run_diagnostic(self):

        # Open the stats file and print out the header line
        stats_fh = open(self.statistics_file, 'w')
        out_list = [
            'VARIABLE',
            'PEARSON_R',
            'SPEARMAN_R',
            'RMSE',
            'NORMALIZED_RMSE',
            'BIAS_PERCENTAGE',
            'R_SQUARE',
        ]
        stats_fh.write(','.join(out_list) + '\n')

        # Read the observed and predicted files into numpy recarrays
        obs = utilities.csv2rec(self.observed_file)
        prd = utilities.csv2rec(self.predicted_file)

        # Subset the observed data just to the IDs that are in the
        # predicted file
        obs_keep = np.in1d(getattr(obs, self.id_field),
                           getattr(prd, self.id_field))
        obs = obs[obs_keep]

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # For each variable, calculate the statistics
        for v in obs.dtype.names:

            # Get the metadata for this field
            try:
                fm = mp.get_attribute(v)
            except:
                err_msg = v + ' is missing metadata.'
                print err_msg
                continue

            # Only continue if this is a continuous accuracy variable
            if fm.field_type != 'CONTINUOUS' or fm.accuracy_attr == 0:
                continue

            obs_vals = getattr(obs, v)
            prd_vals = getattr(prd, v)

            if np.all(obs_vals == 0.0):
                pearson_r = 0.0
                spearman_r = 0.0
                rmse = 0.0
                std_rmse = 0.0
                bias = 0.0
                r2 = 0.0
            else:
                if np.all(prd_vals == 0.0):
                    pearson_r = 0.0
                    spearman_r = 0.0
                else:
                    pearson_r = statistics.pearson_r(obs_vals, prd_vals)
                    spearman_r = statistics.spearman_r(obs_vals, prd_vals)
                rmse = statistics.rmse(obs_vals, prd_vals)
                std_rmse = rmse / obs_vals.mean()
                bias = statistics.bias_percentage(obs_vals, prd_vals)
                r2 = statistics.r2(obs_vals, prd_vals)

            # Print this out to the stats file
            out_list = [
                v,
                '%.6f' % pearson_r,
                '%.6f' % spearman_r,
                '%.6f' % rmse,
                '%.6f' % std_rmse,
                '%.6f' % bias,
                '%.6f' % r2,
            ]
            stats_fh.write(','.join(out_list) + '\n')
        stats_fh.close()
    def run_diagnostic(self):
        # Read the observed and predicted files into numpy recarrays
        obs = utilities.csv2rec(self.observed_file)
        prd = utilities.csv2rec(self.predicted_file)

        # Subset the observed data just to the IDs that are in the
        # predicted file
        obs_keep = np.in1d(getattr(obs, self.id_field),
                           getattr(prd, self.id_field))
        obs = obs[obs_keep]

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # Open the stats file and print out the header lines
        stats_fh = open(self.statistics_file, 'w')
        out_list = [
            'SPECIES',
            'OP_PP',
            'OP_PA',
            'OA_PP',
            'OA_PA',
            'PREVALENCE',
            'SENSITIVITY',
            'FALSE_NEGATIVE_RATE',
            'SPECIFICITY',
            'FALSE_POSITIVE_RATE',
            'PERCENT_CORRECT',
            'ODDS_RATIO',
            'KAPPA',
        ]
        stats_fh.write(','.join(out_list) + '\n')

        # For each variable, calculate the statistics
        for v in obs.dtype.names:

            # Get the metadata for this field
            try:
                fm = mp.get_attribute(v)
            except:
                err_msg = v + ' is missing metadata.'
                print err_msg
                continue

            # Only continue if this is a continuous species variable
            if fm.field_type != 'CONTINUOUS' or fm.species_attr == 0:
                continue

            obs_vals = getattr(obs, v)
            prd_vals = getattr(prd, v)

            # Create a binary error matrix from the obs and prd data
            stats = statistics.BinaryErrorMatrix(obs_vals, prd_vals)
            counts = stats.counts()

            # Build the list of items for printing
            out_list = [
                v,
                '%d' % counts[0, 0],
                '%d' % counts[0, 1],
                '%d' % counts[1, 0],
                '%d' % counts[1, 1],
                '%.4f' % stats.prevalence(),
                '%.4f' % stats.sensitivity(),
                '%.4f' % stats.false_negative_rate(),
                '%.4f' % stats.specificity(),
                '%.4f' % stats.false_positive_rate(),
                '%.4f' % stats.percent_correct(),
                '%.4f' % stats.odds_ratio(),
                '%.4f' % stats.kappa(),
            ]
            stats_fh.write(','.join(out_list) + '\n')

        stats_fh.close()
Esempio n. 12
0
    def _create_story(self):

        # Set up an empty list to hold the story
        story = []

        # Import the report styles
        styles = report_styles.get_report_styles()

        # Create a page break
        story = self._make_page_break(story, self.PORTRAIT)

        # Section title
        title_str = '<strong>Local-Scale Accuracy Assessment:<br/>'
        title_str += 'Species Accuracy at Plot Locations'
        title_str += '</strong>'

        para = p.Paragraph(title_str, styles['section_style'])
        t = p.Table([[para]], colWidths=[7.5 * u.inch])
        t.setStyle(
            p.TableStyle([
                ('TOPPADDING', (0, 0), (-1, -1), 6),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
                ('BACKGROUND', (0, 0), (-1, -1), '#957348'),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('GRID', (0, 0), (-1, -1), 0.25, colors.black),
            ]))
        story.append(t)
        story.append(p.Spacer(0, 0.2 * u.inch))

        # Kappa explanation
        kappa_str = '''
            Cohen's kappa coefficient (Cohen, 1960) is a statistical measure
            of reliability, accounting for agreement occurring by chance.  
            The equation for kappa is: 
        '''
        para = p.Paragraph(kappa_str, styles['body_style'])
        story.append(para)
        story.append(p.Spacer(0, 0.05 * u.inch))

        kappa_str = '''
           kappa = (Pr(a) - Pr(e)) / (1.0 - Pr(e))
        '''
        para = p.Paragraph(kappa_str, styles['indented'])
        story.append(para)
        story.append(p.Spacer(0, 0.05 * u.inch))

        kappa_str = '''
            where Pr(a) is the relative observed agreement among
            raters, and Pr(e) is the probability that agreement is
            due to chance.<br/><br/>

            <strong>Abbreviations Used:</strong><br/>
            OP/PP = Observed Present / Predicted Present<br/>
            OA/PP = Observed Absent / Predicted Present
            (errors of commission)<br/>
            OP/PA = Observed Present / Predicted Absent
            (errors of ommission)<br/>
            OA/PA = Observed Absent / Predicted Absent
        '''
        para = p.Paragraph(kappa_str, styles['body_style'])
        story.append(para)
        story.append(p.Spacer(0, 0.2 * u.inch))

        # Create a list of lists to hold the species accuracy information
        species_table = []

        # Header row
        header_row = []

        spp_str = '<strong>Species PLANTS Code<br/>'
        spp_str += 'Scientific Name / Common Name</strong>'
        para = p.Paragraph(spp_str, styles['body_style_10'])
        header_row.append(para)

        spp_str = '<strong>Species prevalence</strong>'
        para = p.Paragraph(spp_str, styles['body_style_10'])
        header_row.append(para)

        p1 = p.Paragraph('<strong>OP/PP</strong>',
                         styles['body_style_10_right'])
        p2 = p.Paragraph('<strong>OP/PA</strong>',
                         styles['body_style_10_right'])
        p3 = p.Paragraph('<strong>OA/PP</strong>',
                         styles['body_style_10_right'])
        p4 = p.Paragraph('<strong>OA/PA</strong>',
                         styles['body_style_10_right'])
        header_cells = [[p1, p2], [p3, p4]]
        t = p.Table(header_cells, colWidths=[0.75 * u.inch, 0.75 * u.inch])
        t.setStyle(
            p.TableStyle([
                ('GRID', (0, 0), (-1, -1), 1, colors.white),
                ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                ('TOPPADDING', (0, 0), (-1, -1), 2),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 2),
            ]))
        header_row.append(t)

        kappa_str = '<strong>Kappa coefficient</strong>'
        para = p.Paragraph(kappa_str, styles['body_style_10'])
        header_row.append(para)
        species_table.append(header_row)

        # Open the species accuracy file into a recarray
        spp_data = utilities.csv2rec(self.species_accuracy_file)

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # Read in the report metadata if it exists
        if self.report_metadata_file:
            rmp = xrmp.XMLReportMetadataParser(self.report_metadata_file)
        else:
            rmp = None

        # Subset the attributes to just species
        attrs = []
        for attr in mp.attributes:
            if attr.species_attr == 1 and 'NOTALY' not in attr.field_name:
                attrs.append(attr.field_name)

        # Iterate over the species and print out the statistics
        for spp in attrs:

            # Empty row to hold the formatted output
            species_row = []

            # Get the scientific and common names from the report metadata
            # if it exists; otherwise, just use the species symbol
            if rmp is not None:

                # Strip off any suffix if it exists
                try:
                    spp_plain = spp.split('_')[0]
                    spp_info = rmp.get_species(spp_plain)
                    spp_str = spp_info.spp_symbol + '<br/>'
                    spp_str += spp_info.scientific_name + ' / '
                    spp_str += spp_info.common_name
                except IndexError:
                    spp_str = spp
            else:
                spp_str = spp
            para = p.Paragraph(spp_str, styles['body_style_10'])
            species_row.append(para)

            # Get the statistical information
            data = spp_data[spp_data.SPECIES == spp][0]
            counts = [data.OP_PP, data.OP_PA, data.OA_PP, data.OA_PA]
            prevalence = data.PREVALENCE
            kappa = data.KAPPA

            # Species prevalence
            prevalence_str = '%.4f' % prevalence
            para = p.Paragraph(prevalence_str, styles['body_style_10_right'])
            species_row.append(para)

            # Capture the plot counts in an inner table
            count_cells = []
            count_row = []
            for i in range(0, 4):
                para = p.Paragraph('%d' % counts[i],
                                   styles['body_style_10_right'])
                count_row.append(para)
                if i % 2 == 1:
                    count_cells.append(count_row)
                    count_row = []
            t = p.Table(count_cells, colWidths=[0.75 * u.inch, 0.75 * u.inch])
            t.setStyle(
                p.TableStyle([
                    ('GRID', (0, 0), (-1, -1), 1, colors.white),
                    ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'),
                    ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                    ('TOPPADDING', (0, 0), (-1, -1), 2),
                    ('BOTTOMPADDING', (0, 0), (-1, -1), 2),
                ]))
            species_row.append(t)

            # Print out the kappa statistic
            kappa_str = '%.4f' % kappa
            para = p.Paragraph(kappa_str, styles['body_style_10_right'])
            species_row.append(para)

            # Push this row to the master species table
            species_table.append(species_row)

        # Style this into a reportlab table and add to the story
        col_widths = [(x * u.inch) for x in [4.0, 0.75, 1.5, 0.75]]
        t = p.Table(species_table, colWidths=col_widths)
        t.setStyle(
            p.TableStyle([
                ('BACKGROUND', (0, 0), (-1, -1), '#f1efe4'),
                ('GRID', (0, 0), (-1, -1), 2, colors.white),
                ('TOPPADDING', (0, 0), (0, -1), 2),
                ('BOTTOMPADDING', (0, 0), (0, -1), 2),
                ('LEFTPADDING', (0, 0), (0, -1), 6),
                ('RIGHTPADDING', (0, 0), (0, -1), 6),
                ('ALIGNMENT', (0, 0), (0, -1), 'LEFT'),
                ('VALIGN', (0, 0), (0, -1), 'TOP'),
                ('TOPPADDING', (1, 0), (1, -1), 2),
                ('BOTTOMPADDING', (1, 0), (1, -1), 2),
                ('LEFTPADDING', (1, 0), (1, -1), 6),
                ('RIGHTPADDING', (1, 0), (1, -1), 6),
                ('ALIGNMENT', (1, 0), (1, -1), 'RIGHT'),
                ('VALIGN', (1, 0), (1, 0), 'TOP'),
                ('VALIGN', (1, 1), (1, -1), 'MIDDLE'),
                ('TOPPADDING', (2, 0), (2, -1), 0),
                ('BOTTOMPADDING', (2, 0), (2, -1), 0),
                ('LEFTPADDING', (2, 0), (2, -1), 0),
                ('RIGHTPADDING', (2, 0), (2, -1), 0),
                ('ALIGNMENT', (2, 0), (2, -1), 'LEFT'),
                ('VALIGN', (2, 0), (2, -1), 'TOP'),
                ('TOPPADDING', (3, 0), (3, -1), 2),
                ('BOTTOMPADDING', (3, 0), (3, -1), 2),
                ('LEFTPADDING', (3, 0), (3, -1), 6),
                ('RIGHTPADDING', (3, 0), (3, -1), 6),
                ('ALIGNMENT', (3, 0), (3, -1), 'RIGHT'),
                ('VALIGN', (3, 0), (3, 0), 'TOP'),
                ('VALIGN', (3, 1), (3, -1), 'MIDDLE'),
            ]))
        story.append(t)
        story.append(p.Spacer(0, 0.1 * u.inch))

        rare_species_str = """
            Note that some very rare species do not appear in this accuracy
            report, because these species were not included when building
            the initial ordination model.  The full set of species is
            available upon request.
        """
        para = p.Paragraph(rare_species_str, styles['body_style'])
        story.append(para)

        # Return this story
        return story