Beispiel #1
0
    def test_duplicate_name_localities(self):
        """Test whether any full scientific names have more than one record for a locality."""
        logit(self._log, "*** test_duplicate_name_localities ***")
        err_msgs = []
        if self.nnsl_by_species is None:
            self.read_riis(read_resolved=False)

        for sciname, reclist in self.nnsl_by_species.items():
            count = len(reclist)
            i = 0
            while i < count:
                j = i + 1
                while j < count:
                    rec1 = reclist[i]
                    rec2 = reclist[j]
                    if rec1.is_duplicate_locality(rec2):
                        msg = (
                            'Sciname {} has {} on line {} and line {}'.format(
                                sciname, rec1.data[RIIS_SPECIES.LOCALITY_FLD],
                                rec1.data[LINENO_FLD], rec2.data[LINENO_FLD]))
                        err_msgs.append(msg)
                    # assert not rec1.is_duplicate_locality(rec2)
                    j += 1
                i += 1
        self._print_errors("Duplicate Name-Locality records", err_msgs)
Beispiel #2
0
 def test_gbif_resolution_inconsistency(self):
     """Test whether any full scientific names have more than one GBIF taxonKey."""
     logit(self._log, "*** test_gbif_resolution_inconsistency ***")
     err_msgs = []
     if self.nnsl_by_species is None:
         self.read_riis(read_resolved=False)
     for sciname, reclist in self.nnsl_by_species.items():
         count = len(reclist)
         i = 0
         while i < count:
             j = i + 1
             while j < count:
                 rec1 = reclist[i]
                 rec2 = reclist[j]
                 if not rec1.is_gbif_match(rec2):
                     auth1 = rec1.data[RIIS_SPECIES.TAXON_AUTHORITY_FLD]
                     auth2 = rec2.data[RIIS_SPECIES.TAXON_AUTHORITY_FLD]
                     msg = 'Sciname {} has record1 taxon authority {}, with GBIF key {} (line {})'.format(
                         sciname, auth1, rec1.data[RIIS_SPECIES.GBIF_KEY],
                         rec1.data[LINENO_FLD])
                     msg += ' and record2 taxon authority {}, with GBIF key {} (line {})'.format(
                         auth2, rec2.data[RIIS_SPECIES.GBIF_KEY],
                         rec2.data[LINENO_FLD])
                     err_msgs.append(msg)
                 # assert reclist[i].is_gbif_match(reclist[j])
                 j += 1
             i += 1
     self._print_errors("GBIF taxonKey conflicts", err_msgs)
Beispiel #3
0
 def test_taxonomy_keys(self):
     """Test whether any records contain non-integer GBIF taxonKeys or ITIS TSNs."""
     logit(self._log, "*** test_taxonomy_keys ***")
     if self.bad_species is None:
         self.read_riis(read_resolved=False)
     for k, v in self.bad_species.items():
         logit(self._log, "{} {}".format(k, v))
     assert len(self.bad_species) == 0
Beispiel #4
0
    def find_gbif_record(self, gbifid):
        """Find a GBIF occurrence record identified by provided gbifID.

        Args:
            gbifid: local GBIF identifier for finding a record in a large file.

        Returns:
            self.dwcrec: a dictionary containing GBIF record
        """
        if self._csv_reader is None:
            self.open()
        found = False
        try:
            while (self.dwcrec is not None and found is False):
                # Get interpreted record
                self.get_record()
                if self.dwcrec[GBIF.ID_FLD] == gbifid:
                    found = True

                # Where are we
                if (self.recno % LOG.INTERVAL) == 0:
                    logit(self._log,
                          '*** Record number {} ***'.format(self.recno))
            if (self.dwcrec is None and found is False):
                logit(self._log, 'Failed to find {}'.format(gbifid))
                self.close()
        except Exception as e:
            logit(self._log,
                  'Failed on line {}, exception {}'.format(self.recno, e))
        return self.dwcrec
Beispiel #5
0
    def test_resolve_gbif(self):
        """Record changed GBIF taxonomic resolutions and write updated records."""
        logit(self._log, "*** test_resolve_gbif ***")
        err_msgs = []
        self.read_riis(read_resolved=False)

        # Update species data
        self._print_errors("Re-resolve to accepted GBIF taxon", err_msgs)
        name_count, rec_count = self.resolve_riis_to_gbif_taxa()
        logit(
            self._log, "Resolved {} of expected {} records".format(
                rec_count, RIIS_SPECIES.DATA_COUNT))

        # Find mismatches
        for key, reclist in self.nnsl_by_species.items():
            rec1 = reclist[0]
            try:
                rec1.data[RIIS_SPECIES.NEW_GBIF_KEY_FLD]
            except KeyError:
                logit(
                    self._log, 'Failed to add field {} to {} records'.format(
                        RIIS_SPECIES.NEW_GBIF_KEY_FLD, rec1.name))
            else:
                if not rec1.consistent_gbif_resolution():
                    msg = "Record {} old GBIF taxonKey {} / {} conflicts with new GBIF taxonKey {} / {}".format(
                        key, rec1.data[RIIS_SPECIES.GBIF_KEY],
                        rec1.data[RIIS_SPECIES.SCINAME_FLD],
                        rec1.data[RIIS_SPECIES.NEW_GBIF_KEY_FLD],
                        rec1.data[RIIS_SPECIES.NEW_GBIF_SCINAME_FLD])
                    err_msgs.append(msg)
Beispiel #6
0
 def test_missing_taxon_authority_resolution(self):
     """Test whether any full scientific names have more than one GBIF taxonKey."""
     logit(self._log, "*** test_missing_taxon_authority_resolution ***")
     err_msgs = []
     if self.nnsl_by_species is None:
         self.read_riis(read_resolved=False)
     for sciname, reclist in self.nnsl_by_species.items():
         for rec in reclist:
             auth = rec.data[RIIS_SPECIES.TAXON_AUTHORITY_FLD]
             if (auth == "Accepted GBIF"
                     and rec.data[RIIS_SPECIES.GBIF_KEY] <= 0):
                 err_msgs.append(
                     'Sciname {} has GBIF authority with key {} (line {})'.
                     format(sciname, rec.data[RIIS_SPECIES.GBIF_KEY],
                            rec.data[LINENO_FLD]))
             elif (auth == "Accepted ITIS"
                   and rec.data[RIIS_SPECIES.ITIS_KEY] <= 0):
                 err_msgs.append(
                     'Sciname {} has ITIS authority with key {} (line {})'.
                     format(sciname, rec.data[RIIS_SPECIES.GBIF_KEY],
                            rec.data[LINENO_FLD]))
     self._print_errors("Missing authority resolution", err_msgs)
Beispiel #7
0
    def test_missing_resolved_records(self, is_test=True):
        """Read the original and updated RIIS records and find missing records in the updated file.

        Args:
            is_test (bool): True if testing smaller test data file.
        """
        logit(self._log, "*** test_missing_resolved_records ***")
        # Re-read original data
        self.read_riis(read_resolved=False)

        # resolved data
        test_fname = None
        if is_test:
            test_fname = RIIS_SPECIES.TEST_FNAME
        resolved_nnsl = NNSL(DATA_PATH, test_fname=test_fname)
        resolved_nnsl.read_riis(read_resolved=True)

        # Count originals
        for occid in self.nnsl_by_id.keys():
            try:
                resolved_nnsl.nnsl_by_id[occid]
            except KeyError:
                logit(self._log, "Missing record {}".format(occid))
Beispiel #8
0
    def test_resolution_output(self, is_test=True):
        """Record changed GBIF taxonomic resolutions and write updated records.

        Args:
            is_test (bool): True if testing smaller test data file.
        """
        logit(self._log, "*** test_resolution_output ***")
        # Re-read original data
        self.read_riis(read_resolved=False)

        # resolved data
        test_fname = None
        if is_test:
            test_fname = RIIS_SPECIES.TEST_FNAME
        resolved_nnsl = NNSL(DATA_PATH, test_fname=test_fname)
        resolved_nnsl.read_riis(read_resolved=True)

        orig_rec_count = 0
        res_rec_count = 0
        # Find in original
        for occid in self.nnsl_by_id.keys():
            orig_rec_count += 1
            # Find in resolved
            try:
                resolved_nnsl.nnsl_by_id[occid]
            except KeyError:
                logit(
                    self._log,
                    "Failed to find occurrenceID {} in resolved dictionary".
                    format(occid))
            else:
                res_rec_count += 1

        if orig_rec_count != res_rec_count:
            logit(
                self._log, "Original records {}, updated records {}".format(
                    orig_rec_count, res_rec_count))
Beispiel #9
0
 def _print_errors(self, header, msgs):
     if msgs:
         logit(self._log, ERR_SEPARATOR)
         logit(self._log, "--- {} ---".format(header))
         for msg in msgs:
             logit(self._log, msg)