예제 #1
0
 def validate_line(self, how: ImportHow, diag: ImportDiagnostic, lig,
                   vals_cache):
     """
         Validate a line from data point of view.
     :param how:
     :param diag:
     :param lig:
     :param vals_cache:
     :return:
     """
     latitude_was_seen = False
     predefined_mapping = GlobalMapping.PREDEFINED_FIELDS
     custom_mapping = how.custom_mapping
     for raw_field, a_field in self.clean_fields.items():
         m = predefined_mapping.get(a_field)
         if m is None:
             m = custom_mapping.search_field(a_field)
             # No mapping, not stored
             if m is None:
                 continue
         raw_val = lig.get(raw_field)
         # Try to get the value from the cache
         cache_key = (raw_field, raw_val)
         if cache_key in vals_cache:
             if a_field == 'object_lat':
                 latitude_was_seen = True
             continue
         vals_cache[cache_key] = 1
         is_numeric = m['type'] == 'n'
         # Same column with same value was not seen already, proceed
         csv_val: str = clean_value_and_none(raw_val, is_numeric)
         diag.cols_seen.add(a_field)
         # From V1.1, if column is present then it's considered as seen.
         #  Before, the criterion was 'at least one value'.
         if csv_val == '':
             # If no relevant value, leave field as NULL
             continue
         if a_field == 'object_lat':
             vf = convert_degree_minute_float_to_decimal_degree(csv_val)
             if vf < -90 or vf > 90:
                 diag.error(
                     "Invalid Lat. value '%s' for Field '%s' in file %s. "
                     "Incorrect range -90/+90°." %
                     (csv_val, raw_field, self.relative_name))
                 del vals_cache[cache_key]
             else:
                 latitude_was_seen = True
         elif a_field == 'object_lon':
             vf = convert_degree_minute_float_to_decimal_degree(csv_val)
             if vf < -180 or vf > 180:
                 diag.error(
                     "Invalid Long. value '%s' for Field '%s' in file %s. "
                     "Incorrect range -180/+180°." %
                     (csv_val, raw_field, self.relative_name))
         elif is_numeric:
             vf = to_float(csv_val)
             if vf is None:
                 diag.error(
                     "Invalid float value '%s' for Field '%s' in file %s." %
                     (csv_val, raw_field, self.relative_name))
             elif a_field == 'object_annotation_category_id':
                 diag.classif_id_seen.add(int(csv_val))
         elif a_field == 'object_date':
             try:
                 ObjectHeader.date_from_txt(csv_val)
             except ValueError:
                 diag.error(
                     "Invalid Date value '%s' for Field '%s' in file %s." %
                     (csv_val, raw_field, self.relative_name))
         elif a_field == 'object_time':
             try:
                 ObjectHeader.time_from_txt(csv_val)
             except ValueError:
                 diag.error(
                     "Invalid Time value '%s' for Field '%s' in file %s." %
                     (csv_val, raw_field, self.relative_name))
         elif a_field == 'object_annotation_category':
             if clean_value_and_none(
                     lig.get('object_annotation_category_id', '')) == '':
                 # Apply the mapping, if and only if there is no id
                 csv_val = how.taxo_mapping.get(csv_val.lower(), csv_val)
                 # Record that the taxon was seen
                 how.found_taxa[csv_val.lower()] = None
         elif a_field == 'object_annotation_person_name':
             maybe_email = clean_value_and_none(
                 lig.get('object_annotation_person_email', ''))
             # TODO: It's more "diag" than "how"
             how.found_users[csv_val.lower()] = {'email': maybe_email}
         elif a_field == 'object_annotation_status':
             if csv_val != 'noid' and csv_val.lower(
             ) not in classif_qual_revert:
                 diag.error(
                     "Invalid Annotation Status '%s' for Field '%s' in file %s."
                     % (csv_val, raw_field, self.relative_name))
     # Update missing GPS count
     if not latitude_was_seen:
         diag.nb_objects_without_gps += 1
예제 #2
0
 def fetch_existing_ranks(session, prj_id):
     """
         Get existing image ranks from the project
     """
     return ObjectHeader.fetch_existing_ranks(session, prj_id)
예제 #3
0
    def read_fields_to_dicts(how: ImportHow, field_set: Set, lig: Dict[str,
                                                                       str],
                             dicts_to_write, vals_cache: Dict):
        """
            Read the data line into target dicts. Values go into the right bucket, i.e. target dict, depending
            on mappings (standard one and per-project custom one).
            :param how: Importing directives.
            :param field_set: The fields present in DB record.
            :param lig: A line of TSV data, as {header: val} dict.
            :param dicts_to_write: The output data.
            :param vals_cache: A cache of values, per column and seen value.
        """
        predefined_mapping = GlobalMapping.PREDEFINED_FIELDS
        custom_mapping = how.custom_mapping
        # CSV reader returns a minimal dict with no value equal to None
        # so we have values only for common fields.
        for a_field in field_set.intersection(lig.keys()):
            # We have a value
            raw_val = lig[a_field]
            m = predefined_mapping.get(a_field)
            if not m:
                m = custom_mapping.search_field(a_field)
                assert m is not None
            field_table = m["table"]
            field_name = m["field"]
            is_numeric = m['type'] == 'n'
            # Try to get the transformed value from the cache
            cache_key: Any = (a_field, raw_val)
            if cache_key in vals_cache:
                cached_field_value = vals_cache.get(cache_key)
            else:
                csv_val = clean_value(raw_val, is_numeric)
                # If no relevant value, set field to NULL, i.e. None
                if csv_val == '':
                    cached_field_value = None
                elif a_field == 'object_lat':
                    # It's [n] type but since UVPApp they can contain a notation like ddd°MM.SS
                    # which can be [t] as well.
                    cached_field_value = convert_degree_minute_float_to_decimal_degree(
                        csv_val)
                elif a_field == 'object_lon':
                    cached_field_value = convert_degree_minute_float_to_decimal_degree(
                        csv_val)
                elif is_numeric:
                    cached_field_value = to_float(csv_val)
                elif a_field == 'object_date':
                    cached_field_value = ObjectHeader.date_from_txt(csv_val)
                elif a_field == 'object_time':
                    cached_field_value = ObjectHeader.time_from_txt(csv_val)
                elif field_name == 'classif_when':
                    v2 = clean_value(
                        lig.get('object_annotation_time', '000000')).zfill(6)
                    cached_field_value = datetime.datetime(
                        int(csv_val[0:4]), int(csv_val[4:6]),
                        int(csv_val[6:8]), int(v2[0:2]), int(v2[2:4]),
                        int(v2[4:6]))
                    # no caching of this one as it depends on another value on same line
                    cache_key = "0"
                elif field_name == 'classif_id':
                    # We map 2 fields to classif_id, the second (text one) has [t] type, treated here.
                    # The first, numeric, version is in "if type=n" case above.
                    mapped_val = how.taxo_mapping.get(csv_val.lower(), csv_val)
                    # Use initial mapping
                    cached_field_value = how.found_taxa[none_to_empty(
                        mapped_val).lower()]
                    # better crash than write bad value into the DB
                    assert cached_field_value is not None, "Column %s: no classification of %s mapped as %s" % (
                        a_field, csv_val, mapped_val)
                elif field_name == 'classif_who':
                    # Eventually map to another user if asked so
                    usr_key = none_to_empty(csv_val).lower()
                    cached_field_value = how.found_users[usr_key].get(
                        'id', None)
                elif field_name == 'classif_qual':
                    cached_field_value = classif_qual_revert.get(
                        csv_val.lower())
                else:
                    # Assume it's an ordinary text field with nothing special
                    cached_field_value = csv_val
                # Cache if relevant, setting the cache_key to "0" above effectively voids
                vals_cache[cache_key] = cached_field_value

            # Write the field into the right object
            dict_to_write = dicts_to_write[field_table]
            dict_to_write[field_name] = cached_field_value
        # Ensure that all dicts' fields are valued, to None if needed. This is needed for bulk inserts,
        # in DBWriter.py, as SQL Alchemy core computes an insert for the first line and just injects the
        # data for following ones.
        for a_field in field_set.difference(lig.keys()):
            fld_mping = custom_mapping.search_field(a_field)
            m = predefined_mapping.get(a_field, fld_mping)
            assert m is not None
            if m["field"] not in dicts_to_write[m["table"]]:
                dicts_to_write[m["table"]][m["field"]] = None
예제 #4
0
 def fetch_existing_objects(session, prj_id):
     """
         Get existing object IDs (orig_id AKA object_id in TSV) from the project
     """
     with CodeTimer("Existing objects for %d: " % prj_id, logger):
         return ObjectHeader.fetch_existing_objects(session, prj_id)