Example #1
0
    def test_critical(self):

        # Generate a unique identifier for the error message
        id = uuid.uuid4().hex
        log.critical(id)
        # Check we have the identifier in the DB
        self.session.query(LogModel).filter(LogModel.msg == id).one()
Example #2
0
    def process(self, data):

        log.debug('Processing %s record %s', self.model_class.__name__.lower(), data['irn'])

        try:
            # Do we already have a record for this?
            record = self.get_record(data.get('irn'))

            # Is this a stub record? If it is, we want to change the type and reload.
            # Seems a bit of a hack, but SQLAlchemy does not have a simple way of modifying the type
            #  This only runs for catalogue records
            if isinstance(record, StubModel):

                polymorphic_type = self.model_class.__mapper_args__['polymorphic_identity']
                # Manually set type
                self.session.execute('UPDATE %s.catalogue SET type=:type WHERE irn=:irn' % self.keemu_schema, {'type': polymorphic_type, 'irn': data.get('irn')})

                # If this has a child table, insert the IRN so updates will work
                if self.model_class.__mapper__.local_table.name != 'specimen':
                    # And create empty row in the polymorphic table
                    self.session.execute('INSERT INTO %s.%s (irn) VALUES (:irn)' % (self.keemu_schema, self.model_class.__mapper__.local_table.name), {'irn': data.get('irn')})

                # Commit & expunge so the item can be reloaded
                self.session.commit()
                self.session.expunge(record)
                record = self.get_record(data.get('irn'))

            # Process the relationships
            data = self._process_relationships(data, record)

            # Populate the data
            record.rebuild(**data)

        except NoResultFound:

            data = self._process_relationships(data)
            # Create a new record
            record = self.model_class(**data)

        try:

            self.session.merge(record)
            self.session.commit()

        except DataError, e:
            # Save this error to the log - will need to follow up on these
            log.critical('DB DataError: record %s not created.' % data['irn'], {'data': data}, exc_info=e)
Example #3
0
    def process(self, data):

        # Try and get the model class
        self.model_class = self.get_model_class(data)

        # If we don't have a model class, continue to next record
        if not self.model_class:

            record_type = data.get('ColRecordType', 'Missing')

            # If record type is one we've knowingly excluded
            if record_type in self.excluded_types:
                log.debug('Skipping record %s: No model class for %s', data['irn'], record_type)
            else:
                # Critical error - log to DB
                log.critical('Unknown model class %s for %s. Investigate and then add to [excluded_types] if not required.', record_type, data['irn'])

            # Next record
            return

        # Filter out some of the records
        if not 'ColDepartment' in data:
            log.debug('Skipping record %s: No collection department', data['irn'])
            return None

        if not 'AdmDateInserted' in data:
            log.debug('Skipping record %s: No AdmDateInserted', data['irn'])
            return None

        # Skip records if SecRecordStatus is one of 'DELETE', 'Reserved', 'Stub', 'Stub Record', 'DELETE-MERGED'
        if 'SecRecordStatus' in data and data['SecRecordStatus'] in ['DELETE', 'Reserved', 'Stub', 'Stub Record', 'DELETE-MERGED']:
            log.debug('Skipping record %s: Incorrect record status', data['irn'])
            return None

        # Botany records include ones from Linnean Society. Should be excluded.
        if 'RegHerbariumCurrentOrgAcroLocal' in data and data['RegHerbariumCurrentOrgAcroLocal'] == 'LINN':
            log.debug('Skipping record %s: Non-BM botany record', data['irn'])
            return None

        # 4257 Artefacts have no kind or name. Skip them
        if data['ColRecordType'] == 'Artefact' and 'ArtKind' not in data and 'ArtName' not in data:
            return None

        # Process determinations
        determinations = data.get('EntIdeTaxonRef', None) or data.get('EntIndIndexLotTaxonNameLocalRef', None)

        if determinations:

            data['specimen_taxonomy'] = []

            determinations = self.ensure_list(determinations)

            # Load the taxonomy records for these determinations
            taxonomy_records = self.session.query(TaxonomyModel).filter(TaxonomyModel.irn.in_(determinations)).all()

            # Loop through all retrieved taxonomy records, and add a determination for them
            # This will act as a filter, removing all duplicates / missing taxa
            for taxonomy_record in taxonomy_records:
                filed_as = (taxonomy_record.irn == data.get('EntIdeFiledAsTaxonRef', None))
                data['specimen_taxonomy'].append(Determination(taxonomy_irn=taxonomy_record.irn, specimen_irn=data['irn'], filed_as=filed_as))

        # Parasite card host / parasites

        host_parasites = {
            'host': data.get('CardHostRef', []),
            'parasite': data.get('CardParasiteRef', []),
        }

        stages = self.ensure_list(data.get('CardParasiteStage', []))

        for host_parasite_type, refs in host_parasites.items():
            refs = self.ensure_list(refs)

            for i, ref in enumerate(refs):
                try:
                    stage = stages[i]
                except IndexError:
                    stage = None

                assoc_object = HostParasiteAssociation(taxonomy_irn=ref, parasite_card_irn=data['irn'], parasite_host=host_parasite_type, stage=stage)

                try:
                    data['host_parasite_taxonomy'].append(assoc_object)
                except KeyError:
                    data['host_parasite_taxonomy'] = [assoc_object]

        # Some special field mappings

        # Try to use PalDetDate is if DarYearIdentified is missing
        if not 'DarYearIdentified' in data:
            try:
                date_matches = self.re_date.search(data['PalDetDate'])
                if date_matches:
                    data['DarYearIdentified'] = date_matches.group(1)
                    data['DarMonthIdentified'] = date_matches.group(2)
                    data['DarDayIdentified'] = date_matches.group(3)
            except (KeyError, TypeError):
                # If PalDetDate doesn't exists or isn't a string (can also be a list if there's multiple determination dates - which we ignore)
                pass

        # EntCatCatalogueNumber requires EntCatPrefix if it's used in catalogue_number
        try:
            data['EntCatCatalogueNumber'] = '{0}{1}'.format(data['EntCatPrefix'], data['EntCatCatalogueNumber'])
        except KeyError:
            pass

        # Set egg part type if not already set
        if self.model_class is EggModel and 'PrtType' not in data:
            data['PrtType'] = 'egg'

        super(CatalogueTask, self).process(data)
Example #4
0
    def _process_relationships(self, data, record=None):

        # Basic relationship handling.

        # More complex scenarios are handled in the individual processing functions
        for prop in class_mapper(self.model_class).iterate_properties:

            # Skip the field if the property key is already set in the data object
            # The field has been set in the import types custom preprocess function

            if prop.key in data:
                continue

            # Is this a relationship property?
            # NB: This excludes backrefs, which will be using sqlalchemy.orm.properties.RelationshipProperty, not our own
            if type(prop) == RelationshipProperty:

                # Try and find a child model to use for this relationship
                try:
                    child_model = prop.mapper.class_
                    # If the child model has irn primary key, it relates to a KE EMu record
                    # And a simple relationship should be used
                    if child_model.__mapper__.primary_key[0].key == 'irn':
                        child_model = None

                except AttributeError:
                    child_model = None

                # This is a relationship to a secondary object like SexStage
                if child_model:

                    # If unique, we'll try loading the values from the database first
                    # And only create if they don't exist
                    unique = False

                    for constraint in child_model.__table__.constraints:
                        if constraint.__class__ == UniqueConstraint:
                            unique = True
                            break

                    fields = {}

                    for column in child_model.__table__.columns:
                        if column.alias:
                            for alias in self.ensure_list(column.alias):
                                fields[alias] = column.key

                    # Populate a list of fields
                    data_fields = self._populate_subfield_data(fields.keys(), data)

                    # If we have data retrieve / create a model record
                    if data_fields:
                        data[prop.key] = []
                        # Loop through all the list of fields
                        for field_list in data_fields:

                            # Sometimes nothing is populated - for example, EntSexSex just has None
                            # We want to skip these
                            if not [x for x in field_list.values() if x is not None]:
                                continue

                            if unique:
                                # Try and get record from database
                                try:

                                    filters = []
                                    for alias, key in fields.items():
                                        # Build the filters
                                        col = getattr(child_model, key)

                                        # Do we have a value for this field
                                        if alias not in field_list:
                                            field_list[alias] = None

                                        # String fields should always be lower case & '' for null to ensure unique constraints work correctly
                                        if isinstance(child_model.__table__.columns[key].type, String):
                                            try:
                                                field_list[alias].lower()
                                            except AttributeError:
                                                field_list[alias] = ''

                                        filters.append(col.__eq__(field_list[alias]))

                                    # Run the query
                                    data[prop.key].append(self.session.query(child_model).filter(and_(*filters)).one())

                                except NoResultFound:
                                    # Not found, create a new one
                                    data[prop.key].append(child_model(**field_list))

                            elif 'delete-orphan' in prop.cascade:
                                # If this property has a delete-orphan cascade, everything's fine
                                # SQLa will handle updates, removing old records
                                # But for non unique / no delete orphan relationships
                                # This code will create duplicate records in the associated table
                                # Not a problem now, but log a critical error in case it ever happens
                                data[prop.key].append(child_model(**field_list))
                            else:

                                log.critical('Record %s: Non-unique relationship used in %s.' % (data['irn'], prop.key))


                else:

                    # Basic relationship, in the format:
                    # stratigraphy = relationship("StratigraphyModel", secondary=collection_event_stratigraphy, alias='GeoStratigraphyRef')
                    field_names = prop.alias
                    irns = []

                    # Ensure it's a list
                    field_names = self.ensure_list(field_names)

                    for field_name in field_names:
                        value = data.get(field_name)
                        if value:
                            irns += self.ensure_list(value)

                    # Dedupe IRNS & ensure we are not linking to the same record - eg: 687077
                    try:
                        irns = list(set(irns))
                        irns.remove(data['irn'])
                    except ValueError:
                        pass

                    # Do we have any IRNs?
                    if irns:

                        # Get the relationship model class
                        relationship_model = prop.argument()

                        # Load the model objects and assign to the property
                        data[prop.key] = self.session.query(relationship_model).filter(relationship_model.irn.in_(irns)).all()
                        existing_irns = [record.irn for record in data[prop.key]]

                        # Do we have any missing IRNs
                        missing_irns = list(set(irns) - set(existing_irns))

                        if missing_irns:

                            # Is this a property we want to create stub records for
                            if prop.key == 'associated_record':
                                for missing_irn in missing_irns:
                                    data[prop.key].append(StubModel(irn=missing_irn))
                            else:
                                log.error('Missing IRN %s in relationship %s(%s).%s', ','.join(str(x) for x in missing_irns), self.model_class.__name__, data['irn'], prop.key)

            # This isn't a relationship property - but perform check to see if this a foreign key field
            else:

                try:

                    column = prop.columns[0]

                    foreign_key = column.foreign_keys.pop()
                    # Add the foreign key back
                    column.foreign_keys.add(foreign_key)
                    foreign_key_value = None

                    # Loop through aliases / key and see if we have a foreign key value
                    candidate_names = column.alias if column.alias else prop.key
                    candidate_names = self.ensure_list(candidate_names)

                    for candidate_name in candidate_names:
                        foreign_key_value = data.get(candidate_name)
                        if foreign_key_value:
                            break

                    # We do have a foreign key value, so now perform check to see if it exists
                    if foreign_key_value and isinstance(foreign_key_value, int):

                        result = self.session.execute("SELECT COUNT(*) as exists FROM %s WHERE %s = :foreign_key_value" % (foreign_key.column.table, foreign_key.column.name), {'foreign_key_value': foreign_key_value})
                        record = result.fetchone()

                        if not record.exists:
                            # If the record doesn't exist, create a stub for part parents
                            if prop.key == 'parent_irn':
                                self.session.add(StubModel(irn=foreign_key_value))
                            else:
                            # Otherwise, delete the property so it is not used
                            # Need to ensure all candidate names are unset
                                for candidate_name in candidate_names:
                                    try:
                                        del data[candidate_name]
                                    except KeyError:
                                        pass

                                log.error('%s(%s): Missing foreign key %s for %s field. Field removed from record.', self.model_class.__name__, data['irn'], foreign_key_value, prop.key)

                except (AttributeError, KeyError):
                    pass

        return data