Example #1
0
    def run(self):

        # Need to load an SQLA model
        # So build a dict of all models keyed by KE EMu module
        models = {}

        for cls in KEDataTask.__subclasses__():
            models[cls.module] = cls.model_class if cls.model_class else CatalogueModel

        ke_data = KEParser(self.input().open('r'), schema_file=self.keemu_schema_file, input_file_path=self.input().path)

        for data in ke_data:
            module = data.get('AudTable')
            irn = data.get('AudKey')
            try:
                model = models[module]
            except KeyError:
                log.debug('Skipping eaudit record for %s' % module)
            else:

                try:

                    log.debug('Deleting record %s(%s)' % (model, irn))

                    # Load the object and then delete so we use the SQLA inheritance
                    obj = self.session.query(self.model).filter(self.model.irn == 1).one()
                    self.session.delete(obj)

                except NoResultFound:

                    # We cannot delete this record as it doesn't exist
                    # There are a lot of records being inserted and then deleted again
                    # So will never appear on the insert exports
                    date_inserted = datetime.strptime(data.get('AdmDateInserted'),"%Y-%m-%d")
                    date_deleted = datetime.strptime(data.get('AudDate'),"%Y-%m-%d")

                     # If date deleted is within 7 days of the insert date, do not flag an error
                    if date_deleted - timedelta(days=7) < date_inserted:
                        log.debug('Record %s(%s) not found for deletion, but within date threshold (inserted: %s deleted: %s)' % (model.__name__, irn, date_inserted, date_deleted))
                    else:
                        log.error('Record %s(%s) not found for deletion' % (model, irn))

        self.session.commit()
        self.output().touch()
Example #2
0
    def _process_relationships(self, data, record=None):

        # Basic relationship handling.

        # More complex scenarios are handled in the individual processing functions
        for prop in class_mapper(self.model_class).iterate_properties:

            # Skip the field if the property key is already set in the data object
            # The field has been set in the import types custom preprocess function

            if prop.key in data:
                continue

            # Is this a relationship property?
            # NB: This excludes backrefs, which will be using sqlalchemy.orm.properties.RelationshipProperty, not our own
            if type(prop) == RelationshipProperty:

                # Try and find a child model to use for this relationship
                try:
                    child_model = prop.mapper.class_
                    # If the child model has irn primary key, it relates to a KE EMu record
                    # And a simple relationship should be used
                    if child_model.__mapper__.primary_key[0].key == 'irn':
                        child_model = None

                except AttributeError:
                    child_model = None

                # This is a relationship to a secondary object like SexStage
                if child_model:

                    # If unique, we'll try loading the values from the database first
                    # And only create if they don't exist
                    unique = False

                    for constraint in child_model.__table__.constraints:
                        if constraint.__class__ == UniqueConstraint:
                            unique = True
                            break

                    fields = {}

                    for column in child_model.__table__.columns:
                        if column.alias:
                            for alias in self.ensure_list(column.alias):
                                fields[alias] = column.key

                    # Populate a list of fields
                    data_fields = self._populate_subfield_data(fields.keys(), data)

                    # If we have data retrieve / create a model record
                    if data_fields:
                        data[prop.key] = []
                        # Loop through all the list of fields
                        for field_list in data_fields:

                            # Sometimes nothing is populated - for example, EntSexSex just has None
                            # We want to skip these
                            if not [x for x in field_list.values() if x is not None]:
                                continue

                            if unique:
                                # Try and get record from database
                                try:

                                    filters = []
                                    for alias, key in fields.items():
                                        # Build the filters
                                        col = getattr(child_model, key)

                                        # Do we have a value for this field
                                        if alias not in field_list:
                                            field_list[alias] = None

                                        # String fields should always be lower case & '' for null to ensure unique constraints work correctly
                                        if isinstance(child_model.__table__.columns[key].type, String):
                                            try:
                                                field_list[alias].lower()
                                            except AttributeError:
                                                field_list[alias] = ''

                                        filters.append(col.__eq__(field_list[alias]))

                                    # Run the query
                                    data[prop.key].append(self.session.query(child_model).filter(and_(*filters)).one())

                                except NoResultFound:
                                    # Not found, create a new one
                                    data[prop.key].append(child_model(**field_list))

                            elif 'delete-orphan' in prop.cascade:
                                # If this property has a delete-orphan cascade, everything's fine
                                # SQLa will handle updates, removing old records
                                # But for non unique / no delete orphan relationships
                                # This code will create duplicate records in the associated table
                                # Not a problem now, but log a critical error in case it ever happens
                                data[prop.key].append(child_model(**field_list))
                            else:

                                log.critical('Record %s: Non-unique relationship used in %s.' % (data['irn'], prop.key))


                else:

                    # Basic relationship, in the format:
                    # stratigraphy = relationship("StratigraphyModel", secondary=collection_event_stratigraphy, alias='GeoStratigraphyRef')
                    field_names = prop.alias
                    irns = []

                    # Ensure it's a list
                    field_names = self.ensure_list(field_names)

                    for field_name in field_names:
                        value = data.get(field_name)
                        if value:
                            irns += self.ensure_list(value)

                    # Dedupe IRNS & ensure we are not linking to the same record - eg: 687077
                    try:
                        irns = list(set(irns))
                        irns.remove(data['irn'])
                    except ValueError:
                        pass

                    # Do we have any IRNs?
                    if irns:

                        # Get the relationship model class
                        relationship_model = prop.argument()

                        # Load the model objects and assign to the property
                        data[prop.key] = self.session.query(relationship_model).filter(relationship_model.irn.in_(irns)).all()
                        existing_irns = [record.irn for record in data[prop.key]]

                        # Do we have any missing IRNs
                        missing_irns = list(set(irns) - set(existing_irns))

                        if missing_irns:

                            # Is this a property we want to create stub records for
                            if prop.key == 'associated_record':
                                for missing_irn in missing_irns:
                                    data[prop.key].append(StubModel(irn=missing_irn))
                            else:
                                log.error('Missing IRN %s in relationship %s(%s).%s', ','.join(str(x) for x in missing_irns), self.model_class.__name__, data['irn'], prop.key)

            # This isn't a relationship property - but perform check to see if this a foreign key field
            else:

                try:

                    column = prop.columns[0]

                    foreign_key = column.foreign_keys.pop()
                    # Add the foreign key back
                    column.foreign_keys.add(foreign_key)
                    foreign_key_value = None

                    # Loop through aliases / key and see if we have a foreign key value
                    candidate_names = column.alias if column.alias else prop.key
                    candidate_names = self.ensure_list(candidate_names)

                    for candidate_name in candidate_names:
                        foreign_key_value = data.get(candidate_name)
                        if foreign_key_value:
                            break

                    # We do have a foreign key value, so now perform check to see if it exists
                    if foreign_key_value and isinstance(foreign_key_value, int):

                        result = self.session.execute("SELECT COUNT(*) as exists FROM %s WHERE %s = :foreign_key_value" % (foreign_key.column.table, foreign_key.column.name), {'foreign_key_value': foreign_key_value})
                        record = result.fetchone()

                        if not record.exists:
                            # If the record doesn't exist, create a stub for part parents
                            if prop.key == 'parent_irn':
                                self.session.add(StubModel(irn=foreign_key_value))
                            else:
                            # Otherwise, delete the property so it is not used
                            # Need to ensure all candidate names are unset
                                for candidate_name in candidate_names:
                                    try:
                                        del data[candidate_name]
                                    except KeyError:
                                        pass

                                log.error('%s(%s): Missing foreign key %s for %s field. Field removed from record.', self.model_class.__name__, data['irn'], foreign_key_value, prop.key)

                except (AttributeError, KeyError):
                    pass

        return data