def run(self): # Need to load an SQLA model # So build a dict of all models keyed by KE EMu module models = {} for cls in KEDataTask.__subclasses__(): models[cls.module] = cls.model_class if cls.model_class else CatalogueModel ke_data = KEParser(self.input().open('r'), schema_file=self.keemu_schema_file, input_file_path=self.input().path) for data in ke_data: module = data.get('AudTable') irn = data.get('AudKey') try: model = models[module] except KeyError: log.debug('Skipping eaudit record for %s' % module) else: try: log.debug('Deleting record %s(%s)' % (model, irn)) # Load the object and then delete so we use the SQLA inheritance obj = self.session.query(self.model).filter(self.model.irn == 1).one() self.session.delete(obj) except NoResultFound: # We cannot delete this record as it doesn't exist # There are a lot of records being inserted and then deleted again # So will never appear on the insert exports date_inserted = datetime.strptime(data.get('AdmDateInserted'),"%Y-%m-%d") date_deleted = datetime.strptime(data.get('AudDate'),"%Y-%m-%d") # If date deleted is within 7 days of the insert date, do not flag an error if date_deleted - timedelta(days=7) < date_inserted: log.debug('Record %s(%s) not found for deletion, but within date threshold (inserted: %s deleted: %s)' % (model.__name__, irn, date_inserted, date_deleted)) else: log.error('Record %s(%s) not found for deletion' % (model, irn)) self.session.commit() self.output().touch()
def _process_relationships(self, data, record=None): # Basic relationship handling. # More complex scenarios are handled in the individual processing functions for prop in class_mapper(self.model_class).iterate_properties: # Skip the field if the property key is already set in the data object # The field has been set in the import types custom preprocess function if prop.key in data: continue # Is this a relationship property? # NB: This excludes backrefs, which will be using sqlalchemy.orm.properties.RelationshipProperty, not our own if type(prop) == RelationshipProperty: # Try and find a child model to use for this relationship try: child_model = prop.mapper.class_ # If the child model has irn primary key, it relates to a KE EMu record # And a simple relationship should be used if child_model.__mapper__.primary_key[0].key == 'irn': child_model = None except AttributeError: child_model = None # This is a relationship to a secondary object like SexStage if child_model: # If unique, we'll try loading the values from the database first # And only create if they don't exist unique = False for constraint in child_model.__table__.constraints: if constraint.__class__ == UniqueConstraint: unique = True break fields = {} for column in child_model.__table__.columns: if column.alias: for alias in self.ensure_list(column.alias): fields[alias] = column.key # Populate a list of fields data_fields = self._populate_subfield_data(fields.keys(), data) # If we have data retrieve / create a model record if data_fields: data[prop.key] = [] # Loop through all the list of fields for field_list in data_fields: # Sometimes nothing is populated - for example, EntSexSex just has None # We want to skip these if not [x for x in field_list.values() if x is not None]: continue if unique: # Try and get record from database try: filters = [] for alias, key in fields.items(): # Build the filters col = getattr(child_model, key) # Do we have a value for this field if alias not in field_list: field_list[alias] = None # String fields should always be lower case & '' for null to ensure unique constraints work correctly if isinstance(child_model.__table__.columns[key].type, String): try: field_list[alias].lower() except AttributeError: field_list[alias] = '' filters.append(col.__eq__(field_list[alias])) # Run the query data[prop.key].append(self.session.query(child_model).filter(and_(*filters)).one()) except NoResultFound: # Not found, create a new one data[prop.key].append(child_model(**field_list)) elif 'delete-orphan' in prop.cascade: # If this property has a delete-orphan cascade, everything's fine # SQLa will handle updates, removing old records # But for non unique / no delete orphan relationships # This code will create duplicate records in the associated table # Not a problem now, but log a critical error in case it ever happens data[prop.key].append(child_model(**field_list)) else: log.critical('Record %s: Non-unique relationship used in %s.' % (data['irn'], prop.key)) else: # Basic relationship, in the format: # stratigraphy = relationship("StratigraphyModel", secondary=collection_event_stratigraphy, alias='GeoStratigraphyRef') field_names = prop.alias irns = [] # Ensure it's a list field_names = self.ensure_list(field_names) for field_name in field_names: value = data.get(field_name) if value: irns += self.ensure_list(value) # Dedupe IRNS & ensure we are not linking to the same record - eg: 687077 try: irns = list(set(irns)) irns.remove(data['irn']) except ValueError: pass # Do we have any IRNs? if irns: # Get the relationship model class relationship_model = prop.argument() # Load the model objects and assign to the property data[prop.key] = self.session.query(relationship_model).filter(relationship_model.irn.in_(irns)).all() existing_irns = [record.irn for record in data[prop.key]] # Do we have any missing IRNs missing_irns = list(set(irns) - set(existing_irns)) if missing_irns: # Is this a property we want to create stub records for if prop.key == 'associated_record': for missing_irn in missing_irns: data[prop.key].append(StubModel(irn=missing_irn)) else: log.error('Missing IRN %s in relationship %s(%s).%s', ','.join(str(x) for x in missing_irns), self.model_class.__name__, data['irn'], prop.key) # This isn't a relationship property - but perform check to see if this a foreign key field else: try: column = prop.columns[0] foreign_key = column.foreign_keys.pop() # Add the foreign key back column.foreign_keys.add(foreign_key) foreign_key_value = None # Loop through aliases / key and see if we have a foreign key value candidate_names = column.alias if column.alias else prop.key candidate_names = self.ensure_list(candidate_names) for candidate_name in candidate_names: foreign_key_value = data.get(candidate_name) if foreign_key_value: break # We do have a foreign key value, so now perform check to see if it exists if foreign_key_value and isinstance(foreign_key_value, int): result = self.session.execute("SELECT COUNT(*) as exists FROM %s WHERE %s = :foreign_key_value" % (foreign_key.column.table, foreign_key.column.name), {'foreign_key_value': foreign_key_value}) record = result.fetchone() if not record.exists: # If the record doesn't exist, create a stub for part parents if prop.key == 'parent_irn': self.session.add(StubModel(irn=foreign_key_value)) else: # Otherwise, delete the property so it is not used # Need to ensure all candidate names are unset for candidate_name in candidate_names: try: del data[candidate_name] except KeyError: pass log.error('%s(%s): Missing foreign key %s for %s field. Field removed from record.', self.model_class.__name__, data['irn'], foreign_key_value, prop.key) except (AttributeError, KeyError): pass return data