def test_critical(self): # Generate a unique identifier for the error message id = uuid.uuid4().hex log.critical(id) # Check we have the identifier in the DB self.session.query(LogModel).filter(LogModel.msg == id).one()
def process(self, data): log.debug('Processing %s record %s', self.model_class.__name__.lower(), data['irn']) try: # Do we already have a record for this? record = self.get_record(data.get('irn')) # Is this a stub record? If it is, we want to change the type and reload. # Seems a bit of a hack, but SQLAlchemy does not have a simple way of modifying the type # This only runs for catalogue records if isinstance(record, StubModel): polymorphic_type = self.model_class.__mapper_args__['polymorphic_identity'] # Manually set type self.session.execute('UPDATE %s.catalogue SET type=:type WHERE irn=:irn' % self.keemu_schema, {'type': polymorphic_type, 'irn': data.get('irn')}) # If this has a child table, insert the IRN so updates will work if self.model_class.__mapper__.local_table.name != 'specimen': # And create empty row in the polymorphic table self.session.execute('INSERT INTO %s.%s (irn) VALUES (:irn)' % (self.keemu_schema, self.model_class.__mapper__.local_table.name), {'irn': data.get('irn')}) # Commit & expunge so the item can be reloaded self.session.commit() self.session.expunge(record) record = self.get_record(data.get('irn')) # Process the relationships data = self._process_relationships(data, record) # Populate the data record.rebuild(**data) except NoResultFound: data = self._process_relationships(data) # Create a new record record = self.model_class(**data) try: self.session.merge(record) self.session.commit() except DataError, e: # Save this error to the log - will need to follow up on these log.critical('DB DataError: record %s not created.' % data['irn'], {'data': data}, exc_info=e)
def process(self, data): # Try and get the model class self.model_class = self.get_model_class(data) # If we don't have a model class, continue to next record if not self.model_class: record_type = data.get('ColRecordType', 'Missing') # If record type is one we've knowingly excluded if record_type in self.excluded_types: log.debug('Skipping record %s: No model class for %s', data['irn'], record_type) else: # Critical error - log to DB log.critical('Unknown model class %s for %s. Investigate and then add to [excluded_types] if not required.', record_type, data['irn']) # Next record return # Filter out some of the records if not 'ColDepartment' in data: log.debug('Skipping record %s: No collection department', data['irn']) return None if not 'AdmDateInserted' in data: log.debug('Skipping record %s: No AdmDateInserted', data['irn']) return None # Skip records if SecRecordStatus is one of 'DELETE', 'Reserved', 'Stub', 'Stub Record', 'DELETE-MERGED' if 'SecRecordStatus' in data and data['SecRecordStatus'] in ['DELETE', 'Reserved', 'Stub', 'Stub Record', 'DELETE-MERGED']: log.debug('Skipping record %s: Incorrect record status', data['irn']) return None # Botany records include ones from Linnean Society. Should be excluded. if 'RegHerbariumCurrentOrgAcroLocal' in data and data['RegHerbariumCurrentOrgAcroLocal'] == 'LINN': log.debug('Skipping record %s: Non-BM botany record', data['irn']) return None # 4257 Artefacts have no kind or name. Skip them if data['ColRecordType'] == 'Artefact' and 'ArtKind' not in data and 'ArtName' not in data: return None # Process determinations determinations = data.get('EntIdeTaxonRef', None) or data.get('EntIndIndexLotTaxonNameLocalRef', None) if determinations: data['specimen_taxonomy'] = [] determinations = self.ensure_list(determinations) # Load the taxonomy records for these determinations taxonomy_records = self.session.query(TaxonomyModel).filter(TaxonomyModel.irn.in_(determinations)).all() # Loop through all retrieved taxonomy records, and add a determination for them # This will act as a filter, removing all duplicates / missing taxa for taxonomy_record in taxonomy_records: filed_as = (taxonomy_record.irn == data.get('EntIdeFiledAsTaxonRef', None)) data['specimen_taxonomy'].append(Determination(taxonomy_irn=taxonomy_record.irn, specimen_irn=data['irn'], filed_as=filed_as)) # Parasite card host / parasites host_parasites = { 'host': data.get('CardHostRef', []), 'parasite': data.get('CardParasiteRef', []), } stages = self.ensure_list(data.get('CardParasiteStage', [])) for host_parasite_type, refs in host_parasites.items(): refs = self.ensure_list(refs) for i, ref in enumerate(refs): try: stage = stages[i] except IndexError: stage = None assoc_object = HostParasiteAssociation(taxonomy_irn=ref, parasite_card_irn=data['irn'], parasite_host=host_parasite_type, stage=stage) try: data['host_parasite_taxonomy'].append(assoc_object) except KeyError: data['host_parasite_taxonomy'] = [assoc_object] # Some special field mappings # Try to use PalDetDate is if DarYearIdentified is missing if not 'DarYearIdentified' in data: try: date_matches = self.re_date.search(data['PalDetDate']) if date_matches: data['DarYearIdentified'] = date_matches.group(1) data['DarMonthIdentified'] = date_matches.group(2) data['DarDayIdentified'] = date_matches.group(3) except (KeyError, TypeError): # If PalDetDate doesn't exists or isn't a string (can also be a list if there's multiple determination dates - which we ignore) pass # EntCatCatalogueNumber requires EntCatPrefix if it's used in catalogue_number try: data['EntCatCatalogueNumber'] = '{0}{1}'.format(data['EntCatPrefix'], data['EntCatCatalogueNumber']) except KeyError: pass # Set egg part type if not already set if self.model_class is EggModel and 'PrtType' not in data: data['PrtType'] = 'egg' super(CatalogueTask, self).process(data)
def _process_relationships(self, data, record=None): # Basic relationship handling. # More complex scenarios are handled in the individual processing functions for prop in class_mapper(self.model_class).iterate_properties: # Skip the field if the property key is already set in the data object # The field has been set in the import types custom preprocess function if prop.key in data: continue # Is this a relationship property? # NB: This excludes backrefs, which will be using sqlalchemy.orm.properties.RelationshipProperty, not our own if type(prop) == RelationshipProperty: # Try and find a child model to use for this relationship try: child_model = prop.mapper.class_ # If the child model has irn primary key, it relates to a KE EMu record # And a simple relationship should be used if child_model.__mapper__.primary_key[0].key == 'irn': child_model = None except AttributeError: child_model = None # This is a relationship to a secondary object like SexStage if child_model: # If unique, we'll try loading the values from the database first # And only create if they don't exist unique = False for constraint in child_model.__table__.constraints: if constraint.__class__ == UniqueConstraint: unique = True break fields = {} for column in child_model.__table__.columns: if column.alias: for alias in self.ensure_list(column.alias): fields[alias] = column.key # Populate a list of fields data_fields = self._populate_subfield_data(fields.keys(), data) # If we have data retrieve / create a model record if data_fields: data[prop.key] = [] # Loop through all the list of fields for field_list in data_fields: # Sometimes nothing is populated - for example, EntSexSex just has None # We want to skip these if not [x for x in field_list.values() if x is not None]: continue if unique: # Try and get record from database try: filters = [] for alias, key in fields.items(): # Build the filters col = getattr(child_model, key) # Do we have a value for this field if alias not in field_list: field_list[alias] = None # String fields should always be lower case & '' for null to ensure unique constraints work correctly if isinstance(child_model.__table__.columns[key].type, String): try: field_list[alias].lower() except AttributeError: field_list[alias] = '' filters.append(col.__eq__(field_list[alias])) # Run the query data[prop.key].append(self.session.query(child_model).filter(and_(*filters)).one()) except NoResultFound: # Not found, create a new one data[prop.key].append(child_model(**field_list)) elif 'delete-orphan' in prop.cascade: # If this property has a delete-orphan cascade, everything's fine # SQLa will handle updates, removing old records # But for non unique / no delete orphan relationships # This code will create duplicate records in the associated table # Not a problem now, but log a critical error in case it ever happens data[prop.key].append(child_model(**field_list)) else: log.critical('Record %s: Non-unique relationship used in %s.' % (data['irn'], prop.key)) else: # Basic relationship, in the format: # stratigraphy = relationship("StratigraphyModel", secondary=collection_event_stratigraphy, alias='GeoStratigraphyRef') field_names = prop.alias irns = [] # Ensure it's a list field_names = self.ensure_list(field_names) for field_name in field_names: value = data.get(field_name) if value: irns += self.ensure_list(value) # Dedupe IRNS & ensure we are not linking to the same record - eg: 687077 try: irns = list(set(irns)) irns.remove(data['irn']) except ValueError: pass # Do we have any IRNs? if irns: # Get the relationship model class relationship_model = prop.argument() # Load the model objects and assign to the property data[prop.key] = self.session.query(relationship_model).filter(relationship_model.irn.in_(irns)).all() existing_irns = [record.irn for record in data[prop.key]] # Do we have any missing IRNs missing_irns = list(set(irns) - set(existing_irns)) if missing_irns: # Is this a property we want to create stub records for if prop.key == 'associated_record': for missing_irn in missing_irns: data[prop.key].append(StubModel(irn=missing_irn)) else: log.error('Missing IRN %s in relationship %s(%s).%s', ','.join(str(x) for x in missing_irns), self.model_class.__name__, data['irn'], prop.key) # This isn't a relationship property - but perform check to see if this a foreign key field else: try: column = prop.columns[0] foreign_key = column.foreign_keys.pop() # Add the foreign key back column.foreign_keys.add(foreign_key) foreign_key_value = None # Loop through aliases / key and see if we have a foreign key value candidate_names = column.alias if column.alias else prop.key candidate_names = self.ensure_list(candidate_names) for candidate_name in candidate_names: foreign_key_value = data.get(candidate_name) if foreign_key_value: break # We do have a foreign key value, so now perform check to see if it exists if foreign_key_value and isinstance(foreign_key_value, int): result = self.session.execute("SELECT COUNT(*) as exists FROM %s WHERE %s = :foreign_key_value" % (foreign_key.column.table, foreign_key.column.name), {'foreign_key_value': foreign_key_value}) record = result.fetchone() if not record.exists: # If the record doesn't exist, create a stub for part parents if prop.key == 'parent_irn': self.session.add(StubModel(irn=foreign_key_value)) else: # Otherwise, delete the property so it is not used # Need to ensure all candidate names are unset for candidate_name in candidate_names: try: del data[candidate_name] except KeyError: pass log.error('%s(%s): Missing foreign key %s for %s field. Field removed from record.', self.model_class.__name__, data['irn'], foreign_key_value, prop.key) except (AttributeError, KeyError): pass return data