def save_document(categories, data): primary = get_field(data, './/Bestand') secondary = get_field(data, './/Klassifikation') if not primary or not secondary: return category = categories[primary]['children'][secondary] uid = get_field(data, './/Signatur') document = get_document(uid, category) if not document: document = Document() document.category = [category] document.uid = uid extra_fields = {} document.title = get_field(data, './/Titel') document.description = get_field(data, './/Enthaelt') document.orderId = uid for key in ['Altsignatur', 'Sachbegriffe', 'Personen']: item = get_field(data, './/%s' % key) if item: extra_fields[key] = item document.extraFields = extra_fields # save document document.save() logger.info('dataimport.eadddb.document', 'document %s saved' % document.id) return document
def save_document(category, data): uid = get_field(data, './/Inventar-Nummer') document = get_document(uid, category) if not document: document = Document() document.category = [category] document.uid = uid extra_fields = {} document.title = get_field(data, './/Titel') document.description = get_field(data, './/Beschreibung_Inhalt/Inhalt') document.orderId = uid document.licence = get_field(data, './/Rechteerklaerung/Rechtsstatus') document.author = get_field(data, './/Rechteerklaerung/creditline') document.date_text = get_field(data, './/Entstehung/Datierung_Herstellung/Dat_Begriff') date_addon_text = get_field(data, './/Entstehung/Datierung_Herstellung/DatZusatz') if date_addon_text: if document.date_text: document.date_text += '; ' + date_addon_text else: document.date_text = date_addon_text date_str = get_field(data, './/Beschreibung_Inhalt/Zeitbezuege/Zeitbezug_norm') if date_str: if len(date_str) == 4: document.dateBegin = '%s-01-01' % date_str document.dateEnd = '%s-12-31' % date_str elif len(date_str) == 10: document.date = datetime.strptime(date_str, '%Y-%m-%d').date() elif len(date_str) == 21: date_arr = date_str.split('/') if len(date_arr) == 2: document.dateBegin = datetime.strptime(date_arr[0], '%Y-%m-%d').date() document.dateEnd = datetime.strptime(date_arr[1], '%Y-%m-%d').date() for key in ['Sachgebiet', 'Objektname', 'Objektklasse', 'Sachgebiet', 'Material', 'Beschreibung_Inhalt/Objektgeschichte']: item = get_field(data, './/%s' % key) if item: if '/' in key: extra_fields[key.split('/')[-1]] = item else: extra_fields[key] = item document.extraFields = extra_fields # save document document.save() logger.info('dataimport.eadddb.document', 'document %s saved' % document.id) for file_raw in data.xpath('.//Image'): if not file_raw.get('Abbildung'): continue file = File.objects(externalId=file_raw.get('Abbildung'), document=document).first() if file: continue file = File() file.document = document file.externalId = file_raw.get('Abbildung') file.fileName = file_raw.get('Abbildung') file.binaryExists = False file.save() return document
def save_document(data, parent, nsmap): document_id = get_identifier(data, nsmap) if not document_id: return document_id document = get_document(data, parent, nsmap) if not document: document = Document() document.uid = data.get('id') document.category = [parent] # title title = data.xpath('./ns:did/ns:unittitle', namespaces=nsmap) title = clean_text(title[0].text) if len(title) else '' if '§§ unbekannte Darstellung' in title: title = title.replace('§§ unbekannte Darstellung', '') document.help_required = 1 document.title = title.strip() # restricted restricted = data.xpath('./ns:accessrestrict', namespaces=nsmap) if len(restricted): return False, False # order_id order_id = data.xpath('./ns:did/ns:unitid', namespaces=nsmap) document.orderId = clean_text(order_id[0].text) if len(order_id) else '' # origination origination = data.xpath('./ns:did/ns:origination', namespaces=nsmap) document.origination = clean_text( origination[0].text) if len(origination) else '' # description (findbuch-specific?) description = data.xpath("./ns:did/ns:abstract[@type='Enthält']", namespaces=nsmap) document.description = clean_text( description[0].text) if len(description) else '' # note note = data.xpath('./ns:did/ns:note', namespaces=nsmap) document.note = clean_text(etree.tostring( note[0]).text) if len(note) else '' date = data.xpath('./ns:did/ns:unitdate', namespaces=nsmap) if len(date): date_result = {} if date[0].text: document.date_text = date[0].text if 'normal' in date[0].attrib: date_normalized = date[0].attrib['normal'] if '/' in date_normalized: date_normalized = date_normalized.split('/') if date_normalized[0] == date_normalized[1]: date_result['date'] = datetime.strptime( date_normalized[0], '%Y-%m-%d') else: date_result['begin'] = datetime.strptime( date_normalized[0], '%Y-%m-%d') date_result['end'] = datetime.strptime( date_normalized[1], '%Y-%m-%d') else: date_result['date'] = datetime.strptime( date_normalized, '%Y-%m-%d') if 'date' in date_result: document.date = date_result['date'] if 'begin' in date_result: document.dateBegin = date_result['begin'] if 'end' in date_result: document.dateEnd = date_result['end'] # files files = [] files_xml = data.xpath('./ns:daogrp/ns:daodesc/ns:list/ns:item', namespaces=nsmap) for file_xml in files_xml: file_name = file_xml.xpath('./ns:name', namespaces=nsmap) if not len(file_name): continue file_name = file_name[0].text if not file_name: continue file_upsert_values = { 'set__externalId': data.get('id') + '-' + file_name, 'set__fileName': file_name } file = File.objects(externalId=data.get('id') + '-' + file_name).upsert_one(**file_upsert_values) files.append(file) document.files = files # all other values extra_fields = {} for extra_field_raw in data.xpath('./ns:odd', namespaces=nsmap): field_title = extra_field_raw.xpath('./ns:head', namespaces=nsmap) field_value = extra_field_raw.xpath('./ns:p', namespaces=nsmap) if len(field_title) and len(field_value) and len( clean_text(field_title[0].text)): extra_fields[clean_text(field_title[0].text)] = clean_text( field_value[0].text) if len(extra_fields.keys()): document.extraFields = extra_fields # save document document.save() logger.info('dataimport.eadddb.document', 'document %s saved' % document.id) return document