コード例 #1
0
def save_document(categories, data):
    primary = get_field(data, './/Bestand')
    secondary = get_field(data, './/Klassifikation')
    if not primary or not secondary:
        return
    category = categories[primary]['children'][secondary]

    uid = get_field(data, './/Signatur')
    document = get_document(uid, category)
    if not document:
        document = Document()
        document.category = [category]
        document.uid = uid
    extra_fields = {}
    document.title = get_field(data, './/Titel')
    document.description = get_field(data, './/Enthaelt')
    document.order_id = uid
    for key in ['Altsignatur', 'Sachbegriffe', 'Personen']:
        item = get_field(data, './/%s' % key)
        if item:
            extra_fields[key] = item
    document.extra_fields = extra_fields
    # save document
    document.save()
    logger.info('dataimport.eadddb.document',
                'document %s saved' % document.id)
    return document
コード例 #2
0
def save_document(category, data):

    uid = get_field(data, './/Inventar-Nummer')
    document = get_document(uid, category)
    if not document:
        document = Document()
        document.category = [category]
        document.uid = uid
    extra_fields = {}
    document.title = get_field(data, './/Titel')
    document.description = get_field(data, './/Beschreibung_Inhalt/Inhalt')
    document.order_id = uid
    document.licence = get_field(data, './/Rechteerklaerung/Rechtsstatus')
    document.author = get_field(data, './/Rechteerklaerung/creditline')
    document.date_text = get_field(
        data, './/Entstehung/Datierung_Herstellung/Dat_Begriff')
    date_addon_text = get_field(
        data, './/Entstehung/Datierung_Herstellung/DatZusatz')
    if date_addon_text:
        if document.date_text:
            document.date_text += '; ' + date_addon_text
        else:
            document.date_text = date_addon_text
    date_str = get_field(data,
                         './/Beschreibung_Inhalt/Zeitbezuege/Zeitbezug_norm')
    if date_str:
        if len(date_str) == 4:
            document.date_begin = '%s-01-01' % date_str
            document.date_end = '%s-12-31' % date_str
        elif len(date_str) == 10:
            document.date = datetime.strptime(date_str, '%Y-%m-%d').date()
        elif len(date_str) == 21:
            date_arr = date_str.split('/')
            if len(date_arr) == 2:
                document.date_begin = datetime.strptime(
                    date_arr[0], '%Y-%m-%d').date()
                document.date_end = datetime.strptime(date_arr[1],
                                                      '%Y-%m-%d').date()
    for key in [
            'Sachgebiet', 'Objektname', 'Objektklasse', 'Sachgebiet',
            'Material', 'Beschreibung_Inhalt/Objektgeschichte'
    ]:
        item = get_field(data, './/%s' % key)
        if item:
            if '/' in key:
                extra_fields[key.split('/')[-1]] = item
            else:
                extra_fields[key] = item
    document.extra_fields = extra_fields
    # save document
    document.save()
    logger.info('dataimport.eadddb.document',
                'document %s saved' % document.id)
    for file_raw in data.xpath('.//Image'):
        if not file_raw.get('Abbildung'):
            continue
        file = File.objects(externalId=file_raw.get('Abbildung'),
                            document=document).first()
        if file:
            continue
        file = File()
        file.document = document
        file.externalId = file_raw.get('Abbildung')
        file.fileName = file_raw.get('Abbildung')
        file.binary_exists = False
        file.save()
    return document
コード例 #3
0
def get_document(uid, parent):
    return Document.objects(category=[parent], uid=uid).first()
コード例 #4
0
def save_document(data, parent, nsmap):
    document_id = get_identifier(data, nsmap)
    if not document_id:
        return document_id

    document = get_document(data, parent, nsmap)
    if not document:
        document = Document()
        document.uid = data.get('id')
        document.category = [parent]

    # title
    title = data.xpath('./ns:did/ns:unittitle', namespaces=nsmap)
    title = clean_text(title[0].text) if len(title) else ''
    if '§§ unbekannte Darstellung' in title:
        title = title.replace('§§ unbekannte Darstellung', '')
        document.help_required = 1
    document.title = title.strip()

    # restricted
    restricted = data.xpath('./ns:accessrestrict', namespaces=nsmap)
    if len(restricted):
        return False, False

    # order_id
    order_id = data.xpath('./ns:did/ns:unitid', namespaces=nsmap)
    document.order_id = clean_text(order_id[0].text) if len(order_id) else ''

    # origination
    origination = data.xpath('./ns:did/ns:origination', namespaces=nsmap)
    document.origination = clean_text(
        origination[0].text) if len(origination) else ''

    # description (findbuch-specific?)
    description = data.xpath("./ns:did/ns:abstract[@type='Enthält']",
                             namespaces=nsmap)
    document.description = clean_text(
        description[0].text) if len(description) else ''

    # note
    note = data.xpath('./ns:did/ns:note', namespaces=nsmap)
    document.note = clean_text(etree.tostring(
        note[0]).text) if len(note) else ''

    date = data.xpath('./ns:did/ns:unitdate', namespaces=nsmap)
    if len(date):
        date_result = {}
        if date[0].text:
            document.date_text = date[0].text
        if 'normal' in date[0].attrib:
            date_normalized = date[0].attrib['normal']
            if '/' in date_normalized:
                date_normalized = date_normalized.split('/')
                if date_normalized[0] == date_normalized[1]:
                    date_result['date'] = datetime.strptime(
                        date_normalized[0], '%Y-%m-%d')
                else:
                    date_result['begin'] = datetime.strptime(
                        date_normalized[0], '%Y-%m-%d')
                    date_result['end'] = datetime.strptime(
                        date_normalized[1], '%Y-%m-%d')
            else:
                date_result['date'] = datetime.strptime(
                    date_normalized, '%Y-%m-%d')
        if 'date' in date_result:
            document.date = date_result['date']
        if 'begin' in date_result:
            document.date_begin = date_result['begin']
        if 'end' in date_result:
            document.date_end = date_result['end']

    # files
    files = []
    files_xml = data.xpath('./ns:daogrp/ns:daodesc/ns:list/ns:item',
                           namespaces=nsmap)
    for file_xml in files_xml:
        file_name = file_xml.xpath('./ns:name', namespaces=nsmap)
        if not len(file_name):
            continue
        file_name = file_name[0].text
        if not file_name:
            continue
        file_upsert_values = {
            'set__externalId': data.get('id') + '-' + file_name,
            'set__fileName': file_name
        }
        file = File.objects(externalId=data.get('id') + '-' +
                            file_name).upsert_one(**file_upsert_values)
        files.append(file)
    document.files = files

    # all other values
    extra_fields = {}
    for extra_field_raw in data.xpath('./ns:odd', namespaces=nsmap):
        field_title = extra_field_raw.xpath('./ns:head', namespaces=nsmap)
        field_value = extra_field_raw.xpath('./ns:p', namespaces=nsmap)
        if len(field_title) and len(field_value) and len(
                clean_text(field_title[0].text)):
            extra_fields[clean_text(field_title[0].text)] = clean_text(
                field_value[0].text)
    if len(extra_fields.keys()):
        document.extra_fields = extra_fields

    # save document
    document.save()
    logger.info('dataimport.eadddb.document',
                'document %s saved' % document.id)
    return document
コード例 #5
0
def get_document(data, parent, nsmap):
    return Document.objects(category=[parent],
                            uid=get_identifier(data, nsmap)).first()