def probe(self): items = sum(len(item) for item in self.root) for num, item in enumerate(itertools.chain.from_iterable(item for item in self.root)): yield TaskProgress("Probing XML file, element %d of %d" % (num + 1, items)) if item.tag == 'record': record = Record(root=item) self.records[record.id] = record self.media_paths.update((m.src, m) for m in record.media(mimetype=None)) try: old_record = Record.objects.get(record.id) except KeyError: self.record_new.add(record.id) else: if old_record.mtime == record.mtime and \ old_record.xml.strip() == record.xml.strip(): self.record_identical.add(record.id) else: self.record_conflicts.add(record.id) elif item.tag == 'collection': coll = Collection.fromxml(root=item) self.collections[coll.id] = coll try: old_coll = Collection.objects.get(coll.id) except KeyError: self.coll_new.add(coll.id) else: if old_coll == coll: self.coll_identical.add(coll.id) else: self.coll_conflicts.add(coll.id) self.calc_media_roots()
def _validate_xml(self): inner_xml = self.require_unique_param('inner_xml') result = Record() etree.clear_error_log() try: result.inner_xml = inner_xml.strip() except etree.XMLSyntaxError, e: entry = e.error_log.last_error raise ValidationError("Invalid XML supplied: %s, " "at line %d, character %d" % (entry.message, entry.line - 1, entry.column))
def do_import(ctx): newroot = '/home/louise/Desktop/data/Cornwall' def re_root_path(path): path = os.path.join(newroot, os.path.basename(path)) return path tree = etree.parse(ctx.fileobj.file) root = tree.getroot() if root.tag != 'records': return ctx.set_error('File format not understood - expected root tag ' 'to be records, got %s' % root.tag) for item in root: if item.tag != 'record': return ctx.set_error('Expected a record, got %s' % item.tag) record = Record(root=item) record.id = ctx.idprefix + record.id record.collections = [ctx.collname] Record.objects.set(record) Record.objects.flush() Collection.objects.flush()
def do_import(ctx): referenced_media = [] def mklink(linktype, display, target, mimetype=None): """Make an embedded link""" newelt = etree.Element('a') newelt.set('data-type', 'link') newelt.set('data-linktype', linktype) newelt.set('data-display', display) newelt.set('data-target', target) if mimetype is not None: newelt.set('data-mimetype', mimetype) return newelt def get_media(val): path = os.path.join(config.BAMBOO_MEDIA_PATH, val) if not os.path.exists(path): path = guess_path_case(path) if not os.path.exists(path): print "Missing file:", path mtype = mimetype(path) referenced_media.append((os.path.join(config.BAMBOO_MEDIA_URL, val), path)) return path, mtype def parse_text_content(item): #print "TEXT:%r" % item.text #print "TAIL:%r" % item.tail if item.text: text = item.text.replace('&', '&') \ .replace('<', '<') \ .replace('>', '>') # Yuck - need proper fix if len(stack[-1]) == 0: if stack[-1].text: stack[-1].text += text else: stack[-1].text = text else: if stack[-1][-1].tail: stack[-1][-1].tail += text else: stack[-1][-1].tail = text if item.tail: tail = item.tail.replace('&', '&') \ .replace('<', '<') \ .replace('>', '>') # Yuck - need proper fix if len(stack[-1]) == 0: if stack[-1].tail: stack[-1].tail += tail else: stack[-1].tail = tail else: if stack[-1][-1].tail: stack[-1][-1].tail += tail else: stack[-1][-1].tail = tail for elt in item: if elt.tag in (): newelt = etree.Element(elt.tag) elif elt.tag == 'newline': newelt = etree.Element('br') elif elt.tag in ('img', 'imgthumb', ): # Embedded images, or thumbnails newelt = etree.Element('img') path, mtype = get_media(elt.text) display_type = {'img': 'inline', 'imgthumb': 'thumb', }[elt.tag] newelt.set('data-type', 'file') if mtype is not None: newelt.set('data-mimetype', mtype) newelt.set('data-src', path) newelt.set('data-display', display_type) newelt.set('data-alt', u'') newelt.set('data-title', u'') stack[-1].append(newelt) continue elif elt.tag in ('imglink', 'film', 'sound'): # Embedded links to files path, mtype = get_media(elt.text) newelt = mklink("file", "icon", path, mtype) stack[-1].append(newelt) continue # FIXME - the following fields probably isn't handled very usefully. elif elt.tag in (u'muscode'): newelt = etree.Element('span') newelt.set('style', 'muscode') newelt.text = elt.text elif elt.tag == u'refnext': newelt = mklink("record", "icon", elt.text.strip()) newelt.text = "[NEXT]" stack[-1].append(newelt) continue elif elt.tag == u'refprev': newelt = mklink("record", "icon", elt.text.strip()) newelt.text = "[PREV]" stack[-1].append(newelt) continue elif elt.tag in (u'musref'): newelt = mklink("record", "icon", elt.text.strip()) newelt.text = '[Record %s]' % elt.text stack[-1].append(newelt) continue elif elt.tag in (u'caption'): newelt = etree.Element('div') newelt.set('style', 'caption') subelt = etree.Element('b') subelt.text = 'Caption:' newelt.append(subelt) elif elt.tag in (u'clip'): newelt = etree.Element('div') newelt.set('style', 'clip') subelt = etree.Element('b') subelt.text = 'Clip:' newelt.append(subelt) else: print "Unknown input tag type", etree.tostring(elt) abort() stack[-1].append(newelt) stack.append(newelt) parse_text_content(elt) stack.pop() def append_field(name, type): elt = etree.Element('field') elt.set(u'name', unicode(name)) elt.set(u'type', unicode(type)) stack[-1].append(elt) return elt tree = etree.parse(ctx.fileobj.file) for item in tree.getroot(): record = Record() stack = [record.root] def parse_level(item, prefix=u''): for field in item: if field.tag == u'id': record.id = unicode(field.text) continue ftype = ctx.type_mapping.get(prefix + field.tag) if ftype == u'title': elt = append_field(unicode(field.tag), u'title') elt.text = field.text elif ftype == u'text': elt = append_field(unicode(field.tag), u'text') stack[-1].append(elt) stack.append(elt) parse_text_content(field) elif ftype == u'tag': elt = append_field(unicode(field.tag), u'tag') elt.text = field.text elif ftype == u'number': elt = append_field(unicode(field.tag), u'number') elt.text = field.text elif ftype == u'date': elt = append_field(unicode(field.tag), u'date') elt.text = parse_bamboo_date(field.text) elif ftype == u'file': elt = append_field(unicode(field.tag), u'file') path, mtype = get_media(field.text) if mtype is not None: elt.set('mimetype', mtype) elt.set('src', path) elt.set('display', { 'img': 'inline', 'imgthumb': 'thumb', 'sound': 'inline', 'film': 'inline', }[field.tag]) elt.set('alt', '') elt.set('title', '') elif ftype == u'location': elt = append_field(unicode(field.tag), u'location') elt.text = field.text latlong = ctx.known_locations.get(field.text, None) if latlong is not None: elt.set('latlong', latlong) elif ftype == u'group': elt = etree.Element('group') elt.set(u'name', unicode(field.tag)) stack[-1].append(elt) stack.append(elt) parse_level(field, prefix + field.tag + u'/') # Bamboo reference types - each needs special handling elif ftype == u'seealso': elt = append_field(unicode(field.tag), u'tag') elt.text = field.text elif ftype == u'musref': elt = append_field(unicode(field.tag), u'link') elt.text = "Ref" elt.set(u'linktype', u'record') elt.set(u'target', field.text) elif ftype == u'refnext': # Note - when these are fixed, we must also handle refnext # and refprev inside text fields. elt = append_field(unicode(field.tag), u'link') elt.text = "Next" elt.set(u'linktype', u'record') elt.set(u'target', field.text) elif ftype == u'refprev': elt = append_field(unicode(field.tag), u'link') elt.text = "Previous" elt.set(u'linktype', u'record') elt.set(u'target', field.text) else: print "Unknown field: %s" % (prefix + field.tag) print etree.tostring(field) abort() stack.pop() parse_level(item) record.collections = [ctx.collid] Record.objects.set(record) Record.objects.flush() Collection.objects.flush() return # Download the referenced media import urllib for url, path in referenced_media: if os.path.exists(path): continue print "Downloading %r to %r" % (url, path) fd_in = urllib.urlopen(url) file_contents = fd_in.read() fd_in.close() if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) fd_out = open(path + '.new', 'wb') fd_out.write(file_contents) fd_out.close() os.rename(path + '.new', path)
def do_import(ctx): oldroot = 'file:///G:/data/BURMA_~1/' newroot = '/home/louise/Desktop/data/burma_photos' def re_root_path(path): if path.startswith(oldroot): path = path[len(oldroot):] path = os.path.join(newroot, path) return path def append_field(name, type): elt = etree.Element('field') elt.set(u'name', unicode(name)) elt.set(u'type', unicode(type)) record.root.append(elt) return elt tree = etree.parse(ctx.fileobj.file) root = tree.getroot() if root.tag != 'CatalogType': return ctx.set_error('File format not understood - expected root tag ' 'to be CatalogType, got %s' % root.tag) itemlist = tree.find('MediaItemList') if itemlist is None: return ctx.set_error('File format not understood - no MediaItemList ' 'found in file') for item in itemlist: if item.tag != 'MediaItem': continue record = Record() id = item.find('AssetProperties/UniqueID') if id is not None: record.id = ctx.idprefix + id.text.strip() annotations = item.find('AnnotationFields') if annotations is not None: notes = {} for annotation in annotations: text = unicode(annotation.text).strip() if text: notes[unicode(annotation.tag).lower()] = unicode(text) if u'headline' in notes: elt = append_field(u'title', u'title') elt.text = notes[u'headline'] del notes[u'headline'] for field in sorted(notes.keys()): elt = append_field(field, u'text') elt.text = notes[field] path = item.find('AssetProperties/Filepath') if path is not None: path = re_root_path(path.text) elt = append_field(u'image', u'file') elt.set('src', path) elt.set('mimetype', 'image/jpeg') elt.set('display', 'inline') record.collections = [ctx.collname] Record.objects.set(record) Record.objects.flush() Collection.objects.flush()