Esempio n. 1
0
    def probe(self):
        items = sum(len(item) for item in self.root)
        for num, item in enumerate(itertools.chain.from_iterable(item for item in self.root)):
            yield TaskProgress("Probing XML file, element %d of %d" % (num + 1, items))
            if item.tag == 'record':
                record = Record(root=item)
                self.records[record.id] = record
                self.media_paths.update((m.src, m) for m in record.media(mimetype=None))
                try:
                    old_record = Record.objects.get(record.id)
                except KeyError:
                    self.record_new.add(record.id)
                else:
                    if old_record.mtime == record.mtime and \
                       old_record.xml.strip() == record.xml.strip():
                        self.record_identical.add(record.id)
                    else:
                        self.record_conflicts.add(record.id)

            elif item.tag == 'collection':
                coll = Collection.fromxml(root=item)
                self.collections[coll.id] = coll

                try:
                    old_coll = Collection.objects.get(coll.id)
                except KeyError:
                    self.coll_new.add(coll.id)
                else:
                    if old_coll == coll:
                        self.coll_identical.add(coll.id)
                    else:
                        self.coll_conflicts.add(coll.id)

        self.calc_media_roots()
Esempio n. 2
0
    def _validate_xml(self):
        inner_xml = self.require_unique_param('inner_xml')

        result = Record()
        etree.clear_error_log()
        try:
            result.inner_xml = inner_xml.strip()
        except etree.XMLSyntaxError, e:
            entry = e.error_log.last_error
            raise ValidationError("Invalid XML supplied: %s, "
                                  "at line %d, character %d" %
                                  (entry.message, entry.line - 1, entry.column))
def do_import(ctx):

    newroot = '/home/louise/Desktop/data/Cornwall'

    def re_root_path(path):
        path = os.path.join(newroot, os.path.basename(path))
        return path

    tree = etree.parse(ctx.fileobj.file)
    root = tree.getroot()
    if root.tag != 'records':
        return ctx.set_error('File format not understood - expected root tag '
                             'to be records, got %s' % root.tag)

    for item in root:
        if item.tag != 'record':
            return ctx.set_error('Expected a record, got %s' % item.tag)

        record = Record(root=item)
        record.id = ctx.idprefix + record.id
        record.collections = [ctx.collname]
        Record.objects.set(record)
    Record.objects.flush()
    Collection.objects.flush()
Esempio n. 4
0
def do_import(ctx):
    referenced_media = []

    def mklink(linktype, display, target, mimetype=None):
        """Make an embedded link"""
        newelt = etree.Element('a')
        newelt.set('data-type', 'link')
        newelt.set('data-linktype', linktype)
        newelt.set('data-display', display)
        newelt.set('data-target', target)
        if mimetype is not None:
            newelt.set('data-mimetype', mimetype)
        return newelt

    def get_media(val):
        path = os.path.join(config.BAMBOO_MEDIA_PATH, val)
        if not os.path.exists(path):
            path = guess_path_case(path)
        if not os.path.exists(path):
            print "Missing file:", path
        mtype = mimetype(path)
        referenced_media.append((os.path.join(config.BAMBOO_MEDIA_URL, val), path))
        return path, mtype

    def parse_text_content(item):
        #print "TEXT:%r" % item.text
        #print "TAIL:%r" % item.tail
        if item.text:
            text = item.text.replace('&', '&') \
                .replace('<', '&lt;') \
                .replace('>', '&gt;') # Yuck - need proper fix
            if len(stack[-1]) == 0:
                if stack[-1].text:
                    stack[-1].text += text
                else:
                    stack[-1].text = text
            else:
                if stack[-1][-1].tail:
                    stack[-1][-1].tail += text
                else:
                    stack[-1][-1].tail = text
        if item.tail:
            tail = item.tail.replace('&', '&amp;') \
                .replace('<', '&lt;') \
                .replace('>', '&gt;') # Yuck - need proper fix
            if len(stack[-1]) == 0:
                if stack[-1].tail:
                    stack[-1].tail += tail
                else:
                    stack[-1].tail = tail
            else:
                if stack[-1][-1].tail:
                    stack[-1][-1].tail += tail
                else:
                    stack[-1][-1].tail = tail

        for elt in item:
            if elt.tag in ():
                newelt = etree.Element(elt.tag)
            elif elt.tag == 'newline':
                newelt = etree.Element('br')
            elif elt.tag in ('img', 'imgthumb', ):
                # Embedded images, or thumbnails
                newelt = etree.Element('img')
                path, mtype = get_media(elt.text)
                display_type = {'img': 'inline',
                                'imgthumb': 'thumb',
                               }[elt.tag]

                newelt.set('data-type', 'file')
                if mtype is not None:
                    newelt.set('data-mimetype', mtype)
                newelt.set('data-src', path)
                newelt.set('data-display', display_type)
                newelt.set('data-alt', u'')
                newelt.set('data-title', u'')
                stack[-1].append(newelt)
                continue

            elif elt.tag in ('imglink', 'film', 'sound'):
                # Embedded links to files
                path, mtype = get_media(elt.text)
                newelt = mklink("file", "icon", path, mtype)
                stack[-1].append(newelt)
                continue


            # FIXME - the following fields probably isn't handled very usefully.
            elif elt.tag in (u'muscode'):
                newelt = etree.Element('span')
                newelt.set('style', 'muscode')
                newelt.text = elt.text

            elif elt.tag == u'refnext':
                newelt = mklink("record", "icon", elt.text.strip())
                newelt.text = "[NEXT]"
                stack[-1].append(newelt)
                continue
            elif elt.tag == u'refprev':
                newelt = mklink("record", "icon", elt.text.strip())
                newelt.text = "[PREV]"
                stack[-1].append(newelt)
                continue
            elif elt.tag in (u'musref'):
                newelt = mklink("record", "icon", elt.text.strip())
                newelt.text = '[Record %s]' % elt.text
                stack[-1].append(newelt)
                continue

            elif elt.tag in (u'caption'):
                newelt = etree.Element('div')
                newelt.set('style', 'caption')
                subelt = etree.Element('b')
                subelt.text = 'Caption:'
                newelt.append(subelt)
                
            elif elt.tag in (u'clip'):
                newelt = etree.Element('div')
                newelt.set('style', 'clip')
                subelt = etree.Element('b')
                subelt.text = 'Clip:'
                newelt.append(subelt)

            else:
                print "Unknown input tag type", etree.tostring(elt)
                abort()

            stack[-1].append(newelt)
            stack.append(newelt)
            parse_text_content(elt)
        stack.pop()

    def append_field(name, type):
        elt = etree.Element('field')
        elt.set(u'name', unicode(name))
        elt.set(u'type', unicode(type))
        stack[-1].append(elt)
        return elt

    tree = etree.parse(ctx.fileobj.file)
    for item in tree.getroot():
        record = Record()
        stack = [record.root]

        def parse_level(item, prefix=u''):
            for field in item:
                if field.tag == u'id':
                    record.id = unicode(field.text)
                    continue

                ftype = ctx.type_mapping.get(prefix + field.tag)

                if ftype == u'title':
                    elt = append_field(unicode(field.tag), u'title')
                    elt.text = field.text
                elif ftype == u'text':
                    elt = append_field(unicode(field.tag), u'text')
                    stack[-1].append(elt)
                    stack.append(elt)
                    parse_text_content(field)
                elif ftype == u'tag':
                    elt = append_field(unicode(field.tag), u'tag')
                    elt.text = field.text
                elif ftype == u'number':
                    elt = append_field(unicode(field.tag), u'number')
                    elt.text = field.text
                elif ftype == u'date':
                    elt = append_field(unicode(field.tag), u'date')
                    elt.text = parse_bamboo_date(field.text)
                elif ftype == u'file':
                    elt = append_field(unicode(field.tag), u'file')
                    path, mtype = get_media(field.text)
                    if mtype is not None:
                        elt.set('mimetype', mtype)
                    elt.set('src', path)
                    elt.set('display', {
                        'img': 'inline',
                        'imgthumb': 'thumb',
                        'sound': 'inline',
                        'film': 'inline',
                    }[field.tag])
                    elt.set('alt', '')
                    elt.set('title', '')

                elif ftype == u'location':
                    elt = append_field(unicode(field.tag), u'location')
                    elt.text = field.text
                    latlong = ctx.known_locations.get(field.text, None)
                    if latlong is not None:
                        elt.set('latlong', latlong)
                elif ftype == u'group':
                    elt = etree.Element('group')
                    elt.set(u'name', unicode(field.tag))
                    stack[-1].append(elt)
                    stack.append(elt)
                    parse_level(field, prefix + field.tag + u'/')

                # Bamboo reference types - each needs special handling
                elif ftype == u'seealso':
                    elt = append_field(unicode(field.tag), u'tag')
                    elt.text = field.text
                elif ftype == u'musref':
                    elt = append_field(unicode(field.tag), u'link')
                    elt.text = "Ref"
                    elt.set(u'linktype', u'record')
                    elt.set(u'target', field.text)
                elif ftype == u'refnext':
                    # Note - when these are fixed, we must also handle refnext
                    # and refprev inside text fields.
                    elt = append_field(unicode(field.tag), u'link')
                    elt.text = "Next"
                    elt.set(u'linktype', u'record')
                    elt.set(u'target', field.text)
                elif ftype == u'refprev':
                    elt = append_field(unicode(field.tag), u'link')
                    elt.text = "Previous"
                    elt.set(u'linktype', u'record')
                    elt.set(u'target', field.text)
                else:
                    print "Unknown field: %s" % (prefix + field.tag)
                    print etree.tostring(field)
                    abort()
            stack.pop()
        parse_level(item)

        record.collections = [ctx.collid]
        Record.objects.set(record)

    Record.objects.flush()
    Collection.objects.flush()

    return
    # Download the referenced media
    import urllib
    for url, path in referenced_media:
        if os.path.exists(path):
            continue
        print "Downloading %r to %r" % (url, path)
        fd_in = urllib.urlopen(url)
        file_contents = fd_in.read()
        fd_in.close()
        if not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))
        fd_out = open(path + '.new', 'wb')
        fd_out.write(file_contents)
        fd_out.close()
        os.rename(path + '.new', path)
Esempio n. 5
0
def do_import(ctx):

    oldroot = 'file:///G:/data/BURMA_~1/'
    newroot = '/home/louise/Desktop/data/burma_photos'

    def re_root_path(path):
        if path.startswith(oldroot):
            path = path[len(oldroot):]
        path = os.path.join(newroot, path)
        return path

    def append_field(name, type):
        elt = etree.Element('field')
        elt.set(u'name', unicode(name))
        elt.set(u'type', unicode(type))
        record.root.append(elt)
        return elt

    tree = etree.parse(ctx.fileobj.file)
    root = tree.getroot()
    if root.tag != 'CatalogType':
        return ctx.set_error('File format not understood - expected root tag '
                             'to be CatalogType, got %s' % root.tag)
    itemlist = tree.find('MediaItemList')
    if itemlist is None:
        return ctx.set_error('File format not understood - no MediaItemList '
                             'found in file')

    for item in itemlist:
        if item.tag != 'MediaItem':
            continue

        record = Record()

        id = item.find('AssetProperties/UniqueID')
        if id is not None:
            record.id = ctx.idprefix + id.text.strip()

        annotations = item.find('AnnotationFields')
        if annotations is not None:
            notes = {}
            for annotation in annotations:
                text = unicode(annotation.text).strip()
                if text:
                    notes[unicode(annotation.tag).lower()] = unicode(text)
            if u'headline' in notes:
                elt = append_field(u'title', u'title')
                elt.text = notes[u'headline']
                del notes[u'headline']
            for field in sorted(notes.keys()):
                elt = append_field(field, u'text')
                elt.text = notes[field]

        path = item.find('AssetProperties/Filepath')
        if path is not None:
            path = re_root_path(path.text)
            elt = append_field(u'image', u'file')
            elt.set('src', path)
            elt.set('mimetype', 'image/jpeg')
            elt.set('display', 'inline')

        record.collections = [ctx.collname]
        Record.objects.set(record)
    Record.objects.flush()
    Collection.objects.flush()