Beispiel #1
0
    def probe(self):
        items = sum(len(item) for item in self.root)
        for num, item in enumerate(itertools.chain.from_iterable(item for item in self.root)):
            yield TaskProgress("Probing XML file, element %d of %d" % (num + 1, items))
            if item.tag == 'record':
                record = Record(root=item)
                self.records[record.id] = record
                self.media_paths.update((m.src, m) for m in record.media(mimetype=None))
                try:
                    old_record = Record.objects.get(record.id)
                except KeyError:
                    self.record_new.add(record.id)
                else:
                    if old_record.mtime == record.mtime and \
                       old_record.xml.strip() == record.xml.strip():
                        self.record_identical.add(record.id)
                    else:
                        self.record_conflicts.add(record.id)

            elif item.tag == 'collection':
                coll = Collection.fromxml(root=item)
                self.collections[coll.id] = coll

                try:
                    old_coll = Collection.objects.get(coll.id)
                except KeyError:
                    self.coll_new.add(coll.id)
                else:
                    if old_coll == coll:
                        self.coll_identical.add(coll.id)
                    else:
                        self.coll_conflicts.add(coll.id)

        self.calc_media_roots()
Beispiel #2
0
    def _get_collections(self):
        self.invalid_collections = []

        remcolls = set(self.params.get('remcoll', ()))
        collids = set(filter(lambda x: x != u'' and x not in remcolls,
                             self.params.get('collid', ())))
        colltitle = set(filter(lambda x: x != u'',
                               self.params.get('colltitle', ())))

        for title in colltitle:
            colls = Collection.find_by_title(title)
            if len(colls) == 0:
                self.invalid_collections.append(title)
            for coll in colls:
                if coll.id not in remcolls:
                    collids.add(coll.id)

        self.collections = collids
Beispiel #3
0
    def search(self, **params):
        stash = {}
        context = dict(stash=stash)
        params = SearchParams(stash)
        search = Search(params)
        if not search.validate():
            # If the search doesn't validate, always go to the search entry
            # page.
            params.action = 'entry'
        search.add_to_context(context)
        context['showfull'] = int(getparam('showfull', '0', stash))

        if params.action == 'search':
            return render("search.html", context)
        elif params.action == 'select':
            return render("search_select.html", context)

        elif params.action.startswith('createcoll'):
            if cherrypy.request.method != "POST":
                return render("search_createcoll.html", context)
            subact = params.action[11:]

            parents = []
            for num in getorderparam('parent_order'):
                parent = getparam('parent%d' % num)
                if parent is None or parent == '':
                    continue
                if subact == ('del_parent_%d' % num):
                    continue
                parents.append(unicode(parent))
            if subact == 'add_parent':
                parents.append(u'')
            context['parents'] = tuple(enumerate(parents))
            context['allowable_parents'] = set(
                c.id for c in Collection.objects
            )
            context['collections'] = Collection.objects

            if subact == 'do':
                newtitle = getparam('create_colltitle', '')
                if len(newtitle) == 0:
                    context['error'] = "Cannot create collection with no title"
                    return render("search_createcoll.html", context)

                coll = Collection.find_by_title(newtitle)
                if len(coll) != 0:
                    context['error'] = "Collection with title %s already exists" % newtitle
                    return render("search_createcoll.html", context)

                coll = Collection(None, None, newtitle)
                Collection.objects.set(coll)
                # Have to set the collection before setting the parents, to get
                # an ID for the parents to refer back to.
                coll.set_parents(filter(lambda x: x != '', parents))
                Collection.objects.set(coll)
                Record.objects.flush()
                Collection.objects.flush()

                for record in search.query:
                    record = record.object
                    record.collections = record.collections + [coll.id]
                    Record.objects.set(record)
                Record.objects.flush()
                Collection.objects.flush()

                redirect(url("coll-view", id=coll.id))
            else:
                return render("search_createcoll.html", context)

        elif params.action == 'addtocoll':
            context['all_collections'] = Collection.objects
            if cherrypy.request.method != "POST":
                return render("search_addtocoll.html", context)

            newid = getparam('addto_collid', '')
            if len(newid) == 0:
                context['error'] = "Pick a collection to add to"
                return render("search_addtocoll.html", context)

            coll = Collection.objects.get(newid)
            for record in search.query:
                record = record.object
                record.collections = record.collections + [coll.id]
                Record.objects.set(record)
            Record.objects.flush()
            Collection.objects.flush()

            redirect(url("coll-view", id=coll.id))

            return render("search_addtocoll.html", context)

        elif params.action == 'removefromcoll':
            context['all_collections'] = Collection.objects
            if cherrypy.request.method != "POST":
                return render("search_removefromcoll.html", context)

            newid = getparam('removefrom_collid', '')
            if len(newid) == 0:
                context['error'] = "Pick a collection to remove from"
                return render("search_removefromcoll.html", context)

            coll = Collection.objects.get(newid)
            for record in search.query:
                record = record.object
                record.collections = tuple(filter(lambda x: x != coll.id,
                                                  record.collections))
                Record.objects.set(record)
            Record.objects.flush()
            Collection.objects.flush()

            redirect(url("coll-view", id=coll.id))

            return render("search_removefromcoll.html", context)

        else:
            context['all_collections'] = Collection.objects
            return render("search_entry.html", context)
Beispiel #4
0
def set_config():
    Collection.checkpoint().wait()
    config = Collection.config
    patterns = [
        # FIXME - add an _error type.

        # Fields of date type
        ('*_date', {
            'slot': 'y*',
            'store_field': "*_date",
            'type': "date",
        }),

        # Text fields (stripped of markup)
        ("*_text", {
            "group":"t*",
            "slot":"t*",
            "processor":"stem_en",
            "store_field":"*_text",
            "type":"text"
        }),

        # Summary text for links
        ("*_link", {
            "group":"l*",
            "slot":"l*",
            "processor":"stem_en",
            "store_field":"*_link",
            "type":"text"
        }),

        # Title fields
        ("*_title", {
            "group":"e*",
            "slot":"e*",
            "processor":"stem_en",
            "store_field":"*_title",
            "type":"text"
        }),

        # Number fields
        ("*_number", {
            "slot":"n*",
            "store_field":"*_number",
            "type":"double"
        }),

        # File fields (the title / alt text / content of text files)
        ("*_file", {
            "group":"i*",
            "slot":"i*",
            "processor":"stem_en",
            "store_field":"*_file",
            "type":"text"
        }),

        # Tag fields
        ("*_tag", {
            "group":"g*",
            "slot":"g*",
            "max_length":100,
            "store_field":"*_tag",
            "too_long_action":"hash",
            "lowercase":True,
            "type":"exact"
        }),

        # Location fields (the text part - handled like a tag)
        ("*_location", {
            "group":"g*",
            "slot":"g*",
            "max_length":100,
            "store_field":"*_location",
            "too_long_action":"hash",
            "lowercase":True,
            "type":"exact"
        }),

        # FIXME - location fields have form *_latlong; need a pattern for them.

        # The collection that an item is in.
        ('coll', {
            'group': "C",
            'max_length': 32,
            'slot': 2,
            'store_field': "coll",
            'taxonomy': "coll_hierarchy",
            'too_long_action': "hash",
            'type': "cat",
        }),

        # Modification times.
        ('mtime', {
            'slot': 3,
            'store_field': "mtime",
            'type': "timestamp",
        }),

        # Titles of items.
        ('title', {
            'group': 't',
            'slot': 4,
            'store_field': "title",
            'type': "text",
        }),

        # For media items, the ID of the record holding the item.
        ('parent', {
            'group': 'P',
            'max_length': 100,
            'store_field': "parent",
            'too_long_action': 'hash',
            'type': "exact",
        }),

        # The ID of a media item (the path or url).
        ('fileid', {
            'group': 'F',
            'max_length': 100,
            'store_field': "fileid",
            'too_long_action': 'hash',
            'type': "exact",
        }),

        # The mimetype of a media item
        ('mimetype', {
            'group': 'T',
            'slot': 5,
            'max_length': 100,
            'store_field': "mimetype",
            'too_long_action': 'hash',
            'type': "exact",
        }),

        # The alt text of a media item
        ('filealt', {
            'group': 'A',
            'store_field': "filealt",
            'type': "text",
        }),

        # The title text of a media item
        ('filetitle', {
            'group': 'A',
            'store_field': "filetitle",
            'type': "text",
        }),

        # All dates held in a record.
        ('date', {
            'slot': 6,
            'store_field': "date",
            'type': "date",
        }),

        # The text of a record.
        ("text", {
            "group":"t",
            "processor":"stem_en",
            "store_field":"text",
            "type":"text"
        }),

        ('id', {
            'max_length': 100,
            'store_field': "id",
            'too_long_action': 'hash',
            'type': "id",
        }),

        # The meta field to use (supports things like searches for which fields
        # exist.)
        ("_meta", {
            "group":"#",
            "slot":0,
            "type":"meta"
        }),

        # The type of a record.
        ("type", {
            "group":"!",
            "slot": 1,
            "store_field":"type",
            "type":"exact"
        }),

        ("*", {
            "group":"u",
            "store_field":"*",
            "type":"text"
        }),

    ]
    config['default_type']['patterns'] = patterns

    Collection.config = config
def start_import(params):
    ctx = ImportContext()

    fileobj = params.get('file', None)
    if fileobj is None or not fileobj.filename:
        return ctx.set_error('No file supplied')

    title = unicode(params.get('collection', u''))
    if not title:
        return ctx.set_error('No collection specified') 

    colls = Collection.find_by_title(title)
    if len(colls) == 0:
        return ctx.set_error('Collection specified not found') 
    if len(colls) > 1:
        return ctx.set_error('Collection name specified maps to multiple collections') 
    coll = colls[0]

    type_mapping = {
        u'title': u'title',
        u'note': u'text',
        u'clip': u'text',
        u'caption': u'text',
        u'text': u'text',
        u'medium': u'tag',
        u'keywords': u'tag',
        u'date': u'date',
        u'img': u'file',
        u'imgthumb': u'file',
        u'musref': u'file',
        u'film': u'file',
        u'sound': u'file',
        u'collection': u'group',
        u'collection/person': u'tag',
        u'collection/date': u'date',
        u'collection/form': u'date',
        u'collection/location': u'location',
        u'collection/ethnicgroup': u'tag',
        u'collection/note': u'tag',
        u'collection/refnum': u'tag',
        u'production': u'group',
        u'production/person': u'tag',
        u'production/date': u'date',
        u'production/location': u'location',
        u'production/form': u'tag',
        u'production/note': u'text',
        u'production/refnum': u'tag',
        u'production/ethnicgroup': u'tag',
        u'acquirer': u'group',
        u'acquirer/date': u'date',
        u'acquirer/person': u'tag',
        u'acquirer/form': u'tag',
        u'acquirer/refnum': u'tag',
        u'acquirer/note': u'tag',
        u'size': u'number',
        u'person': u'tag',
        u'ethnicgroup': u'tag',
        u'location': u'location',

        u'refnext': u'refnext',
        u'refprev': u'refprev',
        u'seealso': u'seealso',
    }

    # HACK!
    known_locations = {}
#    for line in open('locations.txt').readlines():
#        line = line.strip()
#        line = line.split(':', 1)
#        if len(line) < 2: continue
#        if not line[1]:
#            continue
#        known_locations[line[0].strip()] = line[1].strip()
#
#    for k in sorted(known_locations):
#        print k, known_locations[k]

    ctx.setup(fileobj=fileobj, collid=coll.id,
              type_mapping=type_mapping, known_locations=known_locations)

    # FIXME - do this in a background thread.
    do_import(ctx)
    return ctx