def probe(self): items = sum(len(item) for item in self.root) for num, item in enumerate(itertools.chain.from_iterable(item for item in self.root)): yield TaskProgress("Probing XML file, element %d of %d" % (num + 1, items)) if item.tag == 'record': record = Record(root=item) self.records[record.id] = record self.media_paths.update((m.src, m) for m in record.media(mimetype=None)) try: old_record = Record.objects.get(record.id) except KeyError: self.record_new.add(record.id) else: if old_record.mtime == record.mtime and \ old_record.xml.strip() == record.xml.strip(): self.record_identical.add(record.id) else: self.record_conflicts.add(record.id) elif item.tag == 'collection': coll = Collection.fromxml(root=item) self.collections[coll.id] = coll try: old_coll = Collection.objects.get(coll.id) except KeyError: self.coll_new.add(coll.id) else: if old_coll == coll: self.coll_identical.add(coll.id) else: self.coll_conflicts.add(coll.id) self.calc_media_roots()
def _get_collections(self): self.invalid_collections = [] remcolls = set(self.params.get('remcoll', ())) collids = set(filter(lambda x: x != u'' and x not in remcolls, self.params.get('collid', ()))) colltitle = set(filter(lambda x: x != u'', self.params.get('colltitle', ()))) for title in colltitle: colls = Collection.find_by_title(title) if len(colls) == 0: self.invalid_collections.append(title) for coll in colls: if coll.id not in remcolls: collids.add(coll.id) self.collections = collids
def search(self, **params): stash = {} context = dict(stash=stash) params = SearchParams(stash) search = Search(params) if not search.validate(): # If the search doesn't validate, always go to the search entry # page. params.action = 'entry' search.add_to_context(context) context['showfull'] = int(getparam('showfull', '0', stash)) if params.action == 'search': return render("search.html", context) elif params.action == 'select': return render("search_select.html", context) elif params.action.startswith('createcoll'): if cherrypy.request.method != "POST": return render("search_createcoll.html", context) subact = params.action[11:] parents = [] for num in getorderparam('parent_order'): parent = getparam('parent%d' % num) if parent is None or parent == '': continue if subact == ('del_parent_%d' % num): continue parents.append(unicode(parent)) if subact == 'add_parent': parents.append(u'') context['parents'] = tuple(enumerate(parents)) context['allowable_parents'] = set( c.id for c in Collection.objects ) context['collections'] = Collection.objects if subact == 'do': newtitle = getparam('create_colltitle', '') if len(newtitle) == 0: context['error'] = "Cannot create collection with no title" return render("search_createcoll.html", context) coll = Collection.find_by_title(newtitle) if len(coll) != 0: context['error'] = "Collection with title %s already exists" % newtitle return render("search_createcoll.html", context) coll = Collection(None, None, newtitle) Collection.objects.set(coll) # Have to set the collection before setting the parents, to get # an ID for the parents to refer back to. coll.set_parents(filter(lambda x: x != '', parents)) Collection.objects.set(coll) Record.objects.flush() Collection.objects.flush() for record in search.query: record = record.object record.collections = record.collections + [coll.id] Record.objects.set(record) Record.objects.flush() Collection.objects.flush() redirect(url("coll-view", id=coll.id)) else: return render("search_createcoll.html", context) elif params.action == 'addtocoll': context['all_collections'] = Collection.objects if cherrypy.request.method != "POST": return render("search_addtocoll.html", context) newid = getparam('addto_collid', '') if len(newid) == 0: context['error'] = "Pick a collection to add to" return render("search_addtocoll.html", context) coll = Collection.objects.get(newid) for record in search.query: record = record.object record.collections = record.collections + [coll.id] Record.objects.set(record) Record.objects.flush() Collection.objects.flush() redirect(url("coll-view", id=coll.id)) return render("search_addtocoll.html", context) elif params.action == 'removefromcoll': context['all_collections'] = Collection.objects if cherrypy.request.method != "POST": return render("search_removefromcoll.html", context) newid = getparam('removefrom_collid', '') if len(newid) == 0: context['error'] = "Pick a collection to remove from" return render("search_removefromcoll.html", context) coll = Collection.objects.get(newid) for record in search.query: record = record.object record.collections = tuple(filter(lambda x: x != coll.id, record.collections)) Record.objects.set(record) Record.objects.flush() Collection.objects.flush() redirect(url("coll-view", id=coll.id)) return render("search_removefromcoll.html", context) else: context['all_collections'] = Collection.objects return render("search_entry.html", context)
def set_config(): Collection.checkpoint().wait() config = Collection.config patterns = [ # FIXME - add an _error type. # Fields of date type ('*_date', { 'slot': 'y*', 'store_field': "*_date", 'type': "date", }), # Text fields (stripped of markup) ("*_text", { "group":"t*", "slot":"t*", "processor":"stem_en", "store_field":"*_text", "type":"text" }), # Summary text for links ("*_link", { "group":"l*", "slot":"l*", "processor":"stem_en", "store_field":"*_link", "type":"text" }), # Title fields ("*_title", { "group":"e*", "slot":"e*", "processor":"stem_en", "store_field":"*_title", "type":"text" }), # Number fields ("*_number", { "slot":"n*", "store_field":"*_number", "type":"double" }), # File fields (the title / alt text / content of text files) ("*_file", { "group":"i*", "slot":"i*", "processor":"stem_en", "store_field":"*_file", "type":"text" }), # Tag fields ("*_tag", { "group":"g*", "slot":"g*", "max_length":100, "store_field":"*_tag", "too_long_action":"hash", "lowercase":True, "type":"exact" }), # Location fields (the text part - handled like a tag) ("*_location", { "group":"g*", "slot":"g*", "max_length":100, "store_field":"*_location", "too_long_action":"hash", "lowercase":True, "type":"exact" }), # FIXME - location fields have form *_latlong; need a pattern for them. # The collection that an item is in. ('coll', { 'group': "C", 'max_length': 32, 'slot': 2, 'store_field': "coll", 'taxonomy': "coll_hierarchy", 'too_long_action': "hash", 'type': "cat", }), # Modification times. ('mtime', { 'slot': 3, 'store_field': "mtime", 'type': "timestamp", }), # Titles of items. ('title', { 'group': 't', 'slot': 4, 'store_field': "title", 'type': "text", }), # For media items, the ID of the record holding the item. ('parent', { 'group': 'P', 'max_length': 100, 'store_field': "parent", 'too_long_action': 'hash', 'type': "exact", }), # The ID of a media item (the path or url). ('fileid', { 'group': 'F', 'max_length': 100, 'store_field': "fileid", 'too_long_action': 'hash', 'type': "exact", }), # The mimetype of a media item ('mimetype', { 'group': 'T', 'slot': 5, 'max_length': 100, 'store_field': "mimetype", 'too_long_action': 'hash', 'type': "exact", }), # The alt text of a media item ('filealt', { 'group': 'A', 'store_field': "filealt", 'type': "text", }), # The title text of a media item ('filetitle', { 'group': 'A', 'store_field': "filetitle", 'type': "text", }), # All dates held in a record. ('date', { 'slot': 6, 'store_field': "date", 'type': "date", }), # The text of a record. ("text", { "group":"t", "processor":"stem_en", "store_field":"text", "type":"text" }), ('id', { 'max_length': 100, 'store_field': "id", 'too_long_action': 'hash', 'type': "id", }), # The meta field to use (supports things like searches for which fields # exist.) ("_meta", { "group":"#", "slot":0, "type":"meta" }), # The type of a record. ("type", { "group":"!", "slot": 1, "store_field":"type", "type":"exact" }), ("*", { "group":"u", "store_field":"*", "type":"text" }), ] config['default_type']['patterns'] = patterns Collection.config = config
def start_import(params): ctx = ImportContext() fileobj = params.get('file', None) if fileobj is None or not fileobj.filename: return ctx.set_error('No file supplied') title = unicode(params.get('collection', u'')) if not title: return ctx.set_error('No collection specified') colls = Collection.find_by_title(title) if len(colls) == 0: return ctx.set_error('Collection specified not found') if len(colls) > 1: return ctx.set_error('Collection name specified maps to multiple collections') coll = colls[0] type_mapping = { u'title': u'title', u'note': u'text', u'clip': u'text', u'caption': u'text', u'text': u'text', u'medium': u'tag', u'keywords': u'tag', u'date': u'date', u'img': u'file', u'imgthumb': u'file', u'musref': u'file', u'film': u'file', u'sound': u'file', u'collection': u'group', u'collection/person': u'tag', u'collection/date': u'date', u'collection/form': u'date', u'collection/location': u'location', u'collection/ethnicgroup': u'tag', u'collection/note': u'tag', u'collection/refnum': u'tag', u'production': u'group', u'production/person': u'tag', u'production/date': u'date', u'production/location': u'location', u'production/form': u'tag', u'production/note': u'text', u'production/refnum': u'tag', u'production/ethnicgroup': u'tag', u'acquirer': u'group', u'acquirer/date': u'date', u'acquirer/person': u'tag', u'acquirer/form': u'tag', u'acquirer/refnum': u'tag', u'acquirer/note': u'tag', u'size': u'number', u'person': u'tag', u'ethnicgroup': u'tag', u'location': u'location', u'refnext': u'refnext', u'refprev': u'refprev', u'seealso': u'seealso', } # HACK! known_locations = {} # for line in open('locations.txt').readlines(): # line = line.strip() # line = line.split(':', 1) # if len(line) < 2: continue # if not line[1]: # continue # known_locations[line[0].strip()] = line[1].strip() # # for k in sorted(known_locations): # print k, known_locations[k] ctx.setup(fileobj=fileobj, collid=coll.id, type_mapping=type_mapping, known_locations=known_locations) # FIXME - do this in a background thread. do_import(ctx) return ctx