def kv2map(k, v): ''' method generating a docmap from a list of fieldnames + a list of values ''' map = {} if(not isinstance(k, list) or not isinstance(v, list)): return map for i in range(len(k)): # check of waarde leeg (--> negeren) if(i >= len(v)): continue value = utils.ensureUnicode(v[i]) if not value: continue value = value.strip() if len(value)==0: continue key = utils.ensureUnicode(k[i]) key = key.strip() # check of key leeg (--> negeren) if not key: continue if (key in map): oldv = map[key] if (type(oldv) is list): oldv.append(value) else: map[key] = [oldv, value] else: map[key] = value return map
def getValues(self): '''Returns the values of all rows set in this table as a dict. The result will be of the form: result[name]={ "type": type, "path": path, "order": order } If the same name is used more than once, subsequent names will be renamed to "name#i" with i an incremental number starting from 1. Order defines the order of the rows within the table widget. Only entries with a valid path and type will be returned.''' result = TEntries() order = 0 for inputFileRow in self.rows: name = utils.ensureUnicode(inputFileRow.getName()) type = utils.ensureUnicode(inputFileRow.getType()) path = utils.ensureUnicode(inputFileRow.getPath()) if not os.path.exists(path): continue if type not in self.getAvailableFiletypes(): continue # Name is only required if table shows a name column, otherwise name defaults to order nb if not name: if self.nameColumn: continue else: name=order result.append(TEntry(name,type,path,order)) order = order+1 return result
def addField(self, fieldname, value): if(not fieldname or not value): return fieldname = utils.ensureUnicode(fieldname) value = utils.ensureUnicode(value) if(fieldname in self.params): self.params[fieldname].append(value) else: self.params[fieldname] = [value]
def getCachedVersionFilename(xmlFilename): '''Returns the filename to the plaintext cached version of an XML thesaurus. Will return an empty string if no cached file is found, or if file is not an xml file.''' xmlFilename = utils.ensureUnicode(xmlFilename) if not xmlFilename: return None if not xmlFilename.lower().endswith('.xml'): return None plaintextFilename = utils.ensureUnicode(os.path.basename(xmlFilename)) return assetsfolder.getAssetsPathFor(plaintextFilename.replace('.xml', '_cache.thc'))
def addReferenceThesaurus(self, thesaurusName, thesaurusPath, type): '''Add reference thesaurus with specified name, path and type to to the settings. If the file does not exist or its type is not known it is not added.''' thesaurusName = utils.ensureUnicode(thesaurusName) 'TODO: is it safe to convert filenames to unicode?' thesaurusPath = utils.ensureUnicode(thesaurusPath) type = utils.ensureUnicode(type) if not os.path.exists(thesaurusPath): return if not type in thesaurus_types: return self.thesauri[thesaurusName] = {"path": thesaurusPath, "type": type, "order": len(self.thesauri)}
def getThesauriStatusOfWord(word): '''Get best status from hightest rated thesaurus for the given word.''' tmpstatus = None for th in getThesauri(): word = utils.ensureUnicode(word) if not word: return u"Leeg (niet ingevuld)" if (th.containsTerm(word)): term = th.getTerm(word) if (term.getUse() is not None): tmpstatus = bestStatus(tmpstatus,("niet_voorkeur",th.name,"Niet voorkeursterm %s" % (th.name))) else: tmpstatus = bestStatus(tmpstatus,("voorkeur",th.name,"Voorkeursterm %s" % (th.name))) if (tmpstatus is None): return u"Eigen term" return utils.ensureUnicode(tmpstatus[2])
def removeField(self, fieldname): '''Removes the parameter with the given name, if it exists.''' if not fieldname: return fieldname = utils.ensureUnicode(fieldname) if fieldname in self.params: del self.params[fieldname]
def endElement(self,name): tn = getConvertedTagName(name) if (tn == "record"): self.inRecord = False self.emit() return if (self.inTag): if (not (self.current_value is None)): n = utils.ensureUnicode(self.current_tag_name.strip()) v = utils.ensureUnicode(self.current_value.strip()) if (n in self.docmap.keys()): self.docmap[n].append(v) else: self.docmap[n]=[v] self.current_tag_name = None self.current_value = None self.inTag = False
def parseTextFile(self, filename): '''Parse thesaurus from plain text file with given filename.''' fil = inputfileformat.getFileDescriptor(filename) for line in fil: line = line.replace("\n","") line = line.replace("\r","") line = line.strip() # leave off newline characters word = utils.ensureUnicode(line) if word: t = Term() t.addField(u"term", word) self.addTerm(t)
def getStatusOfWord(self,word): '''Compare a specified word with the thesaurus. It will either be a preferred term, an unpreferred synonym, or not exist in thesaurus.''' word = utils.ensureUnicode(word) if (not word): return u"Leeg (niet ingevuld)" if (not self.containsTerm(word)): return u"Niet in de %s thesaurus" % (self.name) term = self.getTerm(word) if (term.getUse() is not None): return u"Niet de voorkeurterm" return u"Voorkeurterm"
def parseDefaultAdlibDoc(self, filename): '''Parse adLib XML thesaurus from specified filename. Will check if there is a cached plaintext version of the thesaurus stored already. If so, this will be parsed instead of the XML version, because this is a lot faster.''' filename = utils.ensureUnicode(filename) if utils.cacheThesauri and cachedVersionExists(filename): print " - Loading thesaurus from previously cached file %s" % getCachedVersionFilename(filename) cachedThesaurus = loadCachedVersion(filename) self.terms = cachedThesaurus.terms self.name = cachedThesaurus.name return inputfileformat.parseSAXFile(filename, self) if utils.cacheThesauri: print " - Caching thesaurus to file %s" % getCachedVersionFilename(filename) createCachedVersion(self, filename)
def __init__(self, name=u'Unknown'): self.terms = {} self.name = utils.ensureUnicode(name) pass
def removeReferenceThesaurus(self, thesaurusName): '''Removes the thesaurus with specified name if it exists''' thesaurusName = utils.ensureUnicode(thesaurusName) if thesaurusName in self.thesauri: del self.thesauri[thesaurusName]
def getPath(self): return utils.ensureUnicode(self.pathField.get())
def containsTerm(self,word): '''Check whether term is in thesaurus. Word is case insensitive.''' word = utils.ensureUnicode(word) return word.lower() in self.terms
def getType(self): return utils.ensureUnicode(self.typeSelect.get())
def getName(self): if self.nameColumn: return utils.ensureUnicode(self.nameField.get()) else: return ""
def getContent(): return utils.ensureUnicode(_content)
def start(self): museumName = self.museumnaamField.get() museumName = utils.ensureUnicode(museumName) if not museumName.strip(): tkMessageBox.showerror('Geen naam voor de collectie opgegeven', 'Vul de naam van de collectie in, aub.'); return outputFile = self.outputField.get() if not isValidOutputFile(outputFile): tkMessageBox.showerror('Fout bij het starten', 'Kon niet starten omdat er geen correct "Output" bestand is opgegeven.'); return if os.path.exists(outputFile): doOverwrite = tkMessageBox.askyesno('Bestand overschrijven?', 'Het gekozen "Output" bestand bestaat reeds. Wilt u verder gaan en het overschrijven?') if not doOverwrite: return try: waitDialog = WaitDialog(self.parent) utils.setMaxDetail(self.settings.maxUniqueValues) # Will only return input files with valid files and names filled in inputFiles = self.inputFilesTable.getValues() if inputFiles.size() == 0: waitDialog.close() tkMessageBox.showerror('Fout bij het starten', u'Kon niet starten omdat er geen geldige "Input" bestanden zijn opgegeven.\nEr is minstens één input bestand met ingevulde naam, type en bestandslocatie vereist.'); return if self.checkb["state"] != DISABLED and self.checkThesaurus.get(): checkThesaurus = True else: checkThesaurus = False # Set configured reference thesauri err = None if (checkThesaurus): referenceThesauri = self.settings.thesauri err = setCustomThesauri(referenceThesauri) else: err = setCustomThesauri(TEntries()) if (not (err is None)): waitDialog.close() tkMessageBox.showerror('Fout bij het starten', err); return # Set specified input files to analyse objects = [] thesauri = [] fieldstats = [] csvfieldstats = [] inputFiles.sort() for entry in inputFiles.values: utils.s("%s - %s - %s\n" % (entry.name, entry.type, entry.path)) if entry.type == 'Adlib XML Objecten': objects.append(entry.path) elif entry.type == 'XML Fieldstats' or entry.type == "Adlib XML Personen": fieldstats.append(entry.path) elif entry.type == 'CSV Fieldstats': csvfieldstats.append(entry.path) elif entry.type == 'Adlib XML Thesaurus': thesauri.append(entry.path) else: print "ERROR: Input bestand %s met type %s kan niet gebruikt worden" % (entry.name, entry.type) generateReport(museumName, objects, thesauri, fieldstats, csvfieldstats, outputFile, False) except Exception, e: waitDialog.close() stacktrace = traceback.format_exc() print "exception ..." print stacktrace print "done" ExceptionDialog(self.parent, e, stacktrace) return
def getTerm(self,word): '''Searches for word in thesaurus, case insensitive. Returns a term object. Only supply a word that is in thesaurus, test with containsTerm.''' word = utils.ensureUnicode(word) return self.terms[word.lower()]