def next(self): ''' @rtype: C{Dataset} @return: Return the next Dataset or raise StopIteration ''' #Have we finished? if len(self.files) == 0: raise StopIteration #Get the first file self.file = self.files.pop(0) try: #Open it ds = formats.Open(self.file) #Remove any files in our filelist that occur in the dataset's filelist and decrement the filecount for f in ds.filelist: if f in self.files: self.files.remove(f) self.filecount -= 1 #Fin! return ds except: #decrement the filecount and append to the errors list self.filecount -= 1 self.errors.append((self.file, utilities.ExceptionInfo(), utilities.ExceptionInfo(10))) #Skip to the next file so we don't stop the iteration #Exceptions here will keep recursing until we find a #file we can open or run out of files. return self.next()
def writablecallback(arg): #filepath=arg.value.get() filepath=arg.value if utilities.writable(filepath): return True else: #arg.value.set('') arg.value err='I/O Error','%s is not writable.'%filepath logger.error('%s' % utilities.ExceptionInfo()) try:getargs.tkMessageBox.showerror() except:pass return False
for _lib in sorted(_glob(_path.join(__path__[0], '[a-z]*.py'))): _lib = _path.splitext(_path.basename(_lib))[0] try: #import custom format and add to the list of formats __formats__[_lib] = __import__('%s.%s' % (__name__, _lib), fromlist=[__name__]) #append module _format_regex & fields to lists format_regex.extend([ r for r in __formats__[_lib].format_regex if not r in format_regex ]) #except:pass except: _warn.showwarning = _warn._show_warning #Fix Ft overwrite _warn.warn('Unable to import %s\n%s' % (_lib, utilities.ExceptionInfo())) #import generic formats (eg. GeoTiff, JP2, etc...) import __default__ #append module _format_regex to list of format regexes format_regex.extend( [_r for _r in __default__.format_regex if not _r in format_regex]) def Open(f): ''' Open an image with the appropriate driver. @type f: C{str} @param f: a filepath to the dataset to open. @rtype: C{formats.Dataset}
def main(dir, xlsx, logger, mediaid=None, update=False, getovs=False, recurse=False, archive=False): """ Run the Metadata Crawler @type dir: C{str} @param dir: The directory to start the metadata crawl. @type xlsx: C{str} @param xlsx: Excel spreadsheet to write metadata to @type logger: C{progresslogger.ProgressLogger} @param logger: Use an already instantiated logger @type mediaid:C{str} @param mediaid:CD/DVD media ID @type getovs: C{boolean} @param getovs: Generate overview (quicklook/thumbnail) images @type recurse: C{boolean} @param recurse: Search directory recursively? @type archive: C{boolean} @param archive: Search compressed archives (tar/zip)? @return: C{progresslogger.ProgressLogger} """ shp=xlsx.replace('.xlsx','.shp') format_regex = formats.format_regex format_fields = formats.fields logger.debug(' '.join(sys.argv)) #raise Exception #ExcelWriter=utilities.ExcelWriter(xlsx,format_fields.keys(),update=update) with utilities.ExcelWriter(xlsx,format_fields.keys(),update=update) as ExcelWriter: try: #Are we updating an existing crawl? records={} if update and os.path.exists(xlsx): #Do we need to recreate the shapefile? if os.path.exists(shp): ShapeWriter=False else: logger.info('%s does not exist, it will be recreated...'%shp) ShapeWriter=geometry.ShapeWriter(shp,format_fields,update=False) #Build a dict of existing records row=-1 #with utilities.ExcelReader(xlsx) as ExcelReader: #Using a context manager ensures closure before writing for row,rec in enumerate(utilities.ExcelReader(xlsx)): #Check if the dataset still exists, mark it DELETED if it doesn't if os.path.exists(rec['filepath']) or rec['mediaid'] !='' or \ (rec['filepath'][0:4]=='/vsi' and utilities.compressed_file_exists(rec['filepath'],False)): if ShapeWriter: ext=[rec['UL'].split(','),rec['UR'].split(','),rec['LR'].split(','),rec['LL'].split(',')] ShapeWriter.WriteRecord(ext,rec) #Kludge to ensure backwards compatibility with previously generated guids #records[rec['guid']]=rec records[utilities.uuid(rec['filepath'])]=(row,rec) else: if rec.get('DELETED',0)not in [1,'1']: rec['DELETED']=1 ExcelWriter.UpdateRecord(rec,row) logger.info('Marked %s as deleted' % (rec['filepath'])) if row==-1:logger.info('Output spreadsheet is empty, no records to update') ExcelWriter.save() del ShapeWriter ShapeWriter=geometry.ShapeWriter(shp,format_fields,update=update) except Exception,err: logger.error('%s' % utilities.ExceptionInfo()) logger.debug(utilities.ExceptionInfo(10)) #sys.exit(1) return logger.info('Searching for files...') now=time.time() Crawler=crawler.Crawler(dir,recurse=recurse,archive=archive) logger.info('Found %s files...'%Crawler.filecount) #Loop thru dataset objects returned by Crawler for ds in Crawler: try: logger.debug('Attempting to open %s'%Crawler.file) fi=ds.fileinfo fi['filepath']=utilities.uncpath(fi['filepath']) fi['filelist']='|'.join(utilities.uncpath(ds.filelist)) #qlk=utilities.uncpath(os.path.join(os.path.dirname(xlsx),'%s.%s.qlk.jpg'%(fi['filename'],fi['guid']))) #thm=utilities.uncpath(os.path.join(os.path.dirname(xlsx),'%s.%s.thm.jpg'%(fi['filename'],fi['guid']))) qlk=os.path.join(os.path.dirname(xlsx),'%s.%s.qlk.jpg'%(fi['filename'],fi['guid'])) thm=os.path.join(os.path.dirname(xlsx),'%s.%s.thm.jpg'%(fi['filename'],fi['guid'])) if update and ds.guid in records: row,rec=records[ds.guid] #Issue 35: if it's not modified, but we've asked for overview images and it doesn't already have them.... if ismodified(rec,fi,os.path.dirname(xlsx)) or (not rec['quicklook'] and getovs): md=ds.metadata geom=ds.extent md.update(fi) logger.info('Updated metadata for %s, %s files remaining' % (Crawler.file,len(Crawler.files))) try: if rec['quicklook'] and os.path.exists(rec['quicklook']):getovs=False #Don't update overview if getovs: qlk=ds.getoverview(qlk, width=800) #We don't need to regenerate it, just resize it #thm=ds.getoverview(thm, width=150) thm=overviews.resize(qlk,thm,width=150) md['quicklook']=os.path.basename(qlk) md['thumbnail']=os.path.basename(thm) #md['quicklook']=utilities.uncpath(qlk) #md['thumbnail']=utilities.uncpath(thm) logger.info('Updated overviews for %s' % Crawler.file) except Exception,err: logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) try: ExcelWriter.UpdateRecord(md,row) except Exception,err: logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) try: ShapeWriter.UpdateRecord(geom,md,'guid="%s"'%rec['guid']) except Exception,err: logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) else: logger.info('Metadata did not need updating for %s, %s files remaining' % (Crawler.file,len(Crawler.files))) continue
md.update(fi) if mediaid:md.update({'mediaid':mediaid}) logger.info('Extracted metadata from %s, %s files remaining' % (Crawler.file,len(Crawler.files))) try: if getovs: qlk=ds.getoverview(qlk, width=800) #We don't need to regenerate it, just resize it #thm=ds.getoverview(thm, width=150) thm=overviews.resize(qlk,thm,width=150) md['quicklook']=os.path.basename(qlk) md['thumbnail']=os.path.basename(thm) #md['quicklook']=utilities.uncpath(qlk) #md['thumbnail']=utilities.uncpath(thm) logger.info('Generated overviews from %s' % Crawler.file) except Exception as err: logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) try: ExcelWriter.WriteRecord(md) except Exception as err: logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) try: ShapeWriter.WriteRecord(geom,md) except Exception as err: logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) except NotImplementedError as err: logger.warn('%s: %s' % (Crawler.file, str(err))) logger.debug(utilities.ExceptionInfo(10))
def main(xlsx,xsl,dir,logger, mef=False,cat='',ops=''): ''' Run the Metadata Transform @type xlsx: C{str} @param xlsx: Excel spreadsheet to read metadata from @type xsl: C{str} @param xsl: XSL transform {*.xsl|%s} @type dir: C{str} @param dir: The directory to output metadata XML to @type logger: C{progresslogger.ProgressLogger} @param logger: An instantiated logger @type mef: C{boolean} @param mef: Create Metadata Exchange Format (MEF) file @type cat: C{str} @param cat: The GeoNetwork category/ies to apply to the records ('|' separated) @type ops: C{str} @param ops: The GeoNetwork operations privileges to apply to the records ('|' separated) @todo - start using the "-m" opt, currently not used at all. - add it to the GetArgs GUI - populate a dropdown list with transforms.categories - add a gui event that show/hides or enables/disables the categ list triggered by the mef opt - if <default> categ is selected, logic is: * check xlsx for categ column * if so use that, * if categ column is null for a row, of if no column at all then use default from config ''' % '|'.join(['"%s"'%s for s in transforms.transforms.keys()]) xlrdr=utilities.ExcelReader(xlsx, list) qlkdir=os.path.dirname(xlsx) logger.info('Transforming %s metadata records'%xlrdr.records) transform=transforms.Transform(xsl) for rec in xlrdr: try: tmpcat=cat # dummy var as we may overwrite it tmpops=ops overviews=[] deleted=False for i, val in enumerate(rec): # We use a list instead of a dict as there can # be multiple fields with the same header/name if val[0]=='DELETED' and val[1] in [1,'1']:deleted=True elif val[0]=='filename':filename=val[1] elif val[0]=='guid':guid=val[1] elif val[0] in ['quicklook','thumbnail'] and val[1] not in [ '', None]: overviews.append(os.path.join(qlkdir,val[1])) elif val[0] == 'category' and val[1]: tmpcat=val[1] del rec[i] elif val[0] == 'operations' and val[1]: tmpops=val[1] del rec[i] xmlfile='%s/%s.%s.xml'%(dir,filename,guid) meffile='%s/%s.%s.mef'%(dir,filename,guid) if deleted: logger.info('%s has been marked as deleted, XSLT processing will be terminated.'%filename) if os.path.exists(xmlfile):os.rename(xmlfile,'%s.deleted'%xmlfile) if os.path.exists(meffile):os.rename(meffile,'%s.deleted'%meffile) continue strxml=transforms.ListToXML(rec,'crawlresult') result = transform.transform(strxml, xmlfile) #if overviews:transforms.CreateMEF(dir,xmlfile,guid,overviews) #Create MEF even if there are no overviews if mef:transforms.CreateMEF(dir,xmlfile,guid,overviews,tmpcat,tmpops) logger.info('Transformed metadata for ' +filename) except Exception,err: logger.error('%s %s' % (filename, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) try:os.remove(xmlfile) except:pass try:os.remove(meffile) except:pass
def onerror(self, e): self.errors.append((e.filename, utilities.ExceptionInfo(), utilities.ExceptionInfo(10)))
(Crawler.file, len(Crawler.files))) try: if getovs: qlk = ds.getoverview(qlk, width=800) #We don't need to regenerate it, just resize it #thm=ds.getoverview(thm, width=150) thm = overviews.resize(qlk, thm, width=150) md['quicklook'] = os.path.basename(qlk) md['thumbnail'] = os.path.basename(thm) #md['quicklook']=utilities.uncpath(qlk) #md['thumbnail']=utilities.uncpath(thm) logger.info('Generated overviews from %s' % Crawler.file) except Exception as err: logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) try: ExcelWriter.WriteRecord(md) except Exception as err: logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) try: ShapeWriter.WriteRecord(geom, md) except Exception as err: logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo())) logger.debug(utilities.ExceptionInfo(10)) except NotImplementedError as err: