Example #1
0
    def next(self):
        ''' @rtype:  C{Dataset}
            @return: Return the next Dataset or raise StopIteration
        '''
        #Have we finished?
        if len(self.files) == 0:
            raise StopIteration

        #Get the first file
        self.file = self.files.pop(0)
        try:
            #Open it
            ds = formats.Open(self.file)

            #Remove any files in our filelist that occur in the dataset's filelist and decrement the filecount
            for f in ds.filelist:
                if f in self.files:
                    self.files.remove(f)
                    self.filecount -= 1
            #Fin!
            return ds
        except:
            #decrement the filecount and append to the errors list
            self.filecount -= 1
            self.errors.append((self.file, utilities.ExceptionInfo(),
                                utilities.ExceptionInfo(10)))

            #Skip to the next file so we don't stop the iteration
            #Exceptions here will keep recursing until we find a
            #file we can open or run out of files.
            return self.next()
Example #2
0
 def writablecallback(arg):
     #filepath=arg.value.get()
     filepath=arg.value
     if utilities.writable(filepath):
         return True
     else:
         #arg.value.set('')
         arg.value
         err='I/O Error','%s is not writable.'%filepath
         logger.error('%s' % utilities.ExceptionInfo())
         try:getargs.tkMessageBox.showerror()
         except:pass
         return False
Example #3
0
for _lib in sorted(_glob(_path.join(__path__[0], '[a-z]*.py'))):
    _lib = _path.splitext(_path.basename(_lib))[0]
    try:
        #import custom format and add to the list of formats
        __formats__[_lib] = __import__('%s.%s' % (__name__, _lib),
                                       fromlist=[__name__])

        #append module _format_regex & fields to lists
        format_regex.extend([
            r for r in __formats__[_lib].format_regex if not r in format_regex
        ])
    #except:pass
    except:
        _warn.showwarning = _warn._show_warning  #Fix Ft overwrite
        _warn.warn('Unable to import %s\n%s' %
                   (_lib, utilities.ExceptionInfo()))

#import generic formats (eg. GeoTiff, JP2, etc...)
import __default__
#append module _format_regex to list of format regexes
format_regex.extend(
    [_r for _r in __default__.format_regex if not _r in format_regex])


def Open(f):
    ''' Open an image with the appropriate driver.

        @type  f: C{str}
        @param f: a filepath to the dataset to open.

        @rtype:   C{formats.Dataset}
Example #4
0
def main(dir, xlsx, logger, mediaid=None, update=False, getovs=False, recurse=False, archive=False):

    """ Run the Metadata Crawler

        @type  dir:    C{str}
        @param dir:    The directory to start the metadata crawl.
        @type  xlsx:    C{str}
        @param xlsx:    Excel spreadsheet to write metadata to
        @type  logger: C{progresslogger.ProgressLogger}
        @param logger: Use an already instantiated logger
        @type  mediaid:C{str}
        @param mediaid:CD/DVD media ID
        @type  getovs: C{boolean}
        @param getovs: Generate overview (quicklook/thumbnail) images
        @type  recurse: C{boolean}
        @param recurse: Search directory recursively?
        @type  archive: C{boolean}
        @param archive: Search compressed archives (tar/zip)?
        @return:  C{progresslogger.ProgressLogger}
    """

    shp=xlsx.replace('.xlsx','.shp')

    format_regex  = formats.format_regex
    format_fields = formats.fields

    logger.debug(' '.join(sys.argv))

    #raise Exception
    #ExcelWriter=utilities.ExcelWriter(xlsx,format_fields.keys(),update=update)
    with utilities.ExcelWriter(xlsx,format_fields.keys(),update=update) as ExcelWriter:
        try:
            #Are we updating an existing crawl?
            records={}
            if update and os.path.exists(xlsx):

                #Do we need to recreate the shapefile?
                if os.path.exists(shp):
                    ShapeWriter=False
                else:
                    logger.info('%s does not exist, it will be recreated...'%shp)
                    ShapeWriter=geometry.ShapeWriter(shp,format_fields,update=False)

                #Build a dict of existing records
                row=-1
                #with utilities.ExcelReader(xlsx) as ExcelReader: #Using a context manager ensures closure before writing
                for row,rec in enumerate(utilities.ExcelReader(xlsx)):
                    #Check if the dataset still exists, mark it DELETED if it doesn't
                    if os.path.exists(rec['filepath']) or rec['mediaid'] !='' or \
                       (rec['filepath'][0:4]=='/vsi' and utilities.compressed_file_exists(rec['filepath'],False)):
                        if ShapeWriter:
                            ext=[rec['UL'].split(','),rec['UR'].split(','),rec['LR'].split(','),rec['LL'].split(',')]
                            ShapeWriter.WriteRecord(ext,rec)
                        #Kludge to ensure backwards compatibility with previously generated guids
                        #records[rec['guid']]=rec
                        records[utilities.uuid(rec['filepath'])]=(row,rec)
                    else:
                        if rec.get('DELETED',0)not in [1,'1']:
                            rec['DELETED']=1
                            ExcelWriter.UpdateRecord(rec,row)
                            logger.info('Marked %s as deleted' % (rec['filepath']))
                if row==-1:logger.info('Output spreadsheet is empty, no records to update')
                ExcelWriter.save()
                del ShapeWriter
            ShapeWriter=geometry.ShapeWriter(shp,format_fields,update=update)

        except Exception,err:
            logger.error('%s' % utilities.ExceptionInfo())
            logger.debug(utilities.ExceptionInfo(10))
            #sys.exit(1)
            return

        logger.info('Searching for files...')
        now=time.time()
        Crawler=crawler.Crawler(dir,recurse=recurse,archive=archive)
        logger.info('Found %s files...'%Crawler.filecount)

        #Loop thru dataset objects returned by Crawler
        for ds in Crawler:
            try:
                logger.debug('Attempting to open %s'%Crawler.file)
                fi=ds.fileinfo
                fi['filepath']=utilities.uncpath(fi['filepath'])
                fi['filelist']='|'.join(utilities.uncpath(ds.filelist))
                #qlk=utilities.uncpath(os.path.join(os.path.dirname(xlsx),'%s.%s.qlk.jpg'%(fi['filename'],fi['guid'])))
                #thm=utilities.uncpath(os.path.join(os.path.dirname(xlsx),'%s.%s.thm.jpg'%(fi['filename'],fi['guid'])))
                qlk=os.path.join(os.path.dirname(xlsx),'%s.%s.qlk.jpg'%(fi['filename'],fi['guid']))
                thm=os.path.join(os.path.dirname(xlsx),'%s.%s.thm.jpg'%(fi['filename'],fi['guid']))

                if update and ds.guid in records:
                    row,rec=records[ds.guid]
                    #Issue 35: if it's not modified, but we've asked for overview images and it doesn't already have them....
                    if ismodified(rec,fi,os.path.dirname(xlsx)) or (not rec['quicklook'] and getovs):
                        md=ds.metadata
                        geom=ds.extent
                        md.update(fi)
                        logger.info('Updated metadata for %s, %s files remaining' % (Crawler.file,len(Crawler.files)))
                        try:
                            if rec['quicklook'] and os.path.exists(rec['quicklook']):getovs=False #Don't update overview
                            if getovs:
                                qlk=ds.getoverview(qlk, width=800)
                                #We don't need to regenerate it, just resize it
                                #thm=ds.getoverview(thm, width=150)
                                thm=overviews.resize(qlk,thm,width=150)
                                md['quicklook']=os.path.basename(qlk)
                                md['thumbnail']=os.path.basename(thm)
                                #md['quicklook']=utilities.uncpath(qlk)
                                #md['thumbnail']=utilities.uncpath(thm)
                                logger.info('Updated overviews for %s' % Crawler.file)
                        except Exception,err:
                            logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo()))
                            logger.debug(utilities.ExceptionInfo(10))
                        try:
                            ExcelWriter.UpdateRecord(md,row)
                        except Exception,err:
                            logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo()))
                            logger.debug(utilities.ExceptionInfo(10))
                        try:
                            ShapeWriter.UpdateRecord(geom,md,'guid="%s"'%rec['guid'])
                        except Exception,err:
                            logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo()))
                            logger.debug(utilities.ExceptionInfo(10))
                    else:
                        logger.info('Metadata did not need updating for %s, %s files remaining' % (Crawler.file,len(Crawler.files)))
                        continue
Example #5
0
                    md.update(fi)
                    if mediaid:md.update({'mediaid':mediaid})
                    logger.info('Extracted metadata from %s, %s files remaining' % (Crawler.file,len(Crawler.files)))
                    try:
                        if getovs:
                            qlk=ds.getoverview(qlk, width=800)
                            #We don't need to regenerate it, just resize it
                            #thm=ds.getoverview(thm, width=150)
                            thm=overviews.resize(qlk,thm,width=150)
                            md['quicklook']=os.path.basename(qlk)
                            md['thumbnail']=os.path.basename(thm)
                            #md['quicklook']=utilities.uncpath(qlk)
                            #md['thumbnail']=utilities.uncpath(thm)
                            logger.info('Generated overviews from %s' % Crawler.file)
                    except Exception as err:
                        logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo()))
                        logger.debug(utilities.ExceptionInfo(10))
                    try:
                        ExcelWriter.WriteRecord(md)
                    except Exception as err:
                        logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo()))
                        logger.debug(utilities.ExceptionInfo(10))
                    try:
                        ShapeWriter.WriteRecord(geom,md)
                    except Exception as err:
                        logger.error('%s\n%s' % (Crawler.file, utilities.ExceptionInfo()))
                        logger.debug(utilities.ExceptionInfo(10))

            except NotImplementedError as err:
                logger.warn('%s: %s' % (Crawler.file, str(err)))
                logger.debug(utilities.ExceptionInfo(10))
Example #6
0
def main(xlsx,xsl,dir,logger, mef=False,cat='',ops=''):
    '''
    Run the Metadata Transform
    @type  xlsx: C{str}
    @param xlsx: Excel spreadsheet to read metadata from
    @type  xsl: C{str}
    @param xsl: XSL transform {*.xsl|%s}
    @type  dir: C{str}
    @param dir: The directory to output metadata XML to
    @type  logger: C{progresslogger.ProgressLogger}
    @param logger: An instantiated logger
    @type  mef: C{boolean}
    @param mef: Create Metadata Exchange Format (MEF) file
    @type  cat: C{str}
    @param cat: The GeoNetwork category/ies to apply to the records ('|' separated)
    @type  ops: C{str}
    @param ops: The GeoNetwork operations privileges to apply to the records ('|' separated)

    @todo - start using the "-m" opt, currently not used at all.
          - add it to the GetArgs GUI
          - populate a dropdown list with transforms.categories
          - add a gui event that show/hides or enables/disables the categ list triggered by the mef opt
          - if <default> categ is selected, logic is:
            * check xlsx for categ column
            * if so use that,
            * if categ column is null for a row, of if no column at all then use default from config

    ''' % '|'.join(['"%s"'%s for s in transforms.transforms.keys()])

    xlrdr=utilities.ExcelReader(xlsx, list)
    qlkdir=os.path.dirname(xlsx)
    logger.info('Transforming %s metadata records'%xlrdr.records)
    transform=transforms.Transform(xsl)
    for rec in xlrdr:
        try:
            tmpcat=cat  # dummy var as we may overwrite it
            tmpops=ops
            overviews=[]
            deleted=False
            for i, val in enumerate(rec):    # We use a list instead of a dict as there can
                                            # be multiple fields with the same header/name
                if val[0]=='DELETED' and val[1] in [1,'1']:deleted=True
                elif val[0]=='filename':filename=val[1]
                elif val[0]=='guid':guid=val[1]
                elif val[0] in ['quicklook','thumbnail'] and val[1] not in [ '', None]:
                    overviews.append(os.path.join(qlkdir,val[1]))
                elif val[0] == 'category' and val[1]:
                    tmpcat=val[1]
                    del rec[i]
                elif val[0] == 'operations' and val[1]:
                    tmpops=val[1]
                    del rec[i]
            xmlfile='%s/%s.%s.xml'%(dir,filename,guid)
            meffile='%s/%s.%s.mef'%(dir,filename,guid)
            if deleted:
                logger.info('%s has been marked as deleted, XSLT processing will be terminated.'%filename)
                if os.path.exists(xmlfile):os.rename(xmlfile,'%s.deleted'%xmlfile)
                if os.path.exists(meffile):os.rename(meffile,'%s.deleted'%meffile)
                continue
            strxml=transforms.ListToXML(rec,'crawlresult')
            result = transform.transform(strxml, xmlfile)
            #if overviews:transforms.CreateMEF(dir,xmlfile,guid,overviews)
            #Create MEF even if there are no overviews
            if mef:transforms.CreateMEF(dir,xmlfile,guid,overviews,tmpcat,tmpops)
            logger.info('Transformed metadata for ' +filename)
        except Exception,err:
            logger.error('%s %s' % (filename, utilities.ExceptionInfo()))
            logger.debug(utilities.ExceptionInfo(10))
            try:os.remove(xmlfile)
            except:pass
            try:os.remove(meffile)
            except:pass
Example #7
0
 def onerror(self, e):
     self.errors.append((e.filename, utilities.ExceptionInfo(),
                         utilities.ExceptionInfo(10)))
Example #8
0
                        (Crawler.file, len(Crawler.files)))
                    try:
                        if getovs:
                            qlk = ds.getoverview(qlk, width=800)
                            #We don't need to regenerate it, just resize it
                            #thm=ds.getoverview(thm, width=150)
                            thm = overviews.resize(qlk, thm, width=150)
                            md['quicklook'] = os.path.basename(qlk)
                            md['thumbnail'] = os.path.basename(thm)
                            #md['quicklook']=utilities.uncpath(qlk)
                            #md['thumbnail']=utilities.uncpath(thm)
                            logger.info('Generated overviews from %s' %
                                        Crawler.file)
                    except Exception as err:
                        logger.error('%s\n%s' %
                                     (Crawler.file, utilities.ExceptionInfo()))
                        logger.debug(utilities.ExceptionInfo(10))
                    try:
                        ExcelWriter.WriteRecord(md)
                    except Exception as err:
                        logger.error('%s\n%s' %
                                     (Crawler.file, utilities.ExceptionInfo()))
                        logger.debug(utilities.ExceptionInfo(10))
                    try:
                        ShapeWriter.WriteRecord(geom, md)
                    except Exception as err:
                        logger.error('%s\n%s' %
                                     (Crawler.file, utilities.ExceptionInfo()))
                        logger.debug(utilities.ExceptionInfo(10))

            except NotImplementedError as err: