def call_local(self):
    '''
    sort out and return local_file

    This comes from the URL and local_dir
    and ends .store
    '''
    if self.indb():
      if callable(self.local):
        sys.msg(f"**unexpected method for self.local {self.local}")
      else:
        return self.local
    
    kwargs = fdict(self.__dict__.copy())
    if 'local_dir' in kwargs and \
        (kwargs['local_dir'] is not None) and \
        len(kwargs['local_dir']) > 0:
      self.local_dir = list_resolve(kwargs['local_dir'])

    if (self.local_dir is None) or (len(self.local_dir) == 0):
      self.local_dir = list_resolve(self.db_dir)
    self.local_file = Path(self.local_dir[0],self.as_posix().split("://")[1]) 
    #self.local_file = Path(self.local_dir[-1],str(self.with_scheme(''))[2:]).absolute()
    # replace ' '
    self.local_file = Path(str(self.local_file).replace(' ','_'))
    suffix = self.local_file.suffix
    self.local_file = self.local_file.with_suffix(suffix + '.store')
    self.check_path(self.local_file.parent)
    self.local_file.parent.mkdir(parents=True,exist_ok=True) 
    return self.local_file
def main():
  if False:
    u='https://e4ftl01.cr.usgs.gov/MOTA/MCD15A3H.006/2003.12.11/MCD15A3H.A2003345.h09v06.006.2015084002115.hdf'
    url = URL(u)
    data = url.read_bytes()
    ofile = Path('data',url.name)
    osize = ofile.write_bytes(data)
    assert osize == 3365255
    print('passed')

  if False:
    u='https://e4ftl01.cr.usgs.gov/MOTA/MCD15A3H.006/2003.12.11'
    url = URL(u)
    files = url.glob('*0.hdf',pre_filter=True) 
    print(files) 

  if True:
    u='https://e4ftl01.cr.usgs.gov'
    import os
    os.environ['CACHE_FILE'] = 'data/database.db'

    url = URL(u,verbose=True,db_file='data/new_db.txt',local_dir='work')
    rlist = url.glob('MOT*/MCD15A3H.006/2003.12.11/*0.hdf',pre_filter=True)
    for i,r in enumerate(rlist):
      print(i)
      # we can save in decalring a new URL by passing old one
      u = URL(r,**(fdict(url.__dict__.copy())))
      data=u.read_bytes()
      # updata database
      u.flush()
Beispiel #3
0
 def _convert_to_abs(self, ilist):
     # this is slow and may be not needed
     self.msg(f'parsing URLs from html file {len(ilist)} items')
     return [
         self.update(*[str(self), l.rstrip('/#')],
                     **(fdict(self.__dict__.copy()))) for l in ilist
     ]
 def init(self,**kwargs):
     self.__dict__.update(ginit(self,**kwargs))
     if 'database' in self.__dict__ and type(self.database) == Database:
       # already have databse stored
       pass
     else:
       self.database = Database(self.db_file,\
                         **(fdict(self.__dict__.copy())))
Beispiel #5
0
    def glob(self, pattern, pre_filter=True):
        '''
    Iterate over this subtree and yield all existing files (of any
    kind, including directories) matching the given relative pattern.

    The URL here then needs to return lxml html code.

    Positional arguments:
       patterm  : to search for e.g. */2021.*.01
                  only wildcards * and ? considered at present

    '''
        u = self
        url = str(u)
        if url[-1] == '/':
            url = urls[:-1]
        url = self.update(url, pattern)
        # check in database
        store_url = url
        store_flag = 'query'
        olist = self.database.get_from_db(store_flag, store_url)
        if olist is not None:
            if type(olist) is list:
                return [self.update(o) for o in olist]
            return [self.update(olist)]

        # start at the top
        uc = np.array(url.parts)
        for i, w in enumerate(uc[1:]):
            if i == 0:
                base_list = [self.update(uc[0])]
            new_list = []
            for b in base_list:
                # set to new item
                glob = self.update(b)._glob(w, pre_filter=pre_filter)

                # glob with the next item
                new_list = new_list + glob
            base_list = np.unique(
                np.array(new_list, dtype=np.object).flatten())

        base_list = np.unique(np.array(base_list, dtype=np.object))

        olist = list(np.array([self.update(i) for i in base_list]).flatten())
        self.dedate()

        for l in olist:
            l.init(**(fdict(self.__dict__.copy())))

        # cache this in case we want to re-use it
        cache = {store_flag: {str(store_url): [str(i) for i in olist]}}
        self.database.set_db(cache)
        if type(olist) is list:
            return [self.update(o) for o in olist]
        return [self.update(olist)]
Beispiel #6
0
    def __init__(self, **kwargs):
        kwargs['defaults'] = {
         'store_msg'  : [],\
         'database'   : None,\
         'product'    : 'MCD15A3H',\
         'tile'       : 'h08v06',\
         'log'        : None,\
         'day'        : '01',\
         'doy'        : None,
         'month'      : '*',\
         'sds'        : None,
         'year'       : "2019",\
         'site'       : 'https://e4ftl01.cr.usgs.gov',\
         'size_check' : False,\
         'noclobber'  : True,\
         'local_dir'  : 'work',\
         'local_file' : None,\
         'db_file'    : None,\
         'db_dir'     : 'work',\
         'verbose'    : False,\
         'stderr'     : sys.stderr
        }
        self.__dict__.update(ginit(self, **kwargs))
        if 'database' in self.__dict__ and type(self.database) == Database:
            # already have databse stored
            pass
        else:
            self.database = Database(self.db_file,\
                              **(fdict(self.__dict__.copy(),ignore=['db_dir','db_file'])))

        self.translateoptions = gdal.TranslateOptions(
            gdal.ParseCommandLine("-of Gtiff -co COMPRESS=LZW"))
        # list of tiles
        if type(self.tile) is str:
            self.tile = [self.tile]

        if type(self.sds) is str:
            self.sds = [self.sds]
        if self.sds is not None:
            self.msg(f'initial SDS {self.sds}')
            self.required_sds = self.sds

        # for most transactions, we want all SDS
        # so self.sds should reflect that
        self.sds = None
        response = self.database.get_from_db('SDS', self.product)
        if response:
            self.msg("found SDS names in database")
            self.sds = response
            self.msg(self.sds)
            # require them all
            if 'required_sds' not in self.__dict__:
                self.required_sds = self.sds
  def update(self,*args,**kwargs):
    '''update args in object'''
    if '_cache_original' not in  self.__dict__:
      self._cache_original = self.__dict__.copy()

    # whetehr we specify full URL in update or not

    if ('full_url' in kwargs) and (kwargs['full_url'] == True):
      args = list(args)
    else:
      args = [str(self)] + list(args)   
    url = super(URL, self).__new__(self,*args)
    url.is_clone = True
    url.__dict__ = fdict(self._cache_original.copy())
    return url
Beispiel #8
0
    def __init__(self, **kwargs):
        kwargs['defaults'] = {
         'store_msg'  : [],\
         'database'   : None,\
         'product'    : 'MCD15A3H',\
         'tile'       : 'h08v06',\
         'log'        : None,\
         'day'        : '01',\
         'doy'        : None,
         'month'      : '*',\
         'sds'        : None,
         'year'       : "2019",\
         'site'       : 'https://e4ftl01.cr.usgs.gov',\
         'size_check' : False,\
         'noclobber'  : True,\
         'local_dir'  : 'work',\
         'local_file' : None,\
         'db_file'    : None,\
         'db_dir'     : 'work',\
         'verbose'    : False,\
         'stderr'     : sys.stderr
        }
        self.__dict__.update(ginit(self, **kwargs))
        if 'database' in self.__dict__ and type(self.database) == Database:
            # already have databse stored
            pass
        else:
            self.database = Database(self.db_file,\
                              **(fdict(self.__dict__.copy(),ignore=['db_dir','db_file'])))

        self.translateoptions = gdal.TranslateOptions(
            gdal.ParseCommandLine("-of Gtiff -co COMPRESS=LZW"))

        # list of tiles
        if type(self.tile) is str:
            self.tile = [self.tile]

        if type(self.sds) is str:
            self.sds = [self.sds]
Beispiel #9
0
    def call_local(self):
        '''
    sort out and return local_file

    This comes from the URL and local_dir
    and ends .store
    '''
        kwargs = fdict(self.__dict__.copy())
        if 'local_dir' in kwargs and \
            (kwargs['local_dir'] is not None) and \
            len(kwargs['local_dir']) > 0:
            self.local_dir = list_resolve(kwargs['local_dir'])

        if (self.local_dir is None) or (len(self.local_dir) == 0):
            self.local_dir = list_resolve(self.db_dir)
        self.local_file = Path(self.local_dir[-1],
                               str(self.with_scheme(''))[2:]).absolute()
        # replace ' '
        self.local_file = Path(str(self.local_file).replace(' ', '_'))
        suffix = self.local_file.suffix
        self.local_file = self.local_file.with_suffix(suffix + '.store')
        self.check_path(self.local_file.parent)
        self.local_file.parent.mkdir(parents=True, exist_ok=True)
        return self.local_file
Beispiel #10
0
    def stitch_date(self, year, doy, get_files=False, test=False):
        '''stitch data for date'''
        year = int(year)
        doy = int(doy)

        dater = (datetime.datetime(year, 1, 1) +\
                   datetime.timedelta(doy - 1)).strftime('%Y %m %d').split()
        self.year = f'{year}'
        self.month = f'{str(int(dater[1])) :0>2s}'
        self.day = f'{str(int(dater[2])) :0>2s}'

        d = self.__dict__.copy()
        fd = fdict(d)
        # dont need to read it
        fd['no_read'] = True
        ofilebase = f"{self.product}/data.__SDS__." + \
                    f"{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}"

        hdf_urls = self.get_url(**(fd))

        if not (len(hdf_urls) and (type(hdf_urls[0]) == URL)):
            if get_files:
                return None, None
            return [None]

        if 'db_file' in self.__dict__:
            if 'database' not in self.__dict__:
                # load database
                d = self.__dict__.copy()
                self.database = Database(
                    self.db_file, **(fdict(d, ignore=['db_dir', 'db_file'])))

        if not test and not get_files:
            # look up in db
            warp_args = None
            dstNodata = None
            step = 1
            #this_set = f"{self.product}.{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}"
            store_flag = 'modis'
            kwargs = {'year': self.year, 'doy':doy,'day':self.day,'month':self.month,'step':step,\
                    'warp_args':warp_args,'product': self.product, 'dstNodata':dstNodata, 'tile': self.tile}
            mkey = json.dumps(kwargs)
            # this is an hdf file
            response = self.database.get_from_db(store_flag, mkey)
            if response and self.noclobber:
                # test
                if self.test_ok(response[0]):
                    # safe to return
                    self.msg(f'positive response from database')
                    ofiles = response
                    return ofiles
                else:
                    msg = f'WARNING: invalid entry {response[0]} in database {str(self.db_file)}'
                    print(msg)
                    self.msg(msg)

        try:
            hdf_files = [str(f.local()) for f in hdf_urls]
        except:
            for f in hdf_urls:
                d = f.read_bytes()
            hdf_files = [str(f.local()) for f in hdf_urls]
        if get_files:
            sds = self.get_sds(hdf_files, do_all=False)
            return hdf_files, sds

        sds = self.get_sds(hdf_files, do_all=True)
        if sds == []:
            for f in hdf_urls:
                d = f.read_bytes()
            hdf_files = [str(f.local()) for f in hdf_urls]
            sds = self.get_sds(hdf_files, do_all=True)

        # early return if we just want sds
        if test == True:
            return sds
        if len(sds) == 0:
            # failed to get SDS: need to download example file
            for f in hdf_urls:
                d = f.read_bytes()
            hdf_files = [str(f.local()) for f in hdf_urls]
            sds = self.get_sds(hdf_files, do_all=True)

        ofiles = []
        if len(sds) > len(self.sds):
            self.msg(f"ERROR in product {self.product} specification of SDS")
            self.msg(f"all SDS claimed to be: {len(self.sds)}")
            self.msg(self.sds)
            self.msg(f"But request for {len(sds)} SDSs made")
            self.msg(sds)
            sys.exit(1)
        for i, sd in enumerate(sds):
            ofile = f'{ofilebase.replace("__SDS__",self.sds[i])}.vrt'.replace(
                ' ', '_')
            spatial_file = Path(f"{self.local_dir[0]}", ofile)
            spatial_file.parent.mkdir(parents=True, exist_ok=True)
            g = gdal.BuildVRT(spatial_file.as_posix(), sds[i])
            if not g:
                d = self.__dict__
                print(
                    f"problem building dataset for {spatial_file} with {fdict(d)}"
                )
                sys.exit(1)
            del g
            ofiles.append(Path(spatial_file).absolute().as_posix())
        # store in db
        cache = {store_flag: {mkey: ofiles}}
        #self.database.set_db(cache,write=True)
        return ofiles
Beispiel #11
0
    def __init__(self, args, **kwargs):
        '''
      kwargs setup and organisation of local_dir
      and db_dir

      args are database files
      '''

        defaults = {\
           'verbose'    : False,\
           'db_dir'     : list_resolve(['~/.url_db']),\
           'db_file'    : None,\
           'log'        : None,\
           'database'   : None,\
           'stderr'     : sys.stderr,\
        }
        defaults.update(kwargs)
        old_db = defaults['database']
        self.__dict__.update(defaults)

        if ('database' in self.__dict__) and (type(self.database) is Database):
            try:
                print("WARNING: shouldnt be here  ... ")
                this = self.database.__dict__
                # in case database object passed
                self.__dict__.update(fdict(this))
                if type(old_db) is dict:
                    self.database.update(old_db)
            except:
                pass

        if self.log is not None:
            try:
                self.stderr = Path(self.log).open("a")
                if self.verbose:
                    try:
                        #msg = f"database: log file {self.log}"
                        self.store_msg.append(msg)
                        print(msg, file=sys.stderr)
                    except:
                        pass
            except:
                self.stderr = sys.stderr
                self.msg(f"WARNING: failure to open log file {self.log}")

        if type(self.db_file) is str:
            self.db_file = [self.db_file]

        # database files
        if (self.db_file is None):
            self.db_file = args
        #else:
        #  if type(self.db_file) is not list:
        #    self.db_file = [self.db_file]
        #  self.db_file.append(args)

        if (self.db_file is not None) and type(self.db_file) is not list:
            self.db_file = [self.db_file]
        if (self.db_dir is not None) and type(self.db_dir) is not list:
            self.db_dir = [self.db_dir]

        # may be a cache
        if 'CACHE_FILE' in os.environ and os.environ['CACHE_FILE'] is not None:
            db_file = [str(l) for l in list_resolve(os.environ['CACHE_FILE'])]
            self.msg(f'using cache {db_file}')
            if (self.db_file is None):
                self.db_file = db_file
            else:
                self.db_file = list_resolve(self.db_file + db_file)

        # in case still none
        if (self.db_file is None) or \
           ((type(self.db_file) is list) and len(self.db_file) == 0):
            # in case self.db_dir is none
            if (self.db_dir is None) or \
             ((type(self.db_dir) is list) and len(self.db_dir) == 0):
                self.db_dir = list_resolve([Path('~', '.url_db')])
            self.db_file = [Path(d, '.db.yml') for d in self.db_dir]

        if type(self.db_file) is str:
            self.db_file = [self.db_file]

        self.db_file = list_resolve([Path(f) for f in self.db_file])
        self.db_dir = [Path(d).parent for d in self.db_file]

        if self.database and (len(self.database.keys())):
            self.msg('getting database from command line')
        else:

            self.database = self.set_db(dict(self.get_db()))
        self.init_database = self.database.copy()
Beispiel #12
0
    def __init__(self, args, **kwargs):
        '''
      kwargs setup and organisation of local_dir
      and db_dir

      args are database files
      '''

        defaults = {\
           'verbose'    : False,\
           'db_dir'     : None,\
           'db_file'    : None,\
           'log'        : None,\
           'database'   : None,\
           'stderr'     : sys.stderr,\
        }
        # try to read from ~/.url_db/.init
        initfile = Path('~/.url_db/init.yml').expanduser().absolute()
        if initfile.exists():
            #self.msg(f'reading init file {initfile.as_posix()}')
            with initfile.open('r') as f:
                info = yaml.safe_load(f)
        else:
            info = {}

        defaults.update(info)
        defaults.update(kwargs)
        old_db = defaults['database']
        self.__dict__.update(defaults)

        if ('database' in self.__dict__) and (type(self.database) is Database):
            try:
                print("WARNING: shouldnt be here  ... ")
                this = self.database.__dict__
                # in case database object passed
                self.__dict__.update(fdict(this))
                if type(old_db) is dict:
                    self.database.update(old_db)
            except:
                pass

        if self.log is not None:
            try:
                self.stderr = Path(self.log).open("a")
                if self.verbose:
                    try:
                        #msg = f"database: log file {self.log}"
                        self.store_msg.append(msg)
                        print(msg, file=sys.stderr)
                    except:
                        pass
            except:
                self.stderr = sys.stderr
                self.msg(f"WARNING: failure to open log file {self.log}")

        if type(self.db_file) is str:
            self.db_file = [self.db_file]

        # database files
        if (self.db_file is None):
            self.db_file = args
        #else:
        #  if type(self.db_file) is not list:
        #    self.db_file = [self.db_file]
        #  self.db_file.append(args)

        if (self.db_file is not None) and type(self.db_file) is not list:
            self.db_file = [self.db_file]
        if (self.db_dir is not None) and type(self.db_dir) is not list:
            self.db_dir = [self.db_dir]

        # may be a cache
        #cache=Path("/shared/groups/jrole001/geog0111/work/database.db")
        #if cache.exists():
        #  cache = cache.as_posix()
        #  self.msg(f'using cache {cache}')
        #  if "db_file" not in self.__dict__:
        #    self.db_file = cache
        #
        #  if (self.db_file is None):
        #    self.db_file = cache
        #  else:
        #    self.db_file = list_resolve([cache] + self.db_file)
        if info == {} and 'CACHE_FILE' in os.environ and os.environ[
                'CACHE_FILE'] is not None:
            db_file = [str(l) for l in list_resolve(os.environ['CACHE_FILE'])]
            #self.msg(f'using cache {db_file}')
            if (self.db_file is None):
                self.db_file = db_file
            else:
                self.db_file = list_resolve(self.db_file + db_file)

        if ((type(self.db_dir) is list) and len(self.db_dir) == 0):
            self.db_dir = None

        if ((type(self.db_file) is list) and len(self.db_file) == 0):
            self.db_file = None

        if type(self.db_file) is str:
            self.db_file = [self.db_file]

        if type(self.db_dir) is str:
            self.db_dir = [self.db_dir]

        # writeable db_files
        if (self.db_file is not None):
            # ie we apparently have something
            can_write = False
            for d in self.db_file:
                try:
                    Path(d).touch()
                    can_write = True
                except:
                    pass

        # in case still none or no writeable
        if (not can_write) or (self.db_file is None):
            # in case self.db_dir is none
            if (self.db_dir is None):
                self.db_dir = list_resolve([Path('~', '.url_db')])
            if (self.db_file is None):
                self.db_file = [Path(d, '.db.yml') for d in self.db_dir]
            else:
                self.db_file.extend([Path(d, '.db.yml') for d in self.db_dir])

        self.db_file = list_resolve([Path(f) for f in self.db_file])
        self.db_dir = [Path(d).parent for d in self.db_file]

        if self.database and (len(self.database.keys())):
            self.msg('getting database from command line')
        else:

            self.database = self.set_db(dict(self.get_db()))
        self.init_database = self.database.copy()
Beispiel #13
0
    def stitch_date(self, year, doy):
        '''stitch data for date'''
        year = int(year)
        doy = int(doy)

        dater = (datetime.datetime(year, 1, 1) +\
                   datetime.timedelta(doy - 1)).strftime('%Y %m %d').split()
        self.year = f'{year}'
        self.month = f'{str(int(dater[1])) :0>2s}'
        self.day = f'{str(int(dater[2])) :0>2s}'

        d = self.__dict__.copy()
        hdf_urls = self.get_url(**(fdict(d)))

        if not (len(hdf_urls) and (type(hdf_urls[0]) == URL)):
            return [None]

        if 'db_file' in self.__dict__:
            if 'database' not in self.__dict__:
                # load database
                d = self.__dict__.copy()
                self.database = Database(
                    self.db_file, **(fdict(d, ignore=['db_dir', 'db_file'])))

        # look up in db
        this_set = f"{self.product}.{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}"
        store_flag = 'modis'
        response = self.database.get_from_db(store_flag, this_set)
        if response and self.noclobber:
            # test
            if self.test_ok(response[0]):
                # safe to return
                self.msg(f'positive response from database')
                ofiles = response
                return ofiles
            else:
                msg = f'WARNING: invalid entry {response[0]} in database {str(self.db_file)}'
                print(msg)
                self.msg(msg)

        for f in hdf_urls:
            d = f.read_bytes()
        hdf_files = [str(f.local()) for f in hdf_urls]
        sds = self.get_sds(hdf_files, do_all=True)
        ofiles = []
        if len(sds) > len(self.sds):
            self.msg(f"ERROR in product {self.product} specification of SDS")
            self.msg(f"all SDS claimed to be: {len(self.sds)}")
            self.msg(self.sds)
            self.msg(f"But request for {len(sds)} SDSs made")
            self.msg(sds)
            sys.exit(1)
        for i, sd in enumerate(sds):
            ofile = f"data.{self.sds[i]}." + \
                    f"{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}.vrt"
            ofile = ofile.replace(' ', '_')
            spatial_file = Path(f"{self.local_dir[0]}", ofile)
            g = gdal.BuildVRT(spatial_file.as_posix(), sds[i])
            if not g:
                d = self.__dict__
                print(
                    f"problem building dataset for {spatial_file} with {fdict(d)}"
                )
                sys.exit(1)
            del g
            ofiles.append(Path(spatial_file).absolute().as_posix())
        # store in db
        cache = {store_flag: {this_set: ofiles}}
        self.database.set_db(cache, write=True)
        return ofiles