Esempio n. 1
0
    def getsinglefilelist(self,
                          start_time,
                          end_time,
                          searchstring,
                          login=True,
                          subdir=None):

        if subdir is not None:
            fullurlpath = self.baseurl + '/' + subdir
        else:
            fullurlpath = self.baseurl

        log.info('')
        log.info('fullurlpath: ' + str(fullurlpath) + '/' + str(searchstring))

        indexhtmlfile = DataFileName()
        indexhtmlfile.satname = 'wgetsite'
        indexhtmlfile.sensorname = self.data_type
        indexhtmlfile.dataprovider = self.host_type
        # Hmm, FileName object should probably set this when datetime is set?
        dt = datetime.utcnow()
        indexhtmlfile.time = dt.strftime(indexhtmlfile.datetime_fields['time'])
        indexhtmlfile.date = dt.strftime(indexhtmlfile.datetime_fields['date'])
        indexhtmlfile.extra = 'index'
        indexhtmlfile.ext = 'html'
        indexhtmlfile = indexhtmlfile.create_scratchfile()
        indexhtmlfile.makedirs()
        indexhtmlfnstr = indexhtmlfile.name
        log.info(indexhtmlfnstr)

        # rcj 13DEC2018 this part doesn't need to run for lance modis data
        # it is handled in lance_modis.py getfilelist
        if hasattr(self,
                   'host') and (self.host == 'nrt3.modaps.eosdis.nasa.gov'
                                or self.host == 'nrt4.modaps.eosdis.nasa.gov'):
            pass
        #everything that is not lance modis that uses this script should still pass through here
        else:
            htmlfilelist = open(self.wget_file(fullurlpath,
                                               indexhtmlfnstr)).readlines()
            #getfiles = self.getLinksFromHTML(htmlfilelist,r'''.*a href="GAASP-MBT_v"."r"."GW"."_s[0-9]{14}".*''')
            #log.info(htmlfilelist)
            links = self.getLinksFromHTML(htmlfilelist, searchstring)

            # This is defined in Site.py - finding the files in the file list is
            # common between HTTP and FTP (getting the lists differs, but sorting
            # through the list and returning the desired files is shared)
            return self.find_files_in_range(links,
                                            start_time,
                                            end_time,
                                            urlpath=fullurlpath)
Esempio n. 2
0
    def getfile(self, remotefile, localfile):
        processingfile = localfile + '.processing'
        ff = open(processingfile, 'w')
        ff.close
        log.info('Touching temporary file: ' +
                 os.path.basename(processingfile))
        temp_filename = DataFileName(localfile).create_scratchfile()
        temp_filename.makedirs()
        temp_fnstr = temp_filename.name
        if not self.downloadactive:
            log.info(
                '      *** nodownload set, not downloading remotefile %s ' %
                remotefile)
            log.info('      ***     to localfile %s' % localfile)
        else:
            log.info('      *** grabbing remotefile %s ' % remotefile)
            log.info('      ***     to localfile %s' % localfile)
            self.wget_file(remotefile, temp_fnstr)

        self.move_to_final(temp_fnstr, processingfile, localfile)