Beispiel #1
0
    def _is_var_ready(self, cycle, var):
        """
        Checks if the variable var is ready for the given forecast hour by comparing its
        filetime to the timestamp given by the forecast hour.  If the filetime is newer
        (later) then the variable is ready.

        :param cycle: which cycle are we working with (UTC)
        :param var: the variable identifier
        :return: true if the variable is ready
        """
        # find last-modified time of file in UTC timezone
        url = self._remote_var_url(cycle.hour, var)
        logging.info('Reading %s' % url)
        r = readhead(url)
        if r.status_code != 200:
            logging.error('Cannot find variable %s for hour %d at url %s' %
                          (var, cycle.hour, url))
            return False
        last_modif = self._parse_header_timestamp(r.headers['Last-Modified'])
        content_size = int(r.headers['Content-Length'])
        logging.info('%s file size %s' % (url, content_size))
        if content_size < min_content_size:
            logging.warning(
                'remote file size less than minimum %s, considered invalid' %
                min_content_size)

        return last_modif > cycle and content_size >= min_content_size
Beispiel #2
0
    def available_online(self,links):
        """
        Make link available online from a list of links
        :param links: list of links
        :return: link available online, '' otherwise
        """

        logging.info('GribSource: Looking for grib links available online')
        available = [x for x in links if readhead(self.remote_url + '/' + x, msg_level=0).status_code == 200]
        if len(available) > 0:
            return available[0]
        else:
            raise GribError('GribSource: failed to find an available online file')
Beispiel #3
0
    def retrieve_gribs(self,
                       from_utc,
                       to_utc,
                       ref_utc=None,
                       cycle_start=None,
                       download_whole_cycle=False):
        """
        Attempts to retrieve the files to satisfy the simulation request from_utc - to_utc.

        Starts with the most recent cycle available an hour ago, then moves further
        into the past.  For each candidate cycle, the filenames are computed, the local cache is
        checked for files that are already there.  The presence of remaining files is checked
        on server, if not available, we try an older cycle, if yes, download is attempted.
        Once all files are downloaded, the manifest is returned, or if retrieval fails, an error is raised.

        :param from_utc: forecast start time
        :param to_utc: forecast end time
        :return: dictionary with
       	    'grib_files': list of grib files available, 
            'colmet_files_utc': list of datetimes for the colmet files, 
            'colmet_prefix': string as colmet file prefix, 
            'colmet_files': list of all colmet files, 
     s      'colmet_missing': list of colmet files that need to be created
        """
        # ensure minutes and seconds are zero, simplifies arithmetic later
        from_utc = from_utc.replace(minute=0,
                                    second=0,
                                    microsecond=0,
                                    tzinfo=pytz.UTC)
        to_utc = to_utc.replace(minute=0,
                                second=0,
                                microsecond=0,
                                tzinfo=pytz.UTC)

        if ref_utc is None:
            ref_utc = datetime.now(pytz.UTC)

        logging.info(
            'retrieve_gribs %s from_utc=%s to_utc=%s ref_utc=%s cycle_start=%s download_whole_cycle=%s'
            % (self.id, from_utc, to_utc, ref_utc, cycle_start,
               download_whole_cycle))

        # it is possible that a cycle output is delayed and unavailable when we expect it (3 hours after cycle time)
        # in this case, the grib source supports using previous cycles (up to 2)
        cycle_shift = 0
        while cycle_shift < 3:

            if cycle_start is not None:
                logging.info('forecast cycle start given as %s' % cycle_start)
            else:
                # select cycle (at least hours_behind_real_time behind)
                # for NAM218 which occurr at [0, 6, 12, 18] hours
                ref_utc_2 = ref_utc - timedelta(
                    hours=self.hours_behind_real_time)
                ref_utc_2 = ref_utc_2.replace(minute=0,
                                              second=0,
                                              microsecond=0)
                cycle_start = min(from_utc, ref_utc_2)
                cycle_start = cycle_start.replace(hour=cycle_start.hour -
                                                  cycle_start.hour % 6)
                cycle_start -= timedelta(hours=self.cycle_hours * cycle_shift)
                logging.info('forecast cycle start selected as %s' %
                             cycle_start)

            if download_whole_cycle:
                logging.info('%s downloading whole cycle' % self.id)
                fc_start, fc_hours = 0, self.max_forecast_hours
            else:
                logging.info('%s downloading from %s to %s' %
                             (self.id, from_utc, to_utc))
                fc_start, fc_hours = self.forecast_times(
                    cycle_start, from_utc, to_utc)

            logging.info('%s downloading cycle %s forecast hours %d to %d' %
                         (self.id, cycle_start, fc_start, fc_hours))

            # computes the relative paths of the desired files (the manifest)
            fc_list, colmet_files_utc = self.file_times(
                cycle_start, fc_start, fc_hours)
            grib_files = self.file_names(cycle_start, fc_list)
            colmet_prefix, colmet_files = self.colmet_names(
                cycle_start, colmet_files_utc)

            for f in grib_files:
                logging.info('%s will retrive %s' % (self.id, f))

            colmet_missing = self.colmet_missing(colmet_prefix, colmet_files)
            if len(colmet_missing) > 0:

                # check what's available locally
                nonlocals = filter(
                    lambda x: not self.grib_available_locally(
                        osp.join(self.ingest_dir, x)), grib_files)

                # check if GRIBs we don't are available remotely
                url_base = self.remote_url
                logging.info('Retrieving %s GRIBs from %s' %
                             (self.id, url_base))
                unavailables = filter(
                    lambda x: readhead(url_base + '/' + x).status_code != 200,
                    nonlocals)
                if len(unavailables) > 0:
                    logging.warning(
                        '%s failed retrieving cycle data for cycle %s, unavailables %s'
                        % (self.id, cycle_start, repr(unavailables)))
                    cycle_shift += 1
                    continue

                # download all gribs we need
                map(lambda x: self.download_grib(url_base, x), nonlocals)

            # return manifest

            return Dict({
                'grib_files': grib_files,
                'colmet_files_utc': colmet_files_utc,
                'colmet_prefix': colmet_prefix,
                'colmet_files': colmet_files,
                'colmet_missing': colmet_missing
            })

        raise GribError(
            'Unsatisfiable: failed to retrieve GRIB2 files in eligible cycles %s'
            % repr(unavailables))
Beispiel #4
0
    def retrieve_gribs(self, from_utc, to_utc, ref_utc=None, cycle_start_utc = None, download_all_gribs = False):
        """
        Attempts to retrieve the files to satisfy the simulation request from_utc - to_utc.

        :param from_utc: forecast start time
        :param to_utc: forecast end time
        :return: dictionary with
        	'grib_files': list of grib files available, 
                'colmet_files_utc': list of datetimes for the colmet files, 
                'colmet_prefix': string as colmet file prefix (directory names)
                'colmet_files': list of all colmet files needed, 
                'colmet_missing': those missing in the cache
        """

        # ensure minutes and seconds are zero, simplifies arithmetic later
        from_utc = from_utc.replace(minute=0, second=0, tzinfo=pytz.UTC)
        to_utc = to_utc.replace(minute=0, second=0, tzinfo=pytz.UTC)

        # round start_utc down and end_utc up to period - reanalysis has no forecast cycles
        start_utc = from_utc.replace(hour = from_utc.hour - from_utc.hour % self.period_hours)
        end_utc = to_utc + timedelta(hours=self.period_hours)-timedelta(seconds=1)
        end_utc = end_utc.replace(hour=end_utc.hour - end_utc.hour % self.period_hours)

        if (start_utc < self.available_from_utc) | (end_utc > self.available_to_utc):
            logging.error('%s is available from %s to %s only' % (self.id, self.available_from_utc, self.available_to_utc))
            logging.info('Check %s for %s' % (self.info_url, self.info))
            raise GribError('Unsatisfiable: %s not available for the requested dates' % self.id)

        # compute the manifest here
        at_time = start_utc
        grib_files = []
        colmet_files_utc=[]
        while at_time <= end_utc:
            grib_files.append(self.make_relative_url(at_time))
            # logging.info('Adding to manifest input file %s' % self.make_relative_url(at_time))
            colmet_files_utc.append(at_time)
            at_time += timedelta(hours=self.period_hours)
        colmet_prefix = self.id
        colmet_files = self.colmet_files(colmet_files_utc)
        colmet_missing = self.colmet_missing(colmet_prefix,colmet_files)

        # if no missing cached files, we do not care about gribs
        if len(colmet_missing) > 0:
            # print 'grib_files = ' + str(grib_files)
            # check what's available locally
            nonlocals = [x for x in grib_files if not self.grib_available_locally(osp.join(self.ingest_dir, x))]
            #print 'nonlocals = ' + str(nonlocals)
            # check if GRIBs we don't have are available remotely
            url_base = self.remote_url
            logging.info('Retrieving CFSR GRIBs from %s' % url_base)
            unavailables = [x for x in nonlocals if readhead(url_base + '/' + x).status_code != 200]
            if len(unavailables) > 0:
                raise GribError('Unsatisfiable: GRIBs %s not available.' % repr(unavailables))

            # download all gribs not available remotely
            list(map(lambda x: self.download_grib(url_base, x), nonlocals))

        # return manifest
        return Dict({'grib_files': grib_files, 
            'colmet_files_utc': colmet_files_utc, 
            'colmet_prefix': colmet_prefix, 
            'colmet_files': colmet_files,
            'colmet_missing': colmet_missing})