Ejemplo n.º 1
0
 def test_parse_digits(self):
     """Test when a digit field is shorter than the format spec."""
     result = parse(
         "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}{ext}",
         "hrpt_noaa19_20140212_1412_02345.l1b")
     self.assertDictEqual(
         result, {
             'platform': 'noaa',
             'platnum': '19',
             'time': dt.datetime(2014, 2, 12, 14, 12),
             'orbit': 2345,
             'ext': '.l1b'
         })
     result = parse(
         "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:5d}{ext}",
         "hrpt_noaa19_20140212_1412_ 2345.l1b")
     self.assertDictEqual(
         result, {
             'platform': 'noaa',
             'platnum': '19',
             'time': dt.datetime(2014, 2, 12, 14, 12),
             'orbit': 2345,
             'ext': '.l1b'
         })
     result = parse(
         "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:_>5d}{ext}",
         "hrpt_noaa19_20140212_1412___345.l1b")
     self.assertDictEqual(
         result, {
             'platform': 'noaa',
             'platnum': '19',
             'time': dt.datetime(2014, 2, 12, 14, 12),
             'orbit': 345,
             'ext': '.l1b'
         })
Ejemplo n.º 2
0
def publish_pps_files(input_msg, publish_q, scene, result_files, **kwargs):
    """
    Publish messages for the files provided
    """

    environment = kwargs.get('environment')
    servername = kwargs.get('servername')
    station = kwargs.get('station', 'unknown')

    for result_file in result_files:
        # Get true start and end time from filenames and adjust the end time in
        # the publish message:
        filename = os.path.basename(result_file)
        LOG.info("file to publish = " + str(filename))
        try:
            try:
                metadata = parse(PPS_OUT_PATTERN, filename)
            except ValueError:
                metadata = parse(PPS_OUT_PATTERN_MULTIPLE, filename)
                metadata['segment'] = '_'.join(
                    [metadata['segment1'], metadata['segment2']])
                del metadata['segment1'], metadata['segment2']
        except ValueError:
            metadata = parse(PPS_STAT_PATTERN, filename)

        endtime = metadata['end_time']
        starttime = metadata['start_time']

        to_send = input_msg.data.copy()
        to_send.pop('dataset', None)
        to_send.pop('collection', None)
        to_send['uri'] = ('ssh://%s/%s' % (servername, result_file))
        to_send['uid'] = filename
        to_send['sensor'] = scene.get('instrument', None)
        if not to_send['sensor']:
            to_send['sensor'] = scene.get('sensor', None)

        to_send['platform_name'] = scene['platform_name']
        to_send['orbit_number'] = scene['orbit_number']
        if result_file.endswith("xml"):
            to_send['format'] = 'PPS-XML'
            to_send['type'] = 'XML'
        if result_file.endswith("nc"):
            to_send['format'] = 'CF'
            to_send['type'] = 'netCDF4'
        if result_file.endswith("h5"):
            to_send['format'] = 'PPS'
            to_send['type'] = 'HDF5'
        to_send['data_processing_level'] = '2'

        to_send['start_time'], to_send['end_time'] = starttime, endtime
        pubmsg = Message(
            '/' + to_send['format'] + '/' + to_send['data_processing_level'] +
            '/' + station + '/' + environment + '/polar/direct_readout/',
            "file", to_send).encode()
        LOG.debug("sending: " + str(pubmsg))
        LOG.info("Sending: " + str(pubmsg))
        publish_q.put(pubmsg)

    return
Ejemplo n.º 3
0
    def test_match(self, fmt, string, expected):
        """Test cases expected to be matched."""

        # Test parsed value
        parsed = parse(fmt, string)
        assert parsed['foo'] == expected

        # Test round trip
        composed = compose(fmt, {'foo': expected})
        parsed = parse(fmt, composed)
        assert parsed['foo'] == expected
Ejemplo n.º 4
0
    def test_parse_string_padding_syntax_with_and_without_s(self):
        """Test that, in string padding syntax, '' is equivalent to 's'.

        From <https://docs.python.org/3.4/library/string.html#format-specification-mini-language>:
            * Type 's': String format. This is the default type for strings and may be omitted.
            * Type None: The same as 's'.
        """
        result = parse('{foo}/{bar:_<8}', 'baz/qux_____')
        expected_result = parse('{foo}/{bar:_<8s}', 'baz/qux_____')
        self.assertEqual(expected_result["foo"], "baz")
        self.assertEqual(expected_result["bar"], "qux")
        self.assertEqual(result, expected_result)
Ejemplo n.º 5
0
def folder_get_version_first_last(
        root,
        fmt="MERRA2_{stream}.tavg1_2d_lnd_Nx.{time:%Y%m%d}.nc4",
        subpaths=['{time:%Y}', '{time:%m}']):
    """
    Get product version and first and last product
    which exists under the root folder.

    Parameters
    ----------
    root: string
        Root folder on local filesystem
    fmt: string, optional
        formatting string
    subpaths: list, optional
        format of the subdirectories under root.
    Returns
    -------
    version: string
        Found product version
    start: datetime.datetime
        First found product datetime
    end: datetime.datetime
        Last found product datetime
    """
    start = None
    end = None
    version = None
    first_folder = get_first_folder(root, subpaths)
    print('First folder', first_folder)
    last_folder = get_last_folder(root, subpaths)
    print('Last folder', last_folder)

    if first_folder is not None:
        files = sorted(
            glob.glob(os.path.join(first_folder, parser.globify(fmt))))
        # parse files according to formatting string ({stream} is ignored)
        data = parser.parse(fmt, os.path.split(files[0])[1])
        start = data['time']
        version = 'M2T1NXLND.5.12.4'

    if last_folder is not None:
        files = sorted(
            glob.glob(os.path.join(last_folder, parser.globify(fmt))))
        data = parser.parse(fmt, os.path.split(files[-1])[1])
        end = data['time']

    return version, start, end
Ejemplo n.º 6
0
    def filename_items_for_filetype(filenames, filetype_info):
        """Iterator over the filenames matching *filetype_info*."""
        for pattern in filetype_info['file_patterns']:
            for filename in match_filenames(filenames, pattern):
                filename_info = parse(pattern, get_filebase(filename, pattern))

                yield filename, filename_info
Ejemplo n.º 7
0
    def process_IN_OPEN(self, event):
        """When the file opens.
        """

        fname = os.path.basename(event.pathname)

        if not fnmatch(fname, globify(self._pattern)):
            logger.debug("Ignoring %s", event.pathname)
            return False

        if self.current_event is None:
            self.current_event = event
        elif(event.pathname != self.current_event.pathname):
            self.clean_up(self.current_event)
            self.current_event = event

        if self._fp is None:
            self._fp = open(event.pathname)
            self._current_pass = self._schedule_reader.next_pass
            info = parse(self._pattern, fname)
            try:
                self.sat = " ".join((info["platform"], info["number"]))
                self.time = info["utctime"]
            except KeyError:
                logger.info("Could not retrieve satellite name from filename")

        self.set_reception_active(event)
        return self._fp is not None
Ejemplo n.º 8
0
    def process_IN_OPEN(self, event):
        """When the file opens.
        """

        fname = os.path.basename(event.pathname)

        if not fnmatch(fname, globify(self._pattern)):
            logger.debug("Ignoring %s", event.pathname)
            return False

        if self.current_event is None:
            self.current_event = event
        elif (event.pathname != self.current_event.pathname):
            self.clean_up(self.current_event)
            self.current_event = event

        if self._fp is None:
            self._fp = open(event.pathname)
            self._current_pass = self._schedule_reader.next_pass
            info = parse(self._pattern, fname)
            try:
                self.sat = " ".join((info["platform"], info["number"]))
                self.time = info["utctime"]
            except KeyError:
                logger.info("Could not retrieve satellite name from filename")

        self.set_reception_active(event)
        return self._fp is not None
Ejemplo n.º 9
0
def gldas_folder_get_version_first_last(root,
                                        fmt=None,
                                        subpaths=["{time:%Y}", "{time:%j}"]):
    """
    Get product version and first and last product which exists under the root folder.

    Parameters
    ----------
    root: string
        Root folder on local filesystem
    fmt: string, optional
        Formatting string
        (default: "GLDAS_NOAH025_3H.A{time:%Y%m%d.%H%M}.0{version:2s}.nc4")
    subpaths: list, optional
        Format of the subdirectories under root (default: ['{:%Y}', '{:%j}']).

    Returns
    -------
    version: string
        Found product version
    start: datetime.datetime
        First found product datetime
    end: datetime.datetime
        Last found product datetime
    """
    if fmt is None:
        fmt = "GLDAS_NOAH025_3H{ep}.A{time:%Y%m%d.%H%M}.0{version:2s}.nc4"

    start = None
    end = None
    version = None
    first_folder = get_first_gldas_folder(root, subpaths)
    last_folder = get_last_gldas_folder(root, subpaths)

    if first_folder is not None:
        files = sorted(glob.glob(os.path.join(first_folder, globify(fmt))))
        data = parse(fmt, os.path.split(files[0])[1])
        start = data["time"]
        ep = data["ep"]
        version = f"GLDAS_Noah_v{data['version']}_025{data['ep']}"

    if last_folder is not None:
        files = sorted(glob.glob(os.path.join(last_folder, globify(fmt))))
        data = parse(fmt, os.path.split(files[-1])[1])
        end = data["time"]

    return version, start, end
Ejemplo n.º 10
0
 def test_parse_align(self):
     filepattern="H-000-{hrit_format:4s}__-{platform_name:4s}________-{channel_name:_<9s}-{segment:_<9s}-{start_time:%Y%m%d%H%M}-__"
     result = parse(filepattern, "H-000-MSG3__-MSG3________-IR_039___-000007___-201506051700-__")
     self.assertDictEqual(result, {'channel_name': 'IR_039',
                                   'hrit_format': 'MSG3',
                                   'platform_name': 'MSG3',
                                   'segment': '000007',
                                   'start_time': dt.datetime(2015, 6, 5, 17, 0)})
Ejemplo n.º 11
0
 def test_parse_align(self):
     filepattern="H-000-{hrit_format:4s}__-{platform_name:4s}________-{channel_name:_<9s}-{segment:_<9s}-{start_time:%Y%m%d%H%M}-__"
     result = parse(filepattern, "H-000-MSG3__-MSG3________-IR_039___-000007___-201506051700-__")
     self.assertDictEqual(result, {'channel_name': 'IR_039',
                                   'hrit_format': 'MSG3',
                                   'platform_name': 'MSG3',
                                   'segment': '000007',
                                   'start_time': dt.datetime(2015, 6, 5, 17, 0)})
Ejemplo n.º 12
0
def gldas_folder_get_version_first_last(
        root,
        fmt="GLDAS_NOAH025_3H.A{time:%Y%m%d.%H%M}.0{version:2s}.nc4",
        subpaths=['{:%Y}', '{:%j}']):
    """
    Get product version and first and last product which exists under the root folder.

    Parameters
    ----------
    root: string
        Root folder on local filesystem
    fmt: string, optional
        formatting string
    subpaths: list, optional
        format of the subdirectories under root.

    Returns
    -------
    version: string
        Found product version 
    start: datetime.datetime
        First found product datetime
    end: datetime.datetime
        Last found product datetime
    """
    start = None
    end = None
    version = None
    first_folder = get_first_gldas_folder(root, subpaths)
    last_folder = get_last_gldas_folder(root, subpaths)

    if first_folder is not None:
        files = sorted(
            glob.glob(os.path.join(first_folder, parser.globify(fmt))))
        data = parser.parse(fmt, os.path.split(files[0])[1])
        start = data['time']
        version = 'GLDAS_Noah_v%s_025' % data['version']

    if last_folder is not None:
        files = sorted(
            glob.glob(os.path.join(last_folder, parser.globify(fmt))))
        data = parser.parse(fmt, os.path.split(files[-1])[1])
        end = data['time']

    return version, start, end
Ejemplo n.º 13
0
 def test_parse(self):
     # Run
     result = parse(
         self.fmt, "/somedir/avhrr/2014/hrpt_noaa19_20140212_1412_12345.l1b")
     # Assert
     self.assertDictEqual(result, {'directory': 'avhrr/2014',
                                   'platform': 'noaa', 'platnum': '19',
                                   'time': dt.datetime(2014, 2, 12, 14, 12),
                                   'orbit': 12345})
Ejemplo n.º 14
0
 def test_parse(self):
     # Run
     result = parse(
         self.fmt, "/somedir/avhrr/2014/hrpt_noaa19_20140212_1412_12345.l1b")
     # Assert
     self.assertDictEqual(result, {'directory': 'avhrr/2014',
                                   'platform': 'noaa', 'platnum': '19',
                                   'time': dt.datetime(2014, 2, 12, 14, 12),
                                   'orbit': 12345})
Ejemplo n.º 15
0
 def test_parse_wildcards(self):
     # Run
     result = parse(
         "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}{ext}",
         "hrpt_noaa19_20140212_1412_12345.l1b")
     # Assert
     self.assertDictEqual(result, {'platform': 'noaa', 'platnum': '19',
                                   'time': dt.datetime(2014, 2, 12, 14, 12),
                                   'orbit': 12345,
                                   'ext': '.l1b'})
Ejemplo n.º 16
0
 def test_002(self):
     res = parse(
         'hrpt16_{satellite:7s}_{start_time:%d-%b-%Y_%H:%M:%S.000}_{orbit_number:5d}',
         "hrpt16_NOAA-19_26-NOV-2014_10:12:00.000_29889")
     self.assertEqual(
         res, {
             'orbit_number': 29889,
             'satellite': 'NOAA-19',
             'start_time': dt.datetime(2014, 11, 26, 10, 12)
         })
Ejemplo n.º 17
0
    def test_greediness(self):
        """Test that the minimum match is parsed out.

        See GH #18.
        """
        from trollsift import parse
        template = '{band_type}_{polarization_extracted}_{unit}_{s1_fname}'
        fname = 'Amplitude_VH_db_S1A_IW_GRDH_1SDV_20160528T171628_20160528T171653_011462_011752_0EED.tif'
        res_dict = parse(template, fname)
        exp = {
            'band_type': 'Amplitude',
            'polarization_extracted': 'VH',
            'unit': 'db',
            's1_fname': 'S1A_IW_GRDH_1SDV_20160528T171628_20160528T171653_011462_011752_0EED.tif',
        }
        self.assertEqual(exp, res_dict)

        template = '{band_type:s}_{polarization_extracted}_{unit}_{s1_fname}'
        res_dict = parse(template, fname)
        self.assertEqual(exp, res_dict)
Ejemplo n.º 18
0
def folder_get_first_last(
        root,
        fmt="SMAP_L3_SM_P_{time:%Y%m%d}_R{orbit:05d}_{proc_number:03d}.h5",
        subpaths=['{:%Y.%m.%d}']):
    """
    Get first and last product which exists under the root folder.

    Parameters
    ----------
    root: string
        Root folder on local filesystem
    fmt: string, optional
        formatting string
    subpaths: list, optional
        format of the subdirectories under root.

    Returns
    -------
    start: datetime.datetime
        First found product datetime
    end: datetime.datetime
        Last found product datetime
    """
    start = None
    end = None
    first_folder = get_first_folder(root, subpaths)
    last_folder = get_last_folder(root, subpaths)

    if first_folder is not None:
        files = sorted(
            glob.glob(os.path.join(first_folder, parser.globify(fmt))))
        data = parser.parse(fmt, os.path.split(files[0])[1])
        start = data['time']

    if last_folder is not None:
        files = sorted(
            glob.glob(os.path.join(last_folder, parser.globify(fmt))))
        data = parser.parse(fmt, os.path.split(files[-1])[1])
        end = data['time']

    return start, end
Ejemplo n.º 19
0
def folder_get_first_last(
        root,
        fmt="SMAP_L3_SM_P_{time:%Y%m%d}_R{orbit:05d}_{proc_number:03d}.h5",
        subpaths=['{:%Y.%m.%d}']):
    """
    Get first and last product which exists under the root folder.

    Parameters
    ----------
    root: string
        Root folder on local filesystem
    fmt: string, optional
        formatting string
    subpaths: list, optional
        format of the subdirectories under root.

    Returns
    -------
    start: datetime.datetime
        First found product datetime
    end: datetime.datetime
        Last found product datetime
    """
    start = None
    end = None
    first_folder = get_first_folder(root, subpaths)
    last_folder = get_last_folder(root, subpaths)

    if first_folder is not None:
        files = sorted(glob.glob(os.path.join(
            first_folder, parser.globify(fmt))))
        data = parser.parse(fmt, os.path.split(files[0])[1])
        start = data['time']

    if last_folder is not None:
        files = sorted(glob.glob(os.path.join(
            last_folder, parser.globify(fmt))))
        data = parser.parse(fmt, os.path.split(files[-1])[1])
        end = data['time']

    return start, end
Ejemplo n.º 20
0
    def filename_items_for_filetype(filenames, filetype_info):
        """Iterator over the filenames matching *filetype_info*."""
        for pattern in filetype_info['file_patterns']:
            for filename in match_filenames(filenames, pattern):
                try:
                    filename_info = parse(
                        pattern, get_filebase(filename, pattern))
                except ValueError:
                    logger.debug("Can't parse %s with %s.", filename, pattern)
                    continue

                yield filename, filename_info
Ejemplo n.º 21
0
def gldas_folder_get_first_last(
        root,
        fmt="GLDAS_NOAH025SUBP_3H.A{time:%Y%j.%H%M}.001.{production_time:%Y%j%H%M%S}.grb",
        subpaths=['{:%Y}', '{:%j}']):
    """
    Get first and last product which exists under the root folder.

    Parameters
    ----------
    root: string
        Root folder on local filesystem
    fmt: string, optional
        formatting string
    subpaths: list, optional
        format of the subdirectories under root.

    Returns
    -------
    start: datetime.datetime
        First found product datetime
    end: datetime.datetime
        Last found product datetime
    """
    start = None
    end = None
    first_folder = get_first_gldas_folder(root, subpaths)
    last_folder = get_last_gldas_folder(root, subpaths)

    if first_folder is not None:
        files = sorted(glob.glob(os.path.join(first_folder, parser.globify(fmt))))
        data = parser.parse(fmt, os.path.split(files[0])[1])
        start = data['time']

    if last_folder is not None:
        files = sorted(glob.glob(os.path.join(last_folder, parser.globify(fmt))))
        data = parser.parse(fmt, os.path.split(files[-1])[1])
        end = data['time']

    return start, end
Ejemplo n.º 22
0
    def select_files(self,
                     base_dir=None,
                     filenames=None,
                     sensor=None):
        file_set, info_filenames = super(xRITFile, self).select_files(
            base_dir, filenames, sensor)

        # for pattern in self.file_patterns:
        #    for filename in filenames:
        #        parse(pattern, os.path.basename(filename))

        matching_filenames = []

        # Organize filenames in to file types and create file handlers
        remaining_filenames = set(self.info['filenames'])
        start_times = []
        end_times = []
        for filetype, filetype_info in self.config['file_types'].items():
            patterns = filetype_info['file_patterns']
            for pattern in patterns:
                used_filenames = set()
                for filename in remaining_filenames:
                    if fnmatch(os.path.basename(filename), globify(pattern)):
                        # we know how to use this file (even if we may not use
                        # it later)
                        used_filenames.add(filename)
                        filename_info = parse(pattern,
                                              os.path.basename(filename))
                        # Only add this file handler if it is within the time
                        # we want
                        file_start = filename_info['start_time']
                        file_end = filename_info.get('end_time', file_start)
                        if self._start_time and file_start < self._start_time:
                            continue
                        if self._end_time and file_end > self._end_time:
                            continue

                        start_times.append(file_start)
                        end_times.append(file_end)
                        matching_filenames.append(filename)
                        # TODO: Area filtering

                remaining_filenames -= used_filenames

        if matching_filenames:
            # Assign the start time and end time
            self._start_time = min(start_times)
            self._end_time = max(end_times)
        self.info['filenames'] = matching_filenames
        return file_set, info_filenames
Ejemplo n.º 23
0
 def filename_items_for_filetype(filenames, filetype_info):
     """Iterator over the filenames matching *filetype_info*."""
     matched_files = []
     for pattern in filetype_info['file_patterns']:
         for filename in match_filenames(filenames, pattern):
             if filename in matched_files:
                 continue
             try:
                 filename_info = parse(
                     pattern, get_filebase(filename, pattern))
             except ValueError:
                 logger.debug("Can't parse %s with %s.", filename, pattern)
                 continue
             matched_files.append(filename)
             yield filename, filename_info
Ejemplo n.º 24
0
    def select_files(self, base_dir=None, filenames=None, sensor=None):
        res = super(FileYAMLReader, self).select_files(base_dir, filenames,
                                                       sensor)

        # Organize filenames in to file types and create file handlers
        remaining_filenames = set(self.info['filenames'])
        for filetype, filetype_info in self.config['file_types'].items():
            filetype_cls = filetype_info['file_reader']
            patterns = filetype_info['file_patterns']
            file_handlers = []
            for pattern in patterns:
                used_filenames = set()

                levels = len(pattern.split('/'))

                for filename in remaining_filenames:
                    filebase = os.path.join(
                        *filename.split(os.path.sep)[-levels:])

                    if fnmatch(filebase, globify(pattern)):
                        # we know how to use this file (even if we may not use
                        # it later)
                        used_filenames.add(filename)
                        filename_info = parse(pattern, filebase)
                        file_handler = filetype_cls(filename, filename_info,
                                                    filetype_info)

                        # Only add this file handler if it is within the time
                        # we want
                        if self._start_time and file_handler.start_time < self._start_time:
                            continue
                        if self._end_time and file_handler.end_time > self._end_time:
                            continue

                        # TODO: Area filtering

                        file_handlers.append(file_handler)
                remaining_filenames -= used_filenames
            # Only create an entry in the file handlers dictionary if
            # we have those files
            if file_handlers:
                # Sort the file handlers by start time
                file_handlers.sort(key=lambda fh: fh.start_time)
                self.file_handlers[filetype] = file_handlers

        return res
Ejemplo n.º 25
0
 def filename_items_for_filetype(filenames, filetype_info):
     """Iterate over the filenames matching *filetype_info*."""
     if not isinstance(filenames, set):
         # we perform set operations later on to improve performance
         filenames = set(filenames)
     for pattern in filetype_info['file_patterns']:
         matched_files = set()
         matches = _match_filenames(filenames, pattern)
         for filename in matches:
             try:
                 filename_info = parse(
                     pattern, _get_filebase(filename, pattern))
             except ValueError:
                 logger.debug("Can't parse %s with %s.", filename, pattern)
                 continue
             matched_files.add(filename)
             yield filename, filename_info
         filenames -= matched_files
Ejemplo n.º 26
0
 def test_002(self):
     res = parse('hrpt16_{satellite:7s}_{start_time:%d-%b-%Y_%H:%M:%S.000}_{orbit_number:5d}',
                 "hrpt16_NOAA-19_26-NOV-2014_10:12:00.000_29889")
     self.assertEqual(res, {'orbit_number': 29889,
                            'satellite': 'NOAA-19',
                            'start_time': dt.datetime(2014, 11, 26, 10, 12)})
Ejemplo n.º 27
0
    def select_files(self,
                     base_dir=None,
                     filenames=None,
                     sensor=None):
        res = super(FileYAMLReader, self).select_files(base_dir, filenames,
                                                       sensor)

        # Organize filenames in to file types and create file handlers
        remaining_filenames = set(self.info['filenames'])
        for filetype, filetype_info in self.config['file_types'].items():
            filetype_cls = filetype_info['file_reader']
            patterns = filetype_info['file_patterns']
            file_handlers = []
            for pattern in patterns:
                used_filenames = set()

                levels = len(pattern.split('/'))
                # correct separator if needed
                pattern = os.path.join(*pattern.split('/'))

                for filename in remaining_filenames:
                    filebase = os.path.join(
                        *filename.split(os.path.sep)[-levels:])

                    if fnmatch(filebase, globify(pattern)):
                        # we know how to use this file (even if we may not use
                        # it later)
                        used_filenames.add(filename)
                        filename_info = parse(pattern,
                                              filebase)
                        file_handler = filetype_cls(filename, filename_info,
                                                    filetype_info)

                        # Only add this file handler if it is within the time
                        # we want
                        if self._start_time and file_handler.start_time < self._start_time:
                            continue
                        if self._end_time and file_handler.end_time > self._end_time:
                            continue

                        if self._area:
                            from trollsched.boundary import AreaDefBoundary, Boundary
                            from satpy.resample import get_area_def
                            try:
                                gbb = Boundary(
                                    *file_handler.get_bounding_box())
                            except NotImplementedError:
                                pass
                            else:
                                abb = AreaDefBoundary(
                                    get_area_def(self._area), frequency=1000)

                                intersection = gbb.contour_poly.intersection(
                                    abb.contour_poly)
                                if not intersection:
                                    continue

                        file_handlers.append(file_handler)
                remaining_filenames -= used_filenames
            # Only create an entry in the file handlers dictionary if
            # we have those files
            if file_handlers:
                # Sort the file handlers by start time
                file_handlers.sort(key=lambda fh: fh.start_time)
                self.file_handlers[filetype] = file_handlers

        return res
Ejemplo n.º 28
0
    def select_files(self, base_dir=None, filenames=None, sensor=None):
        res = super(FileYAMLReader, self).select_files(base_dir, filenames,
                                                       sensor)

        # Organize filenames in to file types and create file handlers
        remaining_filenames = set(self.info['filenames'])
        for filetype, filetype_info in self.config['file_types'].items():
            filetype_cls = filetype_info['file_reader']
            patterns = filetype_info['file_patterns']
            file_handlers = []
            for pattern in patterns:
                used_filenames = set()

                levels = len(pattern.split('/'))
                # correct separator if needed
                pattern = os.path.join(*pattern.split('/'))

                for filename in remaining_filenames:
                    filebase = os.path.join(
                        *filename.split(os.path.sep)[-levels:])

                    if fnmatch(filebase, globify(pattern)):
                        # we know how to use this file (even if we may not use
                        # it later)
                        used_filenames.add(filename)
                        filename_info = parse(pattern, filebase)
                        file_handler = filetype_cls(filename, filename_info,
                                                    filetype_info)

                        # Only add this file handler if it is within the time
                        # we want
                        if self._start_time and file_handler.start_time < self._start_time:
                            continue
                        if self._end_time and file_handler.end_time > self._end_time:
                            continue

                        if self._area:
                            from trollsched.boundary import AreaDefBoundary, Boundary
                            from satpy.resample import get_area_def
                            try:
                                gbb = Boundary(
                                    *file_handler.get_bounding_box())
                            except NotImplementedError:
                                pass
                            else:
                                abb = AreaDefBoundary(get_area_def(self._area),
                                                      frequency=1000)

                                intersection = gbb.contour_poly.intersection(
                                    abb.contour_poly)
                                if not intersection:
                                    continue

                        file_handlers.append(file_handler)
                remaining_filenames -= used_filenames
            # Only create an entry in the file handlers dictionary if
            # we have those files
            if file_handlers:
                # Sort the file handlers by start time
                file_handlers.sort(key=lambda fh: fh.start_time)
                self.file_handlers[filetype] = file_handlers

        return res
Ejemplo n.º 29
0
    def load(self, dataset_keys, area=None, start_time=None, end_time=None):
        image_files = []
        pattern = self.file_patterns[0]
        prologue_file = None
        epilogue_file = None
        for filename in self.info['filenames']:
            try:
                file_info = parse(pattern, os.path.basename(filename))
            except ValueError:
                continue
            if file_info["segment"] == "EPI":
                epilogue_file = filename
            elif file_info["segment"] == "PRO":
                prologue_file = filename
            else:
                image_files.append(filename)

        start_times = set()
        datasets = DatasetDict()
        area_converted_to_extent = False
        area_extent = None
        for ds in dataset_keys:

            channel_files = []
            for filename in image_files:
                file_info = parse(pattern, os.path.basename(filename))
                if file_info["dataset_name"] == ds.name:
                    channel_files.append(filename)
                start_times.add(file_info['start_time'])

            if not channel_files:
                continue
            kwargs = {}
            if 'platform_name' in self.info:
                kwargs['platform_name'] = self.info['platform_name']
            # Convert area definitions to maximal area_extent
            if not area_converted_to_extent and area is not None:
                metadata = xrit.sat.load_files(prologue_file,
                                               channel_files,
                                               epilogue_file,
                                               only_metadata=True,
                                               **kwargs)
                # otherwise use the default value (MSG3 extent at
                # lon0=0.0), that is, do not pass default_extent=area_extent
                area_extent = area_defs_to_extent(
                    [area], metadata.proj4_params)
                area_converted_to_extent = True

            try:
                calibrate = 1
                if ds.calibration == 'counts':
                    calibrate = 0
                elif ds.calibration == 'radiance':
                    calibrate = 2
                image = xrit.sat.load_files(prologue_file,
                                            channel_files,
                                            epilogue_file,
                                            mask=True,
                                            calibrate=calibrate,
                                            **kwargs)
                if area_extent:
                    metadata, data = image(area_extent)
                else:
                    metadata, data = image()
            except CalibrationError:
                LOGGER.warning(
                    "Loading non calibrated data since calibration failed.")
                image = xrit.sat.load_files(prologue_file,
                                            channel_files,
                                            epilogue_file,
                                            mask=True,
                                            calibrate=False,
                                            **kwargs)
                if area_extent:
                    metadata, data = image(area_extent)
                else:
                    metadata, data = image()

            except ReaderError as err:
                # if dataset can't be found, go on with next dataset
                LOGGER.error(str(err))
                continue
            if len(metadata.instruments) != 1:
                sensor = None
            else:
                sensor = metadata.instruments[0]

            units = {'ALBEDO(%)': '%',
                     'KELVIN': 'K'}

            standard_names = {'1': 'counts',
                              'W m-2 sr-1 m-1':
                              'toa_outgoing_radiance_per_unit_wavelength',
                              '%': 'toa_bidirectional_reflectance',
                              'K':
                              'toa_brightness_temperature'}

            unit = units.get(metadata.calibration_unit,
                             metadata.calibration_unit)
            projectable = Projectable(
                data,
                name=ds.name,
                units=unit,
                standard_name=standard_names[unit],
                sensor=sensor,
                start_time=min(start_times),
                id=ds)

            # Build an area on the fly from the mipp metadata
            proj_params = getattr(metadata, "proj4_params").split(" ")
            proj_dict = {}
            for param in proj_params:
                key, val = param.split("=")
                proj_dict[key] = val

            if IS_PYRESAMPLE_LOADED:
                # Build area_def on-the-fly
                projectable.info["area"] = geometry.AreaDefinition(
                    str(metadata.area_extent) + str(data.shape),
                    "On-the-fly area", proj_dict["proj"], proj_dict,
                    data.shape[1], data.shape[0], metadata.area_extent)
            else:
                LOGGER.info("Could not build area, pyresample missing...")

            datasets[ds] = projectable

        return datasets
Ejemplo n.º 30
0
 def test_no_match(self, fmt, string):
     """Test cases expected to not be matched."""
     with pytest.raises(ValueError):
         parse(fmt, string)