Ejemplo n.º 1
0
 def test_compare_fileinfo(self):
     """Test comparing two FileInfo instances."""
     f1 = FileInfo(path="fake/path",
                   times=[
                       datetime.datetime(1900, 1, 1, 0),
                       datetime.datetime(1900, 1, 1, 2)
                   ],
                   attr={})
     f2 = FileInfo(path="fake/path",
                   times=[
                       datetime.datetime(1900, 1, 1, 0),
                       datetime.datetime(1900, 1, 1, 2)
                   ],
                   attr={})
     f3 = FileInfo(path="other/fake/path",
                   times=[
                       datetime.datetime(1900, 1, 1, 0),
                       datetime.datetime(1900, 1, 1, 2)
                   ],
                   attr={})
     f4 = FileInfo(path="fake/path",
                   times=[
                       datetime.datetime(1910, 1, 1, 0),
                       datetime.datetime(1910, 1, 1, 2)
                   ],
                   attr={})
     assert f1 == f2
     assert f1 != f3
     assert f1 != f4
     assert f2 != f3
     assert f2 != f4
     assert f3 != f4
     assert f1 != "fake/path"
Ejemplo n.º 2
0
    def test_sequence_placeholder(self):
        """Test find on all standard filesets.

        Returns:
            None
        """
        filesets = self.init_filesets()

        # STANDARD DATASET
        # Should not find anything:
        empty = list(
            filesets["sequence-placeholder"].find(
                "2016-12-31", "2018-01-01", no_files_error=False
            ))
        assert not empty

        # Should find two files:
        found_files = list(
            filesets["sequence-placeholder"].find(
                "2018-01-01", "2018-01-02",
            ))

        check = [
            FileInfo(join(self.refdir, 'sequence',
                          '2018', '001', 'sequence0001.txt'),
                     [datetime.datetime(2018, 1, 1, 0, 0),
                      datetime.datetime(2018, 1, 1, 12, 0)], {'id': 1}),
            FileInfo(join(self.refdir, 'sequence',
                          '2018', '001', 'sequence0002.txt'),
                     [datetime.datetime(2018, 1, 1, 12, 0),
                      datetime.datetime(2018, 1, 2, 0, 0)], {'id': 2}),

        ]
        assert found_files == check

        # Should find two files and should return them in two bins:
        found_files = list(
            filesets["sequence-placeholder"].find(
                "2018-01-01", "2018-01-02", bundle="6h",
            ))

        check = [
            [
                FileInfo(join(self.refdir, 'sequence',
                              '2018', '001', 'sequence0001.txt'),
                         [datetime.datetime(2018, 1, 1, 0, 0),
                          datetime.datetime(2018, 1, 1, 12, 0)], {'id': 1}),
            ],
            [
                FileInfo(join(self.refdir, 'sequence',
                              '2018', '001', 'sequence0002.txt'),
                         [datetime.datetime(2018, 1, 1, 12, 0),
                          datetime.datetime(2018, 1, 2, 0, 0)], {'id': 2}),
            ],
        ]
        assert found_files == check
Ejemplo n.º 3
0
    def test_complicated_subdirs(self, ):
        """Check whether FileSet can find files in subdirectories that contain
        text and placeholders.
        """
        # The Pinocchio fileset from the cloud toolbox: a folder name contains
        # normal text and a placeholder:
        pinocchio = FileSet(
            join(
                self.refdir,
                "pinocchio",
                "t{year2}{month}{day}",
                "tm{year2}{month}{day}{hour}{minute}{second}{millisecond}.jpg",
            ), )

        # Find all files:
        files = list(pinocchio)

        check = [
            FileInfo(
                join(self.refdir, 'pinocchio', 't171102',
                     'tm171102132855573.jpg'), [
                         datetime.datetime(2017, 11, 2, 13, 28, 55, 573000),
                         datetime.datetime(2017, 11, 2, 13, 28, 55, 573000)
                     ], {}),
        ]
        assert files == check
Ejemplo n.º 4
0
    def test_regex(self):
        filesets = self.init_filesets()

        check = [
            FileInfo(
                join(self.refdir, 'regex',
                     'NSS.HIRX.NJ.D99127.S0632.E0820.B2241718.WI.gz'), [
                         datetime.datetime(1999, 5, 7, 6, 32),
                         datetime.datetime(1999, 5, 7, 8, 20)
                     ], {
                         'satcode': 'NJ',
                         'B': '2241718',
                         'station': 'WI'
                     }),
        ]

        found_file = filesets["regex-HIRS"].find_closest("1999-05-08")

        assert found_file == check[0]
        assert found_file.attr == check[0].attr

        found_files = \
            list(filesets["regex-HIRS"].find("1999-05-07", "1999-05-09"))

        assert found_files == check
Ejemplo n.º 5
0
    def get_info(self, filename, **kwargs):
        """ Get info parameters from a file (time coverage, etc).

        Args:
            filename: Path and name of file or FileInfo object.

        Returns:
            A FileInfo object.
        """

        timestamp = self._get_timestamp(filename)
        return FileInfo(
            filename.path,
            [timestamp, timestamp],
        )
Ejemplo n.º 6
0
    def test_files_overlap_subdirectory(self):
        """A file covers a time period longer than its sub directory.
        """
        filesets = self.init_filesets()
        filesets["tutorial"].set_placeholders(satellite="SatelliteA")
        found_file = filesets["tutorial"].find_closest("2018-01-03")

        check = FileInfo(
            join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02',
                 '200000-000000.nc'), [
                     datetime.datetime(2018, 1, 2, 20, 0),
                     datetime.datetime(2018, 1, 3, 0, 0)
                 ], {'satellite': 'SatelliteA'})

        assert found_file == check
Ejemplo n.º 7
0
    def test_single(self, file_system):
        """Test find on the single fileset.

        Returns:
            None
        """
        filesets = self.init_filesets(file_system)

        # STANDARD DATASET
        # Should not find anything:
        empty = list(filesets["single"].find("2016-12-31",
                                             "2018-01-01",
                                             no_files_error=False))
        assert not empty

        check = [
            FileInfo(join(self._refdir_for_fs(file_system), 'single_file.nc'),
                     [
                         datetime.datetime(2018, 1, 1, 0, 0),
                         datetime.datetime(2018, 1, 3, 0, 0)
                     ], {}),
        ]

        found_files = list(filesets["single"].find(
            "2018-01-01",
            "2018-01-02",
        ))

        assert found_files == check

        found_files = list(filesets["single"].find(
            "2018-01-01",
            "2018-01-02",
            bundle="12h",
        ))

        assert found_files == check

        found_files = list(filesets["single"].find(
            "2018-01-01",
            "2018-01-02",
            bundle=3,
        ))

        assert found_files == check
Ejemplo n.º 8
0
    def get_info(self, filename, **kwargs):
        """Get the time coverage from a Pinocchio JPG image.

        Args:
            filename: Path and name of file or FileInfo object.

        Returns:
            A FileInfo object.
        """

        # read image
        image = PIL.Image.open(filename, 'r')

        name2tagnum = dict((name, num) for num, name in TAGS.items())
        time_string = image._getexif()[name2tagnum["DateTimeOriginal"]]
        time = datetime.datetime.strptime(time_string, "%Y:%m:%d %H:%M:%S")

        return FileInfo(filename[time, time], )
Ejemplo n.º 9
0
    def test_tutorial(self):
        """Test the fileset examples of the tutorial.

        Returns:
            None
        """
        filesets = self.init_filesets()

        # STANDARD DATASET
        # Should not find anything:
        empty = list(filesets["tutorial"].find("2017-12-31",
                                               "2018-01-01",
                                               no_files_error=False))
        assert not empty

        # Find the closest file to 2018-01-01, limited to SatelliteB
        # temporarily:
        found_file = filesets["tutorial"].find_closest(
            "2018-01-01 03:00",
            filters={"!satellite": ("SatelliteA", "SatelliteC")})

        #print("closest check", self._repr_file_info(found_file))

        check = FileInfo(
            join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                 '000000-050000.nc'), [
                     datetime.datetime(2018, 1, 1, 0, 0),
                     datetime.datetime(2018, 1, 1, 5, 0)
                 ], {'satellite': 'SatelliteB'})

        assert found_file == check

        # Limit this fileset to SatelliteB permanently
        filesets["tutorial"].set_placeholders(satellite="SatelliteB", )

        # Should find four files:
        found_files = list(filesets["tutorial"].find(
            "2018-01-01",
            "2018-01-02",
        ))

        #print("four files:")
        # self._print_files(found_files)

        check = [
            FileInfo(
                join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                     '000000-050000.nc'), [
                         datetime.datetime(2018, 1, 1, 0, 0),
                         datetime.datetime(2018, 1, 1, 5, 0)
                     ], {'satellite': 'SatelliteB'}),
            FileInfo(
                join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                     '050000-100000.nc'), [
                         datetime.datetime(2018, 1, 1, 5, 0),
                         datetime.datetime(2018, 1, 1, 10, 0)
                     ], {'satellite': 'SatelliteB'}),
            FileInfo(
                join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                     '100000-150000.nc'), [
                         datetime.datetime(2018, 1, 1, 10, 0),
                         datetime.datetime(2018, 1, 1, 15, 0)
                     ], {'satellite': 'SatelliteB'}),
            FileInfo(
                join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                     '150000-200000.nc'), [
                         datetime.datetime(2018, 1, 1, 15, 0),
                         datetime.datetime(2018, 1, 1, 20, 0)
                     ], {'satellite': 'SatelliteB'}),
            FileInfo(
                join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                     '200000-010000.nc'), [
                         datetime.datetime(2018, 1, 1, 20, 0),
                         datetime.datetime(2018, 1, 2, 1, 0)
                     ], {'satellite': 'SatelliteB'}),
        ]

        assert found_files == check

        # Should find four files and should return them in two bins:
        found_files = list(filesets["tutorial"].find(
            "2018-01-01",
            "2018-01-02",
            bundle="12h",
        ))

        # print("Bundle 12h:")
        # self._print_files(found_files)

        check = [
            [
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '000000-050000.nc'), [
                             datetime.datetime(2018, 1, 1, 0, 0),
                             datetime.datetime(2018, 1, 1, 5, 0)
                         ], {'satellite': 'SatelliteB'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '050000-100000.nc'), [
                             datetime.datetime(2018, 1, 1, 5, 0),
                             datetime.datetime(2018, 1, 1, 10, 0)
                         ], {'satellite': 'SatelliteB'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '100000-150000.nc'), [
                             datetime.datetime(2018, 1, 1, 10, 0),
                             datetime.datetime(2018, 1, 1, 15, 0)
                         ], {'satellite': 'SatelliteB'}),
            ],
            [
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '150000-200000.nc'), [
                             datetime.datetime(2018, 1, 1, 15, 0),
                             datetime.datetime(2018, 1, 1, 20, 0)
                         ], {'satellite': 'SatelliteB'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '200000-010000.nc'), [
                             datetime.datetime(2018, 1, 1, 20, 0),
                             datetime.datetime(2018, 1, 2, 1, 0)
                         ], {'satellite': 'SatelliteB'}),
            ],
        ]

        assert found_files == check

        # Should find four files and should return them in two bins:
        found_files = list(filesets["tutorial"].find(
            "2018-01-01",
            "2018-01-02",
            bundle=3,
        ))

        # print("Bundle 3:")
        # self._print_files(found_files)

        check = [
            [
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '000000-050000.nc'), [
                             datetime.datetime(2018, 1, 1, 0, 0),
                             datetime.datetime(2018, 1, 1, 5, 0)
                         ], {'satellite': 'SatelliteB'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '050000-100000.nc'), [
                             datetime.datetime(2018, 1, 1, 5, 0),
                             datetime.datetime(2018, 1, 1, 10, 0)
                         ], {'satellite': 'SatelliteB'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '100000-150000.nc'), [
                             datetime.datetime(2018, 1, 1, 10, 0),
                             datetime.datetime(2018, 1, 1, 15, 0)
                         ], {'satellite': 'SatelliteB'}),
            ],
            [
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '150000-200000.nc'), [
                             datetime.datetime(2018, 1, 1, 15, 0),
                             datetime.datetime(2018, 1, 1, 20, 0)
                         ], {'satellite': 'SatelliteB'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteB', '2018-01-01',
                         '200000-010000.nc'), [
                             datetime.datetime(2018, 1, 1, 20, 0),
                             datetime.datetime(2018, 1, 2, 1, 0)
                         ], {'satellite': 'SatelliteB'}),
            ],
        ]

        assert found_files == check

        for test_method in [FileSet.map, FileSet.imap]:
            # Check map method
            results = list(
                test_method(filesets["tutorial"],
                            TestFileSet._tutorial_map,
                            start="2018-01-01",
                            end="2018-01-03"))
            check = [
                'SatelliteB', 'SatelliteB', 'SatelliteB', 'SatelliteB',
                'SatelliteB', 'SatelliteB', 'SatelliteB', 'SatelliteB',
                'SatelliteB', 'SatelliteB'
            ]
            assert results == check

            # Check map method on content
            results = list(
                test_method(
                    filesets["tutorial"],
                    TestFileSet._tutorial_map_content,
                    start="2018-01-01",
                    end="2018-01-03",
                    on_content=True,
                ))
            check = [
                111.92121062601221, 24.438060320121387, -98.80775640366036,
                -75.84330354813459, 59.41297628327247, 106.80513550614192,
                -3.999061608822918, -108.68523313569861, -51.82441769876156,
                66.33842832792985
            ]
            assert np.allclose(results, check)
Ejemplo n.º 10
0
    def test_glob(self):
        files = FileSet(
            join(self.refdir, "tutorial", "{satellite}", "*", "*.nc"),
            placeholder={"satellite": 'SatelliteA'},
        )

        self._print_files(list(files))

        # Sort this after paths rather than times (because the times are all
        # equal)
        check = list(
            sorted([
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02',
                         '000000-040000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02',
                         '080000-120000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02',
                         '200000-000000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02',
                         '040000-080000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02',
                         '120000-160000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-02',
                         '160000-200000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01',
                         '000000-040000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01',
                         '080000-120000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01',
                         '200000-000000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01',
                         '040000-080000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01',
                         '120000-160000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
                FileInfo(
                    join(self.refdir, 'tutorial', 'SatelliteA', '2018-01-01',
                         '160000-200000.nc'),
                    [
                        datetime.datetime(1, 1, 1, 0, 0),
                        datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
                    ], {'satellite': 'SatelliteA'}),
            ],
                   key=lambda x: x.path))

        assert list(sorted(files, key=lambda x: x.path)) == check