def _download_and_collect_entries(self, query_result, client=None, path=None, progress=False): if client is None: client = VSOClient() paths = client.get(query_result, path).wait(progress=progress) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def add_from_dir(self, path, recursive=False, pattern='*', ignore_already_added=False, time_string_parse_format=None): """ Search the given directory for FITS files and use their FITS headers to add new entries to the database. Note that one entry in the database is assigned to a list of FITS headers, so not the number of FITS headers but the number of FITS files which have been read determine the number of database entries that will be added. FITS files are detected by reading the content of each file, the ``pattern`` argument may be used to avoid reading entire directories if one knows that all FITS files have the same filename extension. Parameters ---------- path : str The directory where to look for FITS files. recursive : bool, optional If True, the given directory will be searched recursively. Otherwise, only the given directory and no subdirectories are searched. The default is `False`, i.e. the given directory is not searched recursively. pattern : str, optional The pattern can be used to filter the list of filenames before the files are attempted to be read. The default is to collect all files. This value is passed to the function :func:`fnmatch.filter`, see its documentation for more information on the supported syntax. ignore_already_added : bool, optional See :meth:`sunpy.database.Database.add`. time_string_parse_format : str, optional Fallback timestamp format which will be passed to `~astropy.time.Time.strptime` if `sunpy.time.parse_time` is unable to automatically read the ``date-obs`` metadata. """ cmds = CompositeOperation() entries = tables.entries_from_dir( path, recursive, pattern, self.default_waveunit, time_string_parse_format=time_string_parse_format) for database_entry, filepath in entries: if database_entry in list(self) and not ignore_already_added: raise EntryAlreadyAddedError(database_entry) cmd = commands.AddEntry(self.session, database_entry) if self._enable_history: cmds.add(cmd) else: cmd() self._cache.append(database_entry) if cmds: self._command_manager.do(cmds)
def test_entries_from_dir_recursively_false(): entries = list( entries_from_dir(testdir, False, default_waveunit='angstrom', time_string_parse_format='%d/%m/%Y')) assert len(entries) == 107
def test_entries_from_dir_recursively_true(): entries = list( entries_from_dir(testdir, True, default_waveunit='angstrom', time_string_parse_format='%d/%m/%Y', pattern='*fits')) assert len(entries) == 18
def test_entries_from_dir_recursively_false(): with pytest.warns(AstropyUserWarning, match='File may have been truncated'): entries = list( entries_from_dir(testdir, False, default_waveunit='angstrom', time_string_parse_format='%d/%m/%Y')) assert len(entries) == 109
def _download_and_collect_entries(self, query_result, client=None, path=None, progress=False, methods=None, overwrite=False, **kwargs): if kwargs: k, v = kwargs.popitem() raise TypeError(f'unexpected keyword argument {k!r}') if client is None: client = VSOClient() remove_list = [] delete_entries = [] for qr in query_result: temp = tables.DatabaseEntry._from_query_result_block(qr) for database_entry in self: if database_entry.path is not None and temp._compare_attributes( database_entry, ["source", "provider", "physobs", "fileid", "observation_time_start", "observation_time_end", "instrument", "size", "wavemin", "wavemax"]): if not overwrite: remove_list.append(qr) else: delete_entries.append(database_entry) for temp in remove_list: query_result = [x for x in query_result if x != temp] for temp in delete_entries: self.remove(temp) paths = client.fetch(query_result, path) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def add_from_dir(self, path, recursive=False, pattern='*', ignore_already_added=False, time_string_parse_format=None): """Search the given directory for FITS files and use their FITS headers to add new entries to the database. Note that one entry in the database is assigned to a list of FITS headers, so not the number of FITS headers but the number of FITS files which have been read determine the number of database entries that will be added. FITS files are detected by reading the content of each file, the `pattern` argument may be used to avoid reading entire directories if one knows that all FITS files have the same filename extension. Parameters ---------- path : string The directory where to look for FITS files. recursive : bool, optional If True, the given directory will be searched recursively. Otherwise, only the given directory and no subdirectories are searched. The default is `False`, i.e. the given directory is not searched recursively. pattern : string, optional The pattern can be used to filter the list of filenames before the files are attempted to be read. The default is to collect all files. This value is passed to the function :func:`fnmatch.filter`, see its documentation for more information on the supported syntax. ignore_already_added : bool, optional See :meth:`sunpy.database.Database.add`. time_string_parse_format : str, optional Fallback timestamp format which will be passed to `~datetime.datetime.strftime` if `sunpy.time.parse_time` is unable to automatically read the `date-obs` metadata. """ cmds = CompositeOperation() entries = tables.entries_from_dir( path, recursive, pattern, self.default_waveunit, time_string_parse_format=time_string_parse_format) for database_entry, filepath in entries: if database_entry in list(self) and not ignore_already_added: raise EntryAlreadyAddedError(database_entry) cmd = commands.AddEntry(self.session, database_entry) if self._enable_history: cmds.add(cmd) else: cmd() self._cache.append(database_entry) if cmds: self._command_manager.do(cmds)
def _download_and_collect_entries(self, query_result, **kwargs): client = kwargs.pop('client', None) path = kwargs.pop('path', None) progress = kwargs.pop('progress', False) methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE')) if kwargs: k, v = kwargs.popitem() raise TypeError('unexpected keyword argument {0!r}'.format(k)) if client is None: client = VSOClient() paths = client.get(query_result, path, methods).wait(progress=progress) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def test_entries_from_dir_recursively_false(): entries = list( entries_from_dir(testdir, False, default_waveunit='angstrom')) assert len(entries) == 23
def test_entries_from_dir(): entries = list(entries_from_dir(waveunitdir)) assert len(entries) == 4 for entry, filename in entries: if filename.endswith('na120701.091058.fits'): break assert entry.path in (os.path.join(waveunitdir, filename), filename) assert filename.startswith(waveunitdir) assert len(entry.fits_header_entries) == 42 assert entry.fits_header_entries == [ FitsHeaderEntry('SIMPLE', True), FitsHeaderEntry('BITPIX', -32), FitsHeaderEntry('NAXIS', 3), FitsHeaderEntry('NAXIS1', 256), FitsHeaderEntry('NAXIS2', 256), FitsHeaderEntry('NAXIS3', 1), FitsHeaderEntry('DATE', '27-OCT-82'), FitsHeaderEntry('DATE-OBS', '2012-07-01'), FitsHeaderEntry('DATE_OBS', '2012-07-01T09:10:58.200Z'), FitsHeaderEntry('DATE_END', '2012-07-01T09:10:58.200Z'), FitsHeaderEntry('WAVELNTH', 1.98669), FitsHeaderEntry('WAVEUNIT', 'm'), FitsHeaderEntry('PHYSPARA', 'STOKESI'), FitsHeaderEntry('OBJECT', 'FS'), FitsHeaderEntry('OBS_TYPE', 'RADIO'), FitsHeaderEntry('OBS_MODE', 'IMAGE'), FitsHeaderEntry('LONGITUD', 0.0), FitsHeaderEntry('LATITUDE', 0.0), FitsHeaderEntry('INSTITUT', 'MEUDON'), FitsHeaderEntry('CMP_NAME', 'ROUTINE'), FitsHeaderEntry('CONTACT', ' A. KERDRAON'), FitsHeaderEntry('TELESCOP', 'NRH'), FitsHeaderEntry('INSTRUME', 'NRH2'), FitsHeaderEntry('FILENAME', 'nrh2_1509_h80_20120701_091058c02_i.fts'), FitsHeaderEntry('NRH_DATA', '2DB'), FitsHeaderEntry('ORIGIN', 'wrfits'), FitsHeaderEntry('FREQ', 150.9), FitsHeaderEntry('FREQUNIT', 6), FitsHeaderEntry('BSCALE', 1.0), FitsHeaderEntry('BZERO', 0.0), FitsHeaderEntry('BUNIT', 'K'), FitsHeaderEntry('EXPTIME', 1168576512), FitsHeaderEntry('CTYPE1', 'Solar-X'), FitsHeaderEntry('CTYPE2', 'Solar-Y'), FitsHeaderEntry('CTYPE3', 'StokesI'), FitsHeaderEntry('CRPIX1', 128), FitsHeaderEntry('CRPIX2', 128), FitsHeaderEntry('CDELT1', 0.015625), FitsHeaderEntry('CDELT2', 0.015625), FitsHeaderEntry('SOLAR_R', 64.0), FitsHeaderEntry('COMMENT', ''), FitsHeaderEntry('HISTORY', '')] assert entry.fits_key_comments == [ FitsKeyComment('WAVEUNIT', 'in meters'), FitsKeyComment('NAXIS2', 'number of rows'), FitsKeyComment('CDELT2', 'pixel scale y, in solar radius/pixel'), FitsKeyComment('CRPIX1', 'SUN CENTER X, pixels'), FitsKeyComment('CRPIX2', 'SUN CENTER Y, pixels'), FitsKeyComment('SOLAR_R', 'SOLAR RADIUS, pixels'), FitsKeyComment('NAXIS1', 'number of columns'), FitsKeyComment('CDELT1', 'pixel scale x, in solar radius/pixel'), FitsKeyComment('NAXIS3', 'StokesI'), FitsKeyComment('TELESCOP', 'Nancay Radioheliograph'), FitsKeyComment('INSTRUME', 'Nancay 2D-images Radioheliograph'), FitsKeyComment('BUNIT', 'Brightness temperature'), FitsKeyComment('BITPIX', 'IEEE 32-bit floating point values'), FitsKeyComment('DATE', 'Date of file creation'), FitsKeyComment('FREQUNIT', 'in MHz'), FitsKeyComment('EXPTIME', 'in seconds')]
def test_entries_from_dir(): entries = list(entries_from_dir(waveunitdir)) assert len(entries) == 4 for entry, filename in entries: if filename.endswith("na120701.091058.fits"): break assert entry.path == os.path.join(waveunitdir, filename) assert filename.startswith(waveunitdir) assert len(entry.fits_header_entries) == 42 assert entry.fits_header_entries == [ FitsHeaderEntry("SIMPLE", True), FitsHeaderEntry("BITPIX", -32), FitsHeaderEntry("NAXIS", 3), FitsHeaderEntry("NAXIS1", 256), FitsHeaderEntry("NAXIS2", 256), FitsHeaderEntry("NAXIS3", 1), FitsHeaderEntry("DATE", "27-OCT-82"), FitsHeaderEntry("DATE-OBS", "2012-07-01"), FitsHeaderEntry("DATE_OBS", "2012-07-01T09:10:58.200Z"), FitsHeaderEntry("DATE_END", "2012-07-01T09:10:58.200Z"), FitsHeaderEntry("WAVELNTH", 1.98669), FitsHeaderEntry("WAVEUNIT", "m"), FitsHeaderEntry("PHYSPARA", "STOKESI"), FitsHeaderEntry("OBJECT", "FS"), FitsHeaderEntry("OBS_TYPE", "RADIO"), FitsHeaderEntry("OBS_MODE", "IMAGE"), FitsHeaderEntry("LONGITUD", 0.0), FitsHeaderEntry("LATITUDE", 0.0), FitsHeaderEntry("INSTITUT", "MEUDON"), FitsHeaderEntry("CMP_NAME", "ROUTINE"), FitsHeaderEntry("CONTACT", " A. KERDRAON"), FitsHeaderEntry("TELESCOP", "NRH"), FitsHeaderEntry("INSTRUME", "NRH2"), FitsHeaderEntry("FILENAME", "nrh2_1509_h80_20120701_091058c02_i.fts"), FitsHeaderEntry("NRH_DATA", "2DB"), FitsHeaderEntry("ORIGIN", "wrfits"), FitsHeaderEntry("FREQ", 150.9), FitsHeaderEntry("FREQUNIT", 6), FitsHeaderEntry("BSCALE", 1.0), FitsHeaderEntry("BZERO", 0.0), FitsHeaderEntry("BUNIT", "K"), FitsHeaderEntry("EXPTIME", 1168576512), FitsHeaderEntry("CTYPE1", "Solar-X"), FitsHeaderEntry("CTYPE2", "Solar-Y"), FitsHeaderEntry("CTYPE3", "StokesI"), FitsHeaderEntry("CRPIX1", 128), FitsHeaderEntry("CRPIX2", 128), FitsHeaderEntry("CDELT1", 0.015625), FitsHeaderEntry("CDELT2", 0.015625), FitsHeaderEntry("SOLAR_R", 64.0), FitsHeaderEntry("COMMENT", ""), FitsHeaderEntry("HISTORY", ""), ] assert entry.fits_key_comments == [ FitsKeyComment("WAVEUNIT", "in meters"), FitsKeyComment("NAXIS2", "number of rows"), FitsKeyComment("CDELT2", "pixel scale y, in solar radius/pixel"), FitsKeyComment("CRPIX1", "SUN CENTER X, pixels"), FitsKeyComment("CRPIX2", "SUN CENTER Y, pixels"), FitsKeyComment("SOLAR_R", "SOLAR RADIUS, pixels"), FitsKeyComment("NAXIS1", "number of columns"), FitsKeyComment("CDELT1", "pixel scale x, in solar radius/pixel"), FitsKeyComment("NAXIS3", "StokesI"), FitsKeyComment("TELESCOP", "Nancay Radioheliograph"), FitsKeyComment("INSTRUME", "Nancay 2D-images Radioheliograph"), FitsKeyComment("BUNIT", "Brightness temperature"), FitsKeyComment("BITPIX", "IEEE 32-bit floating point values"), FitsKeyComment("DATE", "Date of file creation"), FitsKeyComment("FREQUNIT", "in MHz"), FitsKeyComment("EXPTIME", "in seconds"), ]
def test_entries_from_dir_recursively_true(): entries = list(entries_from_dir(testdir, True, default_waveunit="angstrom")) assert len(entries) == 28
def test_entries_from_dir_recursively_false(): entries = list(entries_from_dir(testdir, False, default_waveunit='angstrom', time_string_parse_format='%d/%m/%Y')) assert len(entries) == 81
def _download_and_collect_entries(self, query_result, **kwargs): client = kwargs.pop('client', None) path = kwargs.pop('path', None) progress = kwargs.pop('progress', False) methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE')) overwrite = kwargs.pop('overwrite', False) if kwargs: k, v = kwargs.popitem() raise TypeError('unexpected keyword argument {0!r}'.format(k)) if client is None: client = VSOClient() remove_list = [] delete_entries = [] for qr in query_result: temp = tables.DatabaseEntry._from_query_result_block(qr) for database_entry in self: if database_entry.path is not None and temp._compare_attributes( database_entry, ["source", "provider", "physobs", "fileid", "observation_time_start", "observation_time_end", "instrument", "size", "wavemin", "wavemax"]): if not overwrite: remove_list.append(qr) else: delete_entries.append(database_entry) for temp in remove_list: query_result = [x for x in query_result if x != temp] for temp in delete_entries: self.remove(temp) paths = client.fetch(query_result, path).wait(progress=progress) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def test_entries_from_dir_recursively_false(): entries = list( entries_from_dir(testdir, False, default_waveunit='angstrom')) assert len(entries) == 39
def test_entries_from_dir_recursively_false(): with pytest.warns(AstropyUserWarning, match='File may have been truncated'): entries = list(entries_from_dir(testdir, False, default_waveunit='angstrom', time_string_parse_format='%d/%m/%Y')) assert len(entries) == 109