def test_entries_from_file_time_string_parse_format(): with pytest.raises(ValueError): # Error should be raised because of the date format in GOES_DATA entries = list(entries_from_file(GOES_DATA)) entries = list( entries_from_file(GOES_DATA, time_string_parse_format='%d/%m/%Y')) assert len(entries) == 4 entry = entries[0] assert len(entry.fits_header_entries) == 17 assert entry.observation_time_start == datetime(2011, 6, 7, 0, 0) assert entry.observation_time_end == datetime(2011, 6, 7, 0, 0) assert entry.path == GOES_DATA
def test_entries_from_file_time_string_parse_format(): with pytest.raises(ValueError): # Error should be raised because of the date format in GOES_DATA entries = list(entries_from_file(GOES_DATA)) entries = list(entries_from_file(GOES_DATA, time_string_parse_format='%d/%m/%Y')) assert len(entries) == 4 entry = entries[0] assert len(entry.fits_header_entries) == 17 assert entry.observation_time_start == datetime(2011, 6, 7, 0, 0) assert entry.observation_time_end == datetime(2011, 6, 7, 0, 0) assert entry.path == GOES_DATA
def _download_and_collect_entries(self, query_result, client=None, path=None, progress=False): if client is None: client = VSOClient() for block in query_result: paths = client.get([block], path).wait(progress=progress) for path in paths: qr_entry = tables.DatabaseEntry._from_query_result_block(block) file_entries = list( tables.entries_from_file(path, self.default_waveunit)) for entry in file_entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start =\ qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def _download_and_collect_entries(self, query_result, client=None, path=None, progress=False): if client is None: client = VSOClient() paths = client.get(query_result, path).wait(progress=progress) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def _download_and_collect_entries(self, query_result, client=None, path=None, progress=False, methods=None, overwrite=False, **kwargs): if kwargs: k, v = kwargs.popitem() raise TypeError(f'unexpected keyword argument {k!r}') if client is None: client = VSOClient() remove_list = [] delete_entries = [] for qr in query_result: temp = tables.DatabaseEntry._from_query_result_block(qr) for database_entry in self: if database_entry.path is not None and temp._compare_attributes( database_entry, ["source", "provider", "physobs", "fileid", "observation_time_start", "observation_time_end", "instrument", "size", "wavemin", "wavemax"]): if not overwrite: remove_list.append(qr) else: delete_entries.append(database_entry) for temp in remove_list: query_result = [x for x in query_result if x != temp] for temp in delete_entries: self.remove(temp) paths = client.fetch(query_result, path) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def test_entries_from_file(): with pytest.warns(AstropyUserWarning, match='File may have been truncated'): entries = list(entries_from_file(MQ_IMAGE)) assert len(entries) == 1 entry = entries[0] assert len(entry.fits_header_entries) == 31 expected_fits_header_entries = [ FitsHeaderEntry('SIMPLE', True), FitsHeaderEntry('BITPIX', 16), FitsHeaderEntry('NAXIS', 2), FitsHeaderEntry('NAXIS1', 1500), FitsHeaderEntry('NAXIS2', 1340), FitsHeaderEntry('CONTACT', '*****@*****.**'), FitsHeaderEntry('DATE_OBS', '2013-08-12T08:42:53.000'), FitsHeaderEntry('DATE_END', '2013-08-12T08:42:53.000'), FitsHeaderEntry('FILENAME', 'mq130812.084253.fits'), FitsHeaderEntry('INSTITUT', 'Observatoire de Paris'), FitsHeaderEntry('INSTRUME', 'Spectroheliograph'), FitsHeaderEntry('OBJECT', 'FS'), FitsHeaderEntry('OBS_MODE', 'SCAN'), FitsHeaderEntry('PHYSPARA', 'Intensity'), FitsHeaderEntry('NBREG', 1), FitsHeaderEntry('NBLAMBD', 1), FitsHeaderEntry('WAVELNTH', 6563), FitsHeaderEntry('WAVEUNIT', 'angstrom'), FitsHeaderEntry('POLARANG', 0), FitsHeaderEntry('THEMISFF', 3), FitsHeaderEntry('LONGTRC', 258.78), FitsHeaderEntry('LONGCARR', 258.78), FitsHeaderEntry('LONGITUD', 258.78), FitsHeaderEntry('LATITUD', 6.50107), FitsHeaderEntry('LATIRC', 6.50107), FitsHeaderEntry('INDLAMD', 1), FitsHeaderEntry('INDREG', 1), FitsHeaderEntry('SEQ_IND', 1), FitsHeaderEntry('SVECTOR', 0), FitsHeaderEntry('COMMENT', ''), FitsHeaderEntry('HISTORY', '') ] assert entry.fits_header_entries == expected_fits_header_entries assert entry.fits_key_comments.sort() == [ FitsKeyComment('SIMPLE', 'Written by IDL: Mon Aug 12 08:48:08 2013'), FitsKeyComment('BITPIX', 'Integer*2 (short integer)') ].sort() assert entry.instrument == 'Spectroheliograph' assert entry.observation_time_start == datetime(2013, 8, 12, 8, 42, 53) assert entry.observation_time_end == datetime(2013, 8, 12, 8, 42, 53) assert round(entry.wavemin, 1) == 656.3 assert round(entry.wavemax, 1) == 656.3 assert entry.path == MQ_IMAGE
def test_entries_from_file(): entries = list(entries_from_file(MQ_IMAGE)) assert len(entries) == 1 entry = entries[0] assert len(entry.fits_header_entries) == 31 expected_fits_header_entries = [ FitsHeaderEntry("SIMPLE", True), FitsHeaderEntry("BITPIX", 16), FitsHeaderEntry("NAXIS", 2), FitsHeaderEntry("NAXIS1", 1500), FitsHeaderEntry("NAXIS2", 1340), FitsHeaderEntry("CONTACT", "*****@*****.**"), FitsHeaderEntry("DATE_OBS", "2013-08-12T08:42:53.000"), FitsHeaderEntry("DATE_END", "2013-08-12T08:42:53.000"), FitsHeaderEntry("FILENAME", "mq130812.084253.fits"), FitsHeaderEntry("INSTITUT", "Observatoire de Paris"), FitsHeaderEntry("INSTRUME", "Spectroheliograph"), FitsHeaderEntry("OBJECT", "FS"), FitsHeaderEntry("OBS_MODE", "SCAN"), FitsHeaderEntry("PHYSPARA", "Intensity"), FitsHeaderEntry("NBREG", 1), FitsHeaderEntry("NBLAMBD", 1), FitsHeaderEntry("WAVELNTH", 6563), FitsHeaderEntry("WAVEUNIT", "angstrom"), FitsHeaderEntry("POLARANG", 0), FitsHeaderEntry("THEMISFF", 3), FitsHeaderEntry("LONGTRC", 258.78), FitsHeaderEntry("LONGCARR", 258.78), FitsHeaderEntry("LONGITUD", 258.78), FitsHeaderEntry("LATITUD", 6.50107), FitsHeaderEntry("LATIRC", 6.50107), FitsHeaderEntry("INDLAMD", 1), FitsHeaderEntry("INDREG", 1), FitsHeaderEntry("SEQ_IND", 1), FitsHeaderEntry("SVECTOR", 0), FitsHeaderEntry("COMMENT", ""), FitsHeaderEntry("HISTORY", ""), ] assert entry.fits_header_entries == expected_fits_header_entries assert entry.fits_key_comments == [ FitsKeyComment("SIMPLE", "Written by IDL: Mon Aug 12 08:48:08 2013"), FitsKeyComment("BITPIX", "Integer*2 (short integer)"), ] assert entry.instrument == "Spectroheliograph" assert entry.observation_time_start == datetime(2013, 8, 12, 8, 42, 53) assert entry.observation_time_end == datetime(2013, 8, 12, 8, 42, 53) assert round(entry.wavemin, 1) == 656.3 assert round(entry.wavemax, 1) == 656.3 assert entry.path == MQ_IMAGE
def test_entries_from_file(): with pytest.warns(AstropyUserWarning, match='File may have been truncated'): entries = list(entries_from_file(MQ_IMAGE)) assert len(entries) == 1 entry = entries[0] assert len(entry.fits_header_entries) == 31 expected_fits_header_entries = [ FitsHeaderEntry('SIMPLE', True), FitsHeaderEntry('BITPIX', 16), FitsHeaderEntry('NAXIS', 2), FitsHeaderEntry('NAXIS1', 1500), FitsHeaderEntry('NAXIS2', 1340), FitsHeaderEntry('CONTACT', '*****@*****.**'), FitsHeaderEntry('DATE_OBS', '2013-08-12T08:42:53.000'), FitsHeaderEntry('DATE_END', '2013-08-12T08:42:53.000'), FitsHeaderEntry('FILENAME', 'mq130812.084253.fits'), FitsHeaderEntry('INSTITUT', 'Observatoire de Paris'), FitsHeaderEntry('INSTRUME', 'Spectroheliograph'), FitsHeaderEntry('OBJECT', 'FS'), FitsHeaderEntry('OBS_MODE', 'SCAN'), FitsHeaderEntry('PHYSPARA', 'Intensity'), FitsHeaderEntry('NBREG', 1), FitsHeaderEntry('NBLAMBD', 1), FitsHeaderEntry('WAVELNTH', 6563), FitsHeaderEntry('WAVEUNIT', 'angstrom'), FitsHeaderEntry('POLARANG', 0), FitsHeaderEntry('THEMISFF', 3), FitsHeaderEntry('LONGTRC', 258.78), FitsHeaderEntry('LONGCARR', 258.78), FitsHeaderEntry('LONGITUD', 258.78), FitsHeaderEntry('LATITUD', 6.50107), FitsHeaderEntry('LATIRC', 6.50107), FitsHeaderEntry('INDLAMD', 1), FitsHeaderEntry('INDREG', 1), FitsHeaderEntry('SEQ_IND', 1), FitsHeaderEntry('SVECTOR', 0), FitsHeaderEntry('COMMENT', ''), FitsHeaderEntry('HISTORY', '')] assert entry.fits_header_entries == expected_fits_header_entries assert entry.fits_key_comments.sort() == [ FitsKeyComment('SIMPLE', 'Written by IDL: Mon Aug 12 08:48:08 2013'), FitsKeyComment('BITPIX', 'Integer*2 (short integer)')].sort() assert entry.instrument == 'Spectroheliograph' assert entry.observation_time_start == datetime(2013, 8, 12, 8, 42, 53) assert entry.observation_time_end == datetime(2013, 8, 12, 8, 42, 53) assert round(entry.wavemin, 1) == 656.3 assert round(entry.wavemax, 1) == 656.3 assert entry.path == MQ_IMAGE
def add_from_file(self, file, ignore_already_added=False): """Generate as many database entries as there are FITS headers in the given file and add them to the database. Parameters ---------- file : str, file object Either a path pointing to a FITS file or an opened file-like object. If an opened file object, its mode must be one of the following rb, rb+, or ab+. ignore_already_added : bool, optional See :meth:`sunpy.database.Database.add`. """ self.add_many(tables.entries_from_file(file, self.default_waveunit), ignore_already_added)
def add_from_file(self, file, ignore_already_added=False): """Generate as many database entries as there are FITS headers in the given file and add them to the database. Parameters ---------- file : str or file-like object Either a path pointing to a FITS file or a an opened file-like object. If an opened file object, its mode must be one of the following rb, rb+, or ab+. ignore_already_added : bool, optional See :meth:`sunpy.database.Database.add`. """ self.add_many( tables.entries_from_file(file, self.default_waveunit), ignore_already_added)
def _download_and_collect_entries(self, query_result, **kwargs): client = kwargs.pop('client', None) path = kwargs.pop('path', None) progress = kwargs.pop('progress', False) methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE')) if kwargs: k, v = kwargs.popitem() raise TypeError('unexpected keyword argument {0!r}'.format(k)) if client is None: client = VSOClient() paths = client.get(query_result, path, methods).wait(progress=progress) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def download(self, *query, **kwargs): """download(*query, client=sunpy.net.vso.VSOClient(), path=None, progress=False) Search for data using the VSO interface (see :meth:`sunpy.net.vso.VSOClient.query`). If querying the VSO results in no data, no operation is performed. Concrete, this means that no entry is added to the database and no file is downloaded. Otherwise, the retrieved search result is used to download all files that belong to this search result. After that, all the gathered information (the one from the VSO query result and the one from the downloaded FITS files) is added to the database in a way that each FITS header is represented by one database entry. """ if not query: raise TypeError('at least one attribute required') client = kwargs.pop('client', None) path = kwargs.pop('path', None) progress = kwargs.pop('progress', False) if kwargs: k, v = kwargs.popitem() raise TypeError('unexpected keyword argument {0!r}'.format(k)) if client is None: client = VSOClient() qr = client.query(*query) # don't do anything if querying the VSO results in no data if not qr: return entries = [] for block in qr: paths = client.get([block], path).wait(progress=progress) for path in paths: qr_entry = tables.DatabaseEntry._from_query_result_block(block) file_entries = list( tables.entries_from_file(path, self.default_waveunit)) for entry in file_entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start =\ qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() entries.extend(file_entries) dump = serialize.dump_query(and_(*query)) (dump_exists, ), = self.session.query( exists().where(tables.JSONDump.dump == tables.JSONDump(dump).dump)) if dump_exists: # dump already exists in table jsondumps -> edit instead of add # update all entries with the fileid `entry.fileid` for entry in entries: old_entry = self.session.query(tables.DatabaseEntry).filter_by( fileid=entry.fileid).first() if old_entry is not None: attrs = [ 'source', 'provider', 'physobs', 'observation_time_start', 'observation_time_end', 'instrument', 'size', 'wavemin', 'wavemax', 'download_time' ] kwargs = dict((k, getattr(entry, k)) for k in attrs) cmd = commands.EditEntry(old_entry, **kwargs) if self._enable_history: self._command_manager.do(cmd) else: cmd() else: self.add_many(entries) # serialize the query and save the serialization in the database # for two reasons: # 1. to avoid unnecessary downloading in future calls of # ``fetch`` # 2. to know whether to add or to edit entries in future calls of # ``download`` (this method) self.session.add(tables.JSONDump(dump))
def _download_and_collect_entries(self, query_result, **kwargs): client = kwargs.pop('client', None) path = kwargs.pop('path', None) progress = kwargs.pop('progress', False) methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE')) overwrite = kwargs.pop('overwrite', False) if kwargs: k, v = kwargs.popitem() raise TypeError('unexpected keyword argument {0!r}'.format(k)) if client is None: client = VSOClient() remove_list = [] delete_entries = [] for qr in query_result: temp = tables.DatabaseEntry._from_query_result_block(qr) for database_entry in self: if database_entry.path is not None and temp._compare_attributes( database_entry, ["source", "provider", "physobs", "fileid", "observation_time_start", "observation_time_end", "instrument", "size", "wavemin", "wavemax"]): if not overwrite: remove_list.append(qr) else: delete_entries.append(database_entry) for temp in remove_list: query_result = [x for x in query_result if x != temp] for temp in delete_entries: self.remove(temp) paths = client.fetch(query_result, path).wait(progress=progress) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def download(self, *query, **kwargs): """download(*query, client=sunpy.net.vso.VSOClient(), path=None, progress=False) Search for data using the VSO interface (see :meth:`sunpy.net.vso.VSOClient.query`). If querying the VSO results in no data, no operation is performed. Concrete, this means that no entry is added to the database and no file is downloaded. Otherwise, the retrieved search result is used to download all files that belong to this search result. After that, all the gathered information (the one from the VSO query result and the one from the downloaded FITS files) is added to the database in a way that each FITS header is represented by one database entry. """ if not query: raise TypeError('at least one attribute required') client = kwargs.pop('client', None) path = kwargs.pop('path', None) progress = kwargs.pop('progress', False) if kwargs: k, v = kwargs.popitem() raise TypeError('unexpected keyword argument {0!r}'.format(k)) if client is None: client = VSOClient() qr = client.query(*query) # don't do anything if querying the VSO results in no data if not qr: return entries = [] for block in qr: paths = client.get([block], path).wait(progress=progress) for path in paths: qr_entry = tables.DatabaseEntry._from_query_result_block(block) file_entries = list( tables.entries_from_file(path, self.default_waveunit)) for entry in file_entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start =\ qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() entries.extend(file_entries) dump = serialize.dump_query(and_(*query)) (dump_exists,), = self.session.query( exists().where(tables.JSONDump.dump == tables.JSONDump(dump).dump)) if dump_exists: # dump already exists in table jsondumps -> edit instead of add # update all entries with the fileid `entry.fileid` for entry in entries: old_entry = self.session.query( tables.DatabaseEntry).filter_by(fileid=entry.fileid).first() if old_entry is not None: attrs = [ 'source', 'provider', 'physobs', 'observation_time_start', 'observation_time_end', 'instrument', 'size', 'wavemin', 'wavemax', 'download_time'] kwargs = dict((k, getattr(entry, k)) for k in attrs) cmd = commands.EditEntry(old_entry, **kwargs) if self._enable_history: self._command_manager.do(cmd) else: cmd() else: self.add_many(entries) # serialize the query and save the serialization in the database # for two reasons: # 1. to avoid unnecessary downloading in future calls of # ``fetch`` # 2. to know whether to add or to edit entries in future calls of # ``download`` (this method) self.session.add(tables.JSONDump(dump))
def test_entries_from_file_withoutwaveunit(): # does not raise `WaveunitNotFoundError`, because no wavelength information # is present in this file entries_from_file(RHESSI_IMAGE).next() with pytest.raises(WaveunitNotFoundError): entries_from_file(EIT_195_IMAGE).next()