def _download_and_collect_entries(self, query_result, client=None, path=None, progress=False): if client is None: client = VSOClient() paths = client.get(query_result, path).wait(progress=progress) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def _download_and_collect_entries(self, query_result, client=None, path=None, progress=False): if client is None: client = VSOClient() for block in query_result: paths = client.get([block], path).wait(progress=progress) for path in paths: qr_entry = tables.DatabaseEntry._from_query_result_block(block) file_entries = list( tables.entries_from_file(path, self.default_waveunit)) for entry in file_entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start =\ qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def _download_and_collect_entries(self, query_result, **kwargs): client = kwargs.pop('client', None) path = kwargs.pop('path', None) progress = kwargs.pop('progress', False) methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE')) if kwargs: k, v = kwargs.popitem() raise TypeError('unexpected keyword argument {0!r}'.format(k)) if client is None: client = VSOClient() paths = client.get(query_result, path, methods).wait(progress=progress) for (path, block) in zip(paths, query_result): qr_entry = tables.DatabaseEntry._from_query_result_block(block) if os.path.isfile(path): entries = tables.entries_from_file(path, self.default_waveunit) elif os.path.isdir(path): entries = tables.entries_from_dir(path, self.default_waveunit) else: raise ValueError('The path is neither a file nor directory') for entry in entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start = qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() yield entry
def download(self, *query, **kwargs): """download(*query, client=sunpy.net.vso.VSOClient(), path=None, progress=False) Search for data using the VSO interface (see :meth:`sunpy.net.vso.VSOClient.query`). If querying the VSO results in no data, no operation is performed. Concrete, this means that no entry is added to the database and no file is downloaded. Otherwise, the retrieved search result is used to download all files that belong to this search result. After that, all the gathered information (the one from the VSO query result and the one from the downloaded FITS files) is added to the database in a way that each FITS header is represented by one database entry. """ if not query: raise TypeError('at least one attribute required') client = kwargs.pop('client', None) path = kwargs.pop('path', None) progress = kwargs.pop('progress', False) if kwargs: k, v = kwargs.popitem() raise TypeError('unexpected keyword argument {0!r}'.format(k)) if client is None: client = VSOClient() qr = client.query(*query) # don't do anything if querying the VSO results in no data if not qr: return entries = [] for block in qr: paths = client.get([block], path).wait(progress=progress) for path in paths: qr_entry = tables.DatabaseEntry._from_query_result_block(block) file_entries = list( tables.entries_from_file(path, self.default_waveunit)) for entry in file_entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start =\ qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() entries.extend(file_entries) dump = serialize.dump_query(and_(*query)) (dump_exists, ), = self.session.query( exists().where(tables.JSONDump.dump == tables.JSONDump(dump).dump)) if dump_exists: # dump already exists in table jsondumps -> edit instead of add # update all entries with the fileid `entry.fileid` for entry in entries: old_entry = self.session.query(tables.DatabaseEntry).filter_by( fileid=entry.fileid).first() if old_entry is not None: attrs = [ 'source', 'provider', 'physobs', 'observation_time_start', 'observation_time_end', 'instrument', 'size', 'wavemin', 'wavemax', 'download_time' ] kwargs = dict((k, getattr(entry, k)) for k in attrs) cmd = commands.EditEntry(old_entry, **kwargs) if self._enable_history: self._command_manager.do(cmd) else: cmd() else: self.add_many(entries) # serialize the query and save the serialization in the database # for two reasons: # 1. to avoid unnecessary downloading in future calls of # ``fetch`` # 2. to know whether to add or to edit entries in future calls of # ``download`` (this method) self.session.add(tables.JSONDump(dump))
def download(self, *query, **kwargs): """download(*query, client=sunpy.net.vso.VSOClient(), path=None, progress=False) Search for data using the VSO interface (see :meth:`sunpy.net.vso.VSOClient.query`). If querying the VSO results in no data, no operation is performed. Concrete, this means that no entry is added to the database and no file is downloaded. Otherwise, the retrieved search result is used to download all files that belong to this search result. After that, all the gathered information (the one from the VSO query result and the one from the downloaded FITS files) is added to the database in a way that each FITS header is represented by one database entry. """ if not query: raise TypeError('at least one attribute required') client = kwargs.pop('client', None) path = kwargs.pop('path', None) progress = kwargs.pop('progress', False) if kwargs: k, v = kwargs.popitem() raise TypeError('unexpected keyword argument {0!r}'.format(k)) if client is None: client = VSOClient() qr = client.query(*query) # don't do anything if querying the VSO results in no data if not qr: return entries = [] for block in qr: paths = client.get([block], path).wait(progress=progress) for path in paths: qr_entry = tables.DatabaseEntry._from_query_result_block(block) file_entries = list( tables.entries_from_file(path, self.default_waveunit)) for entry in file_entries: entry.source = qr_entry.source entry.provider = qr_entry.provider entry.physobs = qr_entry.physobs entry.fileid = qr_entry.fileid entry.observation_time_start =\ qr_entry.observation_time_start entry.observation_time_end = qr_entry.observation_time_end entry.instrument = qr_entry.instrument entry.size = qr_entry.size entry.wavemin = qr_entry.wavemin entry.wavemax = qr_entry.wavemax entry.path = path entry.download_time = datetime.utcnow() entries.extend(file_entries) dump = serialize.dump_query(and_(*query)) (dump_exists,), = self.session.query( exists().where(tables.JSONDump.dump == tables.JSONDump(dump).dump)) if dump_exists: # dump already exists in table jsondumps -> edit instead of add # update all entries with the fileid `entry.fileid` for entry in entries: old_entry = self.session.query( tables.DatabaseEntry).filter_by(fileid=entry.fileid).first() if old_entry is not None: attrs = [ 'source', 'provider', 'physobs', 'observation_time_start', 'observation_time_end', 'instrument', 'size', 'wavemin', 'wavemax', 'download_time'] kwargs = dict((k, getattr(entry, k)) for k in attrs) cmd = commands.EditEntry(old_entry, **kwargs) if self._enable_history: self._command_manager.do(cmd) else: cmd() else: self.add_many(entries) # serialize the query and save the serialization in the database # for two reasons: # 1. to avoid unnecessary downloading in future calls of # ``fetch`` # 2. to know whether to add or to edit entries in future calls of # ``download`` (this method) self.session.add(tables.JSONDump(dump))
def find_sdo_files(directory, wavelength='', time_limits=None, cadence=12, download=None, double_check='Yes'): """***This function no longer work. It will be updated or removed soon.*** Checks a directory for missing files from downloading AIA images and can check/download the files from the missing time. ***This function may need to run several times. It depends on how well the files are downloaded*** Parameters ---------- directory : Str A string of the path to the files to be checked. wavelength : Str The wavelength of the files to check. Only important if download = 'yes' or 'auto'. For HMI files then can have 'los_magnetic_field', 'intensity', etc. Default: '' time_limits : list A list of two entries for the start and end time of the observation. To check if any files were missed before the first and after the last file you have. Default: None cadence : Int An integer number of seconds that should be the temporal seperation of the files. Default: 12 download : Str Indicates whether missing files should be searched for/downloaded. If set to None then there will be a prompt to ask, enter 'Yes' or 'No'. Setting to 'auto' will search for the data automatically without user input. Default: None double_check : Str After checking for more files to download check again - without downloading - to see if there are any still missing, e.g. 'Yes' or 'No'. Can also have 'recursive' which keeps checking all files are found in case there is a large time gap in the middle *** BEWARE INFINITIES ***. Default: 'Yes' Returns ------- A list of the files with no friends cadence seconds after them. """ # this function no longer works at the moment print("This function no longer work. It will be updated or removed soon.") return files_list = list(os.listdir(directory)) files = [ f for f in files_list if f.endswith('.fits')] files.sort() if files == []: empty_but_download = input('The folder provided does not appear to have any \'.fits\' files within it. \nDo you want to download within the time range given in the form \"%Y-%m-%d %H:%M:%S\" (this will only work for SDO/AIA files at the moment)? ') if empty_but_download == 'Yes': client = VSOClient() query_response = client.query_legacy(tstart=time_limits[0], tend=time_limits[1], instrument='AIA', wave=wavelength) n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading #Download the first two from ROB to /tmp folder and wait for download to complete results = client.get(query_response[0:n], path=directory, site='rob') fs = results.wait() still_no_friends = find_sdo_files(directory, wavelength, time_limits=time_limits, download='Yes', double_check='No') return else: assert files != [], f'No .fits files in {directory}.' #make sure there are files in the first place no_friends = [] #files that do not have a friend within the cadence time after it t_of_no_friends = [] #time of files that have no friends t_end_of_no_firends = [] for f in range(len(files)-2): #don't want to look at the last one as it will never have anything after it anyway time_0 = datetime.datetime.strptime(files[f][4:19], '%Y%m%d_%H%M%S') time_1 = datetime.datetime.strptime(files[f+1][4:19], '%Y%m%d_%H%M%S') if time_0 <= time_1 <= time_0 + timedelta(seconds=cadence): #if there is a file <=12s ahead move on continue else: #if there is not a file <=12s ahead add it to the no friends list no_friends.append(files[f]) t_of_no_friends.append(time_0) t_end_of_no_firends.append(time_1) if (download == None) and (len(t_of_no_friends) > 0 ): download = input('Would you like the times of the missing files checked (yea or nay)? ') if download in ['No', 'no', 'N', 'n', 'Nope', 'nope', 'Nay', 'nay']: download = 'No' elif download in ['Yes', 'yes', 'Y', 'y', 'Yip', 'yip', 'Yea', 'yea']: download = 'Yes' if len(t_of_no_friends) > 0: print('There are ', len(t_of_no_friends), ' time intervals of missing files.') if (download == 'Yes'): client = VSOClient() if len(t_of_no_friends) > 0: start_times = [t.strftime("%Y-%m-%d %H:%M:%S") for t in t_of_no_friends] #search a minute ahead end_times = [t.strftime("%Y-%m-%d %H:%M:%S") for t in t_end_of_no_firends] for ts, te in zip(start_times, end_times): if files[0][0:3] == 'aia': query_response = client.query_legacy(tstart=ts, tend=te, instrument='AIA', wave=wavelength) elif files[0][0:3] == 'hmi': query_response = client.query_legacy(tstart=st, tend=te, instrument='HMI', physobs=wavelength) n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading #Download the first two from ROB to /tmp folder and wait for download to complete results = client.get(query_response[0:n], path=directory, site='rob') fs = results.wait() if time_limits != None: time_first = datetime.datetime.strptime(files[0][4:19], '%Y%m%d_%H%M%S') time_last = datetime.datetime.strptime(files[-1][4:19], '%Y%m%d_%H%M%S') time_limits_first = datetime.datetime.strptime(time_limits[0], "%Y-%m-%d %H:%M:%S") time_limits_last = datetime.datetime.strptime(time_limits[1], "%Y-%m-%d %H:%M:%S") if time_first - time_limits_first >= timedelta(seconds=cadence): #if the diff between the start time given and the first file's starting time print('Checking start time-gap.') if files[0][0:3] == 'aia': query_response = client.query_legacy(tstart=time_limits[0], tend=time_first.strftime("%Y-%m-%d %H:%M:%S"), instrument='AIA', wave=wavelength) elif files[0][0:3] == 'hmi': query_response = client.query_legacy(tstart=time_limits[0], tend=time_first.strftime("%Y-%m-%d %H:%M:%S"), instrument='HMI', physobs=wavelength) n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading #Download the first two from ROB to /tmp folder and wait for download to complete results = client.get(query_response[0:n], path=directory, site='rob') fs = results.wait() if time_limits_last - time_last >= timedelta(seconds=cadence): #if the diff between the end time given and the end file's starting time print('Checking end time-gap.') if files[0][0:3] == 'aia': query_response = client.query_legacy(tstart=time_last.strftime("%Y-%m-%d %H:%M:%S"), tend=time_limits[1], instrument='AIA', wave=wavelength) elif files[0][0:3] == 'hmi': query_response = client.query_legacy(tstart=time_last.strftime("%Y-%m-%d %H:%M:%S"), tend=time_limits[1], instrument='HMI', physobs=wavelength) n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading #Download the first two from ROB to /tmp folder and wait for download to complete results = client.get(query_response[0:n], path=directory, site='rob') fs = results.wait() duplicates = glob.glob(directory + '*.*.fits') #removes files that downloaded twice for each_file_path in duplicates: os.remove(each_file_path) if no_friends == []: #if there arent any files to check for then don't double check or anything, just stop print('All files here!') return if double_check == 'No': print(f'Here are files without friends {cadence} seconds ahead of them from directory \n{directory}:') print(no_friends) print('Please wait a few minutes and try this function again, it depends on the servers sometimes.') return no_friends elif double_check == 'Yes': #double check to see if we have all the files still_no_friends = find_sdo_files(directory, wavelength, time_limits=time_limits, download='Yes', double_check='No') elif double_check == 'recursive': still_no_friends = find_sdo_files(directory, wavelength, time_limits=time_limits, download='Yes', double_check='recursive') return still_no_friends