def search(self, *args, **kwargs): """ Query this client for a list of results. Parameters ---------- \\*args: `tuple` `sunpy.net.attrs` objects representing the query. \\*\\*kwargs: `dict` Any extra keywords to refine the search. Returns ------- A `QueryResponse` instance containing the query result. """ baseurl, pattern, matchdict = self.pre_search_hook(*args, **kwargs) scraper = Scraper(baseurl, regex=True) tr = TimeRange(matchdict['Start Time'], matchdict['End Time']) filesmeta = scraper._extract_files_meta(tr, extractor=pattern, matcher=matchdict) filesmeta = sorted(filesmeta, key=lambda k: k['url']) metalist = [] for i in filesmeta: rowdict = self.post_search_hook(i, matchdict) metalist.append(rowdict) return QueryResponse(metalist, client=self)
def test_extract_files_meta(): baseurl0 = r'ftp://solar-pub.nao.ac.jp/pub/nsro/norh/data/tcx/%Y/%m/(\w){3}%y%m%d' extractpattern0 = '{}/tcx/{year:4d}/{month:2d}/{wave}{:4d}{day:2d}' s0 = Scraper(baseurl0, regex=True) timerange0 = TimeRange('2020/1/1 4:00', '2020/1/2') matchdict = {'wave': ['tca', 'tcz']} metalist0 = s0._extract_files_meta(timerange0, extractpattern0, matcher=matchdict) assert metalist0[0]['wave'] == 'tca' assert metalist0[3]['wave'] == 'tcz' assert metalist0[1]['day'] == 2 prefix = r'https://gong2.nso.edu/oQR/zqs/' baseurl1 = prefix + r'%Y%m/mrzqs%y%m%d/mrzqs%y%m%dt%H%Mc(\d){4}_(\d){3}\.fits.gz' extractpattern1 = ('{}/zqs/{year:4d}{month:2d}/mrzqs{:4d}{day:2d}/mrzqs{:6d}t' '{hour:2d}{minute:2d}c{CAR_ROT:4d}_{:3d}.fits.gz') s1 = Scraper(baseurl1, regex=True) timerange1 = TimeRange('2020-01-05', '2020-01-05T16:00:00') metalist1 = s1._extract_files_meta(timerange1, extractpattern1) urls = s1.filelist(timerange1) assert metalist1[3]['CAR_ROT'] == 2226 assert metalist1[-1]['url'] == urls[-1]
def _get_metalist_fn(self, matchdict, baseurl, pattern): """ Function to help get list of OrderedDicts. """ metalist = [] scraper = Scraper(baseurl, regex=True) tr = TimeRange(matchdict["Start Time"], matchdict["End Time"]) filemeta = scraper._extract_files_meta(tr, extractor=pattern, matcher=matchdict) for i in filemeta: rowdict = self.post_search_hook(i, matchdict) metalist.append(rowdict) return metalist
def search(self, *args, **kwargs): supported_waves = [94, 131, 171, 195, 284, 304] * u.Angstrom all_waves = [] matchdict = self._get_match_dict(*args, **kwargs) req_wave = matchdict.get('Wavelength', None) if req_wave is not None: wmin = req_wave.min.to(u.Angstrom, equivalencies=u.spectral()) wmax = req_wave.max.to(u.Angstrom, equivalencies=u.spectral()) req_wave = a.Wavelength(wmin, wmax) for wave in supported_waves: if wave in req_wave: all_waves.append(int(wave.value)) else: all_waves = [int(i.value) for i in supported_waves] all_satnos = matchdict.get('SatelliteNumber') all_levels = matchdict.get('Level') metalist = [] # iterating over all possible Attr values through loops for satno in all_satnos: for level in all_levels: for wave in all_waves: formdict = {'wave': wave, 'SatelliteNumber': satno} if str(level) == '1b': formdict['elem'] = 'fe' if wave == 304: formdict['elem'] = 'he' baseurl = self.baseurl1b pattern = self.pattern1b elif str(level) == '2': baseurl = self.baseurl2 pattern = self.pattern2 else: raise ValueError(f"Level {level} is not supported.") # formatting baseurl using Level, SatelliteNumber and Wavelength urlpattern = baseurl.format(**formdict) scraper = Scraper(urlpattern) tr = TimeRange(matchdict['Start Time'], matchdict['End Time']) filesmeta = scraper._extract_files_meta(tr, extractor=pattern) for i in filesmeta: rowdict = self.post_search_hook(i, matchdict) metalist.append(rowdict) return QueryResponse(metalist, client=self)