def computeLOG_SSDCslots(self, tmin, tmax): """ Given tmin e tmax it creates a daytime slot(tmax is included): Es: tmin 16/06/2018 tmax 20/06/2018 return 16/06/2018 17/06/2018 17/06/2018 18/06/2018 18/06/2018 19/06/2018 19/06/2018 20/06/2018 20/06/2018 21/06/2018 """ dt1 = datetime.timedelta(days=1) tmin = AstroUtils.time_mjd_to_fits(tmin) tmax = AstroUtils.time_mjd_to_fits(tmax) tmin = datetime.datetime.strptime(tmin, "%Y-%m-%dT%H:%M:%S.%f") tmax = datetime.datetime.strptime(tmax, "%Y-%m-%dT%H:%M:%S.%f") slots = [] while tmin <= tmax: slot = [tmin, tmin + dt1] tmin = tmin + dt1 slots.append(slot) return pd.DataFrame(slots, columns=["tmin", "tmax"])
def dataIsMissing(self, tmin, tmax, queryFilepath): """ This method can be extended to handle the case of partial missing data """ if not queryFilepath.exists(): self.logger.warning(self, f"Query file {queryFilepath} does not exists") return DataStatus.MISSING tminUtc = AstroUtils.time_mjd_to_fits(tmin) # YYYY-MM-DDTHH:mm:ss tmaxUtc = AstroUtils.time_mjd_to_fits(tmax) tminUtc = datetime.datetime.strptime(tminUtc, "%Y-%m-%dT%H:%M:%S.%f") tmaxUtc = datetime.datetime.strptime(tmaxUtc, "%Y-%m-%dT%H:%M:%S.%f") self.logger.debug(self, f"({tmin}, {tmax}) => ({tminUtc}, {tmaxUtc})") datesDF = pd.read_csv(queryFilepath, header=None, sep=" ", names=["ssdctmin", "ssdctmax"], parse_dates=["ssdctmin", "ssdctmax"]) #self.logger.debug(self, str(datesDF)) self.logger.debug(self, f"{tminUtc}, {tmaxUtc}") # check interval of tmin intervalIndexTmin = self.getInterval(datesDF, tminUtc) # if tmin is not included in any interval: if intervalIndexTmin == -1: self.logger.debug( self, f"tminUtc {tminUtc} not present in any interval!") return DataStatus.MISSING # check interval of tmax: intervalIndexTmax = self.getInterval(datesDF, tmaxUtc) # if tmax is not included in any interval: if intervalIndexTmax == -1: self.logger.debug( self, f"tmaxUtc {tmaxUtc} not present in any interval!") return DataStatus.MISSING self.logger.debug(self, f"intervalIndexTmin: {str(intervalIndexTmin)}") self.logger.debug(self, f"intervalIndexTmax: {str(intervalIndexTmax)}") # check if there's missing data between the 2 intervals if self.gotHole(datesDF, intervalIndexTmin, intervalIndexTmax): self.logger.debug(self, f"Missing data between the 2 intervals!") return DataStatus.MISSING return DataStatus.OK
def gridFiles(self, tmin, tmax): """ https://tools.ssdc.asi.it/AgileData/rest/GRIDFiles/2009-10-20T00:00:00/2009-11-10T00:00:00 The actual data being downloaded could correspond to a bigger interval than tmin and tmax: this is because the SSDC rest service uses the following conventions: * the EVT file always contains 15 days of data * the LOG file always contains 1 day of data * the mapping between tmin,tmax and the actual time span of the data being downloaded can be inferred from the next examples: * tmin=03/01/21 tmax=05/01/21 * 1 evt file: 01/01/21 to 15/01/21 * 3 log files: 03/01/21, 04/01/21, 05/01/21 * tmin=14/01/21 tmax=18/01/21 * 2 evt files: 01/01/21 to 15/01/21 and 15/01/21 to 31/01/21 * 5 log files: 14/01/21, 15/01/21, 16/01/21, 17/01/21, 18/01/21 """ tmin_utc = AstroUtils.time_mjd_to_fits(tmin) tmax_utc = AstroUtils.time_mjd_to_fits(tmax) api_url = f"https://tools.ssdc.asi.it/AgileData/rest/GRIDFiles/{tmin_utc}/{tmax_utc}" self.logger.info(self, f"Downloading data ({tmin},{tmax}) from {api_url}..") start = time() response = self.http.get(api_url, stream=True) outpath = f"/tmp/agile_{str(uuid.uuid4())}.tar.gz" with open(outpath, "wb") as f: # Writing chunks for large downloads for chunk in tqdm( response.iter_content(chunk_size=1024 * 1024 * 10)): f.write(chunk) end = time() - start outpath_size = os.stat(outpath).st_size self.logger.info( self, f"Took {end} seconds. Downloaded {outpath_size} bytes.") if not Path(outpath).is_file(): raise FileNotFoundError if outpath_size == 0: self.logger.warning(self, f"The downloaded data {outpath} is empty.") return outpath
def computeEVT_SSDCslots(self, tmin, tmax): """ Funzione che dato un tmin e tmax crea slot di 15 gg: Es: tmin 16/06/2018 tmax 25/09/2018 return 15/06/2018 30/06/2018 30/07/2018 15/07/2018 15/07/2018 31/07/2018 31/08/2018 15/08/2018 15/08/2018 31/08/2018 31/09/2018 15/09/2018 15/09/2018 30/09/2018 """ tmin = AstroUtils.time_mjd_to_fits(tmin) tmax = AstroUtils.time_mjd_to_fits(tmax) tmin = datetime.datetime.strptime(tmin, "%Y-%m-%dT%H:%M:%S.%f") tmax = datetime.datetime.strptime(tmax, "%Y-%m-%dT%H:%M:%S.%f") dt1 = datetime.timedelta(days=1) dt14 = datetime.timedelta(days=14) dt15 = datetime.timedelta(days=15) slots = [] while tmin <= tmax: #print("start tmin:",tmin) firstDayOfMonth = tmin.replace(day=1) lastDay = calendar.monthrange(tmin.year, tmin.month)[-1] lastDayOfMonth = datetime.date(tmin.year, tmin.month, lastDay) lastDayOfMonth = datetime.datetime.combine( lastDayOfMonth, datetime.datetime.min.time()) if tmin >= firstDayOfMonth and tmin <= firstDayOfMonth + dt14: slot = [firstDayOfMonth - dt1, firstDayOfMonth + dt14] tmin = firstDayOfMonth + dt15 elif tmin > firstDayOfMonth + dt14 and tmin <= lastDayOfMonth: slot = [firstDayOfMonth + dt14, lastDayOfMonth] tmin = lastDayOfMonth + dt1 slots.append(slot) return pd.DataFrame(slots, columns=["tmin", "tmax"])
def gridList(self, tmin, tmax): """ {'Response': {'message': None, 'statusCode': 'OK'} 'AgileFiles': [ {'filename': 'ag-182087934_STD0P.LOG.gz', 'absolutePath': 'std/0909301200_0910151200-86596/STD0P_LOG/ag-182087934_STD0P.LOG.gz'}, {'filename': 'ag-182174334_STD0P.LOG.gz', 'absolutePath': 'std/0909301200_0910151200-86596/STD0P_LOG/ag-182174334_STD0P.LOG.gz'}, {'filename': 'ag-182260734_STD0P.LOG.gz', 'absolutePath': 'std/0909301200_0910151200-86596/STD0P_LOG/ag-182260734_STD0P.LOG.gz'}, {'filename': 'ag-182347134_STD0P.LOG.gz', 'absolutePath': 'std/0909301200_0910151200-86596/STD0P_LOG/ag-182347134_STD0P.LOG.gz'}, {'filename': 'ag-182433534_STD0P.LOG.gz', 'absolutePath': 'std/0909301200_0910151200-86596/STD0P_LOG/ag-182433534_STD0P.LOG.gz'}, {'filename': 'ag-182519934_STD0P.LOG.gz', 'absolutePath': 'std/0909301200_0910151200-86596/STD0P_LOG/ag-182519934_STD0P.LOG.gz'}, {'filename': 'ag-182606334_STD0P.LOG.gz', 'absolutePath': 'std/0909301200_0910151200-86596/STD0P_LOG/ag-182606334_STD0P.LOG.gz'}, {'filename': 'ag0909301200_0910151200_STD0P_FM.EVT.gz', 'absolutePath': 'std/0909301200_0910151200-86596/ag0909301200_0910151200_STD0P_FM.EVT.gz'}, {'filename': 'ag-182692734_STD0P.LOG.gz', 'absolutePath': 'std/0910151200_0910311200-86597/STD0P_LOG/ag-182692734_STD0P.LOG.gz'}, {'filename': 'ag0910151200_0910311200_STD0P_FM.EVT.gz', 'absolutePath': 'std/0910151200_0910311200-86597/ag0910151200_0910311200_STD0P_FM.EVT.gz'} ] """ tmin_utc = AstroUtils.time_mjd_to_fits(tmin) tmax_utc = AstroUtils.time_mjd_to_fits(tmax) api_url = f"https://tools.ssdc.asi.it/AgileData/rest/GRIDList/{tmin_utc}/{tmax_utc}" self.logger.info( self, f"Downloading filelist to download ({tmin},{tmax}) ({tmin_utc}, {tmax_utc}) from {api_url}.." ) start = time() response = self.http.get(api_url) json_data = json.loads(response.text) end = time() - start self.logger.info(self, f"Took {end} seconds") if json_data["Response"]["statusCode"] != "OK": raise SSDCRestErrorDownload(json_data["Response"]["message"]) if json_data["Response"]["statusCode"] == "OK" and json_data[ "Response"]["message"] == "No data found.": raise SSDCRestErrorDownload(json_data["Response"]["message"]) return json_data["AgileFiles"]
def test_mjd_conversion(input_date, expected): assert AstroUtils.time_mjd_to_agile_seconds( input_date) == expected["agile_seconds"] assert AstroUtils.time_mjd_to_jd(input_date) == pytest.approx( expected["jd"], 0.00001) assert AstroUtils.time_mjd_to_unix(input_date) == expected["unix"] assert AstroUtils.time_mjd_to_fits(input_date) == expected["fits"] assert AstroUtils.time_mjd_to_iso(input_date) == expected["iso"]
def test_getInterval(self, logger, datacoveragepath): agdataset = AGDataset(logger, datacoveragepath) queryEVTPath = Path(__file__).absolute().parent.joinpath( "test_data", "getinterval_EVT.qfile") queryLOGPath = Path(__file__).absolute().parent.joinpath( "test_data", "getinterval_LOG.qfile") datesEVTDF = pd.read_csv(queryEVTPath, header=None, sep=" ", names=["ssdctmin", "ssdctmax"], parse_dates=["ssdctmin", "ssdctmax"]) datesLOGDF = pd.read_csv(queryLOGPath, header=None, sep=" ", names=["ssdctmin", "ssdctmax"], parse_dates=["ssdctmin", "ssdctmax"]) t = 58053 #2017-10-27T00:00:00.000 tfits = AstroUtils.time_mjd_to_fits(t) tfits = datetime.strptime(tfits, "%Y-%m-%dT%H:%M:%S.%f") intervalIndexEVT = agdataset.getInterval(datesEVTDF, tfits) intervalIndexLOG = agdataset.getInterval(datesLOGDF, tfits) assert intervalIndexEVT == 0 assert intervalIndexLOG == 2 t = 59003 #2020-06-03T00:00:00 tfits = AstroUtils.time_mjd_to_fits(t) tfits = datetime.strptime(tfits, "%Y-%m-%dT%H:%M:%S.%f") intervalIndexEVT = agdataset.getInterval(datesEVTDF, tfits) intervalIndexLOG = agdataset.getInterval(datesLOGDF, tfits) assert intervalIndexEVT == -1 assert intervalIndexLOG == -1
def test_astro_utils_time_mjd_to_fits(self): sec_tol = 1 fitstime = AstroUtils.time_mjd_to_fits(58871.45616898) dt = datetime.strptime(fitstime, '%Y-%m-%dT%H:%M:%S.%f') assert dt.year == 2020 assert dt.month == 1 assert dt.day == 23 assert dt.hour == 10 assert dt.minute == 56 assert abs(53 - dt.second) <= sec_tol
def test_got_hole(self, logger, datacoveragepath): agdataset = AGDataset(logger, datacoveragepath) queryEVTPath = Path(__file__).absolute().parent.joinpath( "test_data", "holes_EVT.qfile") queryLOGPath = Path(__file__).absolute().parent.joinpath( "test_data", "holes_LOG.qfile") tmin = 58051 tmax = 58058 tminUtc = AstroUtils.time_mjd_to_fits(tmin) tmaxUtc = AstroUtils.time_mjd_to_fits(tmax) tminUtc = datetime.strptime(tminUtc, "%Y-%m-%dT%H:%M:%S.%f") tmaxUtc = datetime.strptime(tmaxUtc, "%Y-%m-%dT%H:%M:%S.%f") datesEVTDF = pd.read_csv(queryEVTPath, header=None, sep=" ", names=["ssdctmin", "ssdctmax"], parse_dates=["ssdctmin", "ssdctmax"]) datesLOGDF = pd.read_csv(queryLOGPath, header=None, sep=" ", names=["ssdctmin", "ssdctmax"], parse_dates=["ssdctmin", "ssdctmax"]) ### EVT ### intervalIndexTmin = agdataset.getInterval(datesEVTDF, tminUtc) intervalIndexTmax = agdataset.getInterval(datesEVTDF, tmaxUtc) print( f"intervals in EVT file are {intervalIndexTmin} {intervalIndexTmax}" ) hole = agdataset.gotHole(datesEVTDF, intervalIndexTmin, intervalIndexTmax) assert hole == False ### LOG ### intervalIndexTmin = agdataset.getInterval(datesLOGDF, tminUtc) intervalIndexTmax = agdataset.getInterval(datesLOGDF, tmaxUtc) print( f"intervals in LOG file are {intervalIndexTmin} {intervalIndexTmax}" ) hole = agdataset.gotHole(datesLOGDF, intervalIndexTmin, intervalIndexTmax) assert hole == False tmin = 58051 tmax = 58152 tminUtc = AstroUtils.time_mjd_to_fits(tmin) tmaxUtc = AstroUtils.time_mjd_to_fits(tmax) tminUtc = datetime.strptime(tminUtc, "%Y-%m-%dT%H:%M:%S.%f") tmaxUtc = datetime.strptime(tmaxUtc, "%Y-%m-%dT%H:%M:%S.%f") intervalIndexTmin = agdataset.getInterval(datesEVTDF, tminUtc) intervalIndexTmax = agdataset.getInterval(datesEVTDF, tmaxUtc) print( f"intervals in EVT file are {intervalIndexTmin} {intervalIndexTmax}" ) hole = agdataset.gotHole(datesEVTDF, intervalIndexTmin, intervalIndexTmax) assert hole == True ###### LOG ##### intervalIndexTmin = agdataset.getInterval(datesLOGDF, tminUtc) intervalIndexTmax = agdataset.getInterval(datesLOGDF, tmaxUtc) print( f"intervals in LOG file are {intervalIndexTmin} {intervalIndexTmax}" ) hole = agdataset.gotHole(datesLOGDF, intervalIndexTmin, intervalIndexTmax) assert hole == True