def gaia_source(dr=None): # TODO not working """ NAME: gaia_source PURPOSE: download the gaia_source files INPUT: OUTPUT: (just downloads) HISTORY: 2017-Oct-13 Henry Leung """ dr = gaia_default_dr(dr=dr) if dr == 1: for j in range(0, 20, 1): for i in range(0, 256, 1): urlstr = 'http://cdn.gea.esac.esa.int/Gaia/gaia_source/fits/GaiaSource_000-0{:02d}-{:03d}.fits'.format( j, i) with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: urllib.request.urlretrieve(urlstr, reporthook=t.update_to) print('Downloaded Gaia DR{:d} Gaia Source ({:d} of {:d}) file catalog successfully to {}') % ( dr, (j * 256 + i), 256 * 20 + 112, currentdir) for i in range(0, 111, 1): urlstr = 'http://cdn.gea.esac.esa.int/Gaia/gaia_source/fits/GaiaSource_000-020-{:03d}.fits'.format(i) with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: urllib.request.urlretrieve(urlstr, reporthook=t.update_to) print('Downloaded Gaia DR{:d} Gaia Source ({:d} of {:d}) file catalog successfully to {}') % ( dr, (20 * 256 + i), 256 * 20 + 112, currentdir) else: raise ValueError('[astroNN.gaia.downloader.gaia_source()] only supports Gaia DR1 Gaia Source') return None
def tgas(flag=None): """ Get path to the Gaia TGAS DR1 files, download if files not found :return: List of file path :rtype: list :History: 2017-Oct-13 - Written - Henry Leung (University of Toronto) """ # Check if dr arguement is provided, if none then use default fulllist = [] # Check if directory exists folderpath = os.path.join(gaia_env(), 'Gaia/gdr1/tgas_source/fits/') urlbase = 'http://cdn.gea.esac.esa.int/Gaia/gdr1/tgas_source/fits/' if not os.path.exists(folderpath): os.makedirs(folderpath) hash_filename = 'MD5SUM.txt' full_hash_filename = os.path.join(folderpath, hash_filename) if not os.path.isfile(full_hash_filename): urllib.request.urlretrieve(urlbase + hash_filename, full_hash_filename) hash_list = np.loadtxt(full_hash_filename, dtype='str').T for i in range(0, 16, 1): filename = f'TgasSource_000-000-0{i:0{2}d}.fits' fullfilename = os.path.join(folderpath, filename) urlstr = urlbase + filename file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)] # Check if files exists if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm='md5') # In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too if checksum != file_hash and len(file_hash) != 0: print(checksum) print(file_hash) print('File corruption detected, astroNN is attempting to download again') tgas(flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: # progress bar with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: # Download urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) checksum = filehash(fullfilename, algorithm='md5') if checksum != file_hash and len(file_hash) != 0: print('File corruption detected, astroNN is attempting to download again') tgas(flag=1) print(f'Downloaded Gaia DR1 TGAS ({i:d} of 15) file catalog successfully to {fullfilename}') fulllist.extend([fullfilename]) return fulllist
def load_data(flag=None): """ NAME: load_data PURPOSE: load_data galaxy10 DECals data INPUT: None OUTPUT: x (ndarray): An array of images y (ndarray): An array of answer HISTORY: 2021-Mar-24 - Written - Henry Leung (University of Toronto) """ filename = 'Galaxy10_DECals.h5' complete_url = _G10_ORIGIN + filename datadir = os.path.join(astroNN_CACHE_DIR, 'datasets') file_hash = '19AEFC477C41BB7F77FF07599A6B82A038DC042F889A111B0D4D98BB755C1571' # SHA256 # Notice python expect sha256 in lowercase if not os.path.exists(datadir): os.makedirs(datadir) fullfilename = os.path.join(datadir, filename) # Check if files exists if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm='sha256') if checksum != file_hash.lower(): print( 'File corruption detected, astroNN is attempting to download again' ) load_data(flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=complete_url.split('/')[-1]) as t: urllib.request.urlretrieve(complete_url, fullfilename, reporthook=t.update_to) print(f'Downloaded Galaxy10 successfully to {fullfilename}') checksum = filehash(fullfilename, algorithm='sha256') if checksum != file_hash.lower(): load_data(flag=1) with h5py.File(fullfilename, 'r') as F: x = np.array(F['images']) y = np.array(F['ans']) return x, y
def load_data(flag=None): """ NAME: load_data PURPOSE: load_data galaxy10 data INPUT: None OUTPUT: x (ndarray): An array of images y (ndarray): An array of answer HISTORY: 2018-Jan-22 - Written - Henry Leung (University of Toronto) """ filename = 'Galaxy10.h5' complete_url = _G10_ORIGIN + filename datadir = os.path.join(astroNN_CACHE_DIR, 'datasets') file_hash = '969A6B1CEFCC36E09FFFA86FEBD2F699A4AA19B837BA0427F01B0BC6DED458AF' # SHA256 # Notice python expect sha256 in lowercase if not os.path.exists(datadir): os.makedirs(datadir) fullfilename = os.path.join(datadir, filename) # Check if files exists if os.path.isfile(fullfilename) and flag is None: checksum = sha256_checksum(fullfilename) if checksum != file_hash.lower(): print( 'File corruption detected, astroNN attempting to download again' ) load_data(flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=complete_url.split('/')[-1]) as t: urllib.request.urlretrieve(complete_url, fullfilename, reporthook=t.update_to) print(f'Downloaded Galaxy10 successfully to {fullfilename}') checksum = sha256_checksum(fullfilename) if checksum != file_hash.lower(): load_data(flag=1) with h5py.File(fullfilename, 'r') as F: x = np.array(F['images']) y = np.array(F['ans']) return x, y
def allvisit(dr=None, flag=None): """ Download the allVisit file (catalog of properties from individual visit spectra) :param dr: APOGEE DR :type dr: int :param flag: 0: normal, 1: force to re-download :type flag: int :return: full file path and download in background if not found locally, False if cannot be found on server :rtype: str :History: 2017-Oct-11 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 13: file_hash = '2a3b13ccd40a2c8aea8321be9630117922d55b51' # Check if directory exists fullfilepath = os.path.join(apogee_env(), 'dr13/apogee/spectro/redux/r6/') if not os.path.exists(fullfilepath): os.makedirs(fullfilepath) filename = 'allVisit-l30e.2.fits' fullfilename = os.path.join(fullfilepath, filename) url = f'https://data.sdss.org/sas/dr13/apogee/spectro/redux/r6/{filename}' elif dr == 14: file_hash = 'abcecbcdc5fe8d00779738702c115633811e6bbd' # Check if directory exists fullfilepath = os.path.join(apogee_env(), 'dr14/apogee/spectro/redux/r8/') if not os.path.exists(fullfilepath): os.makedirs(fullfilepath) filename = 'allVisit-l31c.2.fits' fullfilename = os.path.join(fullfilepath, filename) url = f'https://data.sdss.org/sas/dr14/apogee/spectro/redux/r8/{filename}' else: raise ValueError('allvisit() only supports APOGEE DR13-DR15') # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') allvisit(dr=dr, flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(os.path.join(fullfilepath, filename)) or flag == 1: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) print(f'Downloaded DR{dr:d} allVisit file catalog successfully to {fullfilepath}') checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') allstar(dr=dr, flag=1) return fullfilename
def apogee_distances(dr=None, flag=None): """ Download the Apogee Distances catalogue :param dr: Apogee DR :type dr: int :param flag: Force to download if flag=1 :type flag: int :return: full file path :rtype: str :History: 2018-Jan-24 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 14: file_hash = 'b33c8419be784b1be3d14af3ee9696c6ac31830f' str1 = 'https://data.sdss.org/sas/dr14/apogee/vac/apogee-distances/' filename = f'apogee_distances-DR{dr}.fits' urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), 'dr14/apogee/vac/apogee-distances/') if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) else: raise ValueError('apogee_distances() only supports DR14') # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') apogee_distances(dr=dr, flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: try: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) print(f'Downloaded DR{dr} Distances successfully to {fullfilename}') checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') apogee_distances(dr=dr, flag=1) except urllib.error.HTTPError: print(f'{urlstr} cannot be found on server, skipped') fullfilename = warning_flag return fullfilename
def __apogee_credentials_downloader(url, fullfilename): """ Download file at the URL with apogee credentials, this function will prompt for username and password :param url: URL :type url: str :param fullfilename: Full file name including path in local system :type fullfilename: str :return: None :History: 2018-Aug-31 - Written - Henry Leung (University of Toronto) """ passman = urllib.request.HTTPPasswordMgrWithDefaultRealm() global __apogee_credentials_username global __apogee_credentials_pw if __apogee_credentials_username is None: print( "\nYou are trying to access APOGEE proprietary data...Please provide username and password..." ) __apogee_credentials_username = input('Username: '******'Password: '******'B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) except urllib.error.HTTPError as emsg: if '401' in str(emsg): __apogee_credentials_username = None __apogee_credentials_pw = None raise ConnectionError('Wrong username or password') elif '404' in str(emsg): warnings.warn(f'{url} cannot be found on server, skipped') fullfilename = warning_flag else: # don't raise error, so a batch downloading script will keep running despite some files not found warnings.warn(f"Unknown error occurred - {emsg}", RuntimeWarning) fullfilename = warning_flag return fullfilename
def apogee_astronn(dr=None, flag=None): """ Download the apogee_astroNN file (catalog of astroNN stellar parameters, abundances, distances and orbital parameters from combined spectra) :param dr: APOGEE DR :type dr: int :param flag: 0: normal, 1: force to re-download :type flag: int :return: full file path and download in background if not found locally, False if cannot be found on server :rtype: str :History: 2019-Dec-10 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 16: # Check if directory exists fullfoldername = os.path.join(apogee_env(), 'dr16/apogee/vac/apogee-astronn/') # Check if directory exists if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) filename = 'apogee_astroNN-DR16.fits' fullfilename = os.path.join(fullfoldername, filename) file_hash = '02187ef2cbe5215dc4d65df7037ecf1b8cc5853d' url = f'https://data.sdss.org/sas/dr16/apogee/vac/apogee-astronn/{filename}' else: raise ValueError('apogee_astroNN() only supports APOGEE DR16') # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') apogee_astronn(dr=dr, flag=1) else: print(fullfilename + ' was found!') # Check if files exists if not os.path.isfile(os.path.join(fullfoldername, filename)) or flag == 1: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) print(f'Downloaded DR{dr:d} apogee_astroNN file catalog successfully to {fullfilename}') checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') apogee_astronn(dr=dr, flag=1) return fullfilename
def allstar_cannon(dr=None, flag=None): """ Download the allStarCannon file (catalog of Cannon stellar parameters and abundances from combined spectra) :param dr: APOGEE DR :type dr: int :param flag: 0: normal, 1: force to re-download :type flag: int :return: full file path and download in background if not found locally, False if cannot be found on server :rtype: str :History: 2017-Oct-24 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 14: # Check if directory exists fullfoldername = os.path.join(apogee_env(), 'dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/cannon/') # Check if directory exists if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) filename = 'allStarCannon-l31c.2.fits' fullfilename = os.path.join(fullfoldername, filename) file_hash = '64d485e95b3504df0b795ab604e21a71d5c7ae45' url = f'https://data.sdss.org/sas/dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/cannon/{filename}' else: raise ValueError('allstar_cannon() only supports APOGEE DR14-DR15') # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') allstar_cannon(dr=dr, flag=1) else: print(fullfilename + ' was found!') # Check if files exists if not os.path.isfile(os.path.join(fullfoldername, filename)) or flag == 1: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) print(f'Downloaded DR{dr:d} allStarCannon file catalog successfully to {fullfilename}') checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') allstar_cannon(dr=dr, flag=1) return fullfilename
def tgas(dr=None): """ NAME: tgas PURPOSE: download the tgas files INPUT: OUTPUT: (just downloads) HISTORY: 2017-Oct-13 Henry Leung """ # Check if dr arguement is provided, if none then use default dr = gaia_default_dr(dr=dr) fulllist = [] if dr == 1: # Check if directory exists folderpath =os.path.join(_GAIA_DATA, 'Gaia/tgas_source/fits/') if not os.path.exists(folderpath): os.makedirs(folderpath) for i in range(0, 16, 1): filename = 'TgasSource_000-000-0{:02d}.fits'.format(i) fullfilename = os.path.join(folderpath, filename) urlstr = 'http://cdn.gea.esac.esa.int/Gaia/tgas_source/fits/{}'.format(filename) # Check if files exists if not os.path.isfile(fullfilename): # progress bar with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: # Download urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) print('Downloaded Gaia DR{:d} TGAS ({:d} of 15) file catalog successfully to {}'.format(dr, i, fullfilename)) else: print(fullfilename + ' was found!') fulllist.extend([fullfilename]) else: raise ValueError('[astroNN.gaia.downloader.tgas()] only supports Gaia DR1 TGAS') return fulllist
def allstar(dr=None): """ NAME: allstar PURPOSE: download the allStar file (catalog of ASPCAP stellar parameters and abundances from combined spectra) INPUT: Data Release 13 OR 14 OUTPUT: full file path and download in background HISTORY: 2017-Oct-09 Henry Leung """ dr = apogee_default_dr(dr=dr) if dr == 13: # Check if directory exists fullfilepath = os.path.join(_APOGEE_DATA, 'dr13/apogee/spectro/redux/r6/stars/l30e/l30e.2/') if not os.path.exists(fullfilepath): os.makedirs(fullfilepath) filename = 'allStar-l30e.2.fits' fullfilename = os.path.join(fullfilepath, filename) url = 'https://data.sdss.org/sas/dr13/apogee/spectro/redux/r6/stars/l30e/l30e.2/{}'.format(filename) elif dr == 14: fullfilepath = os.path.join(_APOGEE_DATA, 'dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/') # Check if directory exists if not os.path.exists(fullfilepath): os.makedirs(fullfilepath) filename = 'allStar-l31c.2.fits' fullfilename = os.path.join(fullfilepath, filename) url = 'https://data.sdss.org/sas/dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/{}'.format(filename) else: raise ValueError('[astroNN.apogee.downloader.all_star()] only supports APOGEE DR13 and DR14') # Check if files exists if not os.path.isfile(os.path.join(fullfilepath, filename)): with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) print('Downloaded DR{:d} allStar file catalog successfully to {}'.format(dr, fullfilename)) else: print(fullfilename + ' was found!') return fullfilename
def allvisit(dr=None): """ NAME: allvisit PURPOSE: download the allVisit file (catalog of properties from individual visit spectra) INPUT: Data Release 13 OR 14 OUTPUT: (just downloads) HISTORY: 2017-Oct-11 Henry Leung """ dr = apogee_default_dr(dr=dr) if dr == 13: # Check if directory exists fullfilepath = os.path.join(_APOGEE_DATA, 'dr13/apogee/spectro/redux/r6/') if not os.path.exists(fullfilepath): os.makedirs(fullfilepath) filename = 'allVisit-l30e.2.fits' fullfilename = os.path.join(fullfilepath, filename) url = 'https://data.sdss.org/sas/dr13/apogee/spectro/redux/r6/{}'.format(filename) elif dr == 14: # Check if directory exists fullfilepath = os.path.join(_APOGEE_DATA, 'dr14/apogee/spectro/redux/r8/') if not os.path.exists(fullfilepath): os.makedirs(fullfilepath) filename = 'allVisit-l31c.2.fits' fullfilename = os.path.join(fullfilepath, filename) url = 'https://data.sdss.org/sas/dr14/apogee/spectro/redux/r8/{}'.format(filename) else: raise ValueError('[astroNN.apogee.downloader.all_visit()] only supports APOGEE DR13 and DR14') if not os.path.isfile(os.path.join(fullfilepath, filename)): with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) print('Downloaded DR{:d} allVisit file catalog successfully to {}'.format(dr, fullfilepath)) else: print(fullfilename + ' was found') return None
def apogee_astronn(dr=None, flag=None): """ Download the apogee_astroNN file (catalog of astroNN stellar parameters, abundances, distances and orbital parameters from combined spectra) :param dr: APOGEE DR :type dr: int :param flag: 0: normal, 1: force to re-download :type flag: int :return: full file path and download in background if not found locally, False if cannot be found on server :rtype: str :History: 2019-Dec-10 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 16: # Check if directory exists fullfoldername = os.path.join(apogee_env(), "dr16/apogee/vac/apogee-astronn/") # Check if directory exists if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) filename = "apogee_astroNN-DR16-v1.fits" fullfilename = os.path.join(fullfoldername, filename) file_hash = "1b81ed13eef36fe9a327a05f4a622246522199b2" url = f"https://data.sdss.org/sas/dr16/apogee/vac/apogee-astronn/{filename}" elif dr == 17: # Check if directory exists fullfoldername = os.path.join(apogee_env(), "dr17/apogee/vac/apogee-astronn/") # Check if directory exists if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) filename = "apogee_astroNN-DR17.fits" fullfilename = os.path.join(fullfoldername, filename) file_hash = "c422b9adba840b3415af2fe6dec6500219f1b68f" url = f"https://data.sdss.org/sas/dr17/apogee/vac/apogee-astronn/{filename}" else: raise ValueError("apogee_astroNN() only supports APOGEE DR16-DR17") # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm="sha1") if checksum != file_hash.lower(): warnings.warn( "File corruption detected, astroNN is attempting to download again" ) apogee_astronn(dr=dr, flag=1) else: logging.info(fullfilename + " was found!") # Check if files exists if not os.path.isfile(os.path.join(fullfoldername, filename)) or flag == 1: with TqdmUpTo(unit="B", unit_scale=True, miniters=1, desc=url.split("/")[-1]) as t: try: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) logging.info( f"Downloaded DR{dr:d} apogee_astroNN file catalog successfully to {fullfilename}" ) checksum = filehash(fullfilename, algorithm="sha1") if checksum != file_hash.lower(): warnings.warn( "File corruption detected, astroNN is attempting to download again" ) apogee_astronn(dr=dr, flag=1) except urllib.error.HTTPError as emsg: if "401" in str(emsg): fullfilename = __apogee_credentials_downloader( url, fullfilename) elif "404" in str(emsg): warnings.warn(f"{url} cannot be found on server, skipped") fullfilename = warning_flag else: warnings.warn(f"Unknown error occurred - {emsg}") fullfilename = warning_flag return fullfilename
def gaia_source(dr=None, flag=None): """ NAME: gaia_source PURPOSE: download the gaia_source files INPUT: dr (int): Gaia DR, example dr=1 flag (int): 0: normal, 1: force to re-download OUTPUT: list of file path HISTORY: 2017-Oct-13 - Written - Henry Leung (University of Toronto) 2017-Nov-26 - Update - Henry Leung (University of Toronto) """ dr = gaia_default_dr(dr=dr) fulllist = [] if dr == 1: # Check if directory exists folderpath = os.path.join(gaia_env(), 'Gaia/gdr1/gaia_source/fits/') urlbase = 'http://cdn.gea.esac.esa.int/Gaia/gdr1/gaia_source/fits/' if not os.path.exists(folderpath): os.makedirs(folderpath) hash_filename = 'MD5SUM.txt' full_hash_filename = os.path.join(folderpath, hash_filename) if not os.path.isfile(full_hash_filename): urllib.request.urlretrieve(urlbase + hash_filename, full_hash_filename) hash_list = np.loadtxt(full_hash_filename, dtype='str').T for j in range(0, 20, 1): for i in range(0, 256, 1): filename = f'GaiaSource_000-0{j:0{2}d}-{i:0{3}d}.fits' urlstr = urlbase + filename fullfilename = os.path.join(folderpath, filename) file_hash = (hash_list[0])[np.argwhere( hash_list[1] == filename)] # Check if files exists if os.path.isfile(fullfilename) and flag is None: checksum = md5_checksum(fullfilename) # In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too if checksum != file_hash and len(file_hash) != 0: print(checksum) print(file_hash) print( 'File corruption detected, astroNN attempting to download again' ) gaia_source(dr=dr, flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: # progress bar with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) checksum = md5_checksum(fullfilename) if checksum != file_hash and len(file_hash) != 0: print( 'File corruption detected, astroNN attempting to download again' ) gaia_source(dr=dr, flag=1) print( f'Downloaded Gaia DR{dr} Gaia Source ({(j * 256 + i):d} of {(256 * 20 + 112):d}) ' f'file catalog successfully to {fullfilename}') fulllist.extend([fullfilename]) for i in range(0, 111, 1): filename = f'GaiaSource_000-020-{i:0{3}d}.fits' urlstr = urlbase + filename fullfilename = os.path.join(folderpath, filename) file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)] # Check if files exists if os.path.isfile(fullfilename) and flag is None: checksum = md5_checksum(fullfilename) # In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too if checksum != file_hash and len(file_hash) != 0: print(checksum) print(file_hash) print( 'File corruption detected, astroNN attempting to download again' ) gaia_source(dr=dr, flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: # progress bar with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) checksum = md5_checksum(fullfilename) if checksum != file_hash and len(file_hash) != 0: print( 'File corruption detected, astroNN attempting to download again' ) gaia_source(dr=dr, flag=1) print( f'Downloaded Gaia DR{dr} Gaia Source ({(20 * 256 + i):d} of {(256 * 20 + 112):d}) file ' f'catalog successfully to {fullfilename}') fulllist.extend([fullfilename]) else: raise ValueError('gaia_source() only supports Gaia DR1 Gaia Source') return fulllist
def apogee_vac_rc(dr=None, flag=None): """ NAME: apogee_vac_rc PURPOSE: download the red clumps catalogue INPUT: dr (int): APOGEE DR, example dr=14 flag (int): 0: normal, 1: force to re-download OUTPUT: (path): full file path and download in background HISTORY: 2017-Nov-16 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 13: file_hash = '5e87eb3ba202f9db24216978dafb19d39d382fc6' str1 = 'https://data.sdss.org/sas/dr13/apogee/vac/apogee-rc/cat/' filename = f'apogee-rc-DR{dr}.fits' urlstr = str1 + filename fullfilename = os.path.join(apogee_env(), 'dr13/apogee/vac/apogee-rc/cat/') if not os.path.exists(fullfilename): os.makedirs(fullfilename) fullfilename = os.path.join(apogee_env(), 'dr13/apogee/vac/apogee-rc/cat/', filename) elif dr == 14: file_hash = '104513070f1c280954f3d1886cac429dbdf2eaf6' str1 = 'https://data.sdss.org/sas/dr14/apogee/vac/apogee-rc/cat/' filename = f'apogee-rc-DR{dr}.fits' urlstr = str1 + filename fullfilename = os.path.join(apogee_env(), 'dr14/apogee/vac/apogee-rc/cat/') if not os.path.exists(fullfilename): os.makedirs(fullfilename) fullfilename = os.path.join(apogee_env(), 'dr14/apogee/vac/apogee-rc/cat/', filename) else: raise ValueError('apogee_vac_rc() only supports DR13 or DR14') # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = sha1_checksum(fullfilename) if checksum != file_hash.lower(): print( 'File corruption detected, astroNN attempting to download again' ) apogee_vac_rc(dr=dr, flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: try: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) print( f'Downloaded DR{dr} Red Clumps Catalog successfully to {fullfilename}' ) checksum = sha1_checksum(fullfilename) if checksum != file_hash.lower(): print( 'File corruption detected, astroNN attempting to download again' ) apogee_vac_rc(dr=dr, flag=1) except urllib.request.HTTPError: print(f'{urlstr} cannot be found on server, skipped') fullfilename = warning_flag return fullfilename
def allstar(dr=None, flag=None): """ NAME: allstar PURPOSE: download the allStar file (catalog of ASPCAP stellar parameters and abundances from combined spectra) INPUT: dr (int): APOGEE DR, example dr=14 flag (int): 0: normal, 1: force to re-download OUTPUT: (path): full file path and download in background HISTORY: 2017-Oct-09 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 13: file_hash = '1718723ada3018de94e1022cd57d4d950a74f91f' # Check if directory exists fullfoldername = os.path.join( apogee_env(), 'dr13/apogee/spectro/redux/r6/stars/l30e/l30e.2/') if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) filename = 'allStar-l30e.2.fits' fullfilename = os.path.join(fullfoldername, filename) url = f'https://data.sdss.org/sas/dr13/apogee/spectro/redux/r6/stars/l30e/l30e.2/{filename}' elif dr == 14: file_hash = 'a7e1801924661954da792e377ad54f412219b105' fullfoldername = os.path.join( apogee_env(), 'dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/') # Check if directory exists if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) filename = 'allStar-l31c.2.fits' fullfilename = os.path.join(fullfoldername, filename) url = f'https://data.sdss.org/sas/dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/{filename}' else: raise ValueError('allstar() only supports APOGEE DR13 and DR14') # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = sha1_checksum(fullfilename) if checksum != file_hash.lower(): print( 'File corruption detected, astroNN attempting to download again' ) allstar(dr=dr, flag=1) else: print(fullfilename + ' was found!') # Check if files exists if not os.path.isfile(os.path.join(fullfoldername, filename)) or flag == 1: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) print( f'Downloaded DR{dr:d} allStar file catalog successfully to {fullfilename}' ) checksum = sha1_checksum(fullfilename) if checksum != file_hash.lower(): print( 'File corruption detected, astroNN attempting to download again' ) allstar(dr=dr, flag=1) return fullfilename
def apogee_rc(dr=None, flag=None): """ Download the APOGEE red clumps catalogue :param dr: Apogee DR :type dr: int :param flag: Force to download if flag=1 :type flag: int :return: full file path :rtype: str :History: 2017-Nov-16 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 13: file_hash = "5e87eb3ba202f9db24216978dafb19d39d382fc6" str1 = "https://data.sdss.org/sas/dr13/apogee/vac/apogee-rc/cat/" filename = f"apogee-rc-DR{dr}.fits" urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), "dr13/apogee/vac/apogee-rc/cat/") if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) elif dr == 14: file_hash = "104513070f1c280954f3d1886cac429dbdf2eaf6" str1 = "https://data.sdss.org/sas/dr14/apogee/vac/apogee-rc/cat/" filename = f"apogee-rc-DR{dr}.fits" urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), "dr14/apogee/vac/apogee-rc/cat/") if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) elif dr == 16: file_hash = "0bc75a230058f50ed8a5ea3fa8554d803ffc103d" str1 = "https://data.sdss.org/sas/dr16/apogee/vac/apogee-rc/cat/" filename = f"apogee-rc-DR{dr}.fits" urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), "dr16/apogee/vac/apogee-rc/cat/") if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) elif dr == 17: file_hash = "d54e0ea4e6a3f5cc3c02a73b93260e992d9836d0" str1 = "https://data.sdss.org/sas/dr17/apogee/vac/apogee-rc/cat/" filename = f"apogee-rc-DR{dr}.fits" urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), "dr17/apogee/vac/apogee-rc/cat/") if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) else: raise ValueError("apogee_rc() only supports APOGEE DR13-DR17") # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm="sha1") if checksum != file_hash.lower(): warnings.warn( "File corruption detected, astroNN is attempting to download again" ) apogee_rc(dr=dr, flag=1) else: logging.info(fullfilename + " was found!") elif not os.path.isfile(fullfilename) or flag == 1: try: with TqdmUpTo(unit="B", unit_scale=True, miniters=1, desc=urlstr.split("/")[-1]) as t: urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) logging.info( f"Downloaded DR{dr} Red Clumps Catalog successfully to {fullfilename}" ) checksum = filehash(fullfilename, algorithm="sha1") if checksum != file_hash.lower(): warnings.warn( "File corruption detected, astroNN is attempting to download again" ) apogee_rc(dr=dr, flag=1) except urllib.error.HTTPError as emsg: if "401" in str(emsg): fullfilename = __apogee_credentials_downloader( urlstr, fullfilename) elif "404" in str(emsg): warnings.warn(f"{urlstr} cannot be found on server, skipped") fullfilename = warning_flag else: warnings.warn(f"Unknown error occurred - {emsg}") fullfilename = warning_flag return fullfilename
def apogee_distances(dr=None, flag=None): """ Download the APOGEE Distances VAC catalogue (APOGEE Distances for DR14, APOGEE Starhourse for DR16) :param dr: APOGEE DR :type dr: int :param flag: Force to download if flag=1 :type flag: int :return: full file path :rtype: str :History: | 2018-Jan-24 - Written - Henry Leung (University of Toronto) | 2021-Jan-29 - Updated - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 14: file_hash = "b33c8419be784b1be3d14af3ee9696c6ac31830f" str1 = "https://data.sdss.org/sas/dr14/apogee/vac/apogee-distances/" filename = f"apogee_distances-DR{dr}.fits" urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), "dr14/apogee/vac/apogee-distances/") if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) if dr == 16: file_hash = "2502e2f7703046163f81ecc4054dce39b2038e4f" str1 = "https://data.sdss.org/sas/dr16/apogee/vac/apogee-starhorse/" filename = f"apogee_starhorse-DR{dr}-v1.fits" urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), "dr16/apogee/vac/apogee-starhorse/") if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) else: raise ValueError("apogee_distances() only supports APOGEE DR14-DR16") # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm="sha1") if checksum != file_hash.lower(): warnings.warn( "File corruption detected, astroNN is attempting to download again" ) apogee_distances(dr=dr, flag=1) else: logging.info(fullfilename + " was found!") elif not os.path.isfile(fullfilename) or flag == 1: try: with TqdmUpTo(unit="B", unit_scale=True, miniters=1, desc=urlstr.split("/")[-1]) as t: urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) logging.info( f"Downloaded DR{dr} Distances successfully to {fullfilename}" ) checksum = filehash(fullfilename, algorithm="sha1") if checksum != file_hash.lower(): warnings.warn( "File corruption detected, astroNN is attempting to download again" ) apogee_distances(dr=dr, flag=1) except urllib.error.HTTPError: warnings.warn(f"{urlstr} cannot be found on server, skipped") fullfilename = warning_flag return fullfilename
def allstar(dr=None, flag=None): """ Download the allStar file (catalog of ASPCAP stellar parameters and abundances from combined spectra) :param dr: APOGEE DR :type dr: int :param flag: 0: normal, 1: force to re-download :type flag: int :return: full file path and download in background if not found locally, False if cannot be found on server :rtype: str :History: 2017-Oct-09 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 13: file_hash = '1718723ada3018de94e1022cd57d4d950a74f91f' # Check if directory exists fullfoldername = os.path.join(apogee_env(), 'dr13/apogee/spectro/redux/r6/stars/l30e/l30e.2/') if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) filename = 'allStar-l30e.2.fits' fullfilename = os.path.join(fullfoldername, filename) url = f'https://data.sdss.org/sas/dr13/apogee/spectro/redux/r6/stars/l30e/l30e.2/{filename}' elif dr == 14: file_hash = 'a7e1801924661954da792e377ad54f412219b105' fullfoldername = os.path.join(apogee_env(), 'dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/') # Check if directory exists if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) filename = 'allStar-l31c.2.fits' fullfilename = os.path.join(fullfoldername, filename) url = f'https://data.sdss.org/sas/dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/{filename}' elif dr == 16: file_hash = '66fe854bd000ca1c0a6b50a998877e4a3e41d184' fullfoldername = os.path.join(apogee_env(), 'dr16/apogee/spectro/aspcap/r12/l33/') # Check if directory exists if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) filename = 'allStar-r12-l33.fits' fullfilename = os.path.join(fullfoldername, filename) url = f'https://data.sdss.org/sas/dr16/apogee/spectro/aspcap/r12/l33/{filename}' else: raise ValueError('allstar() only supports APOGEE DR13-DR16') # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') allstar(dr=dr, flag=1) else: print(fullfilename + ' was found!') # Check if files exists if not os.path.isfile(os.path.join(fullfoldername, filename)) or flag == 1: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: try: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) print(f'Downloaded DR{dr:d} allStar file catalog successfully to {fullfilename}') checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') allstar(dr=dr, flag=1) except urllib.error.HTTPError as emsg: if '401' in str(emsg): fullfilename = __apogee_credentials_downloader(url, fullfilename) elif '404' in str(emsg): print(f'{url} cannot be found on server, skipped') fullfilename = warning_flag else: print(f"Unknown error occurred - {emsg}") fullfilename = warning_flag return fullfilename
def apogee_vac_rc(dr=None, flag=None): """ Download the red clumps catalogue :param dr: Apogee DR :type dr: int :param flag: Force to download if flag=1 :type flag: int :return: full file path :rtype: str :History: 2017-Nov-16 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 13: file_hash = '5e87eb3ba202f9db24216978dafb19d39d382fc6' str1 = 'https://data.sdss.org/sas/dr13/apogee/vac/apogee-rc/cat/' filename = f'apogee-rc-DR{dr}.fits' urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), 'dr13/apogee/vac/apogee-rc/cat/') if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) elif dr == 14: file_hash = '104513070f1c280954f3d1886cac429dbdf2eaf6' str1 = 'https://data.sdss.org/sas/dr14/apogee/vac/apogee-rc/cat/' filename = f'apogee-rc-DR{dr}.fits' urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), 'dr14/apogee/vac/apogee-rc/cat/') if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) elif dr == 16: file_hash = '0bc75a230058f50ed8a5ea3fa8554d803ffc103d' str1 = 'https://data.sdss.org/sas/dr16/apogee/vac/apogee-rc/cat/' filename = f'apogee-rc-DR{dr}.fits' urlstr = str1 + filename fullfoldername = os.path.join(apogee_env(), 'dr16/apogee/vac/apogee-rc/cat/') if not os.path.exists(fullfoldername): os.makedirs(fullfoldername) fullfilename = os.path.join(fullfoldername, filename) else: raise ValueError('apogee_vac_rc() only supports DR13 or DR14') # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') apogee_vac_rc(dr=dr, flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: try: with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) print(f'Downloaded DR{dr} Red Clumps Catalog successfully to {fullfilename}') checksum = filehash(fullfilename, algorithm='sha1') if checksum != file_hash.lower(): print('File corruption detected, astroNN is attempting to download again') apogee_vac_rc(dr=dr, flag=1) except urllib.error.HTTPError: print(f'{urlstr} cannot be found on server, skipped') fullfilename = warning_flag return fullfilename
ApogeeKplerEchelle from astroNN.models import load_folder from astroNN.nn.callbacks import ErrorOnNaN from astroNN.shared.downloader_tools import TqdmUpTo import tensorflow.keras as tfk mnist = tfk.datasets.mnist utils = tfk.utils _URL_ORIGIN = 'http://astro.utoronto.ca/~hleung/shared/ci_data/' filename = 'apogee_dr14_green.h5' complete_url = _URL_ORIGIN + filename # Check if files exists if not os.path.isfile(filename): with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=complete_url.split('/')[-1]) as t: urllib.request.urlretrieve(complete_url, filename, reporthook=t.update_to) # Data preparation f = h5py.File(filename, 'r') xdata = np.array(f['spectra']) ydata = np.stack([f['logg'], f['feh']]).T ydata_err = np.stack([f['logg_err'], f['feh_err']]).T class ApogeeModelTestCase(unittest.TestCase): def test_apogee_cnn(self): """
def allvisit(dr=None, flag=None): """ Download the allVisit file (catalog of properties from individual visit spectra) :param dr: APOGEE DR :type dr: int :param flag: 0: normal, 1: force to re-download :type flag: int :return: full file path and download in background if not found locally, False if cannot be found on server :rtype: str :History: 2017-Oct-11 - Written - Henry Leung (University of Toronto) """ dr = apogee_default_dr(dr=dr) if dr == 13: file_hash = "2a3b13ccd40a2c8aea8321be9630117922d55b51" # Check if directory exists fullfilepath = os.path.join(apogee_env(), "dr13/apogee/spectro/redux/r6/") if not os.path.exists(fullfilepath): os.makedirs(fullfilepath) filename = "allVisit-l30e.2.fits" fullfilename = os.path.join(fullfilepath, filename) url = f"https://data.sdss.org/sas/dr13/apogee/spectro/redux/r6/{filename}" elif dr == 14: file_hash = "abcecbcdc5fe8d00779738702c115633811e6bbd" # Check if directory exists fullfilepath = os.path.join(apogee_env(), "dr14/apogee/spectro/redux/r8/") if not os.path.exists(fullfilepath): os.makedirs(fullfilepath) filename = "allVisit-l31c.2.fits" fullfilename = os.path.join(fullfilepath, filename) url = f"https://data.sdss.org/sas/dr14/apogee/spectro/redux/r8/{filename}" elif dr == 16: file_hash = "65befb967d8d9d6f4f87711c1fa8d0ac014b62da" # Check if directory exists fullfilepath = os.path.join(apogee_env(), "dr16/apogee/spectro/aspcap/r12/l33/") if not os.path.exists(fullfilepath): os.makedirs(fullfilepath) filename = "allVisit-r12-l33.fits" fullfilename = os.path.join(fullfilepath, filename) url = f"https://data.sdss.org/sas/dr16/apogee/spectro/aspcap/r12/l33/{filename}" else: raise ValueError("allvisit() only supports APOGEE DR13-DR16") # check file integrity if os.path.isfile(fullfilename) and flag is None: checksum = filehash(fullfilename, algorithm="sha1") if checksum != file_hash.lower(): warnings.warn( "File corruption detected, astroNN is attempting to download again" ) allvisit(dr=dr, flag=1) else: logging.info(fullfilename + " was found!") elif not os.path.isfile(os.path.join(fullfilepath, filename)) or flag == 1: with TqdmUpTo(unit="B", unit_scale=True, miniters=1, desc=url.split("/")[-1]) as t: urllib.request.urlretrieve(url, fullfilename, reporthook=t.update_to) logging.info( f"Downloaded DR{dr:d} allVisit file catalog successfully to {fullfilepath}" ) checksum = filehash(fullfilename, algorithm="sha1") if checksum != file_hash.lower(): warnings.warn( "File corruption detected, astroNN is attempting to download again" ) allvisit(dr=dr, flag=1) return fullfilename