Esempio n. 1
0
def tgas(flag=None):
    """
    Get path to the Gaia TGAS DR1 files, download if files not found

    :return: List of file path
    :rtype: list
    :History: 2017-Oct-13 - Written - Henry Leung (University of Toronto)
    """
    # Check if dr arguement is provided, if none then use default
    fulllist = []

    # Check if directory exists
    folderpath = os.path.join(gaia_env(), 'Gaia/gdr1/tgas_source/fits/')
    urlbase = 'http://cdn.gea.esac.esa.int/Gaia/gdr1/tgas_source/fits/'

    if not os.path.exists(folderpath):
        os.makedirs(folderpath)

    hash_filename = 'MD5SUM.txt'
    full_hash_filename = os.path.join(folderpath, hash_filename)
    if not os.path.isfile(full_hash_filename):
        urllib.request.urlretrieve(urlbase + hash_filename, full_hash_filename)

    hash_list = np.loadtxt(full_hash_filename, dtype='str').T

    for i in range(0, 16, 1):
        filename = f'TgasSource_000-000-0{i:0{2}d}.fits'
        fullfilename = os.path.join(folderpath, filename)
        urlstr = urlbase + filename
        file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)]

        # Check if files exists
        if os.path.isfile(fullfilename) and flag is None:
            checksum = md5_checksum(fullfilename)
            # In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too
            if checksum != file_hash and len(file_hash) != 0:
                print(checksum)
                print(file_hash)
                print('File corruption detected, astroNN attempting to download again')
                tgas(flag=1)
            else:
                print(fullfilename + ' was found!')

        elif not os.path.isfile(fullfilename) or flag == 1:
            # progress bar
            with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t:
                # Download
                urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to)
                checksum = md5_checksum(fullfilename)
                if checksum != file_hash and len(file_hash) != 0:
                    print('File corruption detected, astroNN attempting to download again')
                    tgas(flag=1)
            print(f'Downloaded Gaia DR1 TGAS ({i:d} of 15) file catalog successfully to {fullfilename}')
        fulllist.extend([fullfilename])

    return fulllist
Esempio n. 2
0
    def test_checksum(self):
        import astroNN
        from astroNN.shared.downloader_tools import md5_checksum, sha1_checksum, sha256_checksum
        anderson2017_path = os.path.join(os.path.dirname(astroNN.__path__[0]), 'astroNN', 'data',
                                         'anderson_2017_dr14_parallax.npz')
        md5_pred = md5_checksum(anderson2017_path)
        sha1_pred = sha1_checksum(anderson2017_path)
        sha256_pred = sha256_checksum(anderson2017_path)

        # read answer hashed by Windows Get-FileHash
        self.assertEqual(md5_pred, '9C714F5FE22BB7C4FF9EA32F3E859D73'.lower())
        self.assertEqual(sha1_pred, '733C0227CF93DB0CD6106B5349402F251E7ED735'.lower())
        self.assertEqual(sha256_pred, '36C265C907F440114D747DA21D2A014D32B5E442D541F183C0EE862F5865FD26'.lower())
Esempio n. 3
0
def gaia_source(dr=None, flag=None):
    """
    NAME:
        gaia_source
    PURPOSE:
        download the gaia_source files
    INPUT:
        dr (int): Gaia DR, example dr=1
        flag (int): 0: normal, 1: force to re-download
    OUTPUT:
        list of file path
    HISTORY:
        2017-Oct-13 - Written - Henry Leung (University of Toronto)
        2017-Nov-26 - Update - Henry Leung (University of Toronto)
    """
    dr = gaia_default_dr(dr=dr)
    fulllist = []

    if dr == 1:

        # Check if directory exists
        folderpath = os.path.join(gaia_env(), 'Gaia/gdr1/gaia_source/fits/')
        urlbase = 'http://cdn.gea.esac.esa.int/Gaia/gdr1/gaia_source/fits/'

        if not os.path.exists(folderpath):
            os.makedirs(folderpath)

        hash_filename = 'MD5SUM.txt'
        full_hash_filename = os.path.join(folderpath, hash_filename)
        if not os.path.isfile(full_hash_filename):
            urllib.request.urlretrieve(urlbase + hash_filename,
                                       full_hash_filename)

        hash_list = np.loadtxt(full_hash_filename, dtype='str').T

        for j in range(0, 20, 1):
            for i in range(0, 256, 1):
                filename = f'GaiaSource_000-0{j:0{2}d}-{i:0{3}d}.fits'
                urlstr = urlbase + filename

                fullfilename = os.path.join(folderpath, filename)
                file_hash = (hash_list[0])[np.argwhere(
                    hash_list[1] == filename)]

                # Check if files exists
                if os.path.isfile(fullfilename) and flag is None:
                    checksum = md5_checksum(fullfilename)
                    # In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too
                    if checksum != file_hash and len(file_hash) != 0:
                        print(checksum)
                        print(file_hash)
                        print(
                            'File corruption detected, astroNN attempting to download again'
                        )
                        gaia_source(dr=dr, flag=1)
                    else:
                        print(fullfilename + ' was found!')
                elif not os.path.isfile(fullfilename) or flag == 1:
                    # progress bar
                    with TqdmUpTo(unit='B',
                                  unit_scale=True,
                                  miniters=1,
                                  desc=urlstr.split('/')[-1]) as t:
                        urllib.request.urlretrieve(urlstr,
                                                   fullfilename,
                                                   reporthook=t.update_to)
                        checksum = md5_checksum(fullfilename)
                        if checksum != file_hash and len(file_hash) != 0:
                            print(
                                'File corruption detected, astroNN attempting to download again'
                            )
                            gaia_source(dr=dr, flag=1)
                    print(
                        f'Downloaded Gaia DR{dr} Gaia Source ({(j * 256 + i):d} of {(256 * 20 + 112):d}) '
                        f'file catalog successfully to {fullfilename}')
                fulllist.extend([fullfilename])

        for i in range(0, 111, 1):
            filename = f'GaiaSource_000-020-{i:0{3}d}.fits'
            urlstr = urlbase + filename

            fullfilename = os.path.join(folderpath, filename)
            file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)]
            # Check if files exists
            if os.path.isfile(fullfilename) and flag is None:
                checksum = md5_checksum(fullfilename)
                # In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too
                if checksum != file_hash and len(file_hash) != 0:
                    print(checksum)
                    print(file_hash)
                    print(
                        'File corruption detected, astroNN attempting to download again'
                    )
                    gaia_source(dr=dr, flag=1)
                else:
                    print(fullfilename + ' was found!')
            elif not os.path.isfile(fullfilename) or flag == 1:
                # progress bar
                with TqdmUpTo(unit='B',
                              unit_scale=True,
                              miniters=1,
                              desc=urlstr.split('/')[-1]) as t:
                    urllib.request.urlretrieve(urlstr,
                                               fullfilename,
                                               reporthook=t.update_to)
                    checksum = md5_checksum(fullfilename)
                    if checksum != file_hash and len(file_hash) != 0:
                        print(
                            'File corruption detected, astroNN attempting to download again'
                        )
                        gaia_source(dr=dr, flag=1)
                    print(
                        f'Downloaded Gaia DR{dr} Gaia Source ({(20 * 256 + i):d} of {(256 * 20 + 112):d}) file '
                        f'catalog successfully to {fullfilename}')
            fulllist.extend([fullfilename])

    else:
        raise ValueError('gaia_source() only supports Gaia DR1 Gaia Source')

    return fulllist