Ejemplo n.º 1
0
def fetch_check_data(data_file,
                     url,
                     hashes,
                     cache_directory='',
                     append='',
                     verbose=False):
    """
    Get data file through a URL call and check its hash:

        1. Check hash table for data file name.
        2. Check hash subdirectory within cache directory for data file.
        3. If data file not in cache, download, compute hash, and verify hash.
        4. If hash correct, save file (+ append); otherwise, raise an error.

    Parameters
    ----------
    data_file : string
        name of file (not the full path)
    url : string
        URL for data file
    hashes : dictionary
        file names and md5 hashes (if empty, simply download file from url)
    cache_directory : string
        cache directory (full path)
    append : string
        append to output file (ex: '.nii.gz')
    verbose : bool
        print statements?

    Returns
    -------
    data_path : string
        data file name (full path)

    Examples
    --------
    >>> from mindboggle.mio.fetch_data import fetch_check_data
    >>> from mindboggle.mio.fetch_data import cache_hashes
    >>> # osf.io URL for OASIS-30_Atropos_template_to_MNI152_affine.txt
    >>> data_file = 'OASIS-30_Atropos_template_to_MNI152_affine.txt'
    >>> url = 'https://osf.io/ufydw/?action=download&version=1'
    >>> hashes = cache_hashes()
    >>> cache_directory = ''
    >>> append = ''
    >>> verbose = False
    >>> data_path = fetch_check_data(data_file, url, hashes, cache_directory,
    ...                              append, verbose) # doctest: +SKIP

    """
    import os
    import shutil

    from mindboggle.mio.fetch_data import fetch_data, fetch_hash

    # ------------------------------------------------------------------------
    # Set temporary cache directory if not specified:
    # ------------------------------------------------------------------------
    if not cache_directory:
        cache_directory = os.path.join(os.environ['HOME'], 'hash_temp')

    # ------------------------------------------------------------------------
    # Check hash table for file name, and store corresponding hash:
    # ------------------------------------------------------------------------
    if hashes and data_file in list(hashes):
        stored_hash = hashes[data_file]

        # --------------------------------------------------------------------
        # Create missing cache and hash directories:
        # --------------------------------------------------------------------
        if not os.path.exists(cache_directory):
            if verbose:
                print("Create missing cache directory: {0}".format(
                    cache_directory))
            os.mkdir(cache_directory)
        hash_dir = os.path.join(cache_directory, stored_hash)
        if not os.path.exists(hash_dir):
            if verbose:
                print("Create missing hash directory: {0}".format(hash_dir))
            os.mkdir(os.path.join(hash_dir))

        # --------------------------------------------------------------------
        # Check hash subdirectory for file:
        # --------------------------------------------------------------------
        data_path = os.path.join(hash_dir, data_file)
        if os.path.exists(data_path):
            if verbose:
                print("File already exists and matches hash: {0}".format(url))
            return data_path

        # --------------------------------------------------------------------
        # If file not in cache, download, compute hash, and verify:
        # --------------------------------------------------------------------
        else:
            if verbose:
                print("Retrieve file from URL: {0}".format(url))

            # Download file as a temporary file:
            temp_file = fetch_data(url)

            # Compute the file's hash:
            data_hash = fetch_hash(temp_file)

            # If hash matches name of the hash directory, save file:
            if os.path.join(cache_directory, data_hash) == hash_dir:
                # Add append:
                if append:
                    data_path += append
                if verbose:
                    print("Copy file to cache: {0}".format(data_path))
                shutil.copyfile(temp_file, data_path)
                return data_path
            else:
                raise IOError("Retrieved hash does not match stored hash.")
    else:
        raise IOError("Data file '{0}' not in hash table.".format(data_file))
Ejemplo n.º 2
0
def fetch_check_data(data_file, url, hashes, cache_directory='', append='',
                     verbose=False):
    """
    Get data file through a URL call and check its hash:

        1. Check hash table for data file name.
        2. Check hash subdirectory within cache directory for data file.
        3. If data file not in cache, download, compute hash, and verify hash.
        4. If hash correct, save file (+ append); otherwise, raise an error.

    Parameters
    ----------
    data_file : string
        name of file (not the full path)
    url : string
        URL for data file
    hashes : dictionary
        file names and md5 hashes (if empty, simply download file from url)
    cache_directory : string
        cache directory (full path)
    append : string
        append to output file (ex: '.nii.gz')
    verbose : bool
        print statements?

    Returns
    -------
    data_path : string
        data file name (full path)

    Examples
    --------
    >>> from mindboggle.mio.fetch_data import fetch_check_data
    >>> from mindboggle.mio.fetch_data import cache_hashes
    >>> # osf.io URL for OASIS-30_Atropos_template_to_MNI152_affine.txt
    >>> data_file = 'OASIS-30_Atropos_template_to_MNI152_affine.txt'
    >>> url = 'https://osf.io/ufydw/?action=download&version=1'
    >>> hashes = cache_hashes()
    >>> cache_directory = ''
    >>> append = ''
    >>> verbose = False
    >>> data_path = fetch_check_data(data_file, url, hashes, cache_directory,
    ...                              append, verbose) # doctest: +SKIP

    """
    import os
    import shutil

    from mindboggle.mio.fetch_data import fetch_data, fetch_hash

    # ------------------------------------------------------------------------
    # Set temporary cache directory if not specified:
    # ------------------------------------------------------------------------
    if not cache_directory:
        cache_directory = os.path.join(os.environ['HOME'], 'hash_temp')

    # ------------------------------------------------------------------------
    # Check hash table for file name, and store corresponding hash:
    # ------------------------------------------------------------------------
    if hashes and data_file in list(hashes):
        stored_hash = hashes[data_file]

        # --------------------------------------------------------------------
        # Create missing cache and hash directories:
        # --------------------------------------------------------------------
        if not os.path.exists(cache_directory):
            if verbose:
                print("Create missing cache directory: {0}".
                      format(cache_directory))
            os.mkdir(cache_directory)
        hash_dir = os.path.join(cache_directory, stored_hash)
        if not os.path.exists(hash_dir):
            if verbose:
                print("Create missing hash directory: {0}".format(hash_dir))
            os.mkdir(os.path.join(hash_dir))

        # --------------------------------------------------------------------
        # Check hash subdirectory for file:
        # --------------------------------------------------------------------
        data_path = os.path.join(hash_dir, data_file)
        if os.path.exists(data_path):
            if verbose:
                print("File already exists and matches hash: {0}".format(url))
            return data_path

        # --------------------------------------------------------------------
        # If file not in cache, download, compute hash, and verify:
        # --------------------------------------------------------------------
        else:
            if verbose:
                print("Retrieve file from URL: {0}".format(url))

            # Download file as a temporary file:
            temp_file = fetch_data(url)

            # Compute the file's hash:
            data_hash = fetch_hash(temp_file)

            # If hash matches name of the hash directory, save file:
            if os.path.join(cache_directory, data_hash) == hash_dir:
                # Add append:
                if append:
                    data_path += append
                if verbose:
                    print("Copy file to cache: {0}".format(data_path))
                shutil.copyfile(temp_file, data_path)
                return data_path
            else:
                raise IOError("Retrieved hash does not match stored hash.")
    else:
        raise IOError("Data file '{0}' not in hash table.".
                      format(data_file))
Ejemplo n.º 3
0
def fetch_check_data(data_file, url='', hashes={}, cache_env='', cache='',
                  return_missing=False, lookup=True):
    """
    Get data file through a URL call and check its hash.

    Steps ::
        If hashes provided:
            1. Check hash table for data file.
            2. Check hash subdirectory within cache directory for data file.
            3. If data file not in cache, download file, compute hash,
               and verify hash.
            4. If hash correct, save file.
        Otherwise, simply download file or return file path as a string.

    Parameters
    ----------
    data_file : string
        data file name
    url : string
        URL for data file
    hashes : dictionary
        file names and md5 hashes (if empty, simply download file from url)
    cache_env : string
        environment variable name for cache path
    cache : string
        in case cache_env is not set, use as cache directory
    return_missing : Boolean
        if data_file not in hash, simply download data_file and return path
    lookup : Boolean
        Simply return data_file path

    Returns
    -------
    data_path : string
        data file name (full path)

    Examples
    --------
    >>> import os
    >>> from mindboggle.mio.fetch_data import hashes_url
    >>> from mindboggle.mio.fetch_data import fetch_check_data
    >>> hashes, url, cache_env, cache = hashes_url()
    >>> data_file = hashes.keys()[0]
    >>> fetch_check_data(data_file, url, hashes, cache_env, cache)

    """
    import os
    import sys
    import shutil

    from mindboggle.mio.fetch_data import fetch_data, fetch_hash

    if lookup:

        #---------------------------------------------------------------------
        # If hashes provided, go through steps to check/download file:
        #---------------------------------------------------------------------
        if hashes:
    
            if not cache_env:
                cache_env = 'MINDBOGGLE_CACHE'
            if not cache:
                cache = os.path.join(os.environ['HOME'], 'hash_temp')
    
            #-----------------------------------------------------------------
            # Check hash table for file:
            #-----------------------------------------------------------------
            if data_file not in hashes.keys():
                if return_missing:
                    data_path = data_file
                    print("Retrieved file not in hashes: {0}".
                          format(data_path))
                    return data_path
                else:
                    sys.exit("Data file '{0}' not in hash table.".
                    format(data_file))
            else:
                stored_hash = hashes[data_file]
    
                #-------------------------------------------------------------
                # Create missing cache and hash directories:
                #-------------------------------------------------------------
                if cache_env in os.environ.keys():
                    cache = os.environ[cache_env]
                if not os.path.exists(cache):
                    print("Create missing cache directory: {0}".format(cache))
                    os.mkdir(cache)
                hash_dir = os.path.join(cache, stored_hash)
                if not os.path.exists(hash_dir):
                    print("Create missing hash directory: {0}".
                          format(hash_dir))
                    os.mkdir(os.path.join(hash_dir))
        
                #-------------------------------------------------------------
                # Check hash subdirectory for file:
                #-------------------------------------------------------------
                data_path = os.path.join(hash_dir, data_file)
                if os.path.exists(data_path):
                    return data_path
        
                #-------------------------------------------------------------
                # If file not in cache, download, compute hash, and verify:
                #-------------------------------------------------------------
                else:
                    print("Retrieve file from the Mindboggle website: {0}".
                          format(url+data_file))
        
                    # Download file as a temporary file:
                    temp_file = fetch_data(url+data_file)
        
                    # Compute the file's hash:
                    data_hash = fetch_hash(temp_file)
        
                    # If hash matches name of the hash directory, save file:
                    if os.path.join(cache, data_hash) == hash_dir:
                        print("Copy file to cache: {0}".format(data_path))
                        shutil.copyfile(temp_file, data_path)
                        return data_path
                    else:
                        print("Retrieved hash does not match stored hash.")
    
        #---------------------------------------------------------------------
        # If hashes not provided, simply download file:
        #---------------------------------------------------------------------
        elif url:
            # Download file as a temporary file:
            data_path = fetch_data(url+data_file)
            print("Hashes not provided. Retrieved file: {0}".format(data_path))
            return data_path
    
        #---------------------------------------------------------------------
        # If URL also not provided, simply return file path:
        #---------------------------------------------------------------------
        else:
            data_path = data_file
            print("Neither hashes nor URL provided. "
                  "Returning file path: {0}".format(data_path))
            return data_path

    #-------------------------------------------------------------------------
    # Simply return file path:
    #-------------------------------------------------------------------------
    else:
        data_path = data_file
        print("Returning file path: {0}".format(data_path))
        return data_path
Ejemplo n.º 4
0
def fetch_check_data(data_file,
                     url='',
                     hashes={},
                     cache_env='',
                     cache='',
                     return_missing=False,
                     lookup=True):
    """
    Get data file through a URL call and check its hash.

    Steps ::
        If hashes provided:
            1. Check hash table for data file.
            2. Check hash subdirectory within cache directory for data file.
            3. If data file not in cache, download file, compute hash,
               and verify hash.
            4. If hash correct, save file.
        Otherwise, simply download file or return file path as a string.

    Parameters
    ----------
    data_file : string
        data file name
    url : string
        URL for data file
    hashes : dictionary
        file names and md5 hashes (if empty, simply download file from url)
    cache_env : string
        environment variable name for cache path
    cache : string
        in case cache_env is not set, use as cache directory
    return_missing : Boolean
        if data_file not in hash, simply download data_file and return path
    lookup : Boolean
        Simply return data_file path

    Returns
    -------
    data_path : string
        data file name (full path)

    Examples
    --------
    >>> import os
    >>> from mindboggle.mio.fetch_data import hashes_url
    >>> from mindboggle.mio.fetch_data import fetch_check_data
    >>> hashes, url, cache_env, cache = hashes_url()
    >>> data_file = hashes.keys()[0]
    >>> fetch_check_data(data_file, url, hashes, cache_env, cache)

    """
    import os
    import sys
    import shutil

    from mindboggle.mio.fetch_data import fetch_data, fetch_hash

    if lookup:

        #---------------------------------------------------------------------
        # If hashes provided, go through steps to check/download file:
        #---------------------------------------------------------------------
        if hashes:

            if not cache_env:
                cache_env = 'MINDBOGGLE_CACHE'
            if not cache:
                cache = os.path.join(os.environ['HOME'], 'hash_temp')

            #-----------------------------------------------------------------
            # Check hash table for file:
            #-----------------------------------------------------------------
            if data_file not in hashes.keys():
                if return_missing:
                    data_path = data_file
                    print(
                        "Retrieved file not in hashes: {0}".format(data_path))
                    return data_path
                else:
                    sys.exit(
                        "Data file '{0}' not in hash table.".format(data_file))
            else:
                stored_hash = hashes[data_file]

                #-------------------------------------------------------------
                # Create missing cache and hash directories:
                #-------------------------------------------------------------
                if cache_env in os.environ.keys():
                    cache = os.environ[cache_env]
                if not os.path.exists(cache):
                    print("Create missing cache directory: {0}".format(cache))
                    os.mkdir(cache)
                hash_dir = os.path.join(cache, stored_hash)
                if not os.path.exists(hash_dir):
                    print(
                        "Create missing hash directory: {0}".format(hash_dir))
                    os.mkdir(os.path.join(hash_dir))

                #-------------------------------------------------------------
                # Check hash subdirectory for file:
                #-------------------------------------------------------------
                data_path = os.path.join(hash_dir, data_file)
                if os.path.exists(data_path):
                    return data_path

                #-------------------------------------------------------------
                # If file not in cache, download, compute hash, and verify:
                #-------------------------------------------------------------
                else:
                    print("Retrieve file from the Mindboggle website: {0}".
                          format(url + data_file))

                    # Download file as a temporary file:
                    temp_file = fetch_data(url + data_file)

                    # Compute the file's hash:
                    data_hash = fetch_hash(temp_file)

                    # If hash matches name of the hash directory, save file:
                    if os.path.join(cache, data_hash) == hash_dir:
                        print("Copy file to cache: {0}".format(data_path))
                        shutil.copyfile(temp_file, data_path)
                        return data_path
                    else:
                        print("Retrieved hash does not match stored hash.")

        #---------------------------------------------------------------------
        # If hashes not provided, simply download file:
        #---------------------------------------------------------------------
        elif url:
            # Download file as a temporary file:
            data_path = fetch_data(url + data_file)
            print("Hashes not provided. Retrieved file: {0}".format(data_path))
            return data_path

        #---------------------------------------------------------------------
        # If URL also not provided, simply return file path:
        #---------------------------------------------------------------------
        else:
            data_path = data_file
            print("Neither hashes nor URL provided. "
                  "Returning file path: {0}".format(data_path))
            return data_path

    #-------------------------------------------------------------------------
    # Simply return file path:
    #-------------------------------------------------------------------------
    else:
        data_path = data_file
        print("Returning file path: {0}".format(data_path))
        return data_path