def df_devices(devices, sensor, start=None, stop=None): """ Function to build a pandas dataframe from a set of csv files with urls stored in config/config.py. Parameters ---------- devices : list of strings each string is the name of one of the two devices to compare sensor : string the sensor to compare start : datetime, optional beginning of time to compare stop : datetime, optional end of time to compare Returns ------- df : pandas dataframe merged dataframe with a column per device """ suffix = '.csv' s = [] for i, device in enumerate(devices): device_suffix = device.replace(" ", "_") d = pd.read_csv(fetch_data(config.rawurls[ sensor][device]), parse_dates=['Timestamp'], infer_datetime_format=True) start = min(d['Timestamp']) if not start else start stop = max(d['Timestamp']) if not stop else stop d = d.loc[(d['Timestamp'] >= start) & (d['Timestamp'] <= stop)].copy() if device == 'ActiGraph wGT3X-BT': d[['Timestamp']] = d.Timestamp.apply(lambda x: x - timedelta(microseconds=1000)) d.set_index('Timestamp', inplace=True) for c in d.columns: d.rename(columns={c: "_".join([c, device_suffix])}, inplace=True) s.append(d) df = s[0].merge(s[1], left_index=True, right_index=True, suffixes=(''.join( ['_', devices[0]]), ''.join(['_', devices[1]]))) for i in range(2, len(s), 1): df = df.merge(s[i], left_index=True, right_index=True, suffixes=('', ''.join(['_', devices[i][1]]))) return(df)
def fetch_check_data(data_file, url, hashes, cache_directory='', append='', verbose=False): """ Get data file through a URL call and check its hash: 1. Check hash table for data file name. 2. Check hash subdirectory within cache directory for data file. 3. If data file not in cache, download, compute hash, and verify hash. 4. If hash correct, save file (+ append); otherwise, raise an error. Parameters ---------- data_file : string name of file (not the full path) url : string URL for data file hashes : dictionary file names and md5 hashes (if empty, simply download file from url) cache_directory : string cache directory (full path) append : string append to output file (ex: '.nii.gz') verbose : bool print statements? Returns ------- data_path : string data file name (full path) Examples -------- >>> from mindboggle.mio.fetch_data import fetch_check_data >>> from mindboggle.mio.fetch_data import cache_hashes >>> # osf.io URL for OASIS-30_Atropos_template_to_MNI152_affine.txt >>> data_file = 'OASIS-30_Atropos_template_to_MNI152_affine.txt' >>> url = 'https://osf.io/ufydw/?action=download&version=1' >>> hashes = cache_hashes() >>> cache_directory = '' >>> append = '' >>> verbose = False >>> data_path = fetch_check_data(data_file, url, hashes, cache_directory, ... append, verbose) # doctest: +SKIP """ import os import shutil from mindboggle.mio.fetch_data import fetch_data, fetch_hash # ------------------------------------------------------------------------ # Set temporary cache directory if not specified: # ------------------------------------------------------------------------ if not cache_directory: cache_directory = os.path.join(os.environ['HOME'], 'hash_temp') # ------------------------------------------------------------------------ # Check hash table for file name, and store corresponding hash: # ------------------------------------------------------------------------ if hashes and data_file in list(hashes): stored_hash = hashes[data_file] # -------------------------------------------------------------------- # Create missing cache and hash directories: # -------------------------------------------------------------------- if not os.path.exists(cache_directory): if verbose: print("Create missing cache directory: {0}".format( cache_directory)) os.mkdir(cache_directory) hash_dir = os.path.join(cache_directory, stored_hash) if not os.path.exists(hash_dir): if verbose: print("Create missing hash directory: {0}".format(hash_dir)) os.mkdir(os.path.join(hash_dir)) # -------------------------------------------------------------------- # Check hash subdirectory for file: # -------------------------------------------------------------------- data_path = os.path.join(hash_dir, data_file) if os.path.exists(data_path): if verbose: print("File already exists and matches hash: {0}".format(url)) return data_path # -------------------------------------------------------------------- # If file not in cache, download, compute hash, and verify: # -------------------------------------------------------------------- else: if verbose: print("Retrieve file from URL: {0}".format(url)) # Download file as a temporary file: temp_file = fetch_data(url) # Compute the file's hash: data_hash = fetch_hash(temp_file) # If hash matches name of the hash directory, save file: if os.path.join(cache_directory, data_hash) == hash_dir: # Add append: if append: data_path += append if verbose: print("Copy file to cache: {0}".format(data_path)) shutil.copyfile(temp_file, data_path) return data_path else: raise IOError("Retrieved hash does not match stored hash.") else: raise IOError("Data file '{0}' not in hash table.".format(data_file))
def fetch_check_data(data_file, url='', hashes={}, cache_env='', cache='', return_missing=False, lookup=True): """ Get data file through a URL call and check its hash. Steps :: If hashes provided: 1. Check hash table for data file. 2. Check hash subdirectory within cache directory for data file. 3. If data file not in cache, download file, compute hash, and verify hash. 4. If hash correct, save file. Otherwise, simply download file or return file path as a string. Parameters ---------- data_file : string data file name url : string URL for data file hashes : dictionary file names and md5 hashes (if empty, simply download file from url) cache_env : string environment variable name for cache path cache : string in case cache_env is not set, use as cache directory return_missing : Boolean if data_file not in hash, simply download data_file and return path lookup : Boolean Simply return data_file path Returns ------- data_path : string data file name (full path) Examples -------- >>> import os >>> from mindboggle.mio.fetch_data import hashes_url >>> from mindboggle.mio.fetch_data import fetch_check_data >>> hashes, url, cache_env, cache = hashes_url() >>> data_file = hashes.keys()[0] >>> fetch_check_data(data_file, url, hashes, cache_env, cache) """ import os import sys import shutil from mindboggle.mio.fetch_data import fetch_data, fetch_hash if lookup: #--------------------------------------------------------------------- # If hashes provided, go through steps to check/download file: #--------------------------------------------------------------------- if hashes: if not cache_env: cache_env = 'MINDBOGGLE_CACHE' if not cache: cache = os.path.join(os.environ['HOME'], 'hash_temp') #----------------------------------------------------------------- # Check hash table for file: #----------------------------------------------------------------- if data_file not in hashes.keys(): if return_missing: data_path = data_file print("Retrieved file not in hashes: {0}". format(data_path)) return data_path else: sys.exit("Data file '{0}' not in hash table.". format(data_file)) else: stored_hash = hashes[data_file] #------------------------------------------------------------- # Create missing cache and hash directories: #------------------------------------------------------------- if cache_env in os.environ.keys(): cache = os.environ[cache_env] if not os.path.exists(cache): print("Create missing cache directory: {0}".format(cache)) os.mkdir(cache) hash_dir = os.path.join(cache, stored_hash) if not os.path.exists(hash_dir): print("Create missing hash directory: {0}". format(hash_dir)) os.mkdir(os.path.join(hash_dir)) #------------------------------------------------------------- # Check hash subdirectory for file: #------------------------------------------------------------- data_path = os.path.join(hash_dir, data_file) if os.path.exists(data_path): return data_path #------------------------------------------------------------- # If file not in cache, download, compute hash, and verify: #------------------------------------------------------------- else: print("Retrieve file from the Mindboggle website: {0}". format(url+data_file)) # Download file as a temporary file: temp_file = fetch_data(url+data_file) # Compute the file's hash: data_hash = fetch_hash(temp_file) # If hash matches name of the hash directory, save file: if os.path.join(cache, data_hash) == hash_dir: print("Copy file to cache: {0}".format(data_path)) shutil.copyfile(temp_file, data_path) return data_path else: print("Retrieved hash does not match stored hash.") #--------------------------------------------------------------------- # If hashes not provided, simply download file: #--------------------------------------------------------------------- elif url: # Download file as a temporary file: data_path = fetch_data(url+data_file) print("Hashes not provided. Retrieved file: {0}".format(data_path)) return data_path #--------------------------------------------------------------------- # If URL also not provided, simply return file path: #--------------------------------------------------------------------- else: data_path = data_file print("Neither hashes nor URL provided. " "Returning file path: {0}".format(data_path)) return data_path #------------------------------------------------------------------------- # Simply return file path: #------------------------------------------------------------------------- else: data_path = data_file print("Returning file path: {0}".format(data_path)) return data_path
def fetch_check_data(data_file, url, hashes, cache_directory='', append='', verbose=False): """ Get data file through a URL call and check its hash: 1. Check hash table for data file name. 2. Check hash subdirectory within cache directory for data file. 3. If data file not in cache, download, compute hash, and verify hash. 4. If hash correct, save file (+ append); otherwise, raise an error. Parameters ---------- data_file : string name of file (not the full path) url : string URL for data file hashes : dictionary file names and md5 hashes (if empty, simply download file from url) cache_directory : string cache directory (full path) append : string append to output file (ex: '.nii.gz') verbose : bool print statements? Returns ------- data_path : string data file name (full path) Examples -------- >>> from mindboggle.mio.fetch_data import fetch_check_data >>> from mindboggle.mio.fetch_data import cache_hashes >>> # osf.io URL for OASIS-30_Atropos_template_to_MNI152_affine.txt >>> data_file = 'OASIS-30_Atropos_template_to_MNI152_affine.txt' >>> url = 'https://osf.io/ufydw/?action=download&version=1' >>> hashes = cache_hashes() >>> cache_directory = '' >>> append = '' >>> verbose = False >>> data_path = fetch_check_data(data_file, url, hashes, cache_directory, ... append, verbose) # doctest: +SKIP """ import os import shutil from mindboggle.mio.fetch_data import fetch_data, fetch_hash # ------------------------------------------------------------------------ # Set temporary cache directory if not specified: # ------------------------------------------------------------------------ if not cache_directory: cache_directory = os.path.join(os.environ['HOME'], 'hash_temp') # ------------------------------------------------------------------------ # Check hash table for file name, and store corresponding hash: # ------------------------------------------------------------------------ if hashes and data_file in list(hashes): stored_hash = hashes[data_file] # -------------------------------------------------------------------- # Create missing cache and hash directories: # -------------------------------------------------------------------- if not os.path.exists(cache_directory): if verbose: print("Create missing cache directory: {0}". format(cache_directory)) os.mkdir(cache_directory) hash_dir = os.path.join(cache_directory, stored_hash) if not os.path.exists(hash_dir): if verbose: print("Create missing hash directory: {0}".format(hash_dir)) os.mkdir(os.path.join(hash_dir)) # -------------------------------------------------------------------- # Check hash subdirectory for file: # -------------------------------------------------------------------- data_path = os.path.join(hash_dir, data_file) if os.path.exists(data_path): if verbose: print("File already exists and matches hash: {0}".format(url)) return data_path # -------------------------------------------------------------------- # If file not in cache, download, compute hash, and verify: # -------------------------------------------------------------------- else: if verbose: print("Retrieve file from URL: {0}".format(url)) # Download file as a temporary file: temp_file = fetch_data(url) # Compute the file's hash: data_hash = fetch_hash(temp_file) # If hash matches name of the hash directory, save file: if os.path.join(cache_directory, data_hash) == hash_dir: # Add append: if append: data_path += append if verbose: print("Copy file to cache: {0}".format(data_path)) shutil.copyfile(temp_file, data_path) return data_path else: raise IOError("Retrieved hash does not match stored hash.") else: raise IOError("Data file '{0}' not in hash table.". format(data_file))
def fetch_check_data(data_file, url='', hashes={}, cache_env='', cache='', return_missing=False, lookup=True): """ Get data file through a URL call and check its hash. Steps :: If hashes provided: 1. Check hash table for data file. 2. Check hash subdirectory within cache directory for data file. 3. If data file not in cache, download file, compute hash, and verify hash. 4. If hash correct, save file. Otherwise, simply download file or return file path as a string. Parameters ---------- data_file : string data file name url : string URL for data file hashes : dictionary file names and md5 hashes (if empty, simply download file from url) cache_env : string environment variable name for cache path cache : string in case cache_env is not set, use as cache directory return_missing : Boolean if data_file not in hash, simply download data_file and return path lookup : Boolean Simply return data_file path Returns ------- data_path : string data file name (full path) Examples -------- >>> import os >>> from mindboggle.mio.fetch_data import hashes_url >>> from mindboggle.mio.fetch_data import fetch_check_data >>> hashes, url, cache_env, cache = hashes_url() >>> data_file = hashes.keys()[0] >>> fetch_check_data(data_file, url, hashes, cache_env, cache) """ import os import sys import shutil from mindboggle.mio.fetch_data import fetch_data, fetch_hash if lookup: #--------------------------------------------------------------------- # If hashes provided, go through steps to check/download file: #--------------------------------------------------------------------- if hashes: if not cache_env: cache_env = 'MINDBOGGLE_CACHE' if not cache: cache = os.path.join(os.environ['HOME'], 'hash_temp') #----------------------------------------------------------------- # Check hash table for file: #----------------------------------------------------------------- if data_file not in hashes.keys(): if return_missing: data_path = data_file print( "Retrieved file not in hashes: {0}".format(data_path)) return data_path else: sys.exit( "Data file '{0}' not in hash table.".format(data_file)) else: stored_hash = hashes[data_file] #------------------------------------------------------------- # Create missing cache and hash directories: #------------------------------------------------------------- if cache_env in os.environ.keys(): cache = os.environ[cache_env] if not os.path.exists(cache): print("Create missing cache directory: {0}".format(cache)) os.mkdir(cache) hash_dir = os.path.join(cache, stored_hash) if not os.path.exists(hash_dir): print( "Create missing hash directory: {0}".format(hash_dir)) os.mkdir(os.path.join(hash_dir)) #------------------------------------------------------------- # Check hash subdirectory for file: #------------------------------------------------------------- data_path = os.path.join(hash_dir, data_file) if os.path.exists(data_path): return data_path #------------------------------------------------------------- # If file not in cache, download, compute hash, and verify: #------------------------------------------------------------- else: print("Retrieve file from the Mindboggle website: {0}". format(url + data_file)) # Download file as a temporary file: temp_file = fetch_data(url + data_file) # Compute the file's hash: data_hash = fetch_hash(temp_file) # If hash matches name of the hash directory, save file: if os.path.join(cache, data_hash) == hash_dir: print("Copy file to cache: {0}".format(data_path)) shutil.copyfile(temp_file, data_path) return data_path else: print("Retrieved hash does not match stored hash.") #--------------------------------------------------------------------- # If hashes not provided, simply download file: #--------------------------------------------------------------------- elif url: # Download file as a temporary file: data_path = fetch_data(url + data_file) print("Hashes not provided. Retrieved file: {0}".format(data_path)) return data_path #--------------------------------------------------------------------- # If URL also not provided, simply return file path: #--------------------------------------------------------------------- else: data_path = data_file print("Neither hashes nor URL provided. " "Returning file path: {0}".format(data_path)) return data_path #------------------------------------------------------------------------- # Simply return file path: #------------------------------------------------------------------------- else: data_path = data_file print("Returning file path: {0}".format(data_path)) return data_path