def create_epoch_datasets(subject, S, dataset_prefix, idx = 1, total = 1): logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10)) # get actigraph start and stop time start_time, stop_time = _get_actigraph_start_stop(subject) # seconds of epoch data for s in S: logging.debug('seconds of epoch data : {}'.format(s)) # get epoch data df_epoch_data = _read_epoch_dataset(subject, '{}{}'.format(dataset_prefix, 10), start_time, stop_time, use_vmu = False, upscale_epoch = True, start_epoch_sec = 10, end_epoch_sec = s) # check if dataset is not none if df_epoch_data is None: logging.warning('No epoch data found, skipping...') return # convert to numpy array and take only XYZ (first three columns) epoch_data = df_epoch_data.values[:, :3] # save to HDF5 save_data_to_group_hdf5(group = subject, data = epoch_data, data_name = '{}{}'.format(dataset_prefix, s), overwrite = True, create_group_if_not_exists = False, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE)
def process_hees_2013(subject, save_hdf5, idx = 1, total = 1): """ Estimation of non-wear time periods based on Hees 2013 paper Estimation of Daily Energy Expenditure in Pregnant and Non-Pregnant Women Using a Wrist-Worn Tri-Axial Accelerometer Vincent T. van Hees, Frida Renström , Antony Wright, Anna Gradmark, Michael Catt, Kong Y. Chen, Marie Löf, Les Bluck, Jeremy Pomeroy, Nicholas J. Wareham, Ulf Ekelund, Søren Brage, Paul W. Franks Published: July 29, 2011https://doi.org/10.1371/journal.pone.0022922 Accelerometer non-wear time was estimated on the basis of the standard deviation and the value range of each accelerometer axis, calculated for consecutive blocks of 30 minutes. A block was classified as non-wear time if the standard deviation was less than 3.0 mg (1 mg = 0.00981 m·s−2) for at least two out of the three axes or if the value range, for at least two out of three axes, was less than 50 mg. Parameters --------- subject : string subject ID save_hdf5 : os.path location of HDF5 file to save non wear data to idx : int (optional) index of counter, only useful when processing large batches and you want to monitor the status total: int (optional) total number of subjects to process, only useful when processing large batches and you want to monitor the status """ logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10)) # read actigraph acceleration data actigraph_acc, *_ = get_actigraph_acc_data(subject, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE) # calculate non non wear time based on Hees 2013 algorithm non_wear_vector = hees_2013_calculate_non_wear_time(actigraph_acc) # save non-wear vector to HDF5 save_data_to_group_hdf5(group = subject, data = non_wear_vector, data_name = 'hees_2013_non_wear_data', overwrite = True, create_group_if_not_exists = False, hdf5_file = save_hdf5)
def perform_inference_segmentation(paths, params): # hdf5 file that contains the original images hdf5_file = os.path.join(paths['hdf5_folder'], params['hdf5_file']) # path to trained CNN model model_file = os.path.join(paths['model_folder'], params['cnn_model'], 'model.h5') # get all patient names from original MRI group patients = get_datasets_from_group(group_name = params['group_no_bg'], hdf5_file = hdf5_file) # loop over each patient, read data, perform inference for i, patient in enumerate(patients): logging.info(f'Processing patient: {patient} {i}/{len(patients)}') # read images images = read_dataset_from_group(dataset = patient, group_name = params['group_no_bg'], hdf5_file = hdf5_file) # rescale 12bit images to 0-1 images = images * params['rescale_factor'] # create empty array to save reconstructed images segmented_images = np.empty_like(images, dtype = 'uint8') # use parallel processing to speed up processing time executor = Parallel(n_jobs = cpu_count(), backend = 'multiprocessing') # create tasks so we can execute them in parallel tasks = (delayed(classify_img_feature)(img = images[img_slice], slice_idx = img_slice, feature_size = params['feature_size'], step_size = params['step_size'], model_file = model_file, verbose = True) for img_slice in range(images.shape[0])) # execute tasks and process the return values for segmented_image, slice_idx in executor(tasks): # add each segmented image slice to the overall array that holds all the slices segmented_images[slice_idx] = segmented_image # save segmentated image to HDF5 file save_data_to_group_hdf5(group = params['group_segmented_classification_mri'], data = segmented_images, data_name = patient, hdf5_file = hdf5_file, overwrite = True)
def process_hecht_2009_triaxial(subject, save_hdf5, idx = 1, total = 1, epoch_dataset = 'epoch10'): """ Calculate the non-wear time from a data array that contains the vector magnitude (VMU) according to Hecht 2009 algorithm Paper: COPD. 2009 Apr;6(2):121-9. doi: 10.1080/15412550902755044. Methodology for using long-term accelerometry monitoring to describe daily activity patterns in COPD. Hecht A1, Ma S, Porszasz J, Casaburi R; COPD Clinical Research Network. Parameters --------- subject : string subject ID save_hdf5 : os.path location of HDF5 file to save non wear data to idx : int (optional) index of counter, only useful when processing large batches and you want to monitor the status total: int (optional) total number of subjects to process, only useful when processing large batches and you want to monitor the status epoch_dataset : string (optional) name of dataset within an HDF5 group that contains the 10sec epoch data """ logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10)) """ ACTIGRAPH DATA """ # read actigraph acceleration time _, _, actigraph_time = get_actigraph_acc_data(subject, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE) # get start and stop time start_time, stop_time = actigraph_time[0], actigraph_time[-1] """ EPOCH DATA """ # check if epoch dataset is part of HDF5 group if epoch_dataset in get_datasets_from_group(group_name = subject, hdf5_file = ACTIGRAPH_HDF5_FILE): # get actigraph 10s epoch data epoch_data, _ , epoch_time_data = get_actigraph_epoch_data(subject, epoch_dataset = epoch_dataset, hdf5_file = ACTIGRAPH_HDF5_FILE) # convert to 60s epoch data epoch_60_data, epoch_60_time_data = get_actigraph_epoch_60_data(epoch_data, epoch_time_data) # calculate epoch 60 VMU epoch_60_vmu_data = calculate_vector_magnitude(epoch_60_data[:,:3], minus_one = False, round_negative_to_zero = False) """ GET NON WEAR VECTOR """ # create dataframe of actigraph acceleration df_epoch_60_vmu = pd.DataFrame(epoch_60_vmu_data, index = epoch_60_time_data, columns = ['VMU']).loc[start_time:stop_time] # retrieve non-wear vector epoch_60_vmu_non_wear_vector = hecht_2009_triaxial_calculate_non_wear_time(data = df_epoch_60_vmu.values) # get the croped time array as int64 (cropped because we selected the previous dataframe to be between start and stop slice) epoch_60_time_data_cropped = np.array(df_epoch_60_vmu.index).astype('int64') # reshape epoch_60_time_data_cropped = epoch_60_time_data_cropped.reshape(len(epoch_60_time_data_cropped), 1) # add two arrays combined_data = np.hstack((epoch_60_time_data_cropped, epoch_60_vmu_non_wear_vector)) """ SAVE TO HDF5 FILE """ save_data_to_group_hdf5(group = subject, data = combined_data, data_name = 'hecht_2009_3_axes_non_wear_data', overwrite = True, create_group_if_not_exists = True, hdf5_file = save_hdf5) else: logging.warning('Subject {} has no corresponding epoch data, skipping...'.format(subject))
def process_choi_2011(subject, save_hdf5, idx = 1, total = 1, epoch_dataset = 'epoch10'): """ Estimate non-wear time based on Choi 2011 paper: Med Sci Sports Exerc. 2011 Feb;43(2):357-64. doi: 10.1249/MSS.0b013e3181ed61a3. Validation of accelerometer wear and nonwear time classification algorithm. Choi L1, Liu Z, Matthews CE, Buchowski MS. Parameters --------- subject : string subject ID save_hdf5 : os.path location of HDF5 file to save non wear data to idx : int (optional) index of counter, only useful when processing large batches and you want to monitor the status total: int (optional) total number of subjects to process, only useful when processing large batches and you want to monitor the status epoch_dataset : string (optional) name of dataset within an HDF5 group that contains the 10sec epoch data """ logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10)) """ ACTIGRAPH DATA """ # read actigraph acceleration time _, _, actigraph_time = get_actigraph_acc_data(subject, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE) # get start and stop time start_time, stop_time = actigraph_time[0], actigraph_time[-1] """ EPOCH DATA """ if epoch_dataset in get_datasets_from_group(group_name = subject, hdf5_file = ACTIGRAPH_HDF5_FILE): # get actigraph 10s epoch data epoch_data, _ , epoch_time_data = get_actigraph_epoch_data(subject, epoch_dataset = epoch_dataset, hdf5_file = ACTIGRAPH_HDF5_FILE) # convert to 60s epoch data epoch_60_data, epoch_60_time_data = get_actigraph_epoch_60_data(epoch_data, epoch_time_data) # obtain counts values epoch_60_count_data = epoch_60_data[:,:3] """ GET NON WEAR VECTOR """ # create dataframe of actigraph acceleration df_epoch_60_count = pd.DataFrame(epoch_60_count_data, index = epoch_60_time_data, columns = ['X - COUNT', 'Y - COUNT', 'Z - COUNT']).loc[start_time:stop_time] # retrieve non-wear vector epoch_60_count_non_wear_vector = choi_2011_calculate_non_wear_time(data = df_epoch_60_count.values, time = df_epoch_60_count.index.values) # get the croped time array as int64 (cropped because we selected the previous dataframe to be between start and stop slice) epoch_60_time_data_cropped = np.array(df_epoch_60_count.index).astype('int64') # reshape epoch_60_time_data_cropped = epoch_60_time_data_cropped.reshape(len(epoch_60_time_data_cropped), 1) # add two arrays combined_data = np.hstack((epoch_60_time_data_cropped, epoch_60_count_non_wear_vector)) """ SAVE TO HDF5 FILE """ save_data_to_group_hdf5(group = subject, data = combined_data, data_name = 'choi_2011_non_wear_data', overwrite = True, create_group_if_not_exists = False, hdf5_file = save_hdf5) else: logging.warning('Subject {} has no corresponding epoch data, skipping...'.format(subject))
def process_troiano_2007(subject, save_hdf5, idx = 1, total = 1, epoch_dataset = 'epoch10'): """ Calculate non wear time by using Troiano 2007 algorithm Troiano 2007 non-wear algorithm detects non wear time from 60s epoch counts Nonwear was defined by an interval of at least 60 consecutive minutes of zero activity intensity counts, with allowance for 1–2 min of counts between 0 and 100 Paper: Physical Activity in the United States Measured by Accelerometer DOI: 10.1249/mss.0b013e31815a51b3 Parameters --------- subject : string subject ID save_hdf5 : os.path location of HDF5 file to save non wear data to idx : int (optional) index of counter, only useful when processing large batches and you want to monitor the status total: int (optional) total number of subjects to process, only useful when processing large batches and you want to monitor the status epoch_dataset : string (optional) name of dataset within an HDF5 group that contains the 10sec epoch data """ logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10)) """ ACTIGRAPH DATA """ # read actigraph acceleration time _, _, actigraph_time = get_actigraph_acc_data(subject, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE) # get start and stop time start_time, stop_time = actigraph_time[0], actigraph_time[-1] """ EPOCH DATA """ if epoch_dataset in get_datasets_from_group(group_name = subject, hdf5_file = ACTIGRAPH_HDF5_FILE): # get actigraph 10s epoch data epoch_data, _ , epoch_time_data = get_actigraph_epoch_data(subject, epoch_dataset = epoch_dataset, hdf5_file = ACTIGRAPH_HDF5_FILE) # convert to 60s epoch data epoch_60_data, epoch_60_time_data = get_actigraph_epoch_60_data(epoch_data, epoch_time_data) # obtain counts values epoch_60_count_data = epoch_60_data[:,:3] """ GET NON WEAR VECTOR """ # create dataframe of actigraph acceleration df_epoch_60_count = pd.DataFrame(epoch_60_count_data, index = epoch_60_time_data, columns = ['X - COUNT', 'Y - COUNT', 'Z - COUNT']).loc[start_time:stop_time] # retrieve non-wear vector epoch_60_count_non_wear_vector = troiano_2007_calculate_non_wear_time(data = df_epoch_60_count.values, time = df_epoch_60_count.index.values) # get the croped time array as int64 (cropped because we selected the previous dataframe to be between start and stop slice) epoch_60_time_data_cropped = np.array(df_epoch_60_count.index).astype('int64') # reshape epoch_60_time_data_cropped = epoch_60_time_data_cropped.reshape(len(epoch_60_time_data_cropped), 1) # add two arrays combined_data = np.hstack((epoch_60_time_data_cropped, epoch_60_count_non_wear_vector)) """ SAVE TO HDF5 FILE """ save_data_to_group_hdf5(group = subject, data = combined_data, data_name = 'troiano_2007_non_wear_data', overwrite = True, create_group_if_not_exists = True, hdf5_file = save_hdf5) else: logging.warning('Subject {} has no corresponding epoch data, skipping...'.format(subject))
def process_gt3x_file(f, i=1, total=1, hdf5_save_location=HDF5_SAVE, delete_zip_folder=True): """ Process .gt3x file - unzip into log.bin and info.txt - extract information from info.txt - extract information from log.bin - save data to hdf5 file Parameters ---------- f : string file location of the .gt3x file i : int (optional) index of file to be processed, is used to display a counter of the process. Default = 1. For example, processing 12/20 total : int (optional) total number of files to be processed, is used to display a counter of the process. Default = 1. For example, processing 12/20 hdf5_save_location : os.path folder location where to save the extracted acceleration data to. """ logging.debug('Processing GTX3 binary file: {} {}/{}'.format( f, i + 1, total)) # unzip the raw .gt3x file: this will provide a log.bin and info.txt file # the save_location is a new folder with the same name as the .gt3x file log_bin, info_txt = unzip_gt3x_file(f, save_location=f.split('.')[0]) # check if unzipping went ok if log_bin is not None: # print verbose logging.debug('log.bin location: {}'.format(log_bin)) logging.debug('info.txt location: {}'.format(info_txt)) # get info data from info file info_data = extract_info(info_txt) # check if subject name could be read from the binary file if info_data['Subject_Name'] is not "": # check if subject ID already processed if info_data['Subject_Name'] not in get_all_subjects_hdf5( hdf5_file=HDF5_SAVE): # retrieve log_data; i.e. accellerometer data and log_time; timestamps of acceleration data log_data, log_time = extract_log( log_bin, acceleration_scale=float(info_data['Acceleration_Scale']), sample_rate=int(info_data['Sample_Rate'])) # check if log data is not None (with None something went wrong during reading of the binary file) if log_data is not None: # save log_data to HDF5 file save_data_to_group_hdf5(group=info_data['Subject_Name'], data=log_data, data_name='log', meta_data=info_data, overwrite=True, hdf5_file=hdf5_save_location) # save log_time data to HDF file save_data_to_group_hdf5(group=info_data['Subject_Name'], data=log_time, data_name='time', meta_data=info_data, overwrite=True, hdf5_file=hdf5_save_location) else: logging.error( 'Unable to convert .gt3x file: {} (subject {})'.format( f, info_data['Subject_Name'])) else: logging.info( 'Subject name already defined as group in HDF5 file: {}, skipping..' .format(info_data['Subject_Name'])) else: logging.error( "Unable to read subject from info.txt file, skipping file: {}". format(f)) else: logging.error("Error unzipping file: {}".format(f)) # delete the created zip folder if delete_zip_folder: delete_directory(f.split('.')[0]) # print time and memory set_end(tic, process)
def batch_process_epoch_files(epoch_sec, epoch_folder=EPOCH_FOLDER, use_parallel=False, num_jobs=cpu_count(), limit=None, skip_n=0): """ Read CSV epoch files from disk and extract (1) header information, and (2) epoch data for XYZ and also the steps. Parameters ------------ epoch_sec : int number of seconds within a single epoch. Examples include 1 for 1 sec epochs, or 10 for 10s epochs epoch_folder : os.path() folder location of the 10 seconds epoch files use_parallel = Boolean (optional) Set to true of subjects need to be processed in parallel, this will execute much faster num_jobs = int (optional) if parallel is set to true, then this indicates have many jobs at the same time need to be executed. Default set to the number of CPU cores limit : int (optional) limit the number of subjects to be processed skipN : int (optional) skip first N subjects """ # get all the .csv 10 seconds epoch files from the folder location. We do this here because there might also be other types of files in the folder # we can also skip_n the first n files, or it is possible to limit the number of files to be processed, such for testing or if we only need, for example, 100 files epoch_files = glob2.glob(os.path.join(epoch_folder, '**', '*.csv'))[0 + skip_n:limit] # if use_parallel is set to True, then use parallelization to process all files if use_parallel: logging.info('Processing in parallel (parallelization on)') # because we need to save the data after the parallel processing, we can't process them all at one since the return values becomes too large, so we peform in batches for i in range(0, len(epoch_files), num_jobs): # define start and end slice (these are the batches) start_slice = i end_slice = i + num_jobs # use parallel processing to speed up processing time executor = Parallel(n_jobs=num_jobs, backend='multiprocessing') # create tasks so we can execute them in parallel tasks = (delayed(parse_epoch_file)(file=f) for f in epoch_files[start_slice:end_slice]) # execute tasks and process the return values for dic_header, data in executor(tasks): # parse out subject ID from file name (split on /, then take the last, then split on space, and take the first) subject = dic_header['File Name'].split('/')[-1].split(' ')[0] dic_header['Subject'] = subject # save header and data to HDF5 file save_data_to_group_hdf5(group=subject, data=data, data_name='epoch{}'.format(epoch_sec), meta_data=dic_header, overwrite=True, create_group_if_not_exists=True, hdf5_file=HDF5_SAVE) # verbose logging.debug('{style} Processed {}/{} {style}'.format( end_slice, len(epoch_files), style='=' * 10)) else: # process files one-by-one for i, f in enumerate(epoch_files): logging.debug( '{style} Processing epoch file: {} {}/{} {style}'.format( f, i + 1, len(epoch_files), style='=' * 10)) # parse the content from the epoch csv file dic_header, data = parse_epoch_file(f) # parse out subject ID from file name (split on /, then take the last, then split on space, and take the first) subject = dic_header['File Name'].split('/')[-1].split(' ')[0] dic_header['Subject'] = subject # save header and data to HDF5 file save_data_to_group_hdf5(group=subject, data=data, data_name='epoch{}'.format(epoch_sec), meta_data=dic_header, overwrite=True, create_group_if_not_exists=True, hdf5_file=HDF5_SAVE)
def remove_bg(paths, params): """ Remove background from MRI images Parameters -------------- hdf5_file : os.path location of HDF5 that contains the raw MRI data, and where we want to save data to img_group_name : string name of HDF5 group that contains the raw MRI images save_group_name : string name of HDF5 group to store images with background removed """ # dynamically create hdf5 file hdf5_file = os.path.join(paths['hdf5_folder'], params['hdf5_file']) # read original MRI datasets from HDF5 file D = get_datasets_from_group(group_name=params['group_original_mri'], hdf5_file=hdf5_file) # read data from each dataset and plot mri data for d_idx, d in enumerate(D): logging.info(f'Processing dataset : {d} {d_idx}/{len(D)}') # read data from group data = read_dataset_from_group(group_name=params['group_original_mri'], dataset=d, hdf5_file=hdf5_file) # read meta data meta_data = read_metadata_from_group_dataset( group_name=params['group_original_mri'], dataset=d, hdf5_file=hdf5_file) logging.info(f'Processing patient : {meta_data["PatientName"]}') # new numpy array to hold segmented data data_segmented = np.empty_like(data, dtype='int16') # process each slice for i in range(data.shape[0]): # ind_cycle = cycle(range(10)) # fig, axs = plt.subplots(1,8, figsize = (20,5)) # axs = axs.ravel() # original MRI img = data[i] # plt_index = next(ind_cycle) # axs[plt_index].imshow(img, cmap = 'gray') # axs[plt_index].set_title('Original MRI') # change grayscale img = change_img_contrast(img, phi=10, theta=1) # plt_index = next(ind_cycle) # axs[plt_index].imshow(img, cmap = 'gray') # axs[plt_index].set_title('Changed gray scale') # convert to 8 bit if d not in ['Torsk 1-4 fersk']: img = np.array(img, dtype='uint8') # plt_index = next(ind_cycle) # axs[plt_index].imshow(img, cmap = 'gray') # axs[plt_index].set_title('Convert to 8 bit') # inverted colors # img = (255) - img # plt_index = next(ind_cycle) # axs[plt_index].imshow(img, cmap = 'gray') # axs[plt_index].set_title('Inverted MRI') # max filter img = ndimage.maximum_filter(img, size=7) # plt_index = next(ind_cycle) # axs[plt_index].imshow(img, cmap = 'gray') # axs[plt_index].set_title('Max filter') # erosion img = cv2.erode(img, None, iterations=4) # plt_index = next(ind_cycle) # axs[plt_index].imshow(img, cmap = 'gray') # axs[plt_index].set_title('Erosion') # gaussian filter img = cv2.GaussianBlur(img, (11, 11), 0) # plt_index = next(ind_cycle) # axs[plt_index].imshow(img, cmap = 'gray') # axs[plt_index].set_title('Gaussian Blur') # knn bg remove segmented_img = perform_knn_segmentation(n_clusters=2, img=img) img = mask_image(img=data[i], segmented_img=segmented_img, mask_value=segmented_img[0][0], fill_value=0) # plt_index = next(ind_cycle) # axs[plt_index].imshow(img, cmap = 'gray') # axs[plt_index].set_title('KNN BG remove') # add masked image to data_segmented, where we store each slice data_segmented[i] = img # plt.show() # save data to HDF5 save_data_to_group_hdf5(group=params['group_no_bg'], data=data_segmented, data_name=d, hdf5_file=hdf5_file, meta_data=meta_data, overwrite=True)
def process_actiwave_file(f, i=1, total=1, acc_dtype=np.float32, ecg_dtype=np.float32, ms2_to_g=0.101972, hdf5_save_location=HDF5_SAVE): """ Single processing of actiwave file - read .edf file - extract content and meta data - acceleration data YXZ - ecg data - estimated heart rate Parameters ---------- f : string file location of the .gt3x file i : int (optional) index of file to be processed, is used to display a counter of the process. Default = 1. For example, processing 12/20 total : int (optional) total number of files to be processed, is used to display a counter of the process. Default = 1. For example, processing 12/20 acc_dtype : datatype datatype for acceleration data. Defaults to np.float32. Meaning that each acceleration value in g is represented as 32 bit float. Can be made smaller, which results in less memory per value, but also less precise ecg_dtype : datatype datatype for ecg data. Defaults to np.float32. Meaning that each ecg value is represented as 32 bit float. Can be made smaller, which results in less memory per value, but also less precise ms2_to_g : float conversion factor to go from values measured in ms2 (meter/square second) to g (gravity) hdf5_save_location : os.path folder location where to save the extracted actiwave data to """ logging.info('Processing EDF file: {} {}/{}'.format(f, i, total)) # read EDF data dic_data = read_edf_file(file=f) # extract edf file meta data edf_meta_data = read_edf_meta_data(file=f) # get subject from meta data (this is also the group name in the HDF5 file) subject = edf_meta_data['Patient Code'] # check to see if the subject is also part of the file name if subject not in f: logging.error( 'Mismatch between subject in file name {} and within EDF meta data {}' .format(f, subject)) return """ Process ECG data """ # read ECG data from the dictionary ecg_data = dic_data.get('ECG0') # check if ecg data available if ecg_data is not None: # reshape the array so we have a column vector ecg_data = ecg_data.reshape(((len(ecg_data), 1))) # convert the data type of the ecg ecg_data = ecg_data.astype(dtype=ecg_dtype) # read meta data for this channel ecg_meta_data = read_edf_channel_meta_data(file=f, channel=0) else: logging.error('ECG data not available for file: {}'.format(f)) return """ Process the acceleration data """ acc_x_data = dic_data.get('X') acc_y_data = dic_data.get('Y') acc_z_data = dic_data.get('Z') # check if X, Y, and Z have values if (acc_x_data is not None) and (acc_y_data is not None) and (acc_z_data is not None): # length of the acceleration data l = len(acc_x_data) # create one acceleration, original data is resized, and note that the order here is now YXZ, this is similar to the order of the raw data acc_data = np.hstack((acc_y_data.reshape( (l, 1)), acc_x_data.reshape((l, 1)), acc_z_data.reshape((l, 1)))) # convert ms^2 acceleration data into g-values acc_data = acc_data * ms2_to_g # convert acc_data to smaller float point precision acc_data = acc_data.astype(dtype=acc_dtype) # read the acceleration channel meta data (here we select channel 1, but channel 2 and 3 are also acceleration data but the contain the same values) acc_meta_data = read_edf_channel_meta_data(file=f, channel=1) else: logging.error('Acceleration data not available for file: {}'.format(f)) return """ Process Estimated HR data """ # read HR data hr_data = dic_data.get('Estimated HR') # check if hr data is present if hr_data is not None: # resize the array to have column vectors hr_data = hr_data.reshape((len(hr_data), 1)) # read meta data for this channel hr_meta_data = read_edf_channel_meta_data(file=f, channel=4) else: logging.warning( 'Estimated HR data not available for file: {}'.format(f)) return """ Save data and meta-data to HDF5 """ # save ecg data save_data_to_group_hdf5(group=subject, data=ecg_data, data_name='ecg', meta_data=ecg_meta_data, overwrite=True, create_group_if_not_exists=True, hdf5_file=hdf5_save_location) # save acceleration data save_data_to_group_hdf5(group=subject, data=acc_data, data_name='acceleration', meta_data=acc_meta_data, overwrite=True, create_group_if_not_exists=True, hdf5_file=hdf5_save_location) # save estimated heart rate data save_data_to_group_hdf5(group=subject, data=hr_data, data_name='estimated_hr', meta_data=hr_meta_data, overwrite=True, create_group_if_not_exists=True, hdf5_file=hdf5_save_location) # save meta data of edf file save_meta_data_to_group(group_name=subject, meta_data=edf_meta_data, hdf5_file=hdf5_save_location)