def create_epoch_datasets(subject, S, dataset_prefix, idx = 1, total = 1):

	logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10))

	# get actigraph start and stop time
	start_time, stop_time = _get_actigraph_start_stop(subject)
	
	# seconds of epoch data
	for s in S:

		logging.debug('seconds of epoch data : {}'.format(s))

		# get epoch data
		df_epoch_data = _read_epoch_dataset(subject, '{}{}'.format(dataset_prefix, 10), start_time, stop_time, use_vmu = False, upscale_epoch = True, start_epoch_sec = 10, end_epoch_sec = s)

		# check if dataset is not none
		if df_epoch_data is None:
			logging.warning('No epoch data found, skipping...')
			return

		# convert to numpy array and take only XYZ (first three columns)
		epoch_data = df_epoch_data.values[:, :3]
		
		# save to HDF5
		save_data_to_group_hdf5(group = subject, data = epoch_data, data_name = '{}{}'.format(dataset_prefix, s), overwrite = True, create_group_if_not_exists = False, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE)
Exemple #2
0
def process_hees_2013(subject, save_hdf5, idx = 1, total = 1):
	"""
	Estimation of non-wear time periods based on Hees 2013 paper

	Estimation of Daily Energy Expenditure in Pregnant and Non-Pregnant Women Using a Wrist-Worn Tri-Axial Accelerometer
	Vincent T. van Hees, Frida Renström , Antony Wright, Anna Gradmark, Michael Catt, Kong Y. Chen, Marie Löf, Les Bluck, Jeremy Pomeroy, Nicholas J. Wareham, Ulf Ekelund, Søren Brage, Paul W. Franks
	Published: July 29, 2011https://doi.org/10.1371/journal.pone.0022922

	Accelerometer non-wear time was estimated on the basis of the standard deviation and the value range of each accelerometer axis, calculated for consecutive blocks of 30 minutes. 
	A block was classified as non-wear time if the standard deviation was less than 3.0 mg (1 mg = 0.00981 m·s−2) for at least two out of the three axes or if the value range, for 
	at least two out of three axes, was less than 50 mg.

	Parameters
	---------
	subject : string
		subject ID
	save_hdf5 : os.path
		location of HDF5 file to save non wear data to
	idx : int (optional)
		index of counter, only useful when processing large batches and you want to monitor the status
	total: int (optional)
		total number of subjects to process, only useful when processing large batches and you want to monitor the status
	"""

	logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10))
	
	# read actigraph acceleration data
	actigraph_acc, *_ = get_actigraph_acc_data(subject, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE)

	# calculate non non wear time based on Hees 2013 algorithm
	non_wear_vector = hees_2013_calculate_non_wear_time(actigraph_acc)
	
	# save non-wear vector to HDF5
	save_data_to_group_hdf5(group = subject, data = non_wear_vector, data_name = 'hees_2013_non_wear_data', overwrite = True, create_group_if_not_exists = False, hdf5_file = save_hdf5)
Exemple #3
0
def perform_inference_segmentation(paths, params):

	# hdf5 file that contains the original images
	hdf5_file = os.path.join(paths['hdf5_folder'], params['hdf5_file'])

	# path to trained CNN model
	model_file = os.path.join(paths['model_folder'], params['cnn_model'], 'model.h5')

	# get all patient names from original MRI group
	patients = get_datasets_from_group(group_name = params['group_no_bg'], hdf5_file = hdf5_file)
	
	# loop over each patient, read data, perform inference
	for i, patient in enumerate(patients):

		logging.info(f'Processing patient: {patient} {i}/{len(patients)}')

		# read images
		images = read_dataset_from_group(dataset = patient, group_name = params['group_no_bg'], hdf5_file = hdf5_file)

		# rescale 12bit images to 0-1
		images = images * params['rescale_factor']

		# create empty array to save reconstructed images
		segmented_images = np.empty_like(images, dtype = 'uint8')

		# use parallel processing to speed up processing time
		executor = Parallel(n_jobs = cpu_count(), backend = 'multiprocessing')

		# create tasks so we can execute them in parallel
		tasks = (delayed(classify_img_feature)(img = images[img_slice], 
												slice_idx = img_slice, 
												feature_size = params['feature_size'], 
												step_size = params['step_size'],
												model_file = model_file,
												verbose = True) for img_slice in range(images.shape[0]))
		
		# execute tasks and process the return values
		for segmented_image, slice_idx in executor(tasks):

			# add each segmented image slice to the overall array that holds all the slices
			segmented_images[slice_idx] = segmented_image

		# save segmentated image to HDF5 file
		save_data_to_group_hdf5(group = params['group_segmented_classification_mri'],
								data = segmented_images,
								data_name = patient,
								hdf5_file = hdf5_file,
								overwrite = True)
Exemple #4
0
def process_hecht_2009_triaxial(subject, save_hdf5, idx = 1, total = 1, epoch_dataset = 'epoch10'):
	"""
	Calculate the non-wear time from a data array that contains the vector magnitude (VMU) according to Hecht 2009 algorithm

	Paper:
	COPD. 2009 Apr;6(2):121-9. doi: 10.1080/15412550902755044.
	Methodology for using long-term accelerometry monitoring to describe daily activity patterns in COPD.
	Hecht A1, Ma S, Porszasz J, Casaburi R; COPD Clinical Research Network.

	Parameters
	---------
	subject : string
		subject ID
	save_hdf5 : os.path
		location of HDF5 file to save non wear data to
	idx : int (optional)
		index of counter, only useful when processing large batches and you want to monitor the status
	total: int (optional)
		total number of subjects to process, only useful when processing large batches and you want to monitor the status
	epoch_dataset : string (optional)
		name of dataset within an HDF5 group that contains the 10sec epoch data
	"""

	logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10))
	
	"""
		ACTIGRAPH DATA
	"""

	# read actigraph acceleration time
	_, _, actigraph_time = get_actigraph_acc_data(subject, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE)
	
	# get start and stop time
	start_time, stop_time = actigraph_time[0], actigraph_time[-1]

	"""
		EPOCH DATA
	"""

	# check if epoch dataset is part of HDF5 group
	if epoch_dataset in get_datasets_from_group(group_name = subject, hdf5_file = ACTIGRAPH_HDF5_FILE):

		# get actigraph 10s epoch data
		epoch_data, _ , epoch_time_data = get_actigraph_epoch_data(subject, epoch_dataset = epoch_dataset, hdf5_file = ACTIGRAPH_HDF5_FILE)

		# convert to 60s epoch data	
		epoch_60_data, epoch_60_time_data = get_actigraph_epoch_60_data(epoch_data, epoch_time_data)

		# calculate epoch 60 VMU
		epoch_60_vmu_data = calculate_vector_magnitude(epoch_60_data[:,:3], minus_one = False, round_negative_to_zero = False)


		"""
			GET NON WEAR VECTOR
		"""

		# create dataframe of actigraph acceleration 
		df_epoch_60_vmu = pd.DataFrame(epoch_60_vmu_data, index = epoch_60_time_data, columns = ['VMU']).loc[start_time:stop_time]

		# retrieve non-wear vector
		epoch_60_vmu_non_wear_vector = hecht_2009_triaxial_calculate_non_wear_time(data = df_epoch_60_vmu.values)

		# get the croped time array as int64 (cropped because we selected the previous dataframe to be between start and stop slice)
		epoch_60_time_data_cropped = np.array(df_epoch_60_vmu.index).astype('int64')
		# reshape
		epoch_60_time_data_cropped = epoch_60_time_data_cropped.reshape(len(epoch_60_time_data_cropped), 1)

		# add two arrays
		combined_data = np.hstack((epoch_60_time_data_cropped, epoch_60_vmu_non_wear_vector))
		
		"""
			SAVE TO HDF5 FILE
		"""
		save_data_to_group_hdf5(group = subject, data = combined_data, data_name = 'hecht_2009_3_axes_non_wear_data', overwrite = True, create_group_if_not_exists = True, hdf5_file = save_hdf5)

	else:
		logging.warning('Subject {} has no corresponding epoch data, skipping...'.format(subject))
Exemple #5
0
def process_choi_2011(subject, save_hdf5, idx = 1, total = 1, epoch_dataset = 'epoch10'):
	"""
	Estimate non-wear time based on Choi 2011 paper:

	Med Sci Sports Exerc. 2011 Feb;43(2):357-64. doi: 10.1249/MSS.0b013e3181ed61a3.
	Validation of accelerometer wear and nonwear time classification algorithm.
	Choi L1, Liu Z, Matthews CE, Buchowski MS.

	Parameters
	---------
	subject : string
		subject ID
	save_hdf5 : os.path
		location of HDF5 file to save non wear data to
	idx : int (optional)
		index of counter, only useful when processing large batches and you want to monitor the status
	total: int (optional)
		total number of subjects to process, only useful when processing large batches and you want to monitor the status
	epoch_dataset : string (optional)
		name of dataset within an HDF5 group that contains the 10sec epoch data
	"""

	logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10))

	"""
		ACTIGRAPH DATA
	"""

	# read actigraph acceleration time
	_, _, actigraph_time = get_actigraph_acc_data(subject, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE)

	# get start and stop time
	start_time, stop_time = actigraph_time[0], actigraph_time[-1]


	"""
		EPOCH DATA
	"""
	if epoch_dataset in get_datasets_from_group(group_name = subject, hdf5_file = ACTIGRAPH_HDF5_FILE):

		# get actigraph 10s epoch data
		epoch_data, _ , epoch_time_data = get_actigraph_epoch_data(subject, epoch_dataset = epoch_dataset, hdf5_file = ACTIGRAPH_HDF5_FILE)

		# convert to 60s epoch data	
		epoch_60_data, epoch_60_time_data = get_actigraph_epoch_60_data(epoch_data, epoch_time_data)

		# obtain counts values
		epoch_60_count_data = epoch_60_data[:,:3]

		"""
			GET NON WEAR VECTOR
		"""

		# create dataframe of actigraph acceleration 
		df_epoch_60_count = pd.DataFrame(epoch_60_count_data, index = epoch_60_time_data, columns = ['X - COUNT', 'Y - COUNT', 'Z - COUNT']).loc[start_time:stop_time]

		# retrieve non-wear vector
		epoch_60_count_non_wear_vector = choi_2011_calculate_non_wear_time(data = df_epoch_60_count.values, time = df_epoch_60_count.index.values)

		# get the croped time array as int64 (cropped because we selected the previous dataframe to be between start and stop slice)
		epoch_60_time_data_cropped = np.array(df_epoch_60_count.index).astype('int64')
		
		# reshape
		epoch_60_time_data_cropped = epoch_60_time_data_cropped.reshape(len(epoch_60_time_data_cropped), 1)

		# add two arrays
		combined_data = np.hstack((epoch_60_time_data_cropped, epoch_60_count_non_wear_vector))

		
		"""
			SAVE TO HDF5 FILE
		"""
		
		save_data_to_group_hdf5(group = subject, data = combined_data, data_name = 'choi_2011_non_wear_data', overwrite = True, create_group_if_not_exists = False, hdf5_file = save_hdf5)

	else:
		logging.warning('Subject {} has no corresponding epoch data, skipping...'.format(subject))
Exemple #6
0
def process_troiano_2007(subject, save_hdf5, idx = 1, total = 1, epoch_dataset = 'epoch10'):
	"""
	Calculate non wear time by using Troiano 2007 algorithm

	Troiano 2007 non-wear algorithm
		detects non wear time from 60s epoch counts
		Nonwear was defined by an interval of at least 60 consecutive minutes of zero activity intensity counts, with allowance for 1–2 min of counts between 0 and 100
	Paper:
		Physical Activity in the United States Measured by Accelerometer
	DOI:
		10.1249/mss.0b013e31815a51b3

	Parameters
	---------
	subject : string
		subject ID
	save_hdf5 : os.path
		location of HDF5 file to save non wear data to
	idx : int (optional)
		index of counter, only useful when processing large batches and you want to monitor the status
	total: int (optional)
		total number of subjects to process, only useful when processing large batches and you want to monitor the status
	epoch_dataset : string (optional)
		name of dataset within an HDF5 group that contains the 10sec epoch data
	"""

	logging.info('{style} Processing subject: {} {}/{} {style}'.format(subject, idx, total, style = '='*10))

	"""
		ACTIGRAPH DATA
	"""

	# read actigraph acceleration time
	_, _, actigraph_time = get_actigraph_acc_data(subject, hdf5_file = ACTIWAVE_ACTIGRAPH_MAPPING_HDF5_FILE)

	# get start and stop time
	start_time, stop_time = actigraph_time[0], actigraph_time[-1]


	"""
		EPOCH DATA
	"""
	if epoch_dataset in get_datasets_from_group(group_name = subject, hdf5_file = ACTIGRAPH_HDF5_FILE):

		# get actigraph 10s epoch data
		epoch_data, _ , epoch_time_data = get_actigraph_epoch_data(subject, epoch_dataset = epoch_dataset, hdf5_file = ACTIGRAPH_HDF5_FILE)

		# convert to 60s epoch data	
		epoch_60_data, epoch_60_time_data = get_actigraph_epoch_60_data(epoch_data, epoch_time_data)

		# obtain counts values
		epoch_60_count_data = epoch_60_data[:,:3]

		"""
			GET NON WEAR VECTOR
		"""

		# create dataframe of actigraph acceleration 
		df_epoch_60_count = pd.DataFrame(epoch_60_count_data, index = epoch_60_time_data, columns = ['X - COUNT', 'Y - COUNT', 'Z - COUNT']).loc[start_time:stop_time]

		# retrieve non-wear vector
		epoch_60_count_non_wear_vector = troiano_2007_calculate_non_wear_time(data = df_epoch_60_count.values, time = df_epoch_60_count.index.values)

		# get the croped time array as int64 (cropped because we selected the previous dataframe to be between start and stop slice)
		epoch_60_time_data_cropped = np.array(df_epoch_60_count.index).astype('int64')
		# reshape
		epoch_60_time_data_cropped = epoch_60_time_data_cropped.reshape(len(epoch_60_time_data_cropped), 1)

		# add two arrays
		combined_data = np.hstack((epoch_60_time_data_cropped, epoch_60_count_non_wear_vector))

		
		"""
			SAVE TO HDF5 FILE
		"""
	
		save_data_to_group_hdf5(group = subject, data = combined_data, data_name = 'troiano_2007_non_wear_data', overwrite = True, create_group_if_not_exists = True, hdf5_file = save_hdf5)

	else:
		logging.warning('Subject {} has no corresponding epoch data, skipping...'.format(subject))
def process_gt3x_file(f,
                      i=1,
                      total=1,
                      hdf5_save_location=HDF5_SAVE,
                      delete_zip_folder=True):
    """
	Process .gt3x file
	- unzip into log.bin and info.txt
	- extract information from info.txt
	- extract information from log.bin
	- save data to hdf5 file

	Parameters
	----------
	f : string
		file location of the .gt3x file
	i : int (optional)
		index of file to be processed, is used to display a counter of the process. Default = 1. For example, processing 12/20
	total : int (optional)
		total number of files to be processed, is used to display a counter of the process. Default = 1. For example, processing 12/20
	hdf5_save_location : os.path
		folder location where to save the extracted acceleration data to.
	"""

    logging.debug('Processing GTX3 binary file: {} {}/{}'.format(
        f, i + 1, total))

    # unzip the raw .gt3x file: this will provide a log.bin and info.txt file
    # the save_location is a new folder with the same name as the .gt3x file
    log_bin, info_txt = unzip_gt3x_file(f, save_location=f.split('.')[0])

    # check if unzipping went ok
    if log_bin is not None:

        # print verbose
        logging.debug('log.bin location: {}'.format(log_bin))
        logging.debug('info.txt location: {}'.format(info_txt))

        # get info data from info file
        info_data = extract_info(info_txt)

        # check if subject name could be read from the binary file
        if info_data['Subject_Name'] is not "":

            # check if subject ID already processed
            if info_data['Subject_Name'] not in get_all_subjects_hdf5(
                    hdf5_file=HDF5_SAVE):

                # retrieve log_data; i.e. accellerometer data and log_time; timestamps of acceleration data
                log_data, log_time = extract_log(
                    log_bin,
                    acceleration_scale=float(info_data['Acceleration_Scale']),
                    sample_rate=int(info_data['Sample_Rate']))

                # check if log data is not None (with None something went wrong during reading of the binary file)
                if log_data is not None:

                    # save log_data to HDF5 file
                    save_data_to_group_hdf5(group=info_data['Subject_Name'],
                                            data=log_data,
                                            data_name='log',
                                            meta_data=info_data,
                                            overwrite=True,
                                            hdf5_file=hdf5_save_location)

                    # save log_time data to HDF file
                    save_data_to_group_hdf5(group=info_data['Subject_Name'],
                                            data=log_time,
                                            data_name='time',
                                            meta_data=info_data,
                                            overwrite=True,
                                            hdf5_file=hdf5_save_location)

                else:
                    logging.error(
                        'Unable to convert .gt3x file: {} (subject {})'.format(
                            f, info_data['Subject_Name']))
            else:
                logging.info(
                    'Subject name already defined as group in HDF5 file: {}, skipping..'
                    .format(info_data['Subject_Name']))
        else:
            logging.error(
                "Unable to read subject from info.txt file, skipping file: {}".
                format(f))
    else:
        logging.error("Error unzipping file: {}".format(f))

    # delete the created zip folder
    if delete_zip_folder:
        delete_directory(f.split('.')[0])

    # print time and memory
    set_end(tic, process)
def batch_process_epoch_files(epoch_sec,
                              epoch_folder=EPOCH_FOLDER,
                              use_parallel=False,
                              num_jobs=cpu_count(),
                              limit=None,
                              skip_n=0):
    """
	Read CSV epoch files from disk and extract (1) header information, and (2) epoch data for XYZ and also the steps.

	Parameters
	------------
	epoch_sec : int
		number of seconds within a single epoch. Examples include 1 for 1 sec epochs, or 10 for 10s epochs
	epoch_folder : os.path()
		folder location of the 10 seconds epoch files
	use_parallel = Boolean (optional)
		Set to true of subjects need to be processed in parallel, this will execute much faster
	num_jobs = int (optional)
		if parallel is set to true, then this indicates have many jobs at the same time need to be executed. Default set to the number of CPU cores
	limit : int (optional)
		limit the number of subjects to be processed
	skipN : int (optional)
		skip first N subjects
	"""

    # get all the .csv 10 seconds epoch files from the folder location. We do this here because there might also be other types of files in the folder
    # we can also skip_n the first n files, or it is possible to limit the number of files to be processed, such for testing or if we only need, for example, 100 files
    epoch_files = glob2.glob(os.path.join(epoch_folder, '**',
                                          '*.csv'))[0 + skip_n:limit]

    # if use_parallel is set to True, then use parallelization to process all files
    if use_parallel:

        logging.info('Processing in parallel (parallelization on)')

        # because we need to save the data after the parallel processing, we can't process them all at one since the return values becomes too large, so we peform in batches
        for i in range(0, len(epoch_files), num_jobs):

            # define start and end slice (these are the batches)
            start_slice = i
            end_slice = i + num_jobs

            # use parallel processing to speed up processing time
            executor = Parallel(n_jobs=num_jobs, backend='multiprocessing')

            # create tasks so we can execute them in parallel
            tasks = (delayed(parse_epoch_file)(file=f)
                     for f in epoch_files[start_slice:end_slice])

            # execute tasks and process the return values
            for dic_header, data in executor(tasks):

                # parse out subject ID from file name (split on /, then take the last, then split on space, and take the first)
                subject = dic_header['File Name'].split('/')[-1].split(' ')[0]
                dic_header['Subject'] = subject

                # save header and data to HDF5 file
                save_data_to_group_hdf5(group=subject,
                                        data=data,
                                        data_name='epoch{}'.format(epoch_sec),
                                        meta_data=dic_header,
                                        overwrite=True,
                                        create_group_if_not_exists=True,
                                        hdf5_file=HDF5_SAVE)

            # verbose
            logging.debug('{style} Processed {}/{} {style}'.format(
                end_slice, len(epoch_files), style='=' * 10))

    else:

        # process files one-by-one
        for i, f in enumerate(epoch_files):

            logging.debug(
                '{style} Processing epoch file: {} {}/{} {style}'.format(
                    f, i + 1, len(epoch_files), style='=' * 10))

            # parse the content from the epoch csv file
            dic_header, data = parse_epoch_file(f)

            # parse out subject ID from file name (split on /, then take the last, then split on space, and take the first)
            subject = dic_header['File Name'].split('/')[-1].split(' ')[0]
            dic_header['Subject'] = subject

            # save header and data to HDF5 file
            save_data_to_group_hdf5(group=subject,
                                    data=data,
                                    data_name='epoch{}'.format(epoch_sec),
                                    meta_data=dic_header,
                                    overwrite=True,
                                    create_group_if_not_exists=True,
                                    hdf5_file=HDF5_SAVE)
Exemple #9
0
def remove_bg(paths, params):
    """
	Remove background from MRI images

	Parameters
	--------------
	hdf5_file : os.path
		location of HDF5 that contains the raw MRI data, and where we want to save data to
	img_group_name : string
		name of HDF5 group that contains the raw MRI images
	save_group_name : string
		name of HDF5 group to store images with background removed
	"""

    # dynamically create hdf5 file
    hdf5_file = os.path.join(paths['hdf5_folder'], params['hdf5_file'])

    # read original MRI datasets from HDF5 file
    D = get_datasets_from_group(group_name=params['group_original_mri'],
                                hdf5_file=hdf5_file)

    # read data from each dataset and plot mri data
    for d_idx, d in enumerate(D):

        logging.info(f'Processing dataset : {d} {d_idx}/{len(D)}')

        # read data from group
        data = read_dataset_from_group(group_name=params['group_original_mri'],
                                       dataset=d,
                                       hdf5_file=hdf5_file)

        # read meta data
        meta_data = read_metadata_from_group_dataset(
            group_name=params['group_original_mri'],
            dataset=d,
            hdf5_file=hdf5_file)

        logging.info(f'Processing patient : {meta_data["PatientName"]}')

        # new numpy array to hold segmented data
        data_segmented = np.empty_like(data, dtype='int16')

        # process each slice
        for i in range(data.shape[0]):

            # ind_cycle = cycle(range(10))
            # fig, axs = plt.subplots(1,8, figsize = (20,5))
            # axs = axs.ravel()

            # original MRI
            img = data[i]
            # plt_index = next(ind_cycle)
            # axs[plt_index].imshow(img, cmap = 'gray')
            # axs[plt_index].set_title('Original MRI')

            # change grayscale
            img = change_img_contrast(img, phi=10, theta=1)
            # plt_index = next(ind_cycle)
            # axs[plt_index].imshow(img, cmap = 'gray')
            # axs[plt_index].set_title('Changed gray scale')

            # convert to 8 bit
            if d not in ['Torsk 1-4 fersk']:
                img = np.array(img, dtype='uint8')
                # plt_index = next(ind_cycle)
                # axs[plt_index].imshow(img, cmap = 'gray')
                # axs[plt_index].set_title('Convert to 8 bit')

            # inverted colors
            # img = (255) - img
            # plt_index = next(ind_cycle)
            # axs[plt_index].imshow(img, cmap = 'gray')
            # axs[plt_index].set_title('Inverted MRI')

            # max filter
            img = ndimage.maximum_filter(img, size=7)
            # plt_index = next(ind_cycle)
            # axs[plt_index].imshow(img, cmap = 'gray')
            # axs[plt_index].set_title('Max filter')

            # erosion
            img = cv2.erode(img, None, iterations=4)
            # plt_index = next(ind_cycle)
            # axs[plt_index].imshow(img, cmap = 'gray')
            # axs[plt_index].set_title('Erosion')

            # gaussian filter
            img = cv2.GaussianBlur(img, (11, 11), 0)
            # plt_index = next(ind_cycle)
            # axs[plt_index].imshow(img, cmap = 'gray')
            # axs[plt_index].set_title('Gaussian Blur')

            # knn bg remove
            segmented_img = perform_knn_segmentation(n_clusters=2, img=img)
            img = mask_image(img=data[i],
                             segmented_img=segmented_img,
                             mask_value=segmented_img[0][0],
                             fill_value=0)
            # plt_index = next(ind_cycle)
            # axs[plt_index].imshow(img, cmap = 'gray')
            # axs[plt_index].set_title('KNN BG remove')

            # add masked image to data_segmented, where we store each slice
            data_segmented[i] = img

            # plt.show()

        # save data to HDF5
        save_data_to_group_hdf5(group=params['group_no_bg'],
                                data=data_segmented,
                                data_name=d,
                                hdf5_file=hdf5_file,
                                meta_data=meta_data,
                                overwrite=True)
def process_actiwave_file(f,
                          i=1,
                          total=1,
                          acc_dtype=np.float32,
                          ecg_dtype=np.float32,
                          ms2_to_g=0.101972,
                          hdf5_save_location=HDF5_SAVE):
    """
	Single processing of actiwave file
	- read .edf file
	- extract content and meta data
		- acceleration data YXZ
		- ecg data
		- estimated heart rate

	Parameters
	----------
	f : string
		file location of the .gt3x file
	i : int (optional)
		index of file to be processed, is used to display a counter of the process. Default = 1. For example, processing 12/20
	total : int (optional)
		total number of files to be processed, is used to display a counter of the process. Default = 1. For example, processing 12/20
	acc_dtype : datatype
		datatype for acceleration data. Defaults to np.float32. Meaning that each acceleration value in g is represented as 32 bit float. Can be made smaller,
		which results in less memory per value, but also less precise
	ecg_dtype : datatype
		datatype for ecg data. Defaults to np.float32. Meaning that each ecg value is represented as 32 bit float. Can be made smaller,
		which results in less memory per value, but also less precise
	ms2_to_g : float
		conversion factor to go from values measured in ms2 (meter/square second) to g (gravity)
	hdf5_save_location : os.path
		folder location where to save the extracted actiwave data to
	"""

    logging.info('Processing EDF file: {} {}/{}'.format(f, i, total))

    # read EDF data
    dic_data = read_edf_file(file=f)

    # extract edf file meta data
    edf_meta_data = read_edf_meta_data(file=f)

    # get subject from meta data (this is also the group name in the HDF5 file)
    subject = edf_meta_data['Patient Code']

    # check to see if the subject is also part of the file name
    if subject not in f:
        logging.error(
            'Mismatch between subject in file name {} and within EDF meta data {}'
            .format(f, subject))
        return
    """
		Process ECG data
	"""

    # read ECG data from the dictionary
    ecg_data = dic_data.get('ECG0')
    # check if ecg data available
    if ecg_data is not None:
        # reshape the array so we have a column vector
        ecg_data = ecg_data.reshape(((len(ecg_data), 1)))
        # convert the data type of the ecg
        ecg_data = ecg_data.astype(dtype=ecg_dtype)
        # read meta data for this channel
        ecg_meta_data = read_edf_channel_meta_data(file=f, channel=0)
    else:
        logging.error('ECG data not available for file: {}'.format(f))
        return
    """
		Process the acceleration data
	"""

    acc_x_data = dic_data.get('X')
    acc_y_data = dic_data.get('Y')
    acc_z_data = dic_data.get('Z')

    # check if X, Y, and Z have values
    if (acc_x_data is not None) and (acc_y_data is not None) and (acc_z_data
                                                                  is not None):

        # length of the acceleration data
        l = len(acc_x_data)

        # create one acceleration, original data is resized, and note that the order here is now YXZ, this is similar to the order of the raw data
        acc_data = np.hstack((acc_y_data.reshape(
            (l, 1)), acc_x_data.reshape((l, 1)), acc_z_data.reshape((l, 1))))

        # convert ms^2 acceleration data into g-values
        acc_data = acc_data * ms2_to_g
        # convert acc_data to smaller float point precision
        acc_data = acc_data.astype(dtype=acc_dtype)

        # read the acceleration channel meta data (here we select channel 1, but channel 2 and 3 are also acceleration data but the contain the same values)
        acc_meta_data = read_edf_channel_meta_data(file=f, channel=1)

    else:
        logging.error('Acceleration data not available for file: {}'.format(f))
        return
    """
		Process Estimated HR data
	"""

    # read HR data
    hr_data = dic_data.get('Estimated HR')

    # check if hr data is present
    if hr_data is not None:
        # resize the array to have column vectors
        hr_data = hr_data.reshape((len(hr_data), 1))
        # read meta data for this channel
        hr_meta_data = read_edf_channel_meta_data(file=f, channel=4)
    else:
        logging.warning(
            'Estimated HR data not available for file: {}'.format(f))
        return
    """
		Save data and meta-data to HDF5
	"""

    # save ecg data
    save_data_to_group_hdf5(group=subject,
                            data=ecg_data,
                            data_name='ecg',
                            meta_data=ecg_meta_data,
                            overwrite=True,
                            create_group_if_not_exists=True,
                            hdf5_file=hdf5_save_location)
    # save acceleration data
    save_data_to_group_hdf5(group=subject,
                            data=acc_data,
                            data_name='acceleration',
                            meta_data=acc_meta_data,
                            overwrite=True,
                            create_group_if_not_exists=True,
                            hdf5_file=hdf5_save_location)
    # save estimated heart rate data
    save_data_to_group_hdf5(group=subject,
                            data=hr_data,
                            data_name='estimated_hr',
                            meta_data=hr_meta_data,
                            overwrite=True,
                            create_group_if_not_exists=True,
                            hdf5_file=hdf5_save_location)
    # save meta data of edf file
    save_meta_data_to_group(group_name=subject,
                            meta_data=edf_meta_data,
                            hdf5_file=hdf5_save_location)