def test_safe_npdata(): print('test list with multiple values') data = [1, 2, 3] data_bak = copy.copy(data) npdata = safe_npdata(data) assert CHECK_EQ_NUMPY(npdata, np.array(data)) npdata += 100 assert CHECK_EQ_LIST_ORDERED(data, data_bak) print('test list with single value') data = [1] npdata = safe_npdata(data) assert CHECK_EQ_NUMPY(npdata, np.array(data)) print('test scalar') data = 10 npdata = safe_npdata(data) assert CHECK_EQ_NUMPY(npdata, np.array(data)) ######################################## test failure cases print('test edge case: tuple') data = (1, 2) try: npdata = safe_npdata(data) sys.exit('\nwrong! never should be here\n\n') except TypeError: print('the input should never be a tuple') print('\n\nDONE! SUCCESSFUL!!\n')
def hist_equalization(input_data, num_bins=256, warning=True, debug=True): ''' convert a N-d numpy data (or list) with random distribution to a 1-d data with equalized histogram e.g., for the samples from a gaussian distribution, the data points are dense in the middle, the cdf increases fast in the middle so that the discrete cdf is sparse in the middle, the equalized data points are interpolated from cdf such that the density can be the same for the middle and the rest parameters: input_data: a list or a numpy data, could be any shape, not necessarily a 1-d data, can be integer data (uint8 image) or float data (float32 image) num_bins: bigger, the histogram of equalized data points is more flat outputs: data_equalized: equalized data with the same shape as input, it is float with [0, 1] ''' np_data = safe_npdata(input_data, warning=warning, debug=debug) if debug: assert isnparray(np_data), 'the input data is not a numpy data' ori_shape = np_data.shape np_data = np_data.flatten() hist, xs = np.histogram(np_data, num_bins, density=True) # return distribution and X's coordinates cdf = hist.cumsum() cdf = cdf / cdf[-1] # sparse in the middle data_equalized = np.interp(np_data, xs[:-1], cdf) return data_equalized.reshape((ori_shape))
def data_normalize(input_data, method='max', data_range=None, sum=1, warning=True, debug=True): ''' this function normalizes N-d data in different ways: 1) normalize the data from a range to [0, 1]; 2) normalize the data which sums to a value parameters: input_data: a list or a numpy N-d data to normalize method: max: normalize the data from a range to [0, 1], when the range is not given, the max and min are obtained from the data sum: normalize the data such that all elements are summed to a value, the default value is 1 data_range: None or 2-element tuple, list or array sum: a scalar outputs: normalized_data: a float32 numpy array with same shape as the input data ''' np_data = safe_npdata(input_data, warning=warning, debug=debug).astype('float32') if debug: assert isnparray(np_data), 'the input data is not a numpy data' assert method in ['max', 'sum'], 'the method for normalization is not correct' if method == 'max': if data_range is None: max_value, min_value = np.max(np_data), np.min(np_data) else: if debug: assert isrange(data_range), 'data range is not correct' max_value, min_value = data_range[1], data_range[0] elif method == 'sum': if debug: assert isscalar(sum), 'the sum is not correct' max_value, min_value = np.sum(np_data) / sum, 0 normalized_data = (np_data - min_value) / (max_value - min_value) # normalization return normalized_data
def calculate_truncated_mse(error_list, truncated_list, warning=True, debug=True): ''' calculate the mse truncated by a set of thresholds, and return the truncated MSE and the percentage of how many points' error is lower than the threshold parameters: error_list: a list of error truncated_list: a list of threshold return tmse_dict: a dictionary where each entry is a dict and has key 'T-MSE' & 'percentage' ''' if debug: assert islistofscalar(error_list) and islistofscalar(truncated_list), 'the input error list and truncated list are not correct' tmse_dict = dict() num_entry = len(error_list) error_array = safe_npdata(error_list, warning=warning, debug=debug) truncated_list = safe_list(truncated_list, warning=warning, debug=debug) for threshold in truncated_list: error_index = np.where(error_array[:] < threshold)[0].tolist() # plot visible points in red color error_interested = error_array[error_index] entry = dict() if error_interested.size == 0: entry['T-MSE'] = 0 entry['percentage'] = 0 else: entry['T-MSE'] = np.mean(error_interested) entry['percentage'] = len(error_index) / float(num_entry) tmse_dict[threshold] = entry return tmse_dict
def nparray_resize(input_nparray, resize_factor=None, target_size=None, interp='bicubic', warning=True, debug=True): ''' resize the numpy array given a resize factor (e.g., 0.25), or given a target size (height, width) e.g., the numpy array has 600 x 800: 1. given a resize factor of 0.25 -> results in an image with 150 x 200 2. given a target size of (300, 400) -> results in an image with 300 x 400 note that: resize_factor and target_size cannot exist at the same time parameters: input_nparray: a numpy array resize_factor: a scalar target_size: a list of tuple or numpy array with 2 elements, representing height and width interp: interpolation methods: bicubic or bilinear outputs: resized_nparray: a numpy array ''' np_array = safe_npdata(input_nparray, warning=warning, debug=debug) if debug: assert interp in ['bicubic', 'bilinear' ], 'the interpolation method is not correct' assert (resize_factor is not None and target_size is None) or ( resize_factor is None and target_size is not None), 'resize_factor and target_size cannot co-exist' if target_size is not None: if debug: assert isimsize( target_size), 'the input target size is not correct' target_width, target_height = int(round(target_size[1])), int( round(target_size[0])) elif resize_factor is not None: if debug: assert isscalar(resize_factor), 'the resize factor is not a scalar' height, width = np_array.shape[:2] target_width, target_height = int(round(resize_factor * width)), int( round(resize_factor * height)) else: assert False, 'the target_size and resize_factor do not exist' if interp == 'bicubic': resized_nparray = cv2.resize(np_array, (target_width, target_height), interpolation=cv2.INTER_CUBIC) elif interp == 'bilinear': resized_nparray = cv2.resize(np_array, (target_width, target_height), interpolation=cv2.INTER_LINEAR) else: assert False, 'interpolation is wrong' return resized_nparray
def nparray_hwc2chw(input_nparray, warning=True, debug=True): ''' this function transpose the channels of an numpy array from HWC to CHW parameters: input_nparray: a numpy HWC array outputs: np_array: a numpy CHW array ''' np_array = safe_npdata(input_nparray, warning=warning, debug=debug) if debug: assert np_array.ndim == 3, 'the input numpy array does not have a good dimension: {}'.format( np_image.shape) return np.transpose(np_array, (2, 0, 1))
def data_unnormalize(data, data_range, debug=True): ''' this function unnormalizes 1-d label to normal scale based on range of data ''' np_data = safe_npdata(input_data, warning=warning, debug=debug).astype('float32') if debug: assert isnparray(np_data), 'the input data is not a numpy data' assert isrange(data_range), 'data range is not correct' max_value = data_range[1] min_value = data_range[0] unnormalized = np_data * (max_value - min_value) + min_value # if debug: # normalized = normalize_data(data=unnormalized, data_range=data_range, debug=False) # assert_almost_equal(data, normalized, decimal=6, err_msg='data is not correct: %f vs %f' % (data, normalized)) return unnormalized