def get_memory_usage_data_layer(self, layer_name): ''' this function return memory usage for data given a specific layer batch_size is considered ''' assert self._compiled, 'the network is not compiled' assert isstring( layer_name), 'the name of layer queried should be a string' assert self._layers.has_key(layer_name), 'no layer queried exists' # we treat activation layer as in-place layer and don't store data in it if isinstance(self._layers[layer_name], Activation): return 0 # datashape = (self._batch_size, ) + self._blobs[layer_name]['data'].shape datashape = (self._batch_size, ) + self._blobs[layer_name]['datashape'] layer = self._layers[layer_name] num_pixel = reduce(mul, datashape) if layer.datatype == 'single': return num_pixel * 4 # single has 4 bytes elif layer.datatype == 'double': return num_pixel * 8 # double has 8 bytes elif layer.datatype == 'uint': return num_pixel # unsigned integer has 1 byte elif layer.datatype == 'boolean': return num_pixel # boolean has 4 bytes else: assert False, 'unknown error while calculating memory usage for data'
def get_recording_type(filename, debug=True): ''' extract the recording type, sentence or neutral tongue or expression ''' if debug: assert isstring(filename), 'input filename is not a string' substrings = filename.split('--') return substrings[1]
def get_person_id(filename, debug=True): ''' extract the person ID from MUGSY filename ''' if debug: assert isstring(filename), 'input filename is not a string' substrings = filename.split('_') return substrings[1]
def get_rotate_degree_v2(camera_id, debug=True): if debug: assert isstring( camera_id ), 'the input camera id is not a string for getting rotation degree' assert camera_id in get_camera_list( ), 'the camera id requested: %s does not exist' % camera_id return rotate_degree_v2[camera_id]
def get_frame_number(filename, debug=True): ''' extract the frame number from MUGSY filename ''' if debug: assert isstring(filename), 'input filename is not a string' substrings = filename.split('--') return int(substrings[3])
def get_recording_id(filename, debug=True): ''' extract the recording id, including date and person id and dot flag ''' if debug: assert isstring(filename), 'input filename is not a string' substrings = filename.split('--') return substrings[0]
def get_camera_id(filename, debug=True): ''' extract the camera ID from MUGSY filename ''' if debug: assert isstring(filename), 'input filename is not a string' # print filename substrings = filename.split('--') return substrings[2]
def get_labeler_id(filename, debug=True): ''' return the real image id and the labeler id, this function assume the name is separated by '--' ''' if debug: assert isstring(filename), 'input filename is not a string' substrings = filename.split('--') labeler_id = substrings[-1] return labeler_id
def get_filename(recording_id, recording_type, camera_id, frame_number, labeler_id, debug=True): ''' return the full filename given all info ''' if debug: assert isstring(recording_id), 'recording id is not a string' assert isstring(recording_type), 'recording type is not a string' assert isscalar(frame_number), 'frame number is not a scalar' assert isstring(labeler_id), 'labeler id is not a string' assert camera_id in get_camera_list( ), 'camera id %s is not in the camera list' % camera_id return '--'.join([ recording_id, recording_type, camera_id, '%05d' % (frame_number), labeler_id ])
def load_hdf5_file(hdf5_file, dataname, debug=True): ''' load a single hdf5 file ''' if debug: assert is_path_exists(hdf5_file) and isfile( hdf5_file), 'input hdf5 path does not exist: %s' % hdf5_file assert islist(dataname), 'dataset queried is not correct' assert all( isstring(dataset_tmp) for dataset_tmp in dataname), 'dataset queried is not correct' hdf5 = h5py.File(hdf5_file, 'r') datadict = dict() for dataset in dataname: datadict[dataset] = np.array(hdf5[dataset]) return datadict
def __init__(self, name, function, bottom=None, datatype=None, paramtype=None): super(Activation, self).__init__(name=name, bottom=bottom, datatype=datatype, paramtype=paramtype) assert isstring( function), 'the function used in dense layer should be a string' assert any(function is item for item in ACTIVATION_FUNCTION), \ 'type of parameter should be one of "%s"' \ % functools.reduce(lambda x, y: str(x) + '" "' + str(y), ACTIVATION_FUNCTION) assert self._bottom is None or len(self._bottom) == 1, \ 'Activation layer can only have one bottom layer' self._function = function
def rand_load_hdf5_from_folder(hdf5_src, dataname, debug=True): ''' randomly load a single hdf5 file from a hdf5 folder ''' if debug: assert is_path_exists(hdf5_src) and isfolder( hdf5_src), 'input hdf5 path does not exist: %s' % hdf5_src assert islist(dataname), 'dataset queried is not correct' assert all( isstring(dataset_tmp) for dataset_tmp in dataname), 'dataset queried is not correct' hdf5list, num_hdf5_files = load_list_from_folder(folder_path=hdf5_src, ext_filter='.hdf5') check_index = random.randrange(0, num_hdf5_files) hdf5_path_sample = hdf5list[check_index] hdf5_file = h5py.File(hdf5_path_sample, 'r') datadict = dict() for dataset in dataname: datadict[dataset] = np.array(hdf5_file[dataset]) return datadict
def __init__(self, name, datatype=None, paramtype=None): if datatype is not None: self._datatype_check(datatype) else: datatype = 'single' print 'datatype of the layer is not defined.' \ 'By default, we use single floating point to save the data' if paramtype is not None: self._paramtype_check(paramtype) else: paramtype = 'single' print 'paramtype of the layer is not defined. By default,' \ 'we use single floating point to save the parameter' assert isstring(name), 'the name of input layer should be a string' self._name = name # self._data = None # self._params = None self._datatype = datatype self._paramtype = paramtype self._top = None self._bottom = None
def remove(self, layer_name): assert isstring( layer_name), 'the layer should be queried by a string name' if self._blobs.has_key(layer_name): assert not isinstance(self._layers[layer_name], Input), \ 'the input layer is not able to delete. ' \ 'You might want to use reshape function to change the input shape.' # TODO: test if reference previous_layer = self._layers[layer_name].bottom[0] if self._layers[layer_name].top is not None: next_layer = self._layers[layer_name].top[0] else: next_layer = None self._layers[layer_name].bottom[0].top = next_layer self._layers[layer_name].top[0].bottom = previous_layer del self._blobs[layer_name] del self._layers[layer_name] self._nb_entries -= 1 else: assert False, 'No layer queried existing' self._compiled = False
def _paramtype_check(self, paramtype): assert isstring(paramtype), 'the type of parameter should be a string' assert any(paramtype is item for item in PARAMTYPE), \ 'type of parameter should be one of "%s"' \ % functools.reduce(lambda x, y: str(x) + '" "' + str(y), PARAMTYPE)
def generate_hdf5(save_dir, data_src, data_name='data', batch_size=1, ext_filter='png', label_src1=None, label_name1='label', label_preprocess_function1=identity, label_range1=None, label_src2=None, label_name2='label2', label_preprocess_function2=identity, label_range2=None, debug=True, vis=False): ''' # this function creates data in hdf5 format from a image path # input parameter # data_src: source of image data, which can be a list of image path, a txt file contains a list of image path, a folder contains a set of images, a list of numpy array image data # label_src: source of label data, which can be none, a file contains a set of labels, a dictionary of labels, a 1-d numpy array data, a list of label data # save_dir: where to store the hdf5 data # batch_size: how many image to store in a single hdf file # ext_filder: what format of data to use for generating hdf5 data ''' # parse input assert is_path_exists_or_creatable( save_dir), 'save path should be a folder to save all hdf5 files' mkdir_if_missing(save_dir) assert isstring( data_name), 'dataset name is not correct' # name for hdf5 data # convert data source to a list of numpy array image data if isfolder(data_src): print 'data is loading from %s with extension .%s' % (data_src, ext_filter) filelist, num_data = load_list_from_folder(data_src, ext_filter=ext_filter) datalist = None elif isfile(data_src): print 'data is loading from %s with extension .%s' % (data_src, ext_filter) filelist, num_data = load_list_from_file(data_src) datalist = None elif islist(data_src): if debug: assert all( isimage(data_tmp) for data_tmp in data_src ), 'input data source is not a list of numpy array image data' datalist = data_src num_data = len(datalist) filelist = None else: assert False, 'data source format is not correct.' if debug: assert (datalist is None and filelist is not None) or ( filelist is None and datalist is not None), 'data is not correct' if datalist is not None: assert len(datalist) == num_data, 'number of data is not equal' if filelist is not None: assert len(filelist) == num_data, 'number of data is not equal' # convert label source to a list of numpy array label if label_src1 is None: labeldict1 = None labellist1 = None elif isfile(label_src1): assert is_path_exists(label_src1), 'file not found' _, _, ext = fileparts(label_src1) assert ext == '.json', 'only json extension is supported' labeldict1 = json.load(label_src1) num_label1 = len(labeldict1) assert num_data == num_label1, 'number of data and label is not equal.' labellist1 = None elif isdict(label_src1): labeldict1 = label_src1 labellist1 = None elif isnparray(label_src1): if debug: assert label_src1.ndim == 1, 'only 1-d label is supported' labeldict1 = None labellist1 = label_src1 elif islist(label_src1): if debug: assert all( np.array(label_tmp).size == 1 for label_tmp in label_src1), 'only 1-d label is supported' labellist1 = label_src1 labeldict1 = None else: assert False, 'label source format is not correct.' assert isfunction(label_preprocess_function1 ), 'label preprocess function is not correct.' # convert label source to a list of numpy array label if label_src2 is None: labeldict2 = None labellist2 = None elif isfile(label_src2): assert is_path_exists(label_src2), 'file not found' _, _, ext = fileparts(label_src2) assert ext == '.json', 'only json extension is supported' labeldict2 = json.load(label_src2) num_label2 = len(labeldict2) assert num_data == num_label2, 'number of data and label is not equal.' labellist2 = None elif isdict(label_src2): labeldict2 = label_src2 labellist2 = None elif isnparray(label_src2): if debug: assert label_src2.ndim == 1, 'only 1-d label is supported' labeldict2 = None labellist2 = label_src2 elif islist(label_src2): if debug: assert all( np.array(label_tmp).size == 1 for label_tmp in label_src2), 'only 1-d label is supported' labellist2 = label_src2 labeldict2 = None else: assert False, 'label source format is not correct.' assert isfunction(label_preprocess_function2 ), 'label preprocess function is not correct.' # warm up if datalist is not None: size_data = datalist[0].shape else: size_data = imread(filelist[0]).shape if labeldict1 is not None: if debug: assert isstring(label_name1), 'label name is not correct' labels1 = np.zeros((batch_size, 1), dtype='float32') # label_value1 = [float(label_tmp_char) for label_tmp_char in labeldict1.values()] # label_range1 = np.array([min(label_value1), max(label_value1)]) if labellist1 is not None: labels1 = np.zeros((batch_size, 1), dtype='float32') # label_range1 = [np.min(labellist1), np.max(labellist1)] if label_src1 is not None and debug: assert label_range1 is not None, 'label range is not correct' assert (labeldict1 is not None and labellist1 is None) or ( labellist1 is not None and labeldict1 is None), 'label is not correct' if labeldict2 is not None: if debug: assert isstring(label_name2), 'label name is not correct' labels2 = np.zeros((batch_size, 1), dtype='float32') # label_value2 = [float(label_tmp_char) for label_tmp_char in labeldict2.values()] # label_range2 = np.array([min(label_value2), max(label_value2)]) if labellist2 is not None: labels2 = np.zeros((batch_size, 1), dtype='float32') # label_range2 = [np.min(labellist2), np.max(labellist2)] if label_src2 is not None and debug: assert label_range2 is not None, 'label range is not correct' assert (labeldict2 is not None and labellist2 is None) or ( labellist2 is not None and labeldict2 is None), 'label is not correct' # start generating count_hdf = 1 # count number of hdf5 file clock = Timer() datalist_batch = list() for i in xrange(num_data): clock.tic() if filelist is not None: imagefile = filelist[i] _, name, _ = fileparts(imagefile) img = imread(imagefile).astype('float32') max_value = np.max(img) if max_value > 1 and max_value <= 255: img = img / 255.0 # [rows,col,channel,numbers], scale the image data to (0, 1) if debug: min_value = np.min(img) assert min_value >= 0 and min_value <= 1, 'data is not in [0, 1]' if datalist is not None: img = datalist[i] if debug: assert size_data == img.shape datalist_batch.append(img) # process label if labeldict1 is not None: if debug: assert len(filelist) == len( labeldict1), 'file list is not equal to label dictionary' labels1[i % batch_size, 0] = float(labeldict1[name]) if labellist1 is not None: labels1[i % batch_size, 0] = float(labellist1[i]) if labeldict2 is not None: if debug: assert len(filelist) == len( labeldict2), 'file list is not equal to label dictionary' labels2[i % batch_size, 0] = float(labeldict2[name]) if labellist2 is not None: labels2[i % batch_size, 0] = float(labellist2[i]) # save to hdf5 if i % batch_size == 0: data = preprocess_image_caffe( datalist_batch, debug=debug, vis=vis ) # swap channel, transfer from list of HxWxC to NxCxHxW # write to hdf5 format if filelist is not None: save_path = os.path.join(save_dir, '%s.hdf5' % name) else: save_path = os.path.join(save_dir, 'image_%010d.hdf5' % count_hdf) h5f = h5py.File(save_path, 'w') h5f.create_dataset(data_name, data=data, dtype='float32') if (labeldict1 is not None) or (labellist1 is not None): labels1 = label_preprocess_function1(data=labels1, data_range=label_range1, debug=debug) h5f.create_dataset(label_name1, data=labels1, dtype='float32') labels1 = np.zeros((batch_size, 1), dtype='float32') if (labeldict2 is not None) or (labellist2 is not None): labels2 = label_preprocess_function2(data=labels2, data_range=label_range2, debug=debug) h5f.create_dataset(label_name2, data=labels2, dtype='float32') labels2 = np.zeros((batch_size, 1), dtype='float32') h5f.close() count_hdf = count_hdf + 1 del datalist_batch[:] if debug: assert len(datalist_batch) == 0, 'list has not been cleared' average_time = clock.toc() print( 'saving to %s: %d/%d, average time:%.3f, elapsed time:%s, estimated time remaining:%s' % (save_path, i + 1, num_data, average_time, format_time(average_time * i), format_time(average_time * (num_data - i)))) return count_hdf - 1, num_data
def _datatype_check(self, datatype): assert isstring(datatype), 'the type of data should be a string' assert any(datatype is item for item in DATATYPE), \ 'type of data should be one of "%s"' \ % functools.reduce(lambda x, y: str(x) + '" "' + str(y), DATATYPE)