Example #1
0
    def get_memory_usage_data_layer(self, layer_name):
        '''
		this function return memory usage for data given a specific layer
		batch_size is considered
		'''
        assert self._compiled, 'the network is not compiled'
        assert isstring(
            layer_name), 'the name of layer queried should be a string'
        assert self._layers.has_key(layer_name), 'no layer queried exists'

        # we treat activation layer as in-place layer and don't store data in it
        if isinstance(self._layers[layer_name], Activation):
            return 0

        # datashape = (self._batch_size, ) + self._blobs[layer_name]['data'].shape
        datashape = (self._batch_size, ) + self._blobs[layer_name]['datashape']
        layer = self._layers[layer_name]
        num_pixel = reduce(mul, datashape)
        if layer.datatype == 'single':
            return num_pixel * 4  # single has 4 bytes
        elif layer.datatype == 'double':
            return num_pixel * 8  # double has 8 bytes
        elif layer.datatype == 'uint':
            return num_pixel  # unsigned integer has 1 byte
        elif layer.datatype == 'boolean':
            return num_pixel  # boolean has 4 bytes
        else:
            assert False, 'unknown error while calculating memory usage for data'
def get_recording_type(filename, debug=True):
    '''
	extract the recording type, sentence or neutral tongue or expression
	'''
    if debug:
        assert isstring(filename), 'input filename is not a string'

    substrings = filename.split('--')
    return substrings[1]
def get_person_id(filename, debug=True):
    '''
	extract the person ID from MUGSY filename
	'''
    if debug:
        assert isstring(filename), 'input filename is not a string'

    substrings = filename.split('_')
    return substrings[1]
def get_rotate_degree_v2(camera_id, debug=True):
    if debug:
        assert isstring(
            camera_id
        ), 'the input camera id is not a string for getting rotation degree'
        assert camera_id in get_camera_list(
        ), 'the camera id requested: %s does not exist' % camera_id

    return rotate_degree_v2[camera_id]
def get_frame_number(filename, debug=True):
    '''
	extract the frame number from MUGSY filename
	'''
    if debug:
        assert isstring(filename), 'input filename is not a string'

    substrings = filename.split('--')
    return int(substrings[3])
def get_recording_id(filename, debug=True):
    '''
	extract the recording id, including date and person id and dot flag
	'''
    if debug:
        assert isstring(filename), 'input filename is not a string'

    substrings = filename.split('--')
    return substrings[0]
def get_camera_id(filename, debug=True):
    '''
	extract the camera ID from MUGSY filename
	'''
    if debug:
        assert isstring(filename), 'input filename is not a string'

    # print filename
    substrings = filename.split('--')
    return substrings[2]
def get_labeler_id(filename, debug=True):
    '''
	return the real image id and the labeler id, this function assume the name is separated by '--'
	'''
    if debug:
        assert isstring(filename), 'input filename is not a string'

    substrings = filename.split('--')
    labeler_id = substrings[-1]

    return labeler_id
def get_filename(recording_id,
                 recording_type,
                 camera_id,
                 frame_number,
                 labeler_id,
                 debug=True):
    '''
	return the full filename given all info
	'''
    if debug:
        assert isstring(recording_id), 'recording id is not a string'
        assert isstring(recording_type), 'recording type is not a string'
        assert isscalar(frame_number), 'frame number is not a scalar'
        assert isstring(labeler_id), 'labeler id is not a string'
        assert camera_id in get_camera_list(
        ), 'camera id %s is not in the camera list' % camera_id

    return '--'.join([
        recording_id, recording_type, camera_id,
        '%05d' % (frame_number), labeler_id
    ])
def load_hdf5_file(hdf5_file, dataname, debug=True):
    '''
    load a single hdf5 file
    '''
    if debug:
        assert is_path_exists(hdf5_file) and isfile(
            hdf5_file), 'input hdf5 path does not exist: %s' % hdf5_file
        assert islist(dataname), 'dataset queried is not correct'
        assert all(
            isstring(dataset_tmp)
            for dataset_tmp in dataname), 'dataset queried is not correct'

    hdf5 = h5py.File(hdf5_file, 'r')
    datadict = dict()
    for dataset in dataname:
        datadict[dataset] = np.array(hdf5[dataset])
    return datadict
Example #11
0
    def __init__(self,
                 name,
                 function,
                 bottom=None,
                 datatype=None,
                 paramtype=None):
        super(Activation, self).__init__(name=name,
                                         bottom=bottom,
                                         datatype=datatype,
                                         paramtype=paramtype)
        assert isstring(
            function), 'the function used in dense layer should be a string'
        assert any(function is item for item in ACTIVATION_FUNCTION), \
         'type of parameter should be one of "%s"' \
         % functools.reduce(lambda x, y: str(x) + '" "' + str(y), ACTIVATION_FUNCTION)
        assert self._bottom is None or len(self._bottom) == 1, \
         'Activation layer can only have one bottom layer'

        self._function = function
def rand_load_hdf5_from_folder(hdf5_src, dataname, debug=True):
    '''
    randomly load a single hdf5 file from a hdf5 folder
    '''
    if debug:
        assert is_path_exists(hdf5_src) and isfolder(
            hdf5_src), 'input hdf5 path does not exist: %s' % hdf5_src
        assert islist(dataname), 'dataset queried is not correct'
        assert all(
            isstring(dataset_tmp)
            for dataset_tmp in dataname), 'dataset queried is not correct'

    hdf5list, num_hdf5_files = load_list_from_folder(folder_path=hdf5_src,
                                                     ext_filter='.hdf5')
    check_index = random.randrange(0, num_hdf5_files)
    hdf5_path_sample = hdf5list[check_index]
    hdf5_file = h5py.File(hdf5_path_sample, 'r')
    datadict = dict()
    for dataset in dataname:
        datadict[dataset] = np.array(hdf5_file[dataset])
    return datadict
Example #13
0
    def __init__(self, name, datatype=None, paramtype=None):
        if datatype is not None:
            self._datatype_check(datatype)
        else:
            datatype = 'single'
            print 'datatype of the layer is not defined.' \
             'By default, we use single floating point to save the data'
        if paramtype is not None:
            self._paramtype_check(paramtype)
        else:
            paramtype = 'single'
            print 'paramtype of the layer is not defined. By default,' \
             'we use single floating point to save the parameter'
        assert isstring(name), 'the name of input layer should be a string'

        self._name = name
        # self._data = None
        # self._params = None
        self._datatype = datatype
        self._paramtype = paramtype
        self._top = None
        self._bottom = None
Example #14
0
    def remove(self, layer_name):
        assert isstring(
            layer_name), 'the layer should be queried by a string name'
        if self._blobs.has_key(layer_name):
            assert not isinstance(self._layers[layer_name], Input), \
             'the input layer is not able to delete. ' \
             'You might want to use reshape function to change the input shape.'

            # TODO: test if reference
            previous_layer = self._layers[layer_name].bottom[0]
            if self._layers[layer_name].top is not None:
                next_layer = self._layers[layer_name].top[0]
            else:
                next_layer = None

            self._layers[layer_name].bottom[0].top = next_layer
            self._layers[layer_name].top[0].bottom = previous_layer
            del self._blobs[layer_name]
            del self._layers[layer_name]
            self._nb_entries -= 1
        else:
            assert False, 'No layer queried existing'
        self._compiled = False
Example #15
0
 def _paramtype_check(self, paramtype):
     assert isstring(paramtype), 'the type of parameter should be a string'
     assert any(paramtype is item for item in PARAMTYPE), \
      'type of parameter should be one of "%s"' \
      % functools.reduce(lambda x, y: str(x) + '" "' + str(y), PARAMTYPE)
def generate_hdf5(save_dir,
                  data_src,
                  data_name='data',
                  batch_size=1,
                  ext_filter='png',
                  label_src1=None,
                  label_name1='label',
                  label_preprocess_function1=identity,
                  label_range1=None,
                  label_src2=None,
                  label_name2='label2',
                  label_preprocess_function2=identity,
                  label_range2=None,
                  debug=True,
                  vis=False):
    '''
    # this function creates data in hdf5 format from a image path 

    # input parameter
    #   data_src:       source of image data, which can be a list of image path, a txt file contains a list of image path, a folder contains a set of images, a list of numpy array image data
    #   label_src:      source of label data, which can be none, a file contains a set of labels, a dictionary of labels, a 1-d numpy array data, a list of label data
    #   save_dir:       where to store the hdf5 data
    #   batch_size:     how many image to store in a single hdf file
    #   ext_filder:     what format of data to use for generating hdf5 data 
    '''

    # parse input
    assert is_path_exists_or_creatable(
        save_dir), 'save path should be a folder to save all hdf5 files'
    mkdir_if_missing(save_dir)
    assert isstring(
        data_name), 'dataset name is not correct'  # name for hdf5 data

    # convert data source to a list of numpy array image data
    if isfolder(data_src):
        print 'data is loading from %s with extension .%s' % (data_src,
                                                              ext_filter)
        filelist, num_data = load_list_from_folder(data_src,
                                                   ext_filter=ext_filter)
        datalist = None
    elif isfile(data_src):
        print 'data is loading from %s with extension .%s' % (data_src,
                                                              ext_filter)
        filelist, num_data = load_list_from_file(data_src)
        datalist = None
    elif islist(data_src):
        if debug:
            assert all(
                isimage(data_tmp) for data_tmp in data_src
            ), 'input data source is not a list of numpy array image data'
        datalist = data_src
        num_data = len(datalist)
        filelist = None
    else:
        assert False, 'data source format is not correct.'
    if debug:
        assert (datalist is None and filelist is not None) or (
            filelist is None and datalist is not None), 'data is not correct'
        if datalist is not None:
            assert len(datalist) == num_data, 'number of data is not equal'
        if filelist is not None:
            assert len(filelist) == num_data, 'number of data is not equal'

    # convert label source to a list of numpy array label
    if label_src1 is None:
        labeldict1 = None
        labellist1 = None
    elif isfile(label_src1):
        assert is_path_exists(label_src1), 'file not found'
        _, _, ext = fileparts(label_src1)
        assert ext == '.json', 'only json extension is supported'
        labeldict1 = json.load(label_src1)
        num_label1 = len(labeldict1)
        assert num_data == num_label1, 'number of data and label is not equal.'
        labellist1 = None
    elif isdict(label_src1):
        labeldict1 = label_src1
        labellist1 = None
    elif isnparray(label_src1):
        if debug:
            assert label_src1.ndim == 1, 'only 1-d label is supported'
        labeldict1 = None
        labellist1 = label_src1
    elif islist(label_src1):
        if debug:
            assert all(
                np.array(label_tmp).size == 1
                for label_tmp in label_src1), 'only 1-d label is supported'
        labellist1 = label_src1
        labeldict1 = None
    else:
        assert False, 'label source format is not correct.'
    assert isfunction(label_preprocess_function1
                      ), 'label preprocess function is not correct.'

    # convert label source to a list of numpy array label
    if label_src2 is None:
        labeldict2 = None
        labellist2 = None
    elif isfile(label_src2):
        assert is_path_exists(label_src2), 'file not found'
        _, _, ext = fileparts(label_src2)
        assert ext == '.json', 'only json extension is supported'
        labeldict2 = json.load(label_src2)
        num_label2 = len(labeldict2)
        assert num_data == num_label2, 'number of data and label is not equal.'
        labellist2 = None
    elif isdict(label_src2):
        labeldict2 = label_src2
        labellist2 = None
    elif isnparray(label_src2):
        if debug:
            assert label_src2.ndim == 1, 'only 1-d label is supported'
        labeldict2 = None
        labellist2 = label_src2
    elif islist(label_src2):
        if debug:
            assert all(
                np.array(label_tmp).size == 1
                for label_tmp in label_src2), 'only 1-d label is supported'
        labellist2 = label_src2
        labeldict2 = None
    else:
        assert False, 'label source format is not correct.'
    assert isfunction(label_preprocess_function2
                      ), 'label preprocess function is not correct.'

    # warm up
    if datalist is not None:
        size_data = datalist[0].shape
    else:
        size_data = imread(filelist[0]).shape

    if labeldict1 is not None:
        if debug:
            assert isstring(label_name1), 'label name is not correct'
        labels1 = np.zeros((batch_size, 1), dtype='float32')
        # label_value1 = [float(label_tmp_char) for label_tmp_char in labeldict1.values()]
        # label_range1 = np.array([min(label_value1), max(label_value1)])
    if labellist1 is not None:
        labels1 = np.zeros((batch_size, 1), dtype='float32')
        # label_range1 = [np.min(labellist1), np.max(labellist1)]
    if label_src1 is not None and debug:
        assert label_range1 is not None, 'label range is not correct'
        assert (labeldict1 is not None and labellist1 is None) or (
            labellist1 is not None
            and labeldict1 is None), 'label is not correct'

    if labeldict2 is not None:
        if debug:
            assert isstring(label_name2), 'label name is not correct'
        labels2 = np.zeros((batch_size, 1), dtype='float32')
        # label_value2 = [float(label_tmp_char) for label_tmp_char in labeldict2.values()]
        # label_range2 = np.array([min(label_value2), max(label_value2)])
    if labellist2 is not None:
        labels2 = np.zeros((batch_size, 1), dtype='float32')
        # label_range2 = [np.min(labellist2), np.max(labellist2)]
    if label_src2 is not None and debug:
        assert label_range2 is not None, 'label range is not correct'
        assert (labeldict2 is not None and labellist2 is None) or (
            labellist2 is not None
            and labeldict2 is None), 'label is not correct'

    # start generating
    count_hdf = 1  # count number of hdf5 file
    clock = Timer()
    datalist_batch = list()
    for i in xrange(num_data):
        clock.tic()
        if filelist is not None:
            imagefile = filelist[i]
            _, name, _ = fileparts(imagefile)
            img = imread(imagefile).astype('float32')
            max_value = np.max(img)
            if max_value > 1 and max_value <= 255:
                img = img / 255.0  # [rows,col,channel,numbers], scale the image data to (0, 1)
            if debug:
                min_value = np.min(img)
                assert min_value >= 0 and min_value <= 1, 'data is not in [0, 1]'
        if datalist is not None:
            img = datalist[i]
        if debug:
            assert size_data == img.shape
        datalist_batch.append(img)

        # process label
        if labeldict1 is not None:
            if debug:
                assert len(filelist) == len(
                    labeldict1), 'file list is not equal to label dictionary'

            labels1[i % batch_size, 0] = float(labeldict1[name])
        if labellist1 is not None:
            labels1[i % batch_size, 0] = float(labellist1[i])
        if labeldict2 is not None:
            if debug:
                assert len(filelist) == len(
                    labeldict2), 'file list is not equal to label dictionary'
            labels2[i % batch_size, 0] = float(labeldict2[name])
        if labellist2 is not None:
            labels2[i % batch_size, 0] = float(labellist2[i])

        # save to hdf5
        if i % batch_size == 0:
            data = preprocess_image_caffe(
                datalist_batch, debug=debug, vis=vis
            )  # swap channel, transfer from list of HxWxC to NxCxHxW

            # write to hdf5 format
            if filelist is not None:
                save_path = os.path.join(save_dir, '%s.hdf5' % name)
            else:
                save_path = os.path.join(save_dir,
                                         'image_%010d.hdf5' % count_hdf)
            h5f = h5py.File(save_path, 'w')
            h5f.create_dataset(data_name, data=data, dtype='float32')
            if (labeldict1 is not None) or (labellist1 is not None):
                labels1 = label_preprocess_function1(data=labels1,
                                                     data_range=label_range1,
                                                     debug=debug)
                h5f.create_dataset(label_name1, data=labels1, dtype='float32')
                labels1 = np.zeros((batch_size, 1), dtype='float32')

            if (labeldict2 is not None) or (labellist2 is not None):
                labels2 = label_preprocess_function2(data=labels2,
                                                     data_range=label_range2,
                                                     debug=debug)
                h5f.create_dataset(label_name2, data=labels2, dtype='float32')
                labels2 = np.zeros((batch_size, 1), dtype='float32')

            h5f.close()
            count_hdf = count_hdf + 1
            del datalist_batch[:]
            if debug:
                assert len(datalist_batch) == 0, 'list has not been cleared'
        average_time = clock.toc()
        print(
            'saving to %s: %d/%d, average time:%.3f, elapsed time:%s, estimated time remaining:%s'
            % (save_path, i + 1, num_data, average_time,
               format_time(average_time * i),
               format_time(average_time * (num_data - i))))

    return count_hdf - 1, num_data
Example #17
0
 def _datatype_check(self, datatype):
     assert isstring(datatype), 'the type of data should be a string'
     assert any(datatype is item for item in DATATYPE), \
      'type of data should be one of "%s"' \
      % functools.reduce(lambda x, y: str(x) + '" "' + str(y), DATATYPE)