def n_devide(input_list, part, except_list=[]): """ 对input_list进行part等分, 除去except_list索引之外,返回索引表 :param input_list:输入的list :param part: :param except_list:不参与分割的索引 :return:返回分割的索引,因此上层需要保留input_list才能获得正确的数据 """ assert float(len(input_list) - len(except_list)) / part >= 1.0, \ pinf.CError('len of input_list : %d seperate part : %d, cannot be possible' % (len(input_list ), part)) assert (False not in [i <= len(input_list) for i in except_list]) is True, \ pinf.CError('except list: %s contain element which not in input_list' % except_list) target_list = list(range(0, len(input_list))) [target_list.remove(i) for i in except_list] sample_amount = math.ceil(len(target_list) / part) residue_amount = sample_amount * part - len(target_list) gather = list() for i in range(0, part - 1): sample = random.sample(target_list, sample_amount) if i < residue_amount: del_sample = random.sample(sample, 1) gather.append(copy.deepcopy(sample)) sample.remove(del_sample[0]) else: del_sample = [] gather.append(copy.deepcopy(sample)) [target_list.remove(j) for j in sample] pass gather.append(target_list) return gather pass
def weight_init(name='undefined', method='msra', height=None, width=None, input_channel=None, output_channel=None, collection='TRAINABLED'): assert method in ['msra'], print('mothod not support') if method == 'msra': initer = tf.contrib.layer.variance_scaling_initializer(factor=2.0, model='FAN_IN', uniform=False) # elif: # sys.exit() else: sys.exit() w = tf.get_variable(name=name + '_weight', shape=[height, width, input_channel, output_channel], dtype=tf.float32, initializer=initer) Util.AddToCollectionInfo(tf.GraphKeys.GLOBAL_VARIABLES, w) tf.add_to_collection(collection, w) Util.AddToCollectionInfo(collection, w) return w
def Activate_Layer( name='undefined', input=None, method='LeakReLU' ): """ 默认格式'NHWC' :param name: :param input: :param method: :return: """ assert method in ['LeakReLU', 'ReLU'], Util.CError('method is not supported') if method == 'LeakReLU': activate = keras.layers.LeakyReLU( alpha=0.1, name=name + 'LeakReLU' )(input) elif method == 'ReLU': activate = keras.layers.ReL else: Util.CError('method is not supported!') sys.exit() tf.add_to_collection(name='ActiOut', value=activate) Util.CLayerInfo(name, input, activate) Util.AddToCollectionInfo('ActiOut', activate) return activate pass
def Bias_Layer( name='undefined', input=None, ): """ 默认格式'NHWC' :param name: :param input: :return: """ bias = tf.nn.bias_add( value=input, bias=init.bias_init( name=name, output_channel=input.get_shape().as_list()[-1], collection='BiasBias' ), data_format='NHWC', name=name ) tf.add_to_collection(name='BiasOut', value=bias) Util.AddToCollectionInfo('BiasOut', bias) Util.CLayerInfo(name, input, bias) return bias pass
def Conv_Layer( name='undefined', input=None, height=None, width=None, output_channel=None ): conv = tf.nn.conv2d( input=input, filter=init.weight_init( name=name, height=height, width=width, input_channel=input.get_shape().as_list()[-1], output_channel=output_channel, collection='ConvWeight' ), strides=[1, 1, 1, 1], padding='SAME', name=name ) tf.add_to_collection(name='ConvOut', value=conv) Util.AddToCollectionInfo('ConvOut', conv) Util.CLayerInfo(name, input, conv) return conv pass
def cluster(self, method='k-means'): assert method in self.SupportClusterAlgorithm, \ pI.CError('cluster algorithm : %s is not support\n' % method) class_key = list(self.GT_AnchorBox.get_keys()) data = np.array([self.GT_AnchorBox[i][2:] for i in class_key]) pass
def check_no_empty_between_two_significative_data(csv_file): """ 判断csv文件中不存[]在两个有意义的数据行之中 :param csv_file: csv 文件路径 :return: """ significative_begin = False has_none = False indexes = 0 with open(csv_file, 'r') as fp: indexes += 1 reader = csv.reader(fp) for i in reader: if significative_begin is False: if i != []: significative_begin = True else: pass pass if has_none is False: if (i == []) and (significative_begin is True): has_none = True else: pass if (has_none is True) and (significative_begin is True): if i != []: pinf.CKeyInfo('in raw %d data is unsatisfactory' % indexes) return False else: pass pass pass return True
def _calc_indexe_by_y_x_indexe(self, y, x): """ 通过y,x索引计算label中哪个位置进行label设定,默认输出的排列为先y,再x :param y:y索引,可以输入list ndarray 或者单指 :param x:x索引, 可以输入list ndarray 或者单指 :return: """ assert type(y).__name__ in ['list', 'int', 'ndarray'], \ Cinf.CError('y has the unlawful type : %s' % (type(y).__name__)) assert type(x).__name__ in ['list', 'int', 'ndarray'], \ Cinf.CError('x has the unlawful type : %s ' % (type(x).__name__)) assert len(list(y)) == len(list(x)), \ Cinf.CError('y must has the same length with x, y: %s, x: %s' % (len(list(y)), len(list(x)))) if type(y).__name__ == 'list': y = np.array(x) if type(x).__name__ == 'list': x = np.array(x) self.FeatureChannedlIndexes = (y * self.Scalar[1] + x % self.Scalar) - 1 return self.FeatureChannedlIndexes pass
def list_not_shorter_than(list1, length): """ 判断list1的长度大于len :param list1: :param len: :return: """ assert (len(list1) >= length) is True, pinf.CError( 'list is shorter than %d' % length) return True pass
def test_sample_except(): for i in range(0, 20): try: a = CS.sample_except([1, 2], 1, [1]) assert CS.sample_except( [1, 2], 1, [1]) == [0], pinf.CError('test sample except failed') except: a = 1 return True pass
def bias_init(name='undefined', method='zero', output_channel=None, collection='TRAINABLED'): assert method in ['zero', 'one'], print('method not support') if method == 'zero': b = tf.get_variable(name=name + '_bias', shape=[output_channel], dtype=tf.float32, initializer=tf.zeros_initializer()) elif method == 'one': b = tf.get_variable(name=name + '_bias', shape=[output_channel], dtype=tf.float32, initializer=tf.ones_initializer()) else: sys.exit() Util.AddToCollectionInfo(tf.GraphKeys.GLOBAL_VARIABLES, b) tf.add_to_collection(collection, b) Util.AddToCollectionInfo(collection, b) return b pass
def _generate_shift(self, point): """ 产生点偏移的label :param point: 点集, shape=[N, 2], [[y, x], ...] :return: """ assert len(point) <= self.PointNum, \ Cinf.CError('Point number is larger then the stipulated number %d' % (len(point))) shift = np.array(point) - np.trunc(np.array(point)) y_x_shift = np.zeros( shape=[ self.FeatureShape[0], self.FeatureShape[1], self.Scalar[0] * self.Scalar[1] * 2 ]) obj_shift_mask = np.zeros( shape=x_y_shift.shape() ) no_obj_shift_mask = np.ones( shape=x_y_shift.shape() ) indexes = self.Indexes indexes_in_indexes = self.IndexesInIndexes feature_channel_y_indexes = self.FeatureChannedlIndexes feature_channel_x_indexes = feature_channel_y_indexes + 1 y_x_shift[ indexes[:, 0], indexes[:, 1], feature_channel_y_indexes] = shift[0] y_x_shift[ indexes[:, 0], indexes[:, 1], feature_channel_x_indexes] = shift[1] obj_shift_mask[ indexes[:, 0], indexes[:, 1], feature_channel_y_indexes] = 1.0 obj_shift_mask[ indexes[:, 0], indexes[:, 1], feature_channel_x_indexes] = 1.0 no_obj_shift_mask[ indexes[:, 0], indexes[:, 1], feature_channel_y_indexes] = 0.0 no_obj_shift_mask[ indexes[:, 0], indexes[:, 1], feature_channel_x_indexes] = 0.0 return shift, x_y_shift pass
def csv_item_seperated(self, file_list_full_path, rate, out_put_file_path, ignore_indexes=[]): """ 针对使用csv存储大量数据,总数据被分为几个文件,每个文件存储一定量的数据集,本程序要求每个数据文件中不能 不能再两个数据文件中穿插空数据行,但可以在两端最后又空行 :param file_list_full_path: 文件列表[] :param rate: 留一比率,float :param out_put_file_path: 信息输出文件路径, str :param ignore_indexes: 无效行,每个文件需要有自己的无效行[[], [], ...] :return: """ info = dict() info['k-part'] = round(1 / rate) info['config_file_format'] = self.CsvItemSeperated info['ignore_indexes'] = ignore_indexes info['source_file_queue'] = file_list_full_path info['source_file_data_amount'] = dict() pinf.CKeyInfo( 'all file path: %s\n' 'sample rate: %f\n' 'output file path: %s\n' 'ignore indexes: %s' % (file_list_full_path, rate, out_put_file_path, ignore_indexes)) assert False not in [ Ccsv.check_no_empty_between_two_significative_data(i) for i in file_list_full_path ] for i in file_list_full_path: info['source_file_data_amount'][i] = Ccsv.count_csv_file_row(i) total_amount = sum( [info['source_file_data_amount'][i] for i in file_list_full_path]) ignore_list = list() begin = 0 for i in range(0, len(file_list_full_path)): append_list = [j + begin for j in ignore_indexes[i]] ignore_list += append_list begin += info['source_file_data_amount'][file_list_full_path[i]] gather = CFS.n_devide(list(range(0, total_amount)), info['k-part'], ignore_list) for i in range(0, info['k-part']): info[i] = gather[i] fs = open(out_put_file_path, 'w') js = json.dumps(info, indent=4) fs.write(js) fs.close() pass
def Pool_Layer( name='undefined', input=None, height=None, width=None, stride=None, method='MAX' ): assert method in ['max', 'global-avg'], Util.CError('method is not support!') if method == 'max': tmp = tf.nn.pool( input=input, window_shape=[height, width], pooling_type='MAX', padding='SAME', dilation_rate=None, strides=stride, name=name + '_max' ) pass elif method == 'global-avg': tmp = tf.reduce_mean( input_tensor=input, axis=3, name=name + '_global-avg' ) pass else: Util.CError('method is not support!') sys.exit() pass Util.CLayerInfo(name, input, tmp) return tmp pass
def count_csv_file_row(csv_file, model=CountModelDefault): """ 计算csv文件的行数,提供三种模式:CountModelDefault:完全计算csv文件 CountModelRejectNoneEnd 除去为[]的结尾端,进行计算行数 CountModelRejetNoneMiddle 除去所有[]进行计算行数 :param csv_file: :param model: :return: """ assert model in [CountModelDefault, CountModelRejectNoneEnd, CountModelRejetNoneMiddle], \ pinf.CError('model : %s not supported' % model) with open(csv_file) as fp: read = csv.reader(fp) if model == CountModelDefault: i = 0 for raw in read: i += 1 return i elif model == CountModelRejectNoneEnd: i = 0 attribute = list() for raw in read: if raw == []: attribute.append(0) else: attribute.append(1) i += 1 for raw in attribute[::-1]: if raw == 0: i -= 1 else: return i return i pass elif model == CountModelRejetNoneMiddle: i = 0 for raw in read: if raw == []: pass else: i += 1 pass pass return i pass
def csv_reader_features_seperated(self, config_file): f = open(config_file, 'r') inf = json.load(f) assert inf['config_file_format'] == self.CsvFeaturesSeperated, \ pinf.CError('config file is not %s format' % self.CsvFeaturesSeperated) all = range(0, inf['sample_amount']) indexes = dict() indexes['file_queue'] = list(inf['0'].keys()) for i in range(0, inf['k-part']): indexes[i] = dict() indexes[i]['eval'] = [ inf[str(i)][j] for j in indexes['file_queue'] ] indexes[i]['train'] = [ inf[str(i)][j] for j in indexes['file_queue'] ] return indexes pass
def __init__(self, image_shape, scalar, point_num, num_class, visiable_exist=True, point_unify=False): """ :param image_shape: 图像的[height, width] :param scalar: 下采样的尺度[height, width] :param point_num: 点的数量 :param num_class: 点的类别数 :param visiable_exist: 是否有可见性该属性的存在 :param point_unify: 是否每个对象的点都是统一的,即每个对象都拥有全体点的合集的点, 如A对象拥有A类点而不拥有B类点,B对象拥有B类点而不拥有A类点,此为point not unify """ self.ImageShape = image_shape self.Scalar = scalar self.PointNum = point_num self.ClassNum = num_class self.VisibleExit = visiable_exist self.PointUnify = point_unify self.FeatureShape = list(np.array(self.ImageShape) / 32.0) self.FeatureChannedlIndexes = None self.Indexes = None self.IndexesInIndexes = None assert False not in [(int(i) - i) == 0.0 for i in self.FeatureShape], \ Cinf.CError('Image shape is not the integral multiple of 32 : %d' % (self.ImageShape)) if visiable_exist is True: if point_unify is False: # 如果有可见性与点性质不统一的性质,则需要:[x_shift, y_shift, confidence, visiable, exit] self.FeatureShapeChannel = self.Scalar[0] * self.Scalar[1] * (self.ClassNum + 2 + 1 + 1 + 1) else: self.FeatureShapeChannel = self.Scalar[0] * self.Scalar[1] * (self.ClassNum + 2 + 1+ 1) else: if self.PointUnify is False: self.FeatureShapeChannel = self.Scalar[0] * self.Scalar[1] * (self.ClassNum + 2 + 1 + 1) else: self.FeatureShapeChannel = self.Scalar[0] * self.Scalar[1] * (self.ClassNum + 2 + 1) self.FeatureOutputShape = self.FeatureShape.append(self.FeatureShapeChannel) pass
def test_n_devide(): a = list(range(0, 10)) result = CS.n_devide(a, 5) assert len(result) == 5, pinf.CError( 'error in n_devide, get len is not 5 : %d' % len(result)) assert (False not in [len(i) == 2 for i in result]) is True, \ pinf.CError('error in n_devide , get item is not 2 : %s' % [len(i) == 2 for i in result]) a = list(range(0, 11)) result = CS.n_devide(a, 5) assert len(result) == 5, pinf.CError( 'error in n_devide, get len is not 5 : %d' % len(result)) assert (False not in [len(i) == 3 for i in result]) is True, \ pinf.CError('error in n_devide , get item is not 2 : %s' % [len(i) == 3 for i in result]) a = list(range(0, 12)) result = CS.n_devide(a, 5, [0]) assert len(result) == 5, pinf.CError( 'error in n_devide, get len is not 5 : %d' % len(result)) assert (False not in [len(i) == 3 for i in result]) is True, \ pinf.CError('error in n_devide , get item is not 2 : %s' % [len(i) == 3 for i in result]) return True
assert (False not in [len(i) == 3 for i in result]) is True, \ pinf.CError('error in n_devide , get item is not 2 : %s' % [len(i) == 3 for i in result]) a = list(range(0, 12)) result = CS.n_devide(a, 5, [0]) assert len(result) == 5, pinf.CError( 'error in n_devide, get len is not 5 : %d' % len(result)) assert (False not in [len(i) == 3 for i in result]) is True, \ pinf.CError('error in n_devide , get item is not 2 : %s' % [len(i) == 3 for i in result]) return True def test_n_devide_return_target_data(): """ 事实上test_n_devide不出错,这个不会有错 :return: """ t = 'asdfghjklqwertyuiopzxcvbnm' a = [t[i] for i in range(0, len(t))] result = CS.n_devide_return_target_data(a, 5) return True pass if __name__ == '__main__': assert test_sample_except() is True pinf.CKeyInfo('test sample_except successfully') assert test_n_devide() is True pinf.CKeyInfo('test n_devide successfully') assert test_n_devide_return_target_data() is True pinf.CKeyInfo('test n_devide_return_target_data successfully')
def test_calc_IoU(): truth = [100, 100, 100, 100] predict = [50, 50, 100, 100] result = Ciou.calc_IoU(predicted_coordinate=predict, true_coordinate=truth) Cinfo.CKeyInfo('truth: %s, predict: %s, IoU: %f' % (truth, predict, result))
def csv_features_seperated(self, file_list_full_path, rate, unique_identification, out_put_file_path, type='speed', ignore_indexes=[]): """ 针对样本的特征存储在多个文件中,但是这多个文件有对应的唯一标志,比如a特征存在a文件,b特征存在b文件,但是对于K样本, 在两个文件中都有对应的第m列标志k在相同,同时对于其他样本又不同 csv多文件留一数据预处理,要求file_list_full_path中的标记拥有同等数据量 :param file_list_full_path:list 多个文件的列表,考虑到可能多种标记 :param out_put_file_path:输出文件,依靠该文件,规范化读取数据 :param unique_identification:json文件,考虑到不同csv文件中可能有不同的排序,我们通过标志列来进行数据安排 :param rate:留一比率 , float :param type:主要考虑到,可能总的文件比较大,同时读取会爆内存,提供两种选择方式, 一种是速度导向,吃内存"speed",一种是内存导向,损速度"memory" :param ignore_indexes:针对csv文件需要忽略行的索引 :return: """ pinf.CKeyInfo('all file path: %s\n' 'sample rate: %f\n' 'unique identification: %d\n' 'output file path: %s\n' 'ignore indexes: %s' % (file_list_full_path, rate, unique_identification, out_put_file_path, ignore_indexes)) info = dict() part_num = round(1 / rate) info['k-part'] = part_num info['config_file_format'] = self.CsvFeaturesSeperated info['ignore_indexes'] = ignore_indexes if type == 'speed': key_info = dict() for i in file_list_full_path: reader = csv.reader(open(i, 'r')) # 取标识列信息,存到字典key_info中,该文件的绝对路径作为键值 key_info[i] = [row[unique_identification] for row in reader] assert self._check_same_length(key_info) is True, \ pinf.CError('not all csv file has the same number of data') info['sample_amount'] = len(key_info[file_list_full_path[0]]) # part_num次留一法,每个留一法验证集互斥 index = CFS.n_devide(key_info[file_list_full_path[0]], part=part_num, except_list=ignore_indexes) for i in range(0, part_num): info[i] = dict() info[i][file_list_full_path[0]] = index[i] for j in file_list_full_path[1:]: l = [ key_info[j].index(key_info[file_list_full_path[0]][k]) for k in index[i] ] info[i][j] = l pass pass elif type == 'memory': print('this kind method is not complete!') sys.exit() pass else: print('illegal type') sys.exit() pass fs = open(out_put_file_path, 'w') js = json.dumps(info, indent=4) fs.write(js) fs.close() pass
def test_csv_item_seperated(): a = LOO.LeaveOneOut() indexes = a.csv_item_seperated( [ './data/test_LeaveOneOut-test_csv_item_seperated1.csv', './data/test_LeaveOneOut-test_csv_item_seperated2.csv' ], rate=0.1, out_put_file_path= './data/test_LeaveOneOut-test_csv_item_seperated.json', ignore_indexes=[[0, 1], [38]]) indexes = a.csv_item_seperated( ['./data/test_LeaveOneOut-test_csv_item_seperated1.csv'], rate=0.1, out_put_file_path= './data/test_LeaveOneOut-test_csv_item_seperated-only_one.json', ignore_indexes=[[0, 1]]) return True pass if __name__ == '__main__': pinf.CKeyInfo('-------------testing csv_features_seperated--------------') test_csv_features_seperated() pinf.CKeyInfo( '-------------testing csv_reader_features_seperated--------------') assert test_csv_reader_features_seperated() is True pinf.CKeyInfo('successed') pinf.CKeyInfo('-------------testing csv_item_seperated--------------') test_csv_item_seperated() pinf.CKeyInfo('successed')
def BatchNormal_Layer( name='undefined', input=None, train=tf.bool(True), ): """ 默认格式'NHWC' :param input: :return: """ global MOVING_DECAY global BNEPS assert Util.CGlobalExit('MOVING_DECAY') assert Util.CGlobalExit('BNEPS') train_mean = tf.reduce_mean( input_tensor=input, axis=3, name=name + '_t_mean', ) train_var = tf.reduce_mean( tf.square( x=tf.subtract( x=input, y=train_mean ) ), axis=[0, 1, 2] ) beta = init.bias_init( name=name + '_beta', method='zero', output_channel=input.get_shape().as_list()[-1], collection='BnBeta' ) gama= init.bias_init( name=name + '_gama', method='one', output_channel=input.get_shape().as_list()[-1], collection='BnGama' ) ema = tf.train.ExponentialMovingAverage(MOVING_DECAY) predict_mean, predict_var = ema.apply([train_mean, train_var]) def depend_in_train(): with tf.control_dependencies([predict_mean, predict_var]): return tf.identity(train_mean), tf.identity(train_var) pass mean, var = tf.cond( train, lambda: depend_in_train(), lambda: (predict_mean, predict_var) ) bn = tf.nn.batch_normalization( x=input, mean=mean, variance=var, offset=beta, scale=gama, variance_epsilon=BNEPS ) tf.add_to_collection(name='BnOut', value=bn) Util.AddToCollectionInfo('BnOut', bn) Util.CLayerInfo(name, input, bn) # bn = tf.add( # x=beta, # y=tf.div( # tf.multiply( # x=gama, # y=tf.subtract( # x=input, # y=mean # ) # ), # y=var # ), # name=name + 'bn_output' # ) return bn pass