def main(): #Step 1 - download google's pre-trained neural network # ============================================================================= # url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip' # data_dir = '../data/' # model_name = os.path.split(url)[-1] # local_zip_file = os.path.join(data_dir, model_name) # if not os.path.exists(local_zip_file): # # Download # model_url = urllib.request.urlopen(url) # with open(local_zip_file, 'wb') as output: # output.write(model_url.read()) # # Extract # with zipfile.ZipFile(local_zip_file, 'r') as zip_ref: # zip_ref.extractall(data_dir) # ============================================================================= data_dir = 'F:/Deep_Dream/inception5h' # start with a gray image with a little noise img_noise = np.random.uniform(size=(224,224,3)) + 100.0 model_fn = 'tensorflow_inception_graph.pb' #Step 2 - Creating Tensorflow session and loading the model graph = tf.Graph() sess = tf.InteractiveSession(graph=graph) with tf.gfile.FastGFile(os.path.join(data_dir, model_fn), 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) t_input = tf.placeholder(np.float32, name='input') # define the input tensor imagenet_mean = 117.0 t_preprocessed = tf.expand_dims(t_input-imagenet_mean, 0) tf.import_graph_def(graph_def, {'input':t_preprocessed}) layers = [op.name for op in graph.get_operations() if op.type=='Conv2D' and 'import/' in op.name] feature_nums = [int(graph.get_tensor_by_name(name+':0').get_shape()[-1]) for name in layers] print('Number of layers', len(layers)) print('Total number of feature channels:', sum(feature_nums)) # Helper functions for TF Graph visualization #pylint: disable=unused-variable def strip_consts(graph_def, max_const_size=32): """Strip large constant values from graph_def.""" strip_def = tf.GraphDef() for n0 in graph_def.node: n = strip_def.node.add() #pylint: disable=maybe-no-member n.MergeFrom(n0) if n.op == 'Const': tensor = n.attr['value'].tensor size = len(tensor.tensor_content) if size > max_const_size: tensor.tensor_content = "<stripped %d bytes>"%size return strip_def def rename_nodes(graph_def, rename_func): res_def = tf.GraphDef() for n0 in graph_def.node: n = res_def.node.add() #pylint: disable=maybe-no-member n.MergeFrom(n0) n.name = rename_func(n.name) for i, s in enumerate(n.input): n.input[i] = rename_func(s) if s[0]!='^' else '^'+rename_func(s[1:]) return res_def def showarray(a): a = np.uint8(np.clip(a, 0, 1)*255) plt.axis("off") plt.imshow(a) plt.show() a=cv2.cvtColor(a, cv2.COLOR_BGR2RGB) cv2.imshow("output", a) cv2.waitKey(0) cv2.destroyAllWindows() def visstd(a, s=0.1): '''Normalize the image range for visualization''' return (a-a.mean())/max(a.std(), 1e-4)*s + 0.5 def T(layer): '''Helper for getting layer output tensor''' return graph.get_tensor_by_name("import/%s:0"%layer) def render_naive(t_obj, img0=img_noise, iter_n=20, step=1.0): t_score = tf.reduce_mean(t_obj) # defining the optimization objective t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation! img = img0.copy() for _ in range(iter_n): g, _ = sess.run([t_grad, t_score], {t_input:img}) # normalizing the gradient, so the same step size should work g /= g.std()+1e-8 # for different layers and networks img += g*step showarray(visstd(img)) def tffunc(*argtypes): '''Helper that transforms TF-graph generating function into a regular one. See "resize" function below. ''' placeholders = list(map(tf.placeholder, argtypes)) def wrap(f): out = f(*placeholders) def wrapper(*args, **kw): return out.eval(dict(zip(placeholders, args)), session=kw.get('session')) return wrapper return wrap def resize(img, size): img = tf.expand_dims(img, 0) return tf.image.resize_bilinear(img, size)[0,:,:,:] resize = tffunc(np.float32, np.int32)(resize) def calc_grad_tiled(img, t_grad, tile_size=512): '''Compute the value of tensor t_grad over the image in a tiled way. Random shifts are applied to the image to blur tile boundaries over multiple iterations.''' sz = tile_size h, w = img.shape[:2] sx, sy = np.random.randint(sz, size=2) img_shift = np.roll(np.roll(img, sx, 1), sy, 0) grad = np.zeros_like(img) for y in range(0, max(h-sz//2, sz),sz): for x in range(0, max(w-sz//2, sz),sz): sub = img_shift[y:y+sz,x:x+sz] g = sess.run(t_grad, {t_input:sub}) grad[y:y+sz,x:x+sz] = g return np.roll(np.roll(grad, -sx, 1), -sy, 0) def render_deepdream(t_obj, img0=img_noise, iter_n=10, step=1.5, octave_n=4, octave_scale=1.4): t_score = tf.reduce_mean(t_obj) # defining the optimization objective t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation! # split the image into a number of octaves img = img0 octaves = [] for _ in range(octave_n-1): hw = img.shape[:2] lo = resize(img, np.int32(np.float32(hw)/octave_scale)) hi = img-resize(lo, hw) img = lo octaves.append(hi) # generate details octave by octave for octave in range(octave_n): if octave>0: hi = octaves[-octave] img = resize(img, hi.shape[:2])+hi for _ in range(iter_n): g = calc_grad_tiled(img, t_grad) img += g*(step / (np.abs(g).mean()+1e-7)) #this will usually be like 3 or 4 octaves #Step 5 output deep dream image via matplotlib showarray(img/255.0) #Step 3 - Pick a layer to enhance our image layer = 'mixed4d_3x3_bottleneck_pre_relu' channel = 139 # picking some feature channel to visualize #open image img0 = PIL.Image.open('test.png') img0 = np.float32(img0) #Step 4 - Apply gradient ascent to that layer render_deepdream(tf.square(T('mixed4c')), img0)
def predict(self, img): if self.sess == None: gd = tf.GraphDef() gd.ParseFromString(tf.gfile.GFile(self.checkpoint, "rb").read()) tf.import_graph_def(gd, name="object_detection_api") self.sess = tf.Session() g = tf.get_default_graph() self.image = g.get_tensor_by_name( "object_detection_api/image_tensor:0") self.boxes = g.get_tensor_by_name( "object_detection_api/detection_boxes:0") self.scores = g.get_tensor_by_name( "object_detection_api/detection_scores:0") self.classes = g.get_tensor_by_name( "object_detection_api/detection_classes:0") img_h, img_w = img.shape[:2] for h0 in [img_h // 3]: for w0 in [0, img_w // 3, img_w * 2 // 3]: grid = img[h0:h0 + img_h // 3, w0:w0 + img_w // 3, :] # grid pred_boxes, pred_scores, pred_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={self.image: np.expand_dims(grid, axis=0)}) pred_boxes = pred_boxes.squeeze() pred_scores = pred_scores.squeeze() # in descreding order pred_classes = pred_classes.squeeze() traffic_light = None h, w = grid.shape[:2] for i in range(pred_boxes.shape[0]): box = pred_boxes[i] score = pred_scores[i] if score < self.prob_thr: continue if pred_classes[i] != self.TRAFFIC_LIGHT_CLASS: continue x0, y0 = box[1] * w, box[0] * h x1, y1 = box[3] * w, box[2] * h x0, y0, x1, y1 = map(int, [x0, y0, x1, y1]) area = np.abs((x1 - x0) * (y1 - y0)) / (w * h) if area <= 0.001: continue traffic_light = grid[y0:y1, x0:x1] # take the first one - with the most confidence if traffic_light is not None: break if traffic_light is None: pass else: brightness = cv2.cvtColor(traffic_light, cv2.COLOR_RGB2HSV)[:, :, -1] hs, ws = np.where(brightness >= (brightness.max() - 30)) hs_mean = hs.mean() tl_h = traffic_light.shape[0] if hs_mean / tl_h < 0.4: return TrafficLight.RED elif hs_mean / tl_h >= 0.55: return TrafficLight.GREEN else: return TrafficLight.YELLOW return TrafficLight.UNKNOWN
def fuse_resize_and_conv(input_graph_def, output_node_names): """Merges preceding resize and mirror pad ops into a specialized convolution. There's a common pattern of enlarging the input to a convolution using a resize operation, and also using MirrorPad to extend the boundaries to that zero edge pixels don't bleed inwards when convolving. This routine looks for that pattern of operations, and fuses them together into a Conv2DWithResizeOp. Args: input_graph_def: A GraphDef containing a model. Returns: Modified graph with resize and pad ops merged. Raises: ValueError: If the graph is badly formed with duplicate node names. """ input_node_map = {} for node in input_graph_def.node: if node.name not in input_node_map.keys(): input_node_map[node.name] = node else: raise ValueError("Duplicate node names detected for ", node.name) node_reference_count = collections.defaultdict(int) for node in input_graph_def.node: for input_name in node.input: stripped_name = node_name_from_input(input_name) node_reference_count[stripped_name] += 1 for output_name in output_node_names: node_reference_count[output_name] += 1 new_ops = [] for node in input_graph_def.node: if node.op != "Conv2D": continue conv_op = node input_op = node_from_map(input_node_map, conv_op.input[0]) if input_op.op == "MirrorPad": mirror_pad_op = input_op resize_op = node_from_map(input_node_map, mirror_pad_op.input[0]) if resize_op.op != "ResizeBilinear": resize_op = None else: mirror_pad_op = None if input_op.op == "ResizeBilinear": resize_op = input_op else: resize_op = None # There are no ops to be fused into the conv, so skip replacing this one. if not mirror_pad_op and not resize_op: continue # We're replacing this node, so make sure the old one is removed. node_reference_count[conv_op.name] = 0 if mirror_pad_op: node_reference_count[mirror_pad_op.name] -= 1 if resize_op: node_reference_count[resize_op.name] -= 1 fused_conv_op = tf.NodeDef() if resize_op: fused_conv_op.op = "FusedResizeAndPadConv2D" else: fused_conv_op.op = "FusedPadConv2D" fused_conv_op.name = conv_op.name if mirror_pad_op: mirror_paddings_name = mirror_pad_op.input[1] mirror_paddings_mode = mirror_pad_op.attr["mode"] else: # If there was no MirrorPad op, then create settings that make the padding # stage of the fused operation a no-op. paddings_op = tf.NodeDef() paddings_op.op = "Const" paddings_op.name = conv_op.name + "_dummy_paddings" paddings_op.attr["dtype"].CopyFrom( tf.AttrValue(type=tf.int32.as_datatype_enum)) paddings_op.attr["value"].CopyFrom( tf.AttrValue(tensor=tensor_util.make_tensor_proto( [0, 0, 0, 0, 0, 0, 0, 0], tf.int32, [4, 2]))) new_ops.extend([paddings_op]) mirror_paddings_name = paddings_op.name mirror_paddings_mode = tf.AttrValue(s=b"REFLECT") if resize_op: fused_conv_op.input.extend([ resize_op.input[0], resize_op.input[1], mirror_paddings_name, conv_op.input[1] ]) fused_conv_op.attr["resize_align_corners"].CopyFrom( resize_op.attr["align_corners"]) else: fused_conv_op.input.extend([ mirror_pad_op.input[0], mirror_paddings_name, conv_op.input[1] ]) fused_conv_op.attr["T"].CopyFrom(conv_op.attr["T"]) fused_conv_op.attr["mode"].CopyFrom(mirror_paddings_mode) fused_conv_op.attr["strides"].CopyFrom(conv_op.attr["strides"]) fused_conv_op.attr["padding"].CopyFrom(conv_op.attr["padding"]) new_ops.extend([fused_conv_op]) result_graph_def = tf.GraphDef() for node in input_graph_def.node: if node_reference_count[node.name] < 1: continue new_node = tf.NodeDef() new_node.CopyFrom(node) result_graph_def.node.extend([new_node]) result_graph_def.node.extend(new_ops) return result_graph_def
def main(): image_lists = create_image_lists(TEST_PERCENTAGE, VALIDATION_PERCENTAGE) n_classes = len(image_lists.keys()) # 读取已经训练好的Inception-v3模型。 with gfile.FastGFile(os.path.join(MODEL_DIR, MODEL_FILE), 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) bottleneck_tensor, jpeg_data_tensor = tf.import_graph_def( graph_def, return_elements=[BOTTLENECK_TENSOR_NAME, JPEG_DATA_TENSOR_NAME]) # 定义新的神经网络输入 bottleneck_input = tf.placeholder(tf.float32, [None, BOTTLENECK_TENSOR_SIZE], name='BottleneckInputPlaceholder') ground_truth_input = tf.placeholder(tf.float32, [None, n_classes], name='GroundTruthInput') # 定义一层全链接层 with tf.name_scope('final_training_ops'): weights = tf.Variable( tf.truncated_normal([BOTTLENECK_TENSOR_SIZE, n_classes], stddev=0.001)) biases = tf.Variable(tf.zeros([n_classes])) logits = tf.matmul(bottleneck_input, weights) + biases final_tensor = tf.nn.softmax(logits) # 定义交叉熵损失函数。 cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=ground_truth_input) cross_entropy_mean = tf.reduce_mean(cross_entropy) train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize( cross_entropy_mean) # 计算正确率。 with tf.name_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(final_tensor, 1), tf.argmax(ground_truth_input, 1)) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) # 训练过程。 for i in range(STEPS): train_bottlenecks, train_ground_truth = get_random_cached_bottlenecks( sess, n_classes, image_lists, BATCH, 'training', jpeg_data_tensor, bottleneck_tensor) sess.run(train_step, feed_dict={ bottleneck_input: train_bottlenecks, ground_truth_input: train_ground_truth }) if i % 100 == 0 or i + 1 == STEPS: validation_bottlenecks, validation_ground_truth = get_random_cached_bottlenecks( sess, n_classes, image_lists, BATCH, 'validation', jpeg_data_tensor, bottleneck_tensor) validation_accuracy = sess.run(evaluation_step, feed_dict={ bottleneck_input: validation_bottlenecks, ground_truth_input: validation_ground_truth }) print( 'Step %d: Validation accuracy on random sampled %d examples = %.1f%%' % (i, BATCH, validation_accuracy * 100)) # 在最后的测试数据上测试正确率。 test_bottlenecks, test_ground_truth = get_test_bottlenecks( sess, image_lists, n_classes, jpeg_data_tensor, bottleneck_tensor) test_accuracy = sess.run(evaluation_step, feed_dict={ bottleneck_input: test_bottlenecks, ground_truth_input: test_ground_truth }) print('Final test accuracy = %.1f%%' % (test_accuracy * 100))
def load_pretrained_inception_v3(model_file): with gfile.FastGFile(model_file, "rb") as f: graph_def = tf.GraphDef() # 构造一个空的图 graph_def.ParseFromString(f.read()) # 将计算图读取进来 _ = tf.import_graph_def(graph_def, name="") # 将图导入到默认图
def predictor(path): # 模型目录 # 训练后生成的检查点文件夹,在当前工程下。 CHECKPOINT_DIR = r'C:\Users\echo1999\Documents\Github\MusicAnalyse\model\tuneAnalyse\runs\1562719894\checkpoints' INCEPTION_MODEL_FILE = r'C:\Users\echo1999\Documents\Github\MusicAnalyse\model\tuneAnalyse\tensorflow_inception_graph.pb' # inception-v3模型参数 BOTTLENECK_TENSOR_NAME = 'pool_3/_reshape:0' # inception-v3模型中代表瓶颈层结果的张量名称 JPEG_DATA_TENSOR_NAME = 'DecodeJpeg/contents:0' # 图像输入张量对应的名称 # 测试数据 # path = sys.argv[1] 图片路径 # path = r"C:\Users\echo1999\Documents\Github\MusicAnalyse\static\myData\picture\38_None.png" # 类别字典 # disease_dict = {0: 'baifen', 1: 'tiaoxiu', 2: 'yexiu'} disease_dict = {0: 'happy', 1: 'sad'} # 读取数据 image_data = tf.gfile.FastGFile(path, 'rb').read() # 评估 checkpoint_file = tf.train.latest_checkpoint(CHECKPOINT_DIR) with tf.Graph().as_default() as graph: with tf.Session().as_default() as sess: # 读取训练好的inception-v3模型 with tf.gfile.FastGFile(INCEPTION_MODEL_FILE, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) # 加载inception-v3模型,并返回数据输入张量和瓶颈层输出张量 bottleneck_tensor, jpeg_data_tensor = tf.import_graph_def( graph_def, return_elements=[ BOTTLENECK_TENSOR_NAME, JPEG_DATA_TENSOR_NAME ]) # 使用inception-v3处理图片获取特征向量 bottleneck_values = sess.run(bottleneck_tensor, {jpeg_data_tensor: image_data}) # 将四维数组压缩成一维数组,由于全连接层输入时有batch的维度,所以用列表作为输入 bottleneck_values = [np.squeeze(bottleneck_values)] # 加载图和变量(这里我选择的是step=900的图,使用的是绝对路径。) saver = tf.train.import_meta_graph( r'C:\Users\echo1999\Documents\Github\MusicAnalyse\model\tuneAnalyse\runs\1562719894\checkpoints\model-9000.meta' ) saver.restore( sess, r'C:\Users\echo1999\Documents\Github\MusicAnalyse\model\tuneAnalyse\runs\1562719894\checkpoints\model-9000' ) # 通过名字从图中获取输入占位符 input_x = graph.get_operation_by_name( 'BottleneckInputPlaceholder').outputs[0] # 我们想要评估的tensors predictions = graph.get_operation_by_name( 'evaluation/ArgMax').outputs[0] # 收集预测值 all_predictions = [] all_predictions = sess.run(predictions, {input_x: bottleneck_values}) # 打印出预测结果 index1 = str(all_predictions)[1] index = int(index1) print(disease_dict[index]) return disease_dict[index]
def get_dataset(self): # self.set_params(self.__params) with tf.gfile.GFile(self.deepspeech_model, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) graph = tf.get_default_graph() tf.import_graph_def(graph_def, name="deepspeech") input_tensor = graph.get_tensor_by_name('deepspeech/input_node:0') seq_length = graph.get_tensor_by_name('deepspeech/input_lengths:0') layer_6 = graph.get_tensor_by_name('deepspeech/logits:0') os.environ["CUDA_VISIBLE_DEVICES"] = '0' # self.sess = tf.Session(graph=self.graph) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(graph=graph, config=config) # with tf.Session(graph=graph, config=config) as sess: # with tf.Session(graph=self.graph) as sess: def iterator(): def interpolate_features(features, input_rate, output_rate, output_len=None): num_features = features.shape[1] input_len = features.shape[0] seq_len = input_len / float(input_rate) if output_len is None: output_len = int(seq_len * output_rate) input_timestamps = np.arange(input_len) / float(input_rate) output_timestamps = np.arange(output_len) / float( output_rate) output_features = np.zeros((output_len, num_features)) for feat in range(num_features): output_features[:, feat] = np.interp( output_timestamps, input_timestamps, features[:, feat]) return output_features def audioToInputVector(audio, fs): numcontext = 9 # Get mfcc coefficients features = python_speech_features.mfcc( audio, samplerate=fs, numcep=self.num_mel_bins, winlen=0.025, winstep=0.01) # We only keep every second feature (BiRNN stride = 2) features = features[::2] # One stride per time step in the input num_strides = len(features) # Add empty initial and final contexts empty_context = np.zeros((numcontext, self.num_mel_bins), dtype=features.dtype) features = np.concatenate( (empty_context, features, empty_context)) # Create a view into the array with overlapping strides of size # numcontext (past) + 1 (present) + numcontext (future) window_size = 2 * numcontext + 1 train_inputs = np.lib.stride_tricks.as_strided( features, (num_strides, window_size, self.num_mel_bins), (features.strides[0], features.strides[0], features.strides[1]), writeable=False) # Flatten the second and third dimensions train_inputs = np.reshape(train_inputs, [num_strides, -1]) train_inputs = np.copy(train_inputs) train_inputs = (train_inputs - np.mean(train_inputs) ) / np.std(train_inputs) # Return results return train_inputs def proProcessVector(features): # One stride per time step in the input num_strides = len(features) # Add empty initial and final contexts left_empty_context = np.zeros((4, features.shape[1]), dtype=features.dtype) right_empty_context = np.zeros((3, features.shape[1]), dtype=features.dtype) features = np.concatenate( (left_empty_context, features, right_empty_context)) # Create a view into the array with overlapping strides of size window_size = 8 train_inputs = np.lib.stride_tricks.as_strided( features, (num_strides, window_size, features.shape[1]), (features.strides[0], features.strides[0], features.strides[1]), writeable=False) # Return results return train_inputs bfmcoeff_loader = BFMCoeffLoader() audio_loader = AudioLoader(sr=self.sample_rate) random.shuffle(self.data_list) # with tf.Session(graph=self.graph) as sess: for line in self.data_list: folder, img_count = line.strip().split('|') img_count = int(img_count) bfmcoeffs = bfmcoeff_loader.get_data( os.path.join(folder, self.bfmcoeff_name)) pcm = audio_loader.get_data( os.path.join(folder, self.wav_name)) if (bfmcoeffs is not None and pcm is not None and img_count > 0): if (bfmcoeffs.shape[0] == img_count): if (self.min_squence_len > img_count): continue if (img_count < self.max_squence_len): rnd_len = random.randint( self.min_squence_len, img_count) else: rnd_len = random.randint( self.min_squence_len, self.max_squence_len) rnd_len = 25 slice_cnt = img_count // rnd_len input_vector = audioToInputVector( pcm, self.sample_rate) network_output = sess.run( layer_6, feed_dict={ input_tensor: input_vector[np.newaxis, ...], seq_length: [input_vector.shape[0]] }) # Resample network output to self.frame_rate fps audio_len_s = float( pcm.shape[0]) / self.sample_rate num_frames = int( round(audio_len_s * self.frame_rate)) network_output = interpolate_features( network_output[:, 0], self.frame_rate * self.frame_feature_scale, self.frame_rate, output_len=num_frames) network_output = np.squeeze(network_output) if (network_output.shape[0] < img_count): network_output = np.pad( network_output, ([0, img_count - network_output.shape[0] ], [0, 0]), 'constant', constant_values=(0)) for i in range(slice_cnt): bfmcoeff_slice = bfmcoeffs[i * rnd_len:(i + 1) * rnd_len, :] length = rnd_len start = int(i * length) network_output_slice = network_output[ start:start + length] network_output_slice = proProcessVector( network_output_slice) yield bfmcoeff_slice, network_output_slice, bfmcoeff_slice.shape[ 0] dataset = tf.data.Dataset.from_generator( iterator, output_types=(tf.float32, tf.float32, tf.int32), output_shapes=([None, 257], [None, 8, 29], [])) dataset = dataset.shuffle(self.shuffle_bufsize).repeat() dataset = dataset.padded_batch(self.batch_size, padded_shapes=([None, 257], [None, 8, 29], [])) # dataset = dataset.map( # self.process_data, # num_parallel_calls=4) return dataset
def get_frozen_graph(graph_file): """Read Frozen Graph file from disk.""" with tf.gfile.FastGFile(graph_file, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) return graph_def
def inference_video(opt): # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_CKPT = './protos/frozen_inference_graph_face.pb' # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = './protos/face_label_map.pbtxt' NUM_CLASSES = 2 MIN_CONF = 0.3 label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) cap = cv2.VideoCapture(os.path.join(opt.avi_dir,opt.reference,'video.avi')) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') dets = [] with detection_graph.as_default(): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(graph=detection_graph, config=config) as sess: frame_num = 0; while True: ret, image = cap.read() if ret == 0: break image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') boxes = detection_graph.get_tensor_by_name('detection_boxes:0') scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Actual detection. start_time = time.time() (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) elapsed_time = time.time() - start_time score = scores[0] dets.append([]); for index in range(0,len(score)): if score[index] > MIN_CONF: dets[-1].append([frame_num, boxes[0][index].tolist(),score[index]]) print('%s-%05d; %d dets; %.2f Hz' % (os.path.join(opt.avi_dir,opt.reference,'video.avi'),frame_num,len(dets[-1]),(1/elapsed_time))) frame_num += 1 cap.release() savepath = os.path.join(opt.work_dir,opt.reference,'faces.pckl') with open(savepath, 'wb') as fil: pickle.dump(dets, fil) return dets
def size(): # Import packages import os import cv2 import numpy as np from picamera.array import PiRGBArray from picamera import PiCamera import tensorflow as tf import sys # This is needed since the working directory is the object_detection folder. sys.path.append('..') sys.path.append( '/home/pi/tensorflow1/models/research/object_detection/utils') # Set up camera constants #IM_WIDTH = 1280 #IM_HEIGHT = 720 IM_WIDTH = 640 #Use smaller resolution for IM_HEIGHT = 480 #slightly faster framerate camera_type = 'picamera' # Import utilites from utils import label_map_util from utils import visualization_utils as vis_util # Name of the directory containing the object detection module we're using MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09' # Grab path to current working directory CWD_PATH = os.getcwd() # Path to frozen detection graph .pb file, which contains the model that is used # for object detection. PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, 'data', 'mscoco_label_map.pbtxt') # Number of classes the object detector can identify NUM_CLASSES = 90 ## Load the label map. # Label maps map indices to category names, so that when the convolution # network predicts `5`, we know that this corresponds to `airplane`. # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() font = cv2.FONT_HERSHEY_SIMPLEX # Initialize camera and perform object detection. # The camera has to be set up and used differently depending on if it's a # Picamera or USB webcam. # I know this is ugly, but I basically copy+pasted the code for the object # detection loop twice, and made one work for Picamera and the other work # for USB. ### Picamera ### if camera_type == 'picamera': # Initialize Picamera and grab reference to the raw capture camera = PiCamera() camera.resolution = (IM_WIDTH, IM_HEIGHT) camera.framerate = 10 rawCapture = PiRGBArray(camera, size=(IM_WIDTH, IM_HEIGHT)) rawCapture.truncate(0) i = 0 for frame1 in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True): t1 = cv2.getTickCount() # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3] # i.e. a single-column array, where each item in the column has the pixel RGB value frame = np.copy(frame1.array) frame.setflags(write=1) frame_expanded = np.expand_dims(frame, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: frame_expanded}) print(np.squeeze(boxes)[0, ], "\n") # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.40) cv2.putText(frame, "FPS: {0:.2f}".format(frame_rate_calc), (30, 50), font, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('Object detector', frame) t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 if np.squeeze(classes)[0] == 44: i = i + 1 else: i = 0 if i == 3: break # Press 'q' to quit if cv2.waitKey(1) == ord('q'): break rawCapture.truncate(0) sizedata = np.squeeze(boxes)[0, ] camera.close() cv2.destroyAllWindows() dimensi = np.array([sizedata[3] - sizedata[1], sizedata[2] - sizedata[0]]) return (dimensi)
def __init__(self, frozen_file, inputshape, in_nodes, dest_nodes): if LooseVersion(tensorflow.__version__) < LooseVersion('1.8.0'): raise ImportError('Your TensorFlow version %s is outdated. ' 'MMdnn requires tensorflow>=1.8.0' % tensorflow.__version__) super(TensorflowParser2, self).__init__() self.weight_loaded = True # load model files into TensorFlow graph with open(frozen_file, 'rb') as f: serialized = f.read() tensorflow.reset_default_graph() original_gdef = tensorflow.GraphDef() original_gdef.ParseFromString(serialized) in_type_list = {} for n in original_gdef.node: if n.name in in_nodes: in_type_list[n.name] = n.attr['dtype'].type from tensorflow.python.tools import strip_unused_lib from tensorflow.python.framework import dtypes from tensorflow.python.platform import gfile original_gdef = strip_unused_lib.strip_unused( input_graph_def=original_gdef, input_node_names=in_nodes, output_node_names=dest_nodes, placeholder_type_enum=dtypes.float32.as_datatype_enum) # Save it to an output file frozen_model_file = './frozen.pb' with gfile.GFile(frozen_model_file, "wb") as f: f.write(original_gdef.SerializeToString()) with open(frozen_model_file, 'rb') as f: serialized = f.read() tensorflow.reset_default_graph() model = tensorflow.GraphDef() model.ParseFromString(serialized) output_shape_map = dict() input_shape_map = dict() dtype = tensorflow.float32 with tensorflow.Graph().as_default() as g: input_map = {} for i in range(len(inputshape)): if in_type_list[in_nodes[i]] == 1 or in_type_list[ in_nodes[i]] == 0: dtype = tensorflow.float32 x = tensorflow.placeholder(dtype, shape=[None] + inputshape[i]) elif in_type_list[in_nodes[i]] == 3: dtype = tensorflow.int32 x = tensorflow.placeholder(dtype, shape=inputshape[i]) elif in_type_list[in_nodes[i]] == 10: dtype = tensorflow.bool x = tensorflow.placeholder(dtype) input_map[in_nodes[i] + ':0'] = x tensorflow.import_graph_def(model, name='', input_map=input_map) with tensorflow.Session(graph=g) as sess: meta_graph_def = tensorflow.train.export_meta_graph( filename='./my-model.meta') model = meta_graph_def.graph_def self.tf_graph = TensorflowGraph(model) self.tf_graph.build()
def ML(): threshold = float(entry1.get()) arr1_label = [] if (P.get()): arr1_label.append(int(1)) if (C.get()): arr1_label.append((int(17))) if (D.get()): arr1_label.append(int(18)) if (V.get()): for i in range(3, 7): arr1_label.append(int(i)) if (C.get()): arr1_label.append(int(62)) if (E_G.get()): for i in range(72, 83): arr1_label.append((int(i))) if var.get() == 1: graph_path = '/root/PycharmProjects/Intern Project/faster_rcnn.pb' if var.get() == 2: graph_path = '/root/PycharmProjects/Intern Project/ssd_mobilenet.pb' if var.get() == 3: graph_path = '/root/PycharmProjects/Intern Project/ssd.pb' with tf.gfile.FastGFile(graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) with tf.Session() as sess: # Restore session sess.graph.as_default() tf.import_graph_def(graph_def, name='') # Read and preprocess an image. global path img = plt.imread(path) rows = img.shape[0] cols = img.shape[1] inp = cv.resize(img, (300, 300)) inp = inp[:, :, [2, 1, 0]] # BGR2RGB # Run the model out = sess.run([ sess.graph.get_tensor_by_name('num_detections:0'), sess.graph.get_tensor_by_name('detection_scores:0'), sess.graph.get_tensor_by_name('detection_boxes:0'), sess.graph.get_tensor_by_name('detection_classes:0') ], feed_dict={ 'image_tensor:0': inp.reshape(1, inp.shape[0], inp.shape[1], 3) }) # Visualize detected bounding boxes. num_detections = int(out[0][0]) for i in range(num_detections): classId = int(out[3][0][i]) if classId in arr1_label: score = float(out[1][0][i]) bbox = [float(v) for v in out[2][0][i]] if score > threshold: x = bbox[1] * cols y = bbox[0] * rows right = bbox[3] * cols bottom = bbox[2] * rows cv.rectangle(img, (int(x), int(y)), (int(right), int(bottom)), (125, 255, 51), thickness=2) x1 = [] y1 = [] x2 = [] y2 = [] x1.append(x) y1.append(y) x2.append(right) y2.append(bottom) if Anno.get(): file1 = open(path + ".txt", "a") file1.write(repr(x1) + repr(y1) + repr(x2) + repr(y2)) file1.writelines("\n") file1.close() del x1 del y1 del x2 del y2 C_det = Canvas(root, width=600, height=600, bg='white') C_det.place(x=300, y=0) photo = ImageTk.PhotoImage(image=Image.fromarray(img)) img = Label(C_det, image=photo) img.image = photo img.place(x=0, y=0)
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Thu Jan 11 15:58:42 2018 @author: www.github.com/GustavZ """ import tensorflow as tf import yaml ## LOAD CONFIG PARAMS ## if (os.path.isfile('config.yml')): with open("config.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) else: with open("config.sample.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) MODEL_NAME = cfg['od_model_name'] ## Actual Script ## NODE_OPS = ['Placeholder','Identity'] MODEL_FILE = '../models/{}/frozen_inference_graph.pb'.format(MODEL_NAME) gf = tf.GraphDef() gf.ParseFromString(open(MODEL_FILE,'rb').read()) print([n.name + '=>' + n.op for n in gf.node if n.op in (NODE_OPS)])
def load_frozen_graph_def(frozen_graph_filename): with tf.gfile.GFile(frozen_graph_filename, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) return graph_def
def animation(load_folder, save_folder, model_path): #### STEP1: Replace placeholder and output .meta file ############################################################ #input_photo = tf.placeholder(tf.float32, [1, 720, 720, 3], name='input') #network_out = network.unet_generator(input_photo) #final_out = guided_filter.guided_filter(input_photo, network_out, r=1, eps=5e-3) #print("input_photo.name =", input_photo.name) #print("input_photo.shape =", input_photo.shape) #print("final_out.name =", final_out.name) #print("final_out.shape =", final_out.shape) #all_vars = tf.trainable_variables() #gene_vars = [var for var in all_vars if 'generator' in var.name] #saver = tf.train.Saver(var_list=gene_vars) #config = tf.ConfigProto() #config.gpu_options.allow_growth = True #sess = tf.Session(config=config) #sess.run(tf.global_variables_initializer()) #saver.restore(sess, tf.train.latest_checkpoint(model_path)) #saver.save(sess, './export/model.ckpt') #sys.exit(0) #### STEP1: Replace placeholder and output .meta file ############################################################ #### STEP2: Convert checkpoint to freeze_graph ################################################################### #graph = tf.get_default_graph() #sess = tf.Session() #saver = tf.train.import_meta_graph('./export/model.ckpt.meta') #saver.restore(sess, './export/model.ckpt') #tf.train.write_graph(sess.graph_def, './export', 'white_box_cartoonization_freeze_graph.pbtxt', as_text=True) #tf.train.write_graph(sess.graph_def, './export', 'white_box_cartoonization_freeze_graph.pb', as_text=False) #sys.exit(0) #### STEP2: Convert checkpoint to freeze_graph ################################################################### #### STEP3: Conversion from freeze_graph to saved_model ########################################################## #def get_graph_def_from_file(graph_filepath): # tf.compat.v1.reset_default_graph() # with ops.Graph().as_default(): # with tf.compat.v1.gfile.GFile(graph_filepath, 'rb') as f: # graph_def = tf.compat.v1.GraphDef() # graph_def.ParseFromString(f.read()) # return graph_def #def convert_graph_def_to_saved_model(export_dir, graph_filepath, input_name, outputs): # graph_def = get_graph_def_from_file(graph_filepath) # with tf.compat.v1.Session(graph=tf.Graph()) as session: # tf.import_graph_def(graph_def, name='') # tf.compat.v1.saved_model.simple_save( # session, # export_dir,# change input_image to node.name if you know the name # inputs={input_name: session.graph.get_tensor_by_name('{}:0'.format(node.name)) # for node in graph_def.node if node.op=='Placeholder'}, # outputs={t.rstrip(":0"):session.graph.get_tensor_by_name(t) for t in outputs} # ) # print('Optimized graph converted to SavedModel!') #shutil.rmtree('./saved_model', ignore_errors=True) #convert_graph_def_to_saved_model('./saved_model', './export/white_box_cartoonization_freeze_graph.pb', 'input', ['add_1:0']) #sys.exit(0) #### STEP3: Conversion from freeze_graph to saved_model ########################################################## #### STEP4: Investigate final INPUT/OUTPUT names after conversion ############################################## #### Reference article : https://qiita.com/iwatake2222/items/80fc73ff23d8f51650f5 # with tf.Session() as sess: # with tf.gfile.GFile('./export/white_box_cartoonization_freeze_graph.pb', 'rb') as f: # graph_def = tf.GraphDef() # graph_def.ParseFromString(f.read()) # sess.graph.as_default() # _ = tf.import_graph_def(graph_def) # ops = {} # for op in tf.get_default_graph().get_operations(): # ops[op.name] = [str(output) for output in op.outputs] # with open('./export/white_box_cartoonization_freeze_graph.json', 'w') as f: # f.write(json.dumps(ops)) # sys.exit(0) #### STEP4: Investigate final INPUT/OUTPUT names after conversion ############################################## with tf.Session() as sess: with tf.gfile.GFile('./export/white_box_cartoonization_freeze_graph.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() _ = tf.import_graph_def(graph_def) tensor_input = sess.graph.get_tensor_by_name('import/input:0') tensor_output = sess.graph.get_tensor_by_name('import/add_1:0') cam = cv2.VideoCapture(0) cam.set(cv2.CAP_PROP_FPS, 30) cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) window_name = "USB Camera" cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE) framecount = 0 fps = "" time1 = 0 while True: start_time = time.perf_counter() ret, image = cam.read() if not ret: continue size = 720 colw = image.shape[1] colh = image.shape[0] new_w = int(colw * min(size/colw, size/colh)) new_h = int(colh * min(size/colw, size/colh)) resized_image = cv2.resize(image, (new_w, new_h), interpolation = cv2.INTER_AREA) canvas = np.full((size, size, 3), 0) canvas[(size - new_h)//2:(size - new_h)//2 + new_h,(size - new_w)//2:(size - new_w)//2 + new_w, :] = resized_image image = canvas batch_image = image.astype(np.float32)/127.5 - 1 batch_image = np.expand_dims(batch_image, axis=0) output = sess.run(tensor_output, {tensor_input: batch_image}) output = (np.squeeze(output)+1)*127.5 output = np.clip(output, 0, 255).astype(np.uint8) cv2.putText(output, "---- Animation ---- " + fps, (640 - 550, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (38, 0, 255), 1, cv2.LINE_AA) image = image.astype(np.uint8) cv2.putText(image, "---- Ground truth ---- " + fps, (640 - 550, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (38, 0, 255), 1, cv2.LINE_AA) img = np.hstack((image, output)) cv2.imshow('USB Camera', img) if cv2.waitKey(1)&0xFF == ord('q'): break # FPS calculation framecount += 1 if framecount >= 10: fps = "(Playback) {:.1f} FPS".format(time1 / 10) framecount = 0 time1 = 0 end_time = time.perf_counter() elapsedTime = end_time - start_time time1 += 1 / elapsedTime
def main(NUM_CLASSES, Model, threshold, thres_str): ########################################################################### pwd = "C://Users/Eric Bianchi/Documents/Virginia Tech/Graduate School/Research/" + Model + "/Pre-Processing" cur = "C://Users/Eric Bianchi/Documents/Virginia Tech/Graduate School/Research/" + Model + "/Post-Processing" TEST_IMAGE_DIR = pwd + "/Evaluation_Output" FROZEN_INFERENCE_GRAPH_LOC = pwd + "/inference_graph/frozen_inference_graph.pb" LABELS_LOC = pwd + "/" + "label_map.pbtxt" imageFileName = "" ########################################################################### print("starting program . . .") if not checkIfNecessaryPathsAndFilesExist( TEST_IMAGE_DIR, FROZEN_INFERENCE_GRAPH_LOC, LABELS_LOC): return # end if # this next comment line is necessary to avoid a false PyCharm warning # noinspection PyUnresolvedReferences if tf.__version__ < '1.5.0': raise ImportError( 'Please upgrade your tensorflow installation to v1.5.* or later!') # end if # load a (frozen) TensorFlow model into memory detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(FROZEN_INFERENCE_GRAPH_LOC, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # end with # end with # Loading label map # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(LABELS_LOC) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) imageFilePaths = [] image_names = [] for imageFileName in os.listdir(TEST_IMAGE_DIR): if imageFileName.endswith(".jpg"): imageFilePaths.append(TEST_IMAGE_DIR + "/" + imageFileName) if imageFileName.endswith(".JPG"): imageFilePaths.append(TEST_IMAGE_DIR + "/" + imageFileName) # end if image_names.append(imageFileName) # end for cur_name = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: for image_path in imageFilePaths: # print(image_path) print(image_names[cur_name]) image_np = cv2.imread(image_path) # print(image_np.shape[1]) if image_np is None: print("error reading file ") continue #end if h = int(image_np.shape[0]) w = int(image_np.shape[1]) if (w > 4000 or h > 4000): w = int(w / 10) h = int(h / 10) image_np = cv2.resize(image_np, (h, w)) else: image_np = image_np # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=1, min_score_thresh=threshold) f = open( cur + "/Threshold_%/" + thres_str + "_Metrics_Detect_txt/" + image_names[cur_name] + ".txt", "w+") i = 0 detect_obj = [] while (i < 100 and (scores[0][i]) > (threshold)): ymin = int((boxes[0][i][0]) * h) xmin = int((boxes[0][i][1]) * w) ymax = int((boxes[0][i][2]) * h) xmax = int((boxes[0][i][3]) * w) #print(classes[0][i]) class_cur = class_list(classes[0][i]).ID #print(class_cur.ID) score = (scores[0][i]) cur_bbox = eval_bbox(image_names[cur_name], class_cur, score, xmin, ymin, xmax, ymax) detect_obj.append(cur_bbox) f.write(detect_obj[i].toString() + "\n") i = i + 1 f.close() #cv2.imshow("image_np " + image_names[cur_name], image_np) #cv2.waitKey() cur_name = cur_name + 1
import tensorflow as tf from tensorflow.python.tools import freeze_graph, optimize_for_inference_lib freeze_graph.freeze_graph(input_graph='mnist_model.pbtxt', input_saver='', input_binary=True, input_checkpoint='mnist_model.ckpt', output_node_names='y_actual', restore_op_name='save/restore_all', filename_tensor_name='save/Const:0', output_graph='frozen_mnist_model.pb', clear_devices=True, initializer_nodes='') input_graph_def = tf.GraphDef() with tf.gfile.Open('frozen_mnist_model.pb', 'rb') as f: data = f.read() input_graph_def.ParseFromString(data) output_graph_def = optimize_for_inference_lib.optimize_for_inference(input_graph_def=input_graph_def, input_node_names=['x_input'], output_node_names=['y_actual'], placeholder_type_enum=tf.float32.as_datatype_enum) f = tf.gfile.FastGFile(name='optimized_frozen_mnist_model.pb', mode='w') f.write(file_content=output_graph_def.SerializeToString())
def _MakeGraphDef(self, text): ret = tf.GraphDef() text_format.Merge(text, ret) return ret
def load_graph(f): with tf.gfile.FastGFile(f, 'rb') as graph: graph_def = tf.GraphDef() graph_def.ParseFromString(graph.read()) tf.import_graph_def(graph_def, name='')
from aiohttp import web from jsonrpcserver.aio import methods from jsonrpcserver.exceptions import InvalidParams import sys sys.path.append(str(Path(__file__).absolute().parent.parent)) from image_classification_service import configuration as config from image_classification_service.imagenet.node_lookup import NodeLookup logger = logging.getLogger(__name__) app = web.Application() graph_path = Path(__file__).parent.joinpath("imagenet", "model_data", "classify_image_graph_def.pb") with tensorflow.gfile.FastGFile(str(graph_path), "rb") as f: graph_def = tensorflow.GraphDef() graph_def.ParseFromString(f.read()) tensorflow.import_graph_def(graph_def, name="") node_lookup = NodeLookup() session = tensorflow.Session() softmax_tensor = session.graph.get_tensor_by_name("softmax:0") @methods.add async def classify(**kwargs): image = kwargs.get("image", None) image_type = kwargs.get("image_type", None) if image is None: raise InvalidParams("image is required")
def main(_): global LEARNING_RATE image_lists = create_image_lists(TEST_PRECENTAGE, VALIDATION_PRECENTAGE) n_classes = len(image_lists.keys()) # 类别数 with gfile.FastGFile(os.path.join(MODEL_DIR, MODEL_FILE), 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) bottleneck_tensor, jpeg_data_tensor = tf.import_graph_def( graph_def, return_elements=[BOTTLENECT_TENSOR_NAME, JPEG_DATA_TENSOR_NAME]) bottleneck_input = tf.placeholder( tf.float32, [None, BOTTLENECT_TENSOR_SIZE], name='BottleneckInputPlaceholder') ground_truth_input = tf.placeholder( tf.float32, [None, n_classes], name='GroundTruthInput') # full-connection layer for classifying with tf.name_scope('final_training_ops'): weights = tf.Variable( tf.truncated_normal( [BOTTLENECT_TENSOR_SIZE, n_classes], stddev=0.001)) biases = tf.Variable(tf.zeros([n_classes])) logits = tf.matmul(bottleneck_input, weights) + biases final_tensor = tf.nn.softmax(logits) cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=ground_truth_input) cross_entropy_mean = tf.reduce_mean(cross_entropy) train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize( cross_entropy_mean) with tf.name_scope('evaluation'): correct_prediction = tf.equal( tf.argmax(final_tensor, 1), tf.argmax(ground_truth_input, 1)) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(STEPS): LEARNING_RATE = BASE_LEARNING_RATE / (math.exp( i / (STEPS / math.log(0.1 / MIN_LEARNING_RATE)))) train_bottlenecks, train_ground_truth = get_random_cached_bottlenecks \ (sess, n_classes, image_lists, BATCH, 'training', jpeg_data_tensor, bottleneck_tensor) sess.run( train_step, feed_dict={ bottleneck_input: train_bottlenecks, ground_truth_input: train_ground_truth }) # calculating accuracy on validation dataset if i % 100 == 0 or i + 1 == STEPS: validation_bottlenecks, validation_ground_truth = \ get_random_cached_bottlenecks(sess, n_classes, image_lists, BATCH, 'validation', jpeg_data_tensor, bottleneck_tensor) validation_accuracy = sess.run( evaluation_step, feed_dict={ bottleneck_input: validation_bottlenecks, ground_truth_input: validation_ground_truth }) global result result['{:0>4d}'.format(i)] = validation_accuracy print( 'Step %d :Validation accuracy on random sampled %d examples = %.1f%%' % (i, BATCH, validation_accuracy * 100)) # test final accuracy test_bottlenecks, test_ground_truth = get_test_bottlenecks( sess, image_lists, n_classes, jpeg_data_tensor, bottleneck_tensor) test_accuracy = sess.run( evaluation_step, feed_dict={ bottleneck_input: test_bottlenecks, ground_truth_input: test_ground_truth }) print('Final test accuracy = %.1f%%' % (test_accuracy * 100))
def post_preprocessing(): from glob import glob img_paths = glob('outputs/intermediate_images/*.png') w, h = 128, 128 img_np = np.empty((len(img_paths), w, h, 3), dtype=np.uint8) for idx, path in enumerate(img_paths): img_arr = cv2.imread(path) img_arr = cv2.resize(img_arr, (w, h), cv2.INTER_BITS) img_arr = img_arr[..., ::-1] img_arr = np.array(img_arr) img_np[idx] = img_arr graph = tf.Graph() sess = tf.Session(graph=graph) PATH_TO_MODEL = './client/motorbike_classification_inception_net_128_v4_e36.pb' with graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_MODEL, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') input_tensor = graph.get_tensor_by_name('input_1:0') output_tensor = graph.get_tensor_by_name('activation_95/Sigmoid:0') embedding_tensor = graph.get_tensor_by_name( 'global_average_pooling2d_1/Mean:0') indicates = list(range(len(img_np))) batch_size = 32 list_index = [ indicates[i:i + batch_size] for i in range(0, len(indicates), batch_size) ] score_list = [] for batch_index in list_index: img_expanded = img_np[batch_index] / 255.0 with graph.as_default(): scores = sess.run([output_tensor], feed_dict={input_tensor: img_expanded}) score_list.append(scores[0]) score = pd.DataFrame({ 'path': img_paths, 'score': np.concatenate(score_list, axis=0).reshape(-1) }) high_quality = score[score['score'] >= score['score'].quantile(0.125)] img_path = high_quality['path'].values np.random.shuffle(img_path) img_path = img_path[:10000] output_dir = 'outputs/output_images' try: os.makedirs(output_dir) except: pass for i in range(len(img_path)): fpath = img_path[i] im = cv2.imread(fpath) cv2.imwrite(f'{output_dir}/{i}.png', im) shutil.make_archive(f'outputs/images', 'zip', output_dir)
def load_graph(model_path): graph_def = tf.GraphDef() with open(model_path, "rb") as f: graph_def.ParseFromString(f.read()) return graph_def
def load_trained_model(): with tf.gfile.FastGFile('./frozen_inference_graph.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) return graph_def
def run(self): logger = logging.getLogger(__name__) logger.info("Starting consumer") # load inception 3 graph with gfile.FastGFile(self.model_dir, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') logger.info("Loaded graph") with tf.Session() as sess: #TODO: add queueing and batching for optimal performance processed_items = 0 while self.queues: for queue in self.queues: item = queue.get() if item is None: self.queues[:] = [q for q in self.queues if q != queue] logger.debug( "[Consumer] Rmoved %s from queues. %d left" % (queue, len(self.queues))) continue item_id, frames, tags = item if processed_items % self.logging_step == 0: logger.info( "[Consumer] Extracting features from video %s [%d]" % (item_id, processed_items)) img_features = [] for index, frame in enumerate(frames): # get tensor from network pool3_layer = sess.graph.get_tensor_by_name('pool_3:0') predictions = sess.run(pool3_layer, {'DecodeJpeg:0': frame}) # concatenate features features = np.squeeze(predictions) img_features.append(features) file_name = os.path.join(self.data_dir, '{0}.pickle'.format(item_id)) fv3_features = np.array(img_features, dtype=np.float32) with open(file_name, 'wb') as handle: pickle.dump((fv3_features, tags), handle, protocol=pickle.HIGHEST_PROTOCOL) if processed_items % self.logging_step == 0: logger.info( "[Consumer] Extracting features from video %s [DONE!][%d]" % (item_id, processed_items)) # Increment counter processed_items = processed_items + 1 logger.info("Ending consumer") return
def main(): #--------------------------------------------------------------------------- # Parse the commandline #--------------------------------------------------------------------------- parser = argparse.ArgumentParser(description='SSD inference for video') parser.add_argument('--model', default='model300.pb', help='model file') parser.add_argument('--training-data', default='training-data-300.pkl', help='training data') parser.add_argument('--output-dir', default='test-out', help='output directory') parser.add_argument('--batch-size', type=int, default=32, help='batch size') args = parser.parse_args() #--------------------------------------------------------------------------- # Print parameters #--------------------------------------------------------------------------- print('[i] Model: ', args.model) print('[i] Training data: ', args.training_data) print('[i] Output dir: ', args.output_dir) print('[i] Batch size: ', args.batch_size) video_root = '../data/ucf24' #--------------------------------------------------------------------------- # Initilize the output directory #--------------------------------------------------------------------------- if os.path.exists(os.path.join(video_root, args.output_dir)): shutil.rmtree(os.path.join(video_root, args.output_dir), ignore_errors=False, onerror=None) os.makedirs(os.path.join(video_root, args.output_dir)) #--------------------------------------------------------------------------- # Load the graph and the training data #--------------------------------------------------------------------------- graph_def = tf.GraphDef() with open(args.model, 'rb') as f: serialized = f.read() graph_def.ParseFromString(serialized) with open(args.training_data, 'rb') as f: data = pickle.load(f) with tf.Session() as sess: tf.import_graph_def(graph_def, name='detector') img_input = sess.graph.get_tensor_by_name('detector/image_input:0') result = sess.graph.get_tensor_by_name('detector/result/result:0') #--------------------------------------------------------------------------- # Run Detection #--------------------------------------------------------------------------- test_output = args.output_dir with open(os.path.join(video_root, 'splitfiles/testlist01.txt')) as fin: video_paths = [os.path.join(video_root, 'rgb-images', line[:-1]) for line in fin.readlines()] with tf.Session() as sess: for video in tqdm(video_paths, total = len(video_paths)): # video: frame folder frames = sorted(os.listdir(video), key = lambda file: int(file[:-4])) frame_paths = [os.path.join(video, frame) for frame in frames] detected_frames = run(frame_paths, img_input, result, data, sess, batch_size = args.batch_size) video_name = video.split('/')[-2:] video_name[-1] = video_name[-1] + '.mp4' video_save_path = os.path.join(video_root, test_output, *video_name) if not os.path.exists(os.path.join(video_root, test_output, video_name[0])): os.makedirs(os.path.join(video_root, test_output, video_name[0])) save_to_video(detected_frames, video_save_path)
def fold_batch_norms(input_graph_def): """Removes batch normalization ops by folding them into convolutions. Batch normalization during training has multiple dynamic parameters that are updated, but once the graph is finalized these become constants. That means there's an opportunity to reduce the computations down to a scale and addition, rather than the more expensive multiple ops, and even bake the scaling into the convolution weights. This function identifies the typical pattern of batch normalization subgraphs, and performs the transformation to fold the computations down into a simpler form. It currently only spots batch normalization that's performed by the BatchNormWithGlobalNormalization op, and will need to be extended in the future to handle the newer style. Args: input_graph_def: A GraphDef containing a model. Returns: Modified graph with BN ops removed, and modified weights. Raises: ValueError: If the graph is badly formed with duplicate node names. """ input_node_map = {} for node in input_graph_def.node: if node.name not in input_node_map.keys(): input_node_map[node.name] = node else: raise ValueError("Duplicate node names detected for ", node.name) nodes_to_skip = {} new_ops = [] for node in input_graph_def.node: if node.op != "BatchNormWithGlobalNormalization": continue conv_op = node_from_map(input_node_map, node.input[0]) if conv_op.op != "Conv2D": tf.logging.warning("Didn't find expected Conv2D input to '%s'" % node.name) continue weights_op = node_from_map(input_node_map, conv_op.input[1]) if weights_op.op != "Const": tf.logging.warning( "Didn't find expected conv Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (conv_op.name, weights_op)) continue weights = values_from_const(weights_op) channel_count = weights.shape[3] mean_op = node_from_map(input_node_map, node.input[1]) if mean_op.op != "Const": tf.logging.warning( "Didn't find expected mean Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, mean_op)) continue mean_value = values_from_const(mean_op) if mean_value.shape != (channel_count, ): tf.logging.warning( "Incorrect shape for mean, found %s, expected %s," " for node %s" % (str(mean_value.shape), str( (channel_count, )), node.name)) continue var_op = node_from_map(input_node_map, node.input[2]) if var_op.op != "Const": tf.logging.warning( "Didn't find expected var Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, var_op)) continue var_value = values_from_const(var_op) if var_value.shape != (channel_count, ): tf.logging.warning( "Incorrect shape for var, found %s, expected %s," " for node %s" % (str(var_value.shape), str( (channel_count, )), node.name)) continue beta_op = node_from_map(input_node_map, node.input[3]) if beta_op.op != "Const": tf.logging.warning( "Didn't find expected beta Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, beta_op)) continue beta_value = values_from_const(beta_op) if beta_value.shape != (channel_count, ): tf.logging.warning( "Incorrect shape for beta, found %s, expected %s," " for node %s" % (str(beta_value.shape), str( (channel_count, )), node.name)) continue gamma_op = node_from_map(input_node_map, node.input[4]) if gamma_op.op != "Const": tf.logging.warning( "Didn't find expected gamma Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, gamma_op)) continue gamma_value = values_from_const(gamma_op) if gamma_value.shape != (channel_count, ): tf.logging.warning( "Incorrect shape for gamma, found %s, expected %s," " for node %s" % (str(gamma_value.shape), str( (channel_count, )), node.name)) continue variance_epsilon_value = node.attr["variance_epsilon"].f scale_after_normalization = node.attr["scale_after_normalization"].b nodes_to_skip[node.name] = True nodes_to_skip[weights_op.name] = True nodes_to_skip[mean_op.name] = True nodes_to_skip[var_op.name] = True nodes_to_skip[beta_op.name] = True nodes_to_skip[gamma_op.name] = True nodes_to_skip[conv_op.name] = True if scale_after_normalization: scale_value = ((1.0 / np.vectorize(math.sqrt) (var_value + variance_epsilon_value)) * gamma_value) else: scale_value = ( 1.0 / np.vectorize(math.sqrt)(var_value + variance_epsilon_value)) offset_value = (-mean_value * scale_value) + beta_value scaled_weights = np.copy(weights) it = np.nditer(scaled_weights, flags=["multi_index"], op_flags=["readwrite"]) while not it.finished: current_scale = scale_value[it.multi_index[3]] it[0] *= current_scale it.iternext() scaled_weights_op = tf.NodeDef() scaled_weights_op.op = "Const" scaled_weights_op.name = weights_op.name scaled_weights_op.attr["dtype"].CopyFrom(weights_op.attr["dtype"]) scaled_weights_op.attr["value"].CopyFrom( tf.AttrValue(tensor=tensor_util.make_tensor_proto( scaled_weights, weights.dtype.type, weights.shape))) new_conv_op = tf.NodeDef() new_conv_op.CopyFrom(conv_op) offset_op = tf.NodeDef() offset_op.op = "Const" offset_op.name = conv_op.name + "_bn_offset" offset_op.attr["dtype"].CopyFrom(mean_op.attr["dtype"]) offset_op.attr["value"].CopyFrom( tf.AttrValue(tensor=tensor_util.make_tensor_proto( offset_value, mean_value.dtype.type, offset_value.shape))) bias_add_op = tf.NodeDef() bias_add_op.op = "BiasAdd" bias_add_op.name = node.name bias_add_op.attr["T"].CopyFrom(conv_op.attr["T"]) bias_add_op.input.extend([new_conv_op.name, offset_op.name]) new_ops.extend( [scaled_weights_op, new_conv_op, offset_op, bias_add_op]) result_graph_def = tf.GraphDef() for node in input_graph_def.node: if node.name in nodes_to_skip: continue new_node = tf.NodeDef() new_node.CopyFrom(node) result_graph_def.node.extend([new_node]) result_graph_def.node.extend(new_ops) return result_graph_def
def recognize(jpg_path, pb_file_path): anchors = parse_anchors("./data/yolo_anchors.txt") classes = read_class_names("./data/coco.names") num_class = len(classes) color_table = get_color_table(num_class) img_ori = cv2.imread(jpg_path) height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple([IMAGE_SIZE, IMAGE_SIZE])) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Graph().as_default(): output_graph_def = tf.GraphDef() print("Load Frozen_Graph File ...") with open(pb_file_path, "rb") as f: output_graph_def.ParseFromString(f.read()) tf.import_graph_def(output_graph_def, name="") print("Finished") # GPU_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) config = tf.ConfigProto() # gpu_options=GPU_options) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Define Input and Outputs input_x = sess.graph.get_tensor_by_name("Placeholder:0") feature_map_1 = sess.graph.get_tensor_by_name( "yolov3/yolov3_head/feature_map_1:0") feature_map_2 = sess.graph.get_tensor_by_name( "yolov3/yolov3_head/feature_map_2:0") feature_map_3 = sess.graph.get_tensor_by_name( "yolov3/yolov3_head/feature_map_3:0") features = feature_map_1, feature_map_2, feature_map_3 # yolo config yolo_model = yolov3(num_class, anchors) yolo_model.pb_forward(input_x) # # use frozen_graph to inference # print "RUN Graph ..." # features = sess.run(features, feed_dict={input_x:np.reshape(img, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])}) # print "Finished" # feature1, feature2, feature3 = features # feature1 = tf.convert_to_tensor(feature1) # feature2 = tf.convert_to_tensor(feature2) # feature3 = tf.convert_to_tensor(feature3) # features = feature1, feature2, feature3 print "Predicting ..." pred_boxes, pred_confs, pred_probs = yolo_model.predict(features) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=30, score_thresh=0.4, iou_thresh=0.5) t0 = time.time() boxes_, scores_, labels_ = sess.run( [boxes, scores, labels], feed_dict={ input_x: np.reshape(img, [-1, IMAGE_SIZE, IMAGE_SIZE, 3]) }) t1 = time.time() print "Finished" # rescale the coordinates to the original image boxes_[:, 0] *= (width_ori / float(IMAGE_SIZE)) boxes_[:, 2] *= (width_ori / float(IMAGE_SIZE)) boxes_[:, 1] *= (height_ori / float(IMAGE_SIZE)) boxes_[:, 3] *= (height_ori / float(IMAGE_SIZE)) print("box coords:") print(boxes_) print('*' * 30) print("scores:") print(scores_) print('*' * 30) print("labels:") print(labels_) print("runtime:") print(t1 - t0) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]], color=color_table[labels_[i]]) #cv2.imshow('Detection result', img_ori) cv2.imwrite('pb_result.jpg', img_ori) #cv2.waitKey(0) num_samples = 50 t0 = time.time() for i in range(num_samples): boxes_, scores_, labels_ = sess.run( [boxes, scores, labels], feed_dict={ input_x: np.reshape(img, [-1, IMAGE_SIZE, IMAGE_SIZE, 3]) }) t1 = time.time() print('Average runtime: %f seconds' % (float(t1 - t0) / num_samples))
res = '' for node_id in top_k: human_string = label_lines[node_id] score = predictions[0][node_id] if score > max_score: max_score = score res = human_string return res, max_score # Loads label file, strips off carriage return label_lines = [line.rstrip() for line in tf.gfile.GFile("logs/trained_labels.txt")] # Unpersists graph from file with tf.gfile.FastGFile("logs/trained_graph.pb", 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') with tf.Session() as sess: # Feed the image_data as input to the graph and get first prediction softmax_tensor = sess.graph.get_tensor_by_name('final_result:0') c = 0 cap = cv2.VideoCapture(0) res, score = '', 0.0 i = 0 mem = '' consecutive = 0
def run_annotation(image_list, labels_mapping, treshold): result = {} model_path = os.environ.get('TF_ANNOTATION_MODEL_PATH') if model_path is None: raise OSError( 'Model path env not found in the system. Please check the installation manual.' ) job = rq.get_current_job() detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(model_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') try: config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(graph=detection_graph, config=config) for image_num, image_path in enumerate(image_list): job.refresh() if 'cancel' in job.meta: del job.meta['cancel'] job.save() return None job.meta['progress'] = image_num * 100 / len(image_list) job.save_meta() image = Image.open(image_path) width, height = image.size if width > 1920 or height > 1080: image = image.resize((width // 2, height // 2), Image.ANTIALIAS) image_np = load_image_into_numpy(image) image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') boxes = detection_graph.get_tensor_by_name('detection_boxes:0') scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) for i in range(len(classes[0])): if classes[0][i] in labels_mapping.keys(): if scores[0][i] >= treshold: xmin, ymin, xmax, ymax = normalize_box( boxes[0][i], width, height) label = labels_mapping[classes[0][i]] if label not in result: result[label] = [] result[label].append( [image_num, xmin, ymin, xmax, ymax]) finally: sess.close() del sess return result