def get_output(self, layer): try: layer = self.layers[layer] except KeyError: print_msg(str(self.layers.keys()), 3) raise KeyError('Unknown layer name fed: %s' % layer) return layer
def _vis_detections(self, im, class_name, dets, ax, thresh=0.5): """ Draw detected bounding boxes. """ inds = np.where(dets[:, -1] >= thresh)[0] if len(inds) == 0: print_msg('No detections found', 2) return for i in inds: bbox = dets[i, :4] score = dets[i, -1] ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=3.5)) ax.text(bbox[0], bbox[1] - 2, '{:s} {:.3f}'.format(class_name, score), bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') ax.set_title(('{} detections with p({} | box) >= {:.1f}').format( class_name, class_name, thresh), fontsize=14) plt.axis('off') plt.tight_layout() plt.draw()
def run_sw_demo(self, sess, image_path, scale_mode=0): """ Pure software demo. """ print_msg('Running software only demo ... ', 3) # Load Image and pre-process im = cv2.imread(image_path) feed_dict = self.pre_process(im, scale_mode) scale_factor = feed_dict[self.im_info][0][2] # TODO send in scale for both X and Y directions if cfg.ENABLE_TENSORBOARD: merged = tf.summary.merge_all() tfboard_writer = tf.summary.FileWriter(cfg.OUTPUT_DIR, sess.graph) # Run network with the session to get proper outputs summary, cls_score, cls_prob, bbox_pred, rois = sess.run([merged, self.get_output('cls_score_fabu'), self.get_output('cls_prob'), self.get_output('bbox_pred_fabu'), self.get_output('proposal')], feed_dict=feed_dict) tfboard_writer.add_summary(summary) else: # Run network with the session to get proper outputs cls_score, cls_prob, bbox_pred, rois = sess.run([self.get_output('cls_score_fabu'), self.get_output('cls_prob'), self.get_output('bbox_pred_fabu'), self.get_output('proposal')], feed_dict=feed_dict) # Post process on network output and show result self.build_json("cls_prob_ref",cls_prob) self.post_process(im, [cls_score, cls_prob, bbox_pred, rois], scale_factor)
def post_process(self, im, sim_ops, scale_factor): """ MUST HAVE FUNCTION IN ALL NETWORKS !!!! Post-processing of the results from network. This function can be used to visualize data from hardware. """ prob = softmax(sim_ops[0][0]) preds = (np.argsort(prob)[::-1])[0:5] for p in preds: print_msg(str(classes[p]) + ' , ' + str(prob[p]), 3)
def run_sw_demo(self, sess, img, scale_mode=0): """ Pure software demo. """ print_msg('Running software only demo ... ', 3) im = cv2.imread(img) feed_dict = self.pre_process(im, scale_mode) fc1000 = sess.run([self.get_output('fc1000')], feed_dict=feed_dict) self.post_process( im, fc1000) # Post process on network output and show result
def __init__(self, isHardware=True, trainable=False): self.inputs = [] self.trainable = trainable self.isHardware = isHardware self.data = tf.placeholder(tf.float32, shape=[None, 224, 224, 3]) self.layers = dict({'data': self.data}) self.trainable = trainable self.n_classes = 1000 self.classes = imageNet_classNames() self.setup() print_msg('VGG16 Network setup done', 0)
def check_model_path(em_net, net, sess): model_path = cfg.MODELS_DIR+str(em_net)+'.npy' if not os.path.exists(model_path): model_path = cfg.MODELS_DIR+str(em_net)+'_random.npy' print_msg("Error: model " + model_path + " does not exist, try to use randomized model " + model_path, 3) if not os.path.exists(model_path): status = create_random_model(net, sess, model_path) if status != 0: raise IOError(('Can not create random model @ '+ model_path +'.\n')) return model_path
def feed(self, *args): assert len(args) != 0 self.inputs = [] for layer in args: if isinstance(layer, basestring): try: layer = self.layers[layer] print_msg(str(layer), 0) except KeyError: print_msg(str(self.layers.keys()), 3) raise KeyError('Unknown layer name fed: %s' % layer) self.inputs.append(layer) return self
def __init__(self, isHardware=True, trainable=False): cfg.TEST.SCALES = (224, ) cfg.TEST.MAX_SIZE = 224 self.inputs = [] self.trainable = trainable self.isHardware = isHardware self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) self.im_info = tf.placeholder(tf.float32, shape=[None, 3]) self.keep_prob = tf.placeholder(tf.float32) self.layers = dict({'data': self.data, 'im_info': self.im_info}) #self.layers = dict({'data':self.data}) self.trainable = trainable self.n_classes = 1000 self.classes = imageNet_classNames() self._layer_map = {} self.create_layer_map() self.setup() print_msg('ResNet50 Network setup done', 0)
def pre_process(self, im, scale_mode=0): """ MUST HAVE FUNCTION IN ALL NETWORKS !!!! Pre-processing of the image. De-mean, crop and resize Returns the feed dictionary the network is expecting For VGG there is just one size image 224x224 at the moment. """ print_msg('Pre-processing the image for the network', 4) im_orig = im.astype(np.uint8, copy=True) im_bgr = cv2.resize(im_orig, (224, 224)) img_mean = np.zeros(shape=im_bgr.shape) img_mean[:, :, 0] = cfg.PIXEL_MEANS[0] img_mean[:, :, 1] = cfg.PIXEL_MEANS[1] img_mean[:, :, 2] = cfg.PIXEL_MEANS[2] img_demean = im_bgr - img_mean processed_ims = [img_demean] blob = im_list_to_blob(processed_ims) feed_dict = {self.data: blob} return feed_dict
def visualize_output(net, em_image, scale_mode, sim_ops, scale_factor, save_output=False): """ Function to visualize hw/simulation output. Arguments: net: name of the network em_image: path to the input image (user is responsible to send in appropriate image (classification vs detection)) scale_mode: scaling mode (0: highest resolution, 1: mid resolution, 2: TODD) sim_ops: Simulation outputs (user is responsible to give all the necessary inputs needed to plot output). As a list. User has to refer the implementation of post_process in lib/networks/xxx.py to give the corrent sim_ops. save_output: will save the output to file depending. TODO Returns: -- """ print_msg('Running RefModel to visualize sim/hw output',3) em_image = cfg.DATA_DIR+str(em_image) im = cv2.imread(em_image) #feed_dict = net.pre_process(im, scale_mode) #scale_factor = feed_dict[net.im_info][0][2] # TODO send in scale for both X and Y directions #model_path = check_model_path(em_net) #net = get_network(em_net, 1) net.post_process(im, sim_ops, scale_factor)
def load(self, data_path, session, ignore_missing=False): """ Initialize graph with pre-trained parameters. """ data_dict = np.load(data_path).item() for op_name in data_dict: with tf.variable_scope(op_name, reuse=True): for param_name, data in data_dict[op_name].iteritems(): try: #print("op_name: %s param_name: %s\n" %(op_name, param_name)) #print(data) var = tf.get_variable(param_name) session.run(var.assign(data)) #print_msg("assign pretrain model "+param_name+ " to "+op_name,0) except ValueError: print_msg( "ignore " + "Param: " + str(param_name) + " - OpName: " + str(op_name), 3) if not ignore_missing: raise print_msg("Model was successfully loaded from " + data_path, 3)
def get_bias(net, model_path): """ API to return all the bias values in the model. To be used by validation framework to initialize the bias SRAM. Pre-req: The model file (netname.npy) must be present in cfg.MODELS_DIR Arguments: net: name of the network. Returns: list containing all the bias values for all the layers in same sequence as define in net._layer_map dict. """ print_msg('Getting bias values for the network '+str(model_path)+'...',3) #model_path = cfg.MODELS_DIR+str(net)+'.npy' #if not os.path.exists(model_path): # raise IOError(('Model not found @ '+ model_path +'.\n')) #model_path = check_model_path(net) #net = get_network(str(net), 1) num_layers = len(net._layer_map.keys()) # Get the total number of layers in the network bias_vals = [] layers_with_bias_count = 0 with open(model_path) as fid: model = np.load(model_path).item() for i in range(num_layers): if net._layer_map[i]['name'] in model: if 'biases' in model[net._layer_map[i]['name']]: b = model[net._layer_map[i]['name']]['biases'] b = convert_to_fp(b) bias_vals.append(b) layers_with_bias_count += 1 else: bias_vals.append([]) else: bias_vals.append([]) print_msg(str(layers_with_bias_count)+' layers were found with bias values',3) return bias_vals
def run_sw_demo(self, sess, image_path, scale_mode=0): """ Pure software demo. """ print_msg('Running software only demo ... ', 3) # Load Image and pre-process im = cv2.imread(image_path) feed_dict = self.pre_process(im, scale_mode) scale_factor = feed_dict[self.im_info][0][ 2] # TODO send in scale for both X and Y directions # Run network with the session to get proper outputs cls_score, cls_prob, bbox_pred, rois = sess.run([ self.get_output('cls_score_fabu'), self.get_output('cls_prob'), self.get_output('bbox_pred_fabu'), self.get_output('proposal') ], feed_dict=feed_dict) # Post process on network output and show result self.post_process(im, cls_score, cls_prob, bbox_pred, rois, scale_factor)
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, fl_cls_prob, fl_bbox_pred, feat_stride=[16,], anchor_scales = [8, 16, 32], base_size = 10, ratios =[0.333, 0.5, 0.667, 1.0, 1.5, 2.0, 3.0], pre_nms_topN = 2000, max_nms_topN = 400, isHardware=False, num_stddev=2.0): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] """ _anchors = generate_anchors(base_size, ratios, anchor_scales) _num_anchors = _anchors.shape[0] im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # Convert fixed point int to floats fror internal calculations ! rpn_cls_prob_reshape = convert_to_float_py(rpn_cls_prob_reshape, fl_cls_prob) rpn_bbox_pred = convert_to_float_py(rpn_bbox_pred, fl_bbox_pred) post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE height, width = rpn_cls_prob_reshape.shape[1:3] # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1], [1, height, width, _num_anchors]) # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'min_size: {}'.format(min_size) print 'max_nms_topN: {}'.format(max_nms_topN) print 'post_nms_topN: {}'.format(post_nms_topN) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, isHardware) proposals = proposals.astype(bbox_deltas.dtype) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) #KM: Move filtering into NMS (after estimating parameters # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) #keep = _filter_boxes(proposals, min_size * im_info[2]) #proposals = proposals[keep, :] # #print '[Ref Model Log] Num total Proposals before NMS : ' + str(proposals.shape) #scores = scores[keep] # # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # Hardware modeling if (isHardware): #if (0): #proposals1 = np.copy(proposals) #scores1 = np.copy(scores) #KM: Proposal inputs to NMS need to be in same order as HW or final results will be different! proposals1 = np.zeros(proposals.shape) scores1 = np.zeros(scores.shape) idy = 0 for k in range(0,A): for j in range(0,width): for i in range(0,height): idx = (i*width*A)+(j*A)+k scores1[idy] = scores[idx] proposals1[idy] = proposals[idx] print_msg(str(k) + '.' + str(j) + '.' + str(i) + ' Proposal ' + str(idy) + ' -> [' + str(int(8*scores1[idy])) + '] ' + str((16*proposals1[idy,:]).astype(int)),1) idy = idy+1 prop, score = nms_hw(proposals1, scores1, num_stddev, nms_thresh, min_size, im_info[2], max_nms_topN, post_nms_topN) batch_inds = np.zeros((prop.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, prop.astype(np.float32, copy=False))) else: order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] print 'Number of proposals : ' + str(len(keep)) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def sqrootLookUp(N): """ Hardware Lookup table for squareroot """ print_msg('Getting squareroot using lookup table.', 1) #if ((N >= 0.0) and (N < 0.1)): if (N < 6.0 / 64.0): return 229.0 / 1024.0 elif ((N >= 6.0 / 64.0) and (N < 13.0 / 64.0)): return 397.0 / 1024.0 elif ((N >= 13.0 / 64.0) and (N < 19.0 / 64.0)): return 512.0 / 1024.0 elif ((N >= 19.0 / 64.0) and (N < 26.0 / 64.0)): return 606.0 / 1024.0 elif ((N >= 26.0 / 64.0) and (N < 32.0 / 64.0)): return 687.0 / 1024.0 elif ((N >= 32.0 / 64.0) and (N < 38.0 / 64.0)): return 759.0 / 1024.0 elif ((N >= 38.0 / 64.0) and (N < 45.0 / 64.0)): return 826.0 / 1024.0 elif ((N >= 45.0 / 64.0) and (N < 51.0 / 64.0)): return 887.0 / 1024.0 elif ((N >= 51.0 / 64.0) and (N < 58.0 / 64.0)): return 944.0 / 1024.0 elif ((N >= 58.0 / 64.0) and (N < 64.0 / 64.0)): return 998.0 / 1024.0 elif ((N >= 64.0 / 64.0) and (N < 70.0 / 64.0)): return 1049.0 / 1024.0 elif ((N >= 70.0 / 64.0) and (N < 77.0 / 64.0)): return 1098.0 / 1024.0 elif ((N >= 77.0 / 64.0) and (N < 83.0 / 64.0)): return 1145.0 / 1024.0 elif ((N >= 83.0 / 64.0) and (N < 90.0 / 64.0)): return 1190.0 / 1024.0 elif ((N >= 90.0 / 64.0) and (N < 96.0 / 64.0)): return 1233.0 / 1024.0 elif ((N >= 96.0 / 64.0) and (N < 102.0 / 64.0)): return 1275.0 / 1024.0 elif ((N >= 102.0 / 64.0) and (N < 109.0 / 64.0)): return 1315.0 / 1024.0 elif ((N >= 109.0 / 64.0) and (N < 115.0 / 64.0)): return 1355.0 / 1024.0 elif ((N >= 115.0 / 64.0) and (N < 122.0 / 64.0)): return 1393.0 / 1024.0 elif ((N >= 122.0 / 64.0) and (N < 128.0 / 64.0)): return 1430.0 / 1024.0 elif ((N >= 128.0 / 64.0) and (N < 134.0 / 64.0)): return 1466.0 / 1024.0 elif ((N >= 134.0 / 64.0) and (N < 141.0 / 64.0)): return 1501.0 / 1024.0 elif ((N >= 141.0 / 64.0) and (N < 147.0 / 64.0)): return 1536.0 / 1024.0 elif ((N >= 147.0 / 64.0) and (N < 154.0 / 64.0)): return 1570.0 / 1024.0 elif ((N >= 154.0 / 64.0) and (N < 160.0 / 64.0)): return 1603.0 / 1024.0 elif ((N >= 160.0 / 64.0) and (N < 166.0 / 64.0)): return 1635.0 / 1024.0 elif ((N >= 166.0 / 64.0) and (N < 173.0 / 64.0)): return 1667.0 / 1024.0 elif ((N >= 173.0 / 64.0) and (N < 179.0 / 64.0)): return 1698.0 / 1024.0 elif ((N >= 179.0 / 64.0) and (N < 186.0 / 64.0)): return 1729.0 / 1024.0 elif ((N >= 186.0 / 64.0) and (N < 192.0 / 64.0)): return 1759.0 / 1024.0 elif ((N >= 192.0 / 64.0) and (N < 198.0 / 64.0)): return 1788.0 / 1024.0 elif ((N >= 198.0 / 64.0) and (N < 205.0 / 64.0)): return 1817.0 / 1024.0 elif ((N >= 205.0 / 64.0) and (N < 211.0 / 64.0)): return 1846.0 / 1024.0 elif ((N >= 211.0 / 64.0) and (N < 218.0 / 64.0)): return 1874.0 / 1024.0 elif ((N >= 218.0 / 64.0) and (N < 224.0 / 64.0)): return 1902.0 / 1024.0 elif ((N >= 224.0 / 64.0) and (N < 230.0 / 64.0)): return 1929.0 / 1024.0 elif ((N >= 230.0 / 64.0) and (N < 237.0 / 64.0)): return 1956.0 / 1024.0 elif ((N >= 237.0 / 64.0) and (N < 243.0 / 64.0)): return 1983.0 / 1024.0 elif ((N >= 243.0 / 64.0) and (N < 250.0 / 64.0)): return 2009.0 / 1024.0 elif ((N >= 250.0 / 64.0) and (N < 256.0 / 64.0)): return 2035.0 / 1024.0 elif ((N >= 256.0 / 64.0) and (N < 262.0 / 64.0)): return 2061.0 / 1024.0 elif ((N >= 262.0 / 64.0) and (N < 269.0 / 64.0)): return 2086.0 / 1024.0 elif ((N >= 269.0 / 64.0) and (N < 275.0 / 64.0)): return 2111.0 / 1024.0 elif ((N >= 275.0 / 64.0) and (N < 282.0 / 64.0)): return 2136.0 / 1024.0 elif ((N >= 282.0 / 64.0) and (N < 288.0 / 64.0)): return 2160.0 / 1024.0 elif ((N >= 288.0 / 64.0) and (N < 294.0 / 64.0)): return 2184.0 / 1024.0 elif ((N >= 294.0 / 64.0) and (N < 301.0 / 64.0)): return 2208.0 / 1024.0 elif ((N >= 301.0 / 64.0) and (N < 307.0 / 64.0)): return 2232.0 / 1024.0 elif ((N >= 307.0 / 64.0) and (N < 314.0 / 64.0)): return 2255.0 / 1024.0 elif ((N >= 314.0 / 64.0) and (N < 320.0 / 64.0)): return 2278.0 / 1024.0 elif ((N >= 320.0 / 64.0) and (N < 326.0 / 64.0)): return 2301.0 / 1024.0 elif ((N >= 326.0 / 64.0) and (N < 333.0 / 64.0)): return 2324.0 / 1024.0 elif ((N >= 333.0 / 64.0) and (N < 339.0 / 64.0)): return 2346.0 / 1024.0 elif ((N >= 339.0 / 64.0) and (N < 346.0 / 64.0)): return 2369.0 / 1024.0 elif ((N >= 346.0 / 64.0) and (N < 352.0 / 64.0)): return 2391.0 / 1024.0 elif ((N >= 352.0 / 64.0) and (N < 358.0 / 64.0)): return 2412.0 / 1024.0 elif ((N >= 358.0 / 64.0) and (N < 365.0 / 64.0)): return 2434.0 / 1024.0 elif ((N >= 365.0 / 64.0) and (N < 371.0 / 64.0)): return 2455.0 / 1024.0 elif ((N >= 371.0 / 64.0) and (N < 378.0 / 64.0)): return 2477.0 / 1024.0 elif ((N >= 378.0 / 64.0) and (N < 384.0 / 64.0)): return 2498.0 / 1024.0 elif ((N >= 384.0 / 64.0) and (N < 390.0 / 64.0)): return 2519.0 / 1024.0 elif ((N >= 390.0 / 64.0) and (N < 397.0 / 64.0)): return 2539.0 / 1024.0 elif ((N >= 397.0 / 64.0) and (N < 403.0 / 64.0)): return 2560.0 / 1024.0 elif ((N >= 403.0 / 64.0) and (N < 410.0 / 64.0)): return 2580.0 / 1024.0 elif ((N >= 410.0 / 64.0) and (N < 416.0 / 64.0)): return 2601.0 / 1024.0 elif ((N >= 416.0 / 64.0) and (N < 422.0 / 64.0)): return 2621.0 / 1024.0 elif ((N >= 422.0 / 64.0) and (N < 429.0 / 64.0)): return 2641.0 / 1024.0 elif ((N >= 429.0 / 64.0) and (N < 435.0 / 64.0)): return 2660.0 / 1024.0 elif ((N >= 435.0 / 64.0) and (N < 442.0 / 64.0)): return 2680.0 / 1024.0 elif ((N >= 442.0 / 64.0) and (N < 448.0 / 64.0)): return 2700.0 / 1024.0 elif ((N >= 448.0 / 64.0) and (N < 454.0 / 64.0)): return 2719.0 / 1024.0 elif ((N >= 454.0 / 64.0) and (N < 461.0 / 64.0)): return 2738.0 / 1024.0 elif ((N >= 461.0 / 64.0) and (N < 467.0 / 64.0)): return 2757.0 / 1024.0 elif ((N >= 467.0 / 64.0) and (N < 474.0 / 64.0)): return 2776.0 / 1024.0 elif ((N >= 474.0 / 64.0) and (N < 480.0 / 64.0)): return 2795.0 / 1024.0 elif ((N >= 480.0 / 64.0) and (N < 486.0 / 64.0)): return 2814.0 / 1024.0 elif ((N >= 486.0 / 64.0) and (N < 493.0 / 64.0)): return 2832.0 / 1024.0 elif ((N >= 493.0 / 64.0) and (N < 499.0 / 64.0)): return 2851.0 / 1024.0 elif ((N >= 499.0 / 64.0) and (N < 506.0 / 64.0)): return 2869.0 / 1024.0 elif ((N >= 506.0 / 64.0) and (N < 512.0 / 64.0)): return 2887.0 / 1024.0 elif ((N >= 512.0 / 64.0) and (N < 518.0 / 64.0)): return 2905.0 / 1024.0 elif ((N >= 518.0 / 64.0) and (N < 525.0 / 64.0)): return 2923.0 / 1024.0 elif ((N >= 525.0 / 64.0) and (N < 531.0 / 64.0)): return 2941.0 / 1024.0 elif ((N >= 531.0 / 64.0) and (N < 538.0 / 64.0)): return 2959.0 / 1024.0 elif ((N >= 538.0 / 64.0) and (N < 544.0 / 64.0)): return 2977.0 / 1024.0 elif ((N >= 544.0 / 64.0) and (N < 550.0 / 64.0)): return 2994.0 / 1024.0 elif ((N >= 550.0 / 64.0) and (N < 557.0 / 64.0)): return 3012.0 / 1024.0 elif ((N >= 557.0 / 64.0) and (N < 563.0 / 64.0)): return 3029.0 / 1024.0 elif ((N >= 563.0 / 64.0) and (N < 570.0 / 64.0)): return 3046.0 / 1024.0 elif ((N >= 570.0 / 64.0) and (N < 576.0 / 64.0)): return 3063.0 / 1024.0 elif ((N >= 576.0 / 64.0) and (N < 582.0 / 64.0)): return 3081.0 / 1024.0 elif ((N >= 582.0 / 64.0) and (N < 589.0 / 64.0)): return 3097.0 / 1024.0 elif ((N >= 589.0 / 64.0) and (N < 595.0 / 64.0)): return 3114.0 / 1024.0 elif ((N >= 595.0 / 64.0) and (N < 602.0 / 64.0)): return 3131.0 / 1024.0 elif ((N >= 602.0 / 64.0) and (N < 608.0 / 64.0)): return 3148.0 / 1024.0 elif ((N >= 608.0 / 64.0) and (N < 614.0 / 64.0)): return 3164.0 / 1024.0 elif ((N >= 614.0 / 64.0) and (N < 621.0 / 64.0)): return 3181.0 / 1024.0 elif ((N >= 621.0 / 64.0) and (N < 627.0 / 64.0)): return 3197.0 / 1024.0 elif ((N >= 627.0 / 64.0) and (N < 634.0 / 64.0)): return 3214.0 / 1024.0 else: return 3230.0 / 1024.0
def exprootLookUp(N): """ Hardware Lookup table for exponential root """ print_msg('Getting exponential root using lookup table.', 1) exproot = np.zeros(N.shape, dtype=N.dtype) for i in range(0, N.shape[0]): if (N[i] < -1946.0 / 2048.0): exproot[i] = 24.0 / 64.0 elif ((N[i] >= -1946.0 / 2048.0) and (N[i] < -1843.0 / 2048.0)): exproot[i] = 25.0 / 64.0 elif ((N[i] >= -1843.0 / 2048.0) and (N[i] < -1741.0 / 2048.0)): exproot[i] = 27.0 / 64.0 elif ((N[i] >= -1741.0 / 2048.0) and (N[i] < -1638.0 / 2048.0)): exproot[i] = 28.0 / 64.0 elif ((N[i] >= -1638.0 / 2048.0) and (N[i] < -1536.0 / 2048.0)): exproot[i] = 29.0 / 64.0 elif ((N[i] >= -1536.0 / 2048.0) and (N[i] < -1434.0 / 2048.0)): exproot[i] = 31.0 / 64.0 elif ((N[i] >= -1434.0 / 2048.0) and (N[i] < -1331.0 / 2048.0)): exproot[i] = 33.0 / 64.0 elif ((N[i] >= -1331.0 / 2048.0) and (N[i] < -1229.0 / 2048.0)): exproot[i] = 34.0 / 64.0 elif ((N[i] >= -1229.0 / 2048.0) and (N[i] < -1126.0 / 2048.0)): exproot[i] = 36.0 / 64.0 elif ((N[i] >= -1126.0 / 2048.0) and (N[i] < -1024.0 / 2048.0)): exproot[i] = 38.0 / 64.0 elif ((N[i] >= -1024.0 / 2048.0) and (N[i] < -922.0 / 2048.0)): exproot[i] = 40.0 / 64.0 elif ((N[i] >= -922.0 / 2048.0) and (N[i] < -819.0 / 2048.0)): exproot[i] = 42.0 / 64.0 elif ((N[i] >= -819.0 / 2048.0) and (N[i] < -717.0 / 2048.0)): exproot[i] = 44.0 / 64.0 elif ((N[i] >= -717.0 / 2048.0) and (N[i] < -614.0 / 2048.0)): exproot[i] = 46.0 / 64.0 elif ((N[i] >= -614.0 / 2048.0) and (N[i] < -512.0 / 2048.0)): exproot[i] = 49.0 / 64.0 elif ((N[i] >= -512.0 / 2048.0) and (N[i] < -410.0 / 2048.0)): exproot[i] = 51.0 / 64.0 elif ((N[i] >= -410.0 / 2048.0) and (N[i] < -307.0 / 2048.0)): exproot[i] = 54.0 / 64.0 elif ((N[i] >= -307.0 / 2048.0) and (N[i] < -205.0 / 2048.0)): exproot[i] = 56.0 / 64.0 elif ((N[i] >= -205.0 / 2048.0) and (N[i] < -102.0 / 2048.0)): exproot[i] = 59.0 / 64.0 elif ((N[i] >= -102.0 / 2048.0) and (N[i] < 0.0 / 2048.0)): exproot[i] = 62.0 / 64.0 elif (N[i] == 0.0 / 2048.0): exproot[i] = 64.0 / 64.0 elif ((N[i] > 0.0 / 2048.0) and (N[i] < 102.0 / 2048.0)): exproot[i] = 66.0 / 64.0 elif ((N[i] >= 102.0 / 2048.0) and (N[i] < 205.0 / 2048.0)): exproot[i] = 69.0 / 64.0 elif ((N[i] >= 205.0 / 2048.0) and (N[i] < 307.0 / 2048.0)): exproot[i] = 73.0 / 64.0 elif ((N[i] >= 307.0 / 2048.0) and (N[i] < 410.0 / 2048.0)): exproot[i] = 76.0 / 64.0 elif ((N[i] >= 410.0 / 2048.0) and (N[i] < 512.0 / 2048.0)): exproot[i] = 80.0 / 64.0 elif ((N[i] >= 512.0 / 2048.0) and (N[i] < 614.0 / 2048.0)): exproot[i] = 84.0 / 64.0 elif ((N[i] >= 614.0 / 2048.0) and (N[i] < 717.0 / 2048.0)): exproot[i] = 89.0 / 64.0 elif ((N[i] >= 717.0 / 2048.0) and (N[i] < 819.0 / 2048.0)): exproot[i] = 93.0 / 64.0 elif ((N[i] >= 819.0 / 2048.0) and (N[i] < 922.0 / 2048.0)): exproot[i] = 98.0 / 64.0 elif ((N[i] >= 922.0 / 2048.0) and (N[i] < 1024.0 / 2048.0)): exproot[i] = 103.0 / 64.0 elif ((N[i] >= 1024.0 / 2048.0) and (N[i] < 1126.0 / 2048.0)): exproot[i] = 108.0 / 64.0 elif ((N[i] >= 1126.0 / 2048.0) and (N[i] < 1229.0 / 2048.0)): exproot[i] = 114.0 / 64.0 elif ((N[i] >= 1229.0 / 2048.0) and (N[i] < 1331.0 / 2048.0)): exproot[i] = 120.0 / 64.0 elif ((N[i] >= 1331.0 / 2048.0) and (N[i] < 1434.0 / 2048.0)): exproot[i] = 126.0 / 64.0 elif ((N[i] >= 1434.0 / 2048.0) and (N[i] < 1536.0 / 2048.0)): exproot[i] = 132.0 / 64.0 elif ((N[i] >= 1536.0 / 2048.0) and (N[i] < 1638.0 / 2048.0)): exproot[i] = 139.0 / 64.0 elif ((N[i] >= 1638.0 / 2048.0) and (N[i] < 1741.0 / 2048.0)): exproot[i] = 146.0 / 64.0 elif ((N[i] >= 1741.0 / 2048.0) and (N[i] < 1843.0 / 2048.0)): exproot[i] = 154.0 / 64.0 elif ((N[i] >= 1843.0 / 2048.0) and (N[i] < 1946.0 / 2048.0)): exproot[i] = 161.0 / 64.0 else: exproot[i] = 170.0 / 64.0 # if (i < 10): # print ' N[' + str(i) + '] = ' + str(N[i]) # print 'exproot[' + str(i) + '] = ' + str(exproot[i]) # print 'np.exp(N) = ' + str(np.exp(N[i])) return exproot
def get_data_for_validation(em_net, em_image, scale_mode, em_start_layer, em_end_layer): """ Efficiently get the inputs, outputs, weights of layers given by start and end index Arguments: sess: tensorflow session net: network to test em_image: path to the input image (user is responsible to send in appropriate image (classification vs detection)) scale_mode: scaling mode (0: highest resolution, 1: mid resolution, 2: TODD) em_start_layer: ID of the start layer em_end_layer: ID of the end layer Returns: List called multi_layer_outputs Each element corresponds to a layers to be tested from em_start_layer to em_end_layer for each layer: [layer_output, layer_inputs, parameters] to access data for layer I, (x = I - em_start_layer): layer output: multi_layer_outputs[x][0] layer inputs (j): multi_layer_outputs[x][1][j] layer weights: multi_layer_outputs[x][2][0] layer biases: multi_layer_outputs[x][2][1] """ print_msg('Running RefModel to get data for verification',3) print_msg('Network: '+str(em_net),3) print_msg('Test Image: '+str(em_image),3) print_msg('Resolution Mode: '+str(scale_mode),3) print_msg('Start Layer: '+str(em_start_layer),3) print_msg('End Layer: '+str(em_end_layer),3) # init session tf.reset_default_graph() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) # create network instance and load models parameters net = get_network(str(em_net), 1)
# init session tf.reset_default_graph() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) # create network instance and load models parameters net = get_network(str(em_net), 1) #model_path = cfg.MODELS_DIR+str(em_net)+'.npy' model_path = check_model_path(em_net, net, sess) em_image = cfg.DATA_DIR+str(em_image) if not os.path.exists(em_image): raise IOError(('Image not found @ '+ em_image +'.\n')) print_msg('Model Path: '+str(model_path),3) if not os.path.exists(model_path): model_path = cfg.MODELS_DIR+str(em_net)+'_random.npy' print_msg("Error: model " + model_path + " does not exist, try to use randomized model " + model_path, 3) if not os.path.exists(model_path): status = create_random_model(net, sess, model_path) if status != 0: raise IOError(('Can not create random model @ '+ model_path +'.\n')) sess.run(tf.global_variables_initializer()) net.load(model_path,sess) im = cv2.imread(em_image) feed_dict = net.pre_process(im, scale_mode) print("im_info\n")
def nms_hw(proposals, scores, num_stddev, nms_thresh, min_size, scale, max_nms_topN, post_nms_topN): score_th = _estimate_parameters(scores, num_stddev) #KM: Remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) print_msg('scale: ' + str(scale), 0) # im_info[2] print_msg('min_size: ' + str(min_size), 0) print_msg('max_nms_topN: ' + str(max_nms_topN), 1) print_msg('post_nms_topN: ' + str(post_nms_topN), 1) keep = _filter_boxes(proposals, min_size * scale) proposals = proposals[keep, :] scores = scores[keep] print_msg('Num total Proposals before NMS : ' + str(proposals.shape), 2) keep = np.where(scores >= score_th)[0] proposals = proposals[keep, :] scores = scores[keep] print_msg('Threshold for proposal selection : ' + str(score_th), 2) print_msg('Number of proposals after thresholding : ' + str(len(keep)), 2) box_register = np.ndarray([0, 4]) # Hardware linked list score_register = np.ndarray([0]) # Hardware linked list num_proposals = len(keep) if (num_proposals >= 1): box_register = np.insert(box_register, 0, proposals[0, :], 0) # Insert the first one score_register = np.insert(score_register, 0, scores[0], 0) # Insert the first one for idx in range(0, num_proposals): print_msg('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~', 2) print_msg( 'Proposal ID: ' + str(idx) + ' -> ' + str(int(8 * scores[idx])) + str( (16 * proposals[idx, :]).astype(int)), 2) print_msg( 'Number of Proposals in Register : ' + str(len(score_register)), 2) if (idx < 1): continue insert_idx = 0 should_delete = False # Flag to see if deletion is needed should_ignore = False # Flag to see if proposal should be ignored for j in range(len(score_register)): #KM: Use > instead of >= so overlap comparison will determine insertion point if scores are equal score_is_more = scores[idx] > score_register[j] iou_is_more = _get_iou(proposals[idx, :], box_register[j, :], nms_thresh) if (not score_is_more) and iou_is_more: print_msg( 'Ignoring Proposal | score_is_more : ' + str(score_is_more) + ' iou_is_more : ' + str(iou_is_more), 2) should_ignore = True break elif score_is_more: print_msg('Inserting Point found : ' + str(insert_idx), 2) should_delete = True break insert_idx += 1 # Insert at the end if insertion point not found if not should_ignore: print_msg('Inserting Proposal @ ' + str(insert_idx), 2) box_register = np.insert(box_register, insert_idx, proposals[idx, :], 0) score_register = np.insert(score_register, insert_idx, scores[idx], 0) # Trim the register array to the max size of 400 (Hardware Limitation) keepLength = min(len(score_register), max_nms_topN) score_register = score_register[:keepLength] box_register = box_register[:keepLength, :] # If we inserted the new proposal, we should delete overlapping ones if should_delete: k = insert_idx + 1 end = len(score_register) while k < end: iou_is_more = _get_iou(proposals[idx, :], box_register[k, :], nms_thresh) if iou_is_more: print_msg( 'Deleting Proposal @ ' + str(k) + ' -> ' + str(int(8 * score_register[k])) + str( (16 * box_register[k, :]).astype(int)), 2) box_register = np.delete(box_register, k, 0) score_register = np.delete(score_register, k, 0) end -= 1 k -= 1 k += 1 # Send out the top 300 proposals keepLength = min(len(score_register), post_nms_topN) score_register = score_register[:keepLength] box_register = box_register[:keepLength, :] print_msg('Final Number of Boxes after HW NMS : ' + str(keepLength), 2) for idx in range(0, keepLength): #print_msg(str(idx) + ' -> ' + str(score_register[idx]) + ' ' + str(box_register[idx,:]),1) print_msg( str(idx) + ' -> ' + str(int(8 * score_register[idx])) + ' ' + str( (16 * box_register[idx, :]).astype(int)), 1) return box_register, score_register
def _estimate_parameters(scores, num_stddev): """ Hardware Mean and standard deviation estimation.""" num_proposals = len(scores) print_msg('Estimating threshold.', 2) print_msg('Number of total proposals : ' + str(num_proposals), 2) score_int = scores.astype(int) score_squared = score_int**2 score_sum = np.sum(score_int) score_squared_sum = np.sum(score_squared) # DO some Fixed point stuff here # -> Use 6 frac bits if (num_proposals > 196608): by_n_approx = 1.0 * (2**18) / num_proposals if (score_sum < 0): mean = -1.0 * int(-score_sum / (2**12)) else: mean = 1.0 * int(score_sum / (2**12)) sq_sum_by_n = 1.0 * int(score_squared_sum / (2**12)) elif (num_proposals > 98304): by_n_approx = 1.0 * (2**17) / num_proposals if (score_sum < 0): mean = -1.0 * int(-score_sum / (2**11)) else: mean = 1.0 * int(score_sum / (2**11)) sq_sum_by_n = 1.0 * int(score_squared_sum / (2**11)) elif (num_proposals > 49152): by_n_approx = 1.0 * (2**16) / num_proposals if (score_sum < 0): mean = -1.0 * int(-score_sum / (2**10)) else: mean = 1.0 * int(score_sum / (2**10)) sq_sum_by_n = 1.0 * int(score_squared_sum / (2**10)) else: by_n_approx = 1.0 * (2**15) / num_proposals if (score_sum < 0): mean = -1.0 * int(-score_sum / (2**9)) else: mean = 1.0 * int(score_sum / (2**9)) sq_sum_by_n = 1.0 * int(score_squared_sum / (2**9)) #print 'mean_shifted = {}'.format(str(mean)) #print 'sq_sum_shifted = {}'.format(str(sq_sum_by_n)) by_n_approx = 1.0 * int((2**6) * by_n_approx) mean = (by_n_approx * mean) / (2**12) sq_sum_by_n = (by_n_approx * sq_sum_by_n) / (2**12) variance = sq_sum_by_n - 1.0 * int((2**6) * (mean**2)) / (2**6) stddev = sqrootLookUp(variance) th = mean + num_stddev * stddev print_msg('num_stddev : ' + str(num_stddev), 2) print_msg( 'alpha value used in refModel: ' + str(by_n_approx) + ' ,CSR Value : ' + str(int(by_n_approx * 64)), 2) print_msg('score_sum : ' + str(score_sum), 2) print_msg('score_squared_sum : ' + str(score_squared_sum), 2) print_msg('by_n_approx : ' + str(by_n_approx), 2) print_msg('mean : ' + str(mean), 2) print_msg('sq_sum_by_n : ' + str(sq_sum_by_n), 2) print_msg('variance : ' + str(variance), 2) print_msg('stddev : ' + str(stddev), 2) print_msg('threshold : ' + str(th), 2) return th