input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) print('Loading weights from {}'.format(C.model_path)) model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse')
def upload_file(): print("request is ", request.files) st = time.time() content_length = request.content_length print(f"Content_length : {content_length}") print("data type is ", type(request)) print("data type of request files ", type(request.files)) data_dict = request.form.to_dict() #print(type(data_dict)) #print(data_dict['file']) data = (data_dict['file'].split(',')[1]) #print(len(data_dict)) #print(data) imgdata = base64.b64decode(data) print("imagedata type is", type(imgdata)) img2 = Image.open(io.BytesIO(imgdata)) print(type(img2)) #img2.show() #img = cv2.imread(img2) #print('opencv type' , type(img)) #print(type(img)) a = np.array(img2.getdata()).astype(np.float64) #print('datatype of w ', w.dtype) #b = np.ones(172800,3) #a = np.concatenate((w,b), axis=None) print('type of data to model ', type(a)) print('shape of data from frontend', a.shape) #r, c = a.shape #print('Value of r', r) if a.shape == (480000, 3): data = a.reshape(600, 800, 3) else: data = a.reshape(480, 640, 3) st = time.time() parser = OptionParser() parser.add_option( "-n", "--num_rois", type="int", dest="num_rois", help="Number of ROIs per iteration. Higher means more memory use.", default=4) parser.add_option( "--config_filename", dest="config_filename", help= "Location to read the metadata related to the training (generated when training).", default="config.pickle") parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='vgg') (options, args) = parser.parse_args() config_output_filename = options.config_filename with open(config_output_filename, 'rb') as f_in: C = pickle.load(f_in) if C.network == 'resnet50': import keras_frcnn.resnet as nn elif C.network == 'vgg': import keras_frcnn.vgg as nn # turn off any data augmentation at test time C.use_horizontal_flips = False C.use_vertical_flips = False C.rot_90 = False class_mapping = C.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} print(class_mapping) class_to_color = { class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping } C.num_rois = int(options.num_rois) if C.network == 'resnet50': num_features = 1024 elif C.network == 'vgg': num_features = 512 if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) print('Loading weights from {}'.format(C.model_path)) model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') all_imgs = [] classes = {} bbox_threshold = 0.6 visualise = True #if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): # continue #print(img_name) #filepath = os.path.join(img_path,img_name) img = data #cv2.imshow('img', img) #cv2.waitKey(0) X, ratio = format_img(img, C) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.6) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0] // C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // C.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax( P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([ C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h) ]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] for key in bboxes: bbox = np.array(bboxes[key]) new_boxes, new_probs = roi_helpers.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=0.6) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) cv2.rectangle( img, (real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int( class_to_color[key][1]), int(class_to_color[key][2])), 2) textLabel = '{}: {}'.format(key, int(100 * new_probs[jk])) all_dets.append((key, 100 * new_probs[jk])) (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1) textOrg = (real_x1, real_y1 - 0) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1) cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) print('Elapsed time = {}'.format(time.time() - st)) print('number of windoiws detected', len(all_dets)) print(all_dets) r = len(all_dets) img3 = normalize(img) #plt.imshow(img) #cv2.imshow('img3', img3) #cv2.waitKey(0) K.clear_session() #data = process(data) #print('after reshape',data.shape) im2 = Image.fromarray(img.astype("uint8"), "RGB") print("im2 data type is ", type(im2)) #to_frontend = (" ".join(str(x) for x in data)) db = data.tobytes() print('type of data to database :', type(db)) todb = insertBLOB('Image007', db) print('final data shape fed to model : ', data.shape) # ImageFile img = db.b64encode() # with open("t.png", "rb") as imageFile: # str = base64.b64encode(imageFile.read()) #cv2.imshow('image', cv2.cvtColor(data, cv2.COLOR_BGR2GRAY)) #cv2.waitKey() #str = base64.b64encode(data) # return jsonify(to_frontend, r) #img = Image.open( 'C:\Window Counter_Project\Flickr\Window_101 (131).jpg' ) #img.load() #data = np.asarray( img, dtype="int32" ) #im = Image.fromarray(data.astype("uint8")) #im.show() # uncomment to look at the image rawBytes = io.BytesIO() print(rawBytes) im2.save(rawBytes, "jpeg") #im2.show() print('type of im2 is ', type(im2)) rawBytes.seek(0) # return to the start of the file response_obj = { 'count': r, 'image': "data:image/jpeg;base64," + str(base64.b64encode(rawBytes.read())) } #print("response is", type(response_obj)) return jsonify(Data=response_obj)
def main(): cleanup() sys.setrecursionlimit(40000) config_output_filename = 'config.pickle' with open(config_output_filename, 'r') as f_in: C = pickle.load(f_in) # turn off any data augmentation at test time C.use_horizontal_flips = False C.use_vertical_flips = False C.rot_90 = False class_mapping = C.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.iteritems()} print(class_mapping) class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} C.num_rois = num_rois if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (1024, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, 1024) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') all_imgs = [] classes = {} bbox_threshold = 0.8 visualise = True print("Converting video to images..") convert_to_images() print("anotating...") list_files = sorted(get_file_names(img_path), key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)]) for img_name in list_files: if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): continue print(img_name) st = time.time() filepath = os.path.join(img_path,img_name) img = cv2.imread(filepath) X = format_img(img, C) img_scaled = np.transpose(X.copy()[0, (2, 1, 0), :, :], (1, 2, 0)).copy() img_scaled[:, :, 0] += 123.68 img_scaled[:, :, 1] += 116.779 img_scaled[:, :, 2] += 103.939 img_scaled = img_scaled.astype(np.uint8) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0]//C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0]//C.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0],C.num_rois,curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([16*x, 16*y, 16*(x+w), 16*(y+h)]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] all_objects = [] for key in bboxes: bbox = np.array(bboxes[key]) new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk,:] cv2.rectangle(img_scaled,(x1, y1), (x2, y2), class_to_color[key],2) textLabel = '{}: {}'.format(key,int(100*new_probs[jk])) all_dets.append((key,100*new_probs[jk])) all_objects.append((key, 1)) (retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1) textOrg = (x1, y1-0) cv2.rectangle(img_scaled, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2) cv2.rectangle(img_scaled, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1) cv2.putText(img_scaled, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) print('Elapsed time = {}'.format(time.time() - st)) height, width, channels = img_scaled.shape cv2.rectangle(img_scaled, (0,0), (width, 30), (0, 0, 0), -1) cv2.putText(img_scaled, "Obj count: " + str(list(accumulate(all_objects))), (5, 19), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 255, 255), 1) cv2.imwrite(os.path.join(output_path, img_name), img_scaled) print(all_dets) print("saving to video..") save_to_video()
def train(): # config for data argument cfg = config.Config() cfg.use_horizontal_flips = True cfg.use_vertical_flips = False cfg.rot_90 = True cfg.num_rois = 32 cfg.base_net_weights = os.path.join('model\\wtcwillis_frcnn_last.hdf5') # Model to load weights from # TODO: the only file should to be change for other data to train cfg.model_path = './model/wtcwillis_frcnn_new.hdf5' # New model to train cfg.simple_label_file = 'building_labels.txt' # Label to read in all_images, classes_count, class_mapping = get_data(cfg.simple_label_file) if 'bg' not in classes_count: classes_count['bg'] = 0 class_mapping['bg'] = len(class_mapping) cfg.class_mapping = class_mapping with open(cfg.config_save_file, 'wb') as config_f: pickle.dump(cfg, config_f) print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format( cfg.config_save_file)) inv_map = {v: k for k, v in class_mapping.items()} print('Training images per class:') pprint.pprint(classes_count) print('Num classes (including bg) = {}'.format(len(classes_count))) random.shuffle(all_images) num_imgs = len(all_images) train_imgs = [s for s in all_images if s['imageset'] == 'trainval'] val_imgs = [s for s in all_images if s['imageset'] == 'test'] print('Num train samples {}'.format(len(train_imgs))) print('Num val samples {}'.format(len(val_imgs))) data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, cfg, nn.get_img_output_length, K.image_dim_ordering(), mode='train') data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, cfg, nn.get_img_output_length, K.image_dim_ordering(), mode='val') if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) else: input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(None, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=len(classes_count), trainable=True) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) try: print('loading weights from {}'.format(cfg.base_net_weights)) model_rpn.load_weights(cfg.base_net_weights, by_name=True) model_classifier.load_weights(cfg.base_net_weights, by_name=True) except Exception as e: print(e) print('Could not load pretrained model weights. Weights can be found in the keras application folder ' 'https://github.com/fchollet/keras/tree/master/keras/applications') optimizer = Adam(lr=1e-4) optimizer_classifier = Adam(lr=1e-4) model_rpn.compile(optimizer=optimizer, loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)]) model_classifier.compile(optimizer=optimizer_classifier, loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(len(classes_count) - 1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) model_all.compile(optimizer='sgd', loss='mae') epoch_length = 75 # num_epochs = int(cfg.num_epochs) num_epochs = 200 iter_num = 0 losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf class_mapping_inv = {v: k for k, v in class_mapping.items()} print('Starting training') vis = True for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor) == epoch_length and cfg.verbose: mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format( mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print('RPN is not producing bounding boxes that overlap' ' the ground truth boxes. Check RPN settings or keep training.') X, Y, img_data = next(data_gen_train) loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if cfg.num_rois > 1: if len(pos_samples) < cfg.num_rois // 2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice(pos_samples, cfg.num_rois // 2, replace=False).tolist() try: selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples), replace=True).tolist() sel_samples = selected_pos_samples + selected_neg_samples else: # in the extreme case where num_rois = 1, we pick a random pos or neg sample selected_pos_samples = pos_samples.tolist() selected_neg_samples = neg_samples.tolist() if np.random.randint(0, 2): sel_samples = random.choice(neg_samples) else: sel_samples = random.choice(pos_samples) loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] losses[iter_num, 2] = loss_class[1] losses[iter_num, 3] = loss_class[2] losses[iter_num, 4] = loss_class[3] iter_num += 1 progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])), ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))]) if iter_num == epoch_length: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_class_cls = np.mean(losses[:, 2]) loss_class_regr = np.mean(losses[:, 3]) class_acc = np.mean(losses[:, 4]) mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if cfg.verbose: print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format( mean_overlapping_bboxes)) print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) print('Loss Detector classifier: {}'.format(loss_class_cls)) print('Loss Detector regression: {}'.format(loss_class_regr)) print('Elapsed time: {}'.format(time.time() - start_time)) curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if cfg.verbose: print('Total loss decreased from {} to {}, saving weights'.format(best_loss, curr_loss)) best_loss = curr_loss model_all.save_weights(cfg.model_path) break except Exception as e: print('Exception: {}'.format(e)) # save model model_all.save_weights(cfg.model_path) continue print('Training complete, exiting.')
def operation(): #After prediction K.clear_session() sys.setrecursionlimit(40000) parser = OptionParser() parser.add_option("-p", "--path", dest="test_path", default="images", help="Path to test data.") parser.add_option( "-n", "--num_rois", dest="num_rois", help="Number of ROIs per iteration. Higher means more memory use.", default=32) parser.add_option( "--config_filename", dest="config_filename", help= "Location to read the metadata related to the training (generated when training).", default="config.pickle") parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') (options, args) = parser.parse_args() if not options.test_path: # if filename is not given parser.error( 'Error: path to test data must be specified. Pass --path to command line' ) config_output_filename = options.config_filename with open(config_output_filename, 'rb') as f_in: C = pickle.load(f_in) if C.network == 'resnet50': import keras_frcnn.resnet as nn elif C.network == 'vgg': import keras_frcnn.vgg as nn # turn off any data augmentation at test time C.use_horizontal_flips = False C.use_vertical_flips = False C.rot_90 = False img_path = options.test_path def format_img_size(img, C): """ formats the image size based on config """ img_min_side = float(C.im_size) (height, width, _) = img.shape if width <= height: ratio = img_min_side / width new_height = int(ratio * height) new_width = int(img_min_side) else: ratio = img_min_side / height new_width = int(ratio * width) new_height = int(img_min_side) img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) return img, ratio def format_img_channels(img, C): """ formats the image channels based on config """ img = img[:, :, (2, 1, 0)] img = img.astype(np.float32) img[:, :, 0] -= C.img_channel_mean[0] img[:, :, 1] -= C.img_channel_mean[1] img[:, :, 2] -= C.img_channel_mean[2] img /= C.img_scaling_factor img = np.transpose(img, (2, 0, 1)) img = np.expand_dims(img, axis=0) return img def format_img(img, C): """ formats an image for model prediction based on config """ img, ratio = format_img_size(img, C) img = format_img_channels(img, C) return img, ratio # Method to transform the coordinates of the bounding box to its original size def get_real_coordinates(ratio, x1, y1, x2, y2): real_x1 = int(round(x1 // ratio)) real_y1 = int(round(y1 // ratio)) real_x2 = int(round(x2 // ratio)) real_y2 = int(round(y2 // ratio)) return (real_x1, real_y1, real_x2, real_y2) class_mapping = C.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} print(class_mapping) class_to_color = { class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping } C.num_rois = int(options.num_rois) if C.network == 'resnet50': num_features = 1024 elif C.network == 'vgg': num_features = 512 if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) print('Loading weights from {}'.format(C.model_path)) model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') all_imgs = [] classes = {} bbox_threshold = 0.8 visualise = True object = [] for idx, img_name in enumerate(sorted(os.listdir(img_path))): if not img_name.lower().endswith( ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): continue print(img_name) st = time.time() filepath = os.path.join(img_path, img_name) img = cv2.imread(filepath) X, ratio = format_img(img, C) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0] // C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // C.num_rois: # pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax( P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr( x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([ C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h) ]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] item = [] for key in bboxes: bbox = np.array(bboxes[key]) new_boxes, new_probs = roi_helpers.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=0.5) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) cv2.rectangle( img, (real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), 2) textLabel = '{}: {}'.format(key, int(100 * new_probs[jk])) all_dets.append((key, 100 * new_probs[jk])) (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1) textOrg = (real_x1, real_y1 - 0) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1) cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) # print(textLabel) # print(real_x1, real_y1, real_x2, real_y2) if (100 * new_probs[jk]) > 95: item.append([key, [real_x1, real_y1, real_x2, real_y2]]) print(all_dets) object.append([[idx], [item]]) # print (object) # print('Elapsed time = {}'.format(time.time() - st)) # cv2.imshow('img', img) # cv2.waitKey(0) cv2.imwrite('./results_imgs/{}.png'.format(idx), img) print("=======================") return object
def configure_keras_models(config): """Configures Keras. Args: nn (Neural Net): A keras NN. config (object): The config file. Returns: tuple: A tuple of three classifiers: (1) the rpn classifier, (2) the classifier (only), (3) the classifier. """ # Import the correct NN according to config # This must be called within main so that import is called if config.network == 'resnet50': import keras_frcnn.resnet as nn num_features = 1024 elif config.network == 'vgg': import keras_frcnn.vgg as nn num_features = 512 # Configure Keras if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) # Config inputs img_input = Input(shape=input_shape_img) roi_input = Input(shape=(config.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(config.anchor_box_scales) * len(config.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) # Classifier to return classifier = nn.classifier(feature_map_input, roi_input, config.num_rois, nb_classes=len(config.class_mapping), trainable=True) print("Configuring Keras with:") print(" - Neural Network: %s..." % config.network) print(" - Weights loaded from: %s" % config.model_path) print(" - Dimension Ordering: %s" % K.image_dim_ordering()) print(" - Num Features: %s" % num_features) print(" - Num rois: %s" % config.num_rois) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) model_rpn.load_weights(config.model_path, by_name=True) model_classifier.load_weights(config.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') return (model_rpn, model_classifier_only, model_classifier)
def work(path, curelist, textedit): #print("I'm hearing too") #textedit.append("I'm hearing too") sys.setrecursionlimit(40000) test_path = path parser = OptionParser() parser.add_option("-p", "--path", dest="test_path", help="Path to test data.", default=test_path) parser.add_option( "-n", "--num_rois", dest="num_rois", help="Number of ROIs per iteration. Higher means more memory use.", default=256) parser.add_option( "--config_filename", dest="config_filename", help= "Location to read the metadata related to the training (generated when training).", default="config.pickle") parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", default="pascal_voc"), #default="pascal_voc" (options, args) = parser.parse_args() if not options.test_path: # if filename is not given parser.error( 'Error: path to test data must be specified. Pass --path to command line' ) if options.parser == 'pascal_voc': from keras_frcnn.pascal_voc_parser import get_data elif options.parser == 'simple': from keras_frcnn.simple_parser import get_data else: raise ValueError( "Command line option parser must be one of 'pascal_voc' or 'simple'" ) config_output_filename = options.config_filename with open(config_output_filename, 'rb') as f_in: C = pickle.load(f_in) # turn off any data augmentation at test time C.use_horizontal_flips = False C.use_vertical_flips = False C.rot_90 = False img_path = options.test_path class_mapping = C.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) #class_mapping = {v: k for k, v in class_mapping.iteritems()} class_mapping = {v: k for k, v in class_mapping.items()} print(class_mapping) class_to_color = { class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping } C.num_rois = int(options.num_rois) if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (1024, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, 1024) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') ##### all_imgs, _, _ = get_data(options.test_path) test_imgs = [s for s in all_imgs if s['imageset'] == 'test'] #test_imgs=test_imgs1[3111:4056] T = {} P = {} for idx, img_data in enumerate(test_imgs): print('{}/{}'.format(idx, len(test_imgs))) #textedit.append('{}/{}'.format(idx,len(test_imgs))) st = time.time() filepath = img_data['filepath'] print(filepath) img = cv2.imread(filepath) X, fx, fy = format_img(img, C) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.5) ##0.7 # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0] // C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // C.num_rois: # pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr( x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append( [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] for key in bboxes: bbox = np.array(bboxes[key]) new_boxes, new_probs = roi_helpers.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=0.45) ###0.5 for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] det = { 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': key, 'prob': new_probs[jk] } all_dets.append(det) print('Elapsed time = {}'.format(time.time() - st)) cursor = textedit.textCursor() cursor.movePosition(QtGui.QTextCursor.End) cursor.insertText('{}/{}'.format(idx, len(test_imgs))) cursor.insertText("\r\n") cursor.insertText('Elapsed time = {}'.format(time.time() - st)) cursor.insertText("\r\n") # textedit.append('Elapsed time = {}'.format(time.time() - st)) textedit.setTextCursor(cursor) textedit.ensureCursorVisible() t, p = get_map(all_dets, img_data['bboxes'], (fx, fy)) for key in t.keys(): if key not in T: T[key] = [] P[key] = [] T[key].extend(t[key]) P[key].extend(p[key]) p1 = [] t1 = [] p1.append(P["airbase"]) t1.append(T["airbase"]) p1.append(P["harbour"]) t1.append(T["harbour"]) p1.append(P["island"]) t1.append(T["island"]) prefastr = [] recfastr = [] apfastr = [] for m in range(len(p1)): p11 = np.zeros(len(p1[m])) for i in range(len(p1[m])): if p1[m][i] > 0.45: p11[i] = 1 else: p11[i] = 0 p_p1 = p11 t_t1 = t1[m] false_positives = np.zeros((0, )) true_positives = np.zeros((0, )) scores = np.zeros((0, )) num_annotations = 0.0 nump = len(p_p1) # for n in range (1915):#(len(p_p1)): for n in range(len(p_p1)): if t_t1[n] == 1 and p_p1[n] == 1: true_positives = np.append(true_positives, 1) false_positives = np.append(false_positives, 0) scores = np.append(scores, p1[m][n]) if t_t1[n] == 0 and p_p1[n] == 1: true_positives = np.append(true_positives, 0) false_positives = np.append(false_positives, 1) scores = np.append(scores, p1[m][n]) # if t_t1[n]==1 and p_p1[n]==0: # true_positives = np.append(true_positives, 0) # false_positives = np.append(false_positives, 0) # scores = np.append(scores, p1[m][n]) if t_t1[n] == 1: num_annotations = num_annotations + 1 descending_indices = np.argsort(-scores) true_positives = true_positives[descending_indices] false_positives = false_positives[descending_indices] # compute false positives and true positives true_positives = np.cumsum(true_positives) false_positives = np.cumsum(false_positives) # compute recall and precision recall1 = true_positives / num_annotations precision1 = true_positives / np.maximum( true_positives + false_positives, np.finfo(np.float64).eps) average_precision = compute_ap(recall1, precision1) prefastr.append(precision1) recfastr.append(recall1) apfastr.append(average_precision) for i in range(0, len(recfastr)): curelist[i].setData(recfastr[i], prefastr[i])
def testModel(config_filename='config_ui.pickle'): st.markdown('## Starting validation of test data set') sys.setrecursionlimit(40000) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True config.log_device_placement = True sess = tf.compat.v1.Session(config=config) K.set_session(sess) test_path = 'test' num_rois = 4 config_filename = config_filename network = 'resnet50' config_output_filename = config_filename with open(config_output_filename, 'rb') as f_in: C = pickle.load(f_in) if C.network == 'resnet50': import keras_frcnn.resnet as nn elif C.network == 'vgg': import keras_frcnn.vgg as nn # turn off any data augmentation at test time C.use_horizontal_flips = False C.use_vertical_flips = False C.rot_90 = False img_path = test_path class_mapping = C.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} st.write('Class Mapping', class_mapping) class_to_color = { class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping } C.num_rois = int(num_rois) if C.network == 'resnet50': num_features = 1024 elif C.network == 'vgg': num_features = 512 if K.image_data_format() == 'channels_first': input_shape_img = (3, None, None) input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) ## Defining Model # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) st.write(f'Loading weights from {C.model_path}') model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') all_imgs = [] classes = {} ## few hyper parameters to select bbox bbox_threshold = 0.9 visualise = True set_Overlap_threshold = 0.1 progress_bar = st.progress(0.0) with st.spinner('Wait for sample test images...'): for idx, img_name in enumerate(sorted(os.listdir(img_path))): progress_bar.progress((idx + 0.1) / len(os.listdir(img_path))) if not img_name.lower().endswith( ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): continue st.write(img_name) stTime = time.time() filepath = os.path.join(img_path, img_name) img = cv2.imread(filepath) X, ratio = format_img(img, C) if K.image_data_format() == 'channels_last': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_data_format(), overlap_thresh=0.7) #0.7 # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0] // C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: print("ROI Shape: ", ROIs.shape[1]) break if jk == R.shape[0] // C.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): #print("np max:",np.max(P_cls[0, ii, :])) #print("np argmax:",np.argmax(P_cls[0, ii, :])) #print(np.max(P_cls[0, ii, :]) < bbox_threshold) if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax( P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] #print('class name:',cls_name) if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr( x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([ C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h) ]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] for key in bboxes: bbox = np.array(bboxes[key]) new_boxes, new_probs = roi_helpers.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=set_Overlap_threshold) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), 2) textLabel = f'{key}: {int(100*new_probs[jk])}' all_dets.append((key, 100 * new_probs[jk])) (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1) textOrg = (real_x1, real_y1 - 0) cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2) cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1) cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) st.write(f'Elapsed time = {time.time() - stTime}') st.write(all_dets) plt.figure(figsize=(10, 10)) plt.grid() plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) #plt.show() st.image(img, use_column_width=True, clamp=True) #cv2.imwrite('./results_imgs-fp-mappen-test/{}.png'.format(os.path.splitext(str(img_name))[0]),img) from keras_frcnn.simple_parser import get_data test_path = 'test_annotationAlt.txt' # Test data (annotation file) startTime = time.time() test_imgs, classes_count, class_mapping = get_data(test_path) st.write('Spend %0.2f mins to load test data' % ((time.time() - startTime) / 60)) class_mapping = C.class_mapping class_mapping = {v: k for k, v in class_mapping.items()} st.write(class_mapping) class_to_color = { class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping } from sklearn.metrics import average_precision_score set_Overlap_threshold = 0.1 T = {} P = {} mAPs = [] iou_map = [] progress_bar1 = st.progress(0.0) with st.spinner('Wait for test set evaluation...'): for idx, img_data in enumerate(test_imgs): progress_bar1.progress((idx + 0.1) / len(test_imgs)) st.write('{}/{}'.format(idx, len(test_imgs))) startTime = time.time() filepath = img_data['filepath'] img = cv2.imread(filepath) X, fx, fy = format_img_map(img, C) # Change X (img) shape from (1, channel, height, width) to (1, height, width, channel) X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = rpn_to_roi(Y1, Y2, C, K.image_data_format(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0] // C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // C.num_rois: # pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) # Calculate all classes' bboxes coordinates on resized image (300, 400) # Drop 'bg' classes bboxes for ii in range(P_cls.shape[1]): # If class name is 'bg', continue if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue # Get class name cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr( x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append( [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] for key in bboxes: bbox = np.array(bboxes[key]) # Apply non-max-suppression on final bboxes to get the output bounding boxe new_boxes, new_probs = non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=set_Overlap_threshold) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] det = { 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': key, 'prob': new_probs[jk] } all_dets.append(det) st.write('Elapsed time = {}'.format(time.time() - startTime)) t, p, iouVal = get_map(all_dets, img_data['bboxes'], (fx, fy)) for key in t.keys(): if key not in T: T[key] = [] P[key] = [] T[key].extend(t[key]) P[key].extend(p[key]) all_aps = [] for key in T.keys(): ap = average_precision_score(T[key], P[key]) st.write('{} AP: {}'.format(key, ap)) all_aps.append(ap) st.write('mAP = {}'.format(np.mean(np.array(all_aps)))) st.write('iou = {}'.format(np.mean(iouVal))) mAPs.append(np.mean(np.array(all_aps))) iou_map.append(iouVal) st.markdown('## Mean IOU:', Average(iou_map)) st.markdown('## Mean average precision:', np.mean(np.array(mAPs)))
def test_frcnn(img_name, config, model): from keras.models import Model from keras_frcnn.RoiPoolingConv import RoiPoolingConv import keras_frcnn.resnet as nn config_output_filename = config with open(config_output_filename, 'rb') as f_in: C = pickle.load(f_in) C.use_horizontal_flips = False C.use_vertical_flips = False C.rot_90 = False C.num_rois = 32 C.model_path = model class_mapping = C.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} print(class_mapping) num_features = 1024 if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) shared_layers = nn.nn_base(img_input, trainable=True) num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) print('Loading weights from {}'.format(C.model_path)) model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') # crop bbox_threshold = 0.8 crops = [] # img_path = path # for idx, img_name in enumerate(sorted(os.listdir(img_path))): # if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): # continue print(img_name) st = time.time() # filepath = os.path.join(img_path,img_name) # img = cv2.imread(filepath) img = cv2.imread(img_name) X, ratio = format_img(img, C) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] bboxes = {} probs = {} # roifs = {} # scenefs = {} for jk in range(R.shape[0] // C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // C.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded # out_roi_pool = RoiPoolingConv(14, C.num_rois)([feature_map_input, roi_input]) # haha = Model(inputs=[feature_map_input, roi_input], outputs=out_roi_pool) # all_roifs = haha.predict([F, ROIs]) # [all_scenefs, P_cls, P_regr] = model_classifier_only.predict([F, ROIs [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax( P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] # roifs[cls_name] = [] # scenefs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([ C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h) ]) probs[cls_name].append(np.max(P_cls[0, ii, :])) # roifs[cls_name].append(all_roifs[0, ii, :]) # scenefs[cls_name].append(all_scenefs[0, ii, :]) all_dets = [] for key in bboxes: bbox = np.array(bboxes[key]) prob = np.array(probs[key]) # roif = np.array(roifs[key]) # scenef = np.array(scenefs[key]) # new_boxes, new_probs, new_roifs, new_scenefs = roi_helpers.nmsf(bbox, prob, roif, scenef, overlap_thresh=0.5) new_boxes, new_probs = roi_helpers.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=0.5) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) all_dets.append( (key, 100 * new_probs[jk], real_x1, real_x2, real_y1, real_y2)) crops.append([]) crops[len(crops) - 1].append(img_name) for i in range(len(all_dets)): cropped = img[all_dets[i][4]:all_dets[i][5], max(0, all_dets[i][2]):all_dets[i][3]] # croppath = '/home/comp/e4252392/end2end/crops0317' # croppath = '/users/sunjingxuan/pycharmprojects/end2end/crops0317_cpu' croppath = '/users/sunjingxuan/desktop/FYP_demo/flask/end2end/crops_demo' cropname = os.path.join( croppath, str(img_name.split(".")[0].split("/")[-1]) + "_cropped" + str(i) + ".jpg") cv2.imwrite(cropname, cropped) # append key, cropname crops[len(crops) - 1].append((all_dets[i][0], cropname)) print('Elapsed time = {}'.format(time.time() - st)) print(all_dets) return crops
def predict(args_): path = args_.path with open('config.pickle', 'rb') as f_in: cfg = pickle.load(f_in) cfg.use_horizontal_flips = False cfg.use_vertical_flips = False cfg.rot_90 = False class_mapping = cfg.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} # tensorflow的输入方式是none,none,3 input_shape_img = (None, None, 3) # 这里如果是resnet,num_features = 1024 # 如果是vgg,num_features = 512 input_shape_features = (None, None, 1024) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(cfg.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers # 构建rpn输出 # anchor的个数 num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) # 构建classifier的输出,参数分别是:特征层输出,预选框,探测框的输入,多少个类,是否可训练。 classifier = nn.classifier(feature_map_input, roi_input, cfg.num_rois, nb_classes=len(class_mapping), trainable=True) # 构建网络 model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) # 加载参数 print('Loading weights from {}'.format(cfg.model_path)) # self.model_path = 'model_trained/model_frcnn.vgg.hdf5' # 这里是resnet,所以不太明白这里的model_path??? model_rpn.load_weights(cfg.model_path, by_name=True) model_classifier.load_weights(cfg.model_path, by_name=True) # 这里是测试阶段。不用训练,但是这种加载模型,之后要求编译是keras要求的,随便找个mse即可 model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') Ap = [[] for i in range(len(class_mapping))] print("Appp", Ap) if os.path.isdir(path): for idx, img_name in enumerate(sorted(os.listdir(path))): if not img_name.lower().endswith( ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): continue print(img_name) predict_single_image(os.path.join(path, img_name), model_rpn, model_classifier_only, cfg, class_mapping, Ap) elif os.path.isfile(path): print('predict image from {}'.format(path)) predict_single_image(path, model_rpn, model_classifier_only, cfg, class_mapping, Ap) print("Ap:") print(Ap) Acc = [] for i in range(len(class_mapping)): sum = 0 avg = 0 for j in Ap[i]: sum += j if len(Ap[i]) != 0: avg = sum / (len(Ap[i])) Acc.append(avg) else: Acc.append(0) for i in range(len(class_mapping)): print(class_mapping[i] + "acc:{}".format(Acc[i]))
def tf_fit_img(img, C,filename): K.clear_session() if C.network == 'resnet50': import keras_frcnn.resnet as nn elif C.network == 'vgg': import keras_frcnn.vgg as nn if C.network == 'resnet50': num_features = 1024 elif C.network == 'vgg': num_features = 512 class_mapping = C.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} print(class_mapping) class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) print('Loading weights from {}'.format(C.model_path)) model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') bbox_threshold = 0.8 X, ratio = format_img(img, C) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN # print(X) [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0]//C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0]//C.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0],C.num_rois,curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] detect_imgs = [] for key in bboxes: bbox = np.array(bboxes[key]) count = 0 newPic_name = "box_{}.jpg".format(str(filename[:-4]+ str(count))) count += 1 detect_imgs.append(newPic_name) original_img = cv2.imread('./static/tmp_pic/'+filename) height, width, _ = original_img.shape (resized_width, resized_height) = get_new_img_size(width, height, 300) resize_img = cv2.resize(original_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) cv2.imwrite("./static/img/doc/" + filename, resize_img) new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk,:] (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) gt_x1, gt_x2 = real_x1 * (resized_width/width), real_x2 * (resized_width/width) gt_y1, gt_y2 = real_y1 * (resized_height/height), real_y2 * (resized_height/height) gt_x1, gt_y1, gt_x2, gt_y2 = int(gt_x1), int(gt_y1), int(gt_x2), int(gt_y2) color = (0, 255, 0) result_img = cv2.rectangle(resize_img, (gt_x1, gt_y1), (gt_x2, gt_y2), color, 2) cv2.imwrite("./static/img/doc/" + newPic_name, result_img) textLabel = '{}: {}'.format(key,int(100*new_probs[jk])) all_dets.append((key,100*new_probs[jk])) (retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1) textOrg = (real_x1, real_y1-0) cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2) cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1) cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) # print('Elapsed time = {}'.format(time.time() - st)) print(all_dets) if len(detect_imgs) > 0: print(detect_imgs[0]) return detect_imgs[0], all_dets else: return "Can not detect"