def __init__(self, port): #Build Socket self.connections = [] self.connections_limit = 2 self.shock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.shock.bind(("0.0.0.0", port)) self.shock.setblocking(True) self.shock.listen(10) self.connections.append(self.shock) #Start Stream self.dev = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) self.dev.start(config) self.signature_dict = {0x00: self.scan_all} #Start modules self.post_processing = PostProcessing() self.yolo = Yolo(None) self.yolo.load_model() while True: try: self.spin_socket() except KeyboardInterrupt: self.dev.close() self.shock.close()
def __init__(self): rospy.init_node('Object_Detection') self.target = "" self.yolo = Yolo() self.bridge = CvBridge() self.depth_finder = Depth_Finder() self.TIMEOUT = 15 self.coord_pub_map = rospy.Publisher('/cv/detected_obj/coords/map', PointStamped, queue_size=10, latch=True) self.coord_pub_odom = rospy.Publisher('/cv/detected_obj/coords/odom', PointStamped, queue_size=10, latch=True) self.coord_pub_json = rospy.Publisher('/cv/detected_obj/coords/json', String, queue_size=10, latch=True) self.img_pub = rospy.Publisher('/yolo/img', Image, queue_size=1, latch=True) img_subscriber = rospy.Subscriber( '/hsrb/head_rgbd_sensor/rgb/image_color', Image, self.img_callback) info_subscriber = rospy.Subscriber( '/hsrb/head_rgbd_sensor/rgb/camera_info', CameraInfo, self.info_callback)
def __init__(self): self.__batch_size = cfg.BATCH_SIZE self.__image_size = cfg.IMAGE_SIZE self.__cell_size = cfg.CELL_SIZE self.__box_per_cell = cfg.BOX_PRE_CELL self.__num_class = len(cfg.CLASSES) self.__learn_rate_base = cfg.LEARN_RATE_BASE self.__max_periods = cfg.MAX_PERIODS self.__model_dir = cfg.MODEL_DIR self.__model_file = cfg.MODEL_FILE self.__log_dir = cfg.LOG_DIR self.__moving_ave_decay = cfg.MOVING_AVE_DECAY self.__save_iter = cfg.SAVE_ITER self.__train_data = PascalVoc('train') self.__test_data = PascalVoc('test') with tf.name_scope('input'): self.__samples = tf.placeholder(dtype=tf.float32, name='samples', shape=(self.__batch_size, self.__image_size, self.__image_size, 3)) self.__labels = tf.placeholder( dtype=tf.float32, name='labels', shape=(self.__batch_size, self.__cell_size, self.__cell_size, self.__box_per_cell, 5 + self.__num_class)) self.__is_training = tf.placeholder(dtype=tf.bool, name='is_training') self.__yolo = Yolo() self.__yolo_output = self.__yolo.build_network(self.__samples, self.__is_training) self.__yolo_loss = self.__yolo.loss(self.__yolo_output, self.__labels) with tf.name_scope('learn'): self.__learn_rate = tf.Variable(self.__learn_rate_base, trainable=False, name='learn_rate_base') moving_ave = tf.train.ExponentialMovingAverage( self.__moving_ave_decay).apply(tf.trainable_variables()) optimize = tf.train.AdamOptimizer(self.__learn_rate).minimize( self.__yolo_loss) with tf.control_dependencies([optimize]): with tf.control_dependencies([moving_ave]): self.__train_op = tf.no_op() with tf.name_scope('load'): self.__load = tf.train.Saver(tf.trainable_variables()) self.__save = tf.train.Saver(tf.global_variables(), max_to_keep=50) with tf.name_scope('summary'): tf.summary.scalar('batch_loss', self.__yolo_loss) self.__summary_op = tf.summary.merge_all() self.__summary_writer = tf.summary.FileWriter(self.__log_dir) self.__summary_writer.add_graph(tf.get_default_graph()) self.__sess = tf.Session()
def run_lpr(): logging.info(f'Debug mode {app.debug}') if request.method == 'POST': file = request.files['file'] img_bytes = file.read() file.close() if(img_bytes is not None): nparr = np.fromstring(img_bytes, np.uint8) inputImage = cv.imdecode(nparr, cv.IMREAD_COLOR) # TODO: state management: avoid loading net for every request yolo = Yolo(img_width=1056, img_height=576, debug=DEBUG, confidence_threshold=0.6, non_max_supress_theshold=0.4, classes_filename='../config/classes.names', model_architecture_filename="../config/yolov3_license_plates.cfg", model_weights_filename="../config/yolov3_license_plates_last.weights", output_directory='../debug/') roi_imgs = yolo.detect(inputImage) ocr = OCR(model_filename="../config/emnist_net_custom.pt", num_classes=36, use_cuda=False, debug=DEBUG) index = 0 for roi_img in roi_imgs: logging.info(f'\n\nProcessing ROI {index}') box = [yolo.bounding_boxes[index][0], yolo.bounding_boxes[index][1], yolo.bounding_boxes[index][2], yolo.bounding_boxes[index][3]] predict(yolo.img, roi_img, box, str(index), (0,255,0), ocr) index += 1 # API response: the highest confidence one logging.info(f'\n\n---Processing the Highest Confidence ROI---\n') bounding_box = None emnist_net_preds = None tesseract_preds = None if(yolo.highest_object_confidence > 0 and yolo.roi_img is not None): bounding_box = { 'x': yolo.box_x, 'y': yolo.box_y, 'w': yolo.box_w, 'h': yolo.box_h } _, emnist_net_preds, tesseract_preds = predict(yolo.img, yolo.roi_img, [yolo.box_x, yolo.box_y, yolo.box_w, yolo.box_h], "", (255,255,0), ocr) if(DEBUG): cv.imwrite("../debug/result.jpg", yolo.img.astype(np.uint8)) data = { 'bounding_box': bounding_box, 'confidence': yolo.highest_object_confidence, 'classId': str(yolo.classId_highest_object), 'emnist_net_preds': emnist_net_preds, 'tesseract_preds': tesseract_preds } response = jsonify(data) response.status_code = 200 return response
def setup(sender=None, **kwargs): global yolo yolo = Yolo() global face_filter face_filter = FaceFilterClass() #warm up yolo.crop_persons("test3.png")
def reset_model(): yolo = Yolo() random_image = tf.convert_to_tensor(np.random.random( (1, image_size, image_size, 3)), dtype=np.float32) _ = yolo(random_image) yolo.save_weights("./weights/yolo")
def __init__(self): self.net = NetworkManager(9999) self.yolo = Yolo() self.yolo.load_model() self.net.addCallback(0x00, self.yolo_callback) while True: try: self.net.spinSocket() except KeyboardInterrupt: self.net.close()
def show_prediction(data_index): with open("../data/data_detect_local_evaluate_100.json") as json_file: data = json.load(json_file) if (data_index not in data): print("Index {} out of range".format(data_index)) return yolo = Yolo() yolo.load_weights("./weights/yolo") img_path = "../pictures/pictures_detect_local_evaluate_100/{}.png".format( data_index) img = get_img(img_path) preds = yolo(img) boxes = get_boxes(preds) show(data_index, img_path, boxes)
def __init__(self, updater): self.sd = SpoilerDetector() self.ms = MythicSpoiler() self.yolo = Yolo(config.model, config.classes, config.conf) self.reddit = Reddit(subreddit="magicTCG") self.scryfall_futur_cards_id = [] self.reddit_futur_cards_subm_id = [] self.mythicspoiler_futur_cards_url = [] self.limit_days = 45 # List of Spoiler Objects limit_date = datetime.today() - timedelta(days=self.limit_days) self.spoiled = Session.query(Spoiler).filter( Spoiler.found_at > limit_date).all() # Job queues: updater.job_queue.run_repeating(self.general_crawl, interval=60, first=10)
class YoloServer(object): def __init__(self): self.net = NetworkManager(9999) self.yolo = Yolo() self.yolo.load_model() self.net.addCallback(0x00, self.yolo_callback) while True: try: self.net.spinSocket() except KeyboardInterrupt: self.net.close() def yolo_callback(self, arg): byte_img = bytes(arg['img'], 'ascii') img_decoded = base64.b64decode(byte_img) img = numpy.frombuffer(img_decoded, dtype=numpy.uint8) cv_img = cv2.imdecode(img, flags=1) names = self.yolo.run(numpy.asarray(cv_img), False) return (0x00, {'names': names})
def main(): cfgfilepath, datafilepath, weightfilepath, host, port, uploaddir, pub_img_flag = importargs( ) yolo = Yolo(cfgfilepath, weightfilepath, datafilepath) server = DarknetServer('yolo_server', uploaddir, ['jpg', 'png'], pub_img_flag, yolo) server.setup_converter() print("server run") server.run(host=host, port=port)
def compute_detections(directory, ground_truth_filename, output): ''' Computes the total number of detections in the data set for each image in the dataset and compares it to its ground truth. Args: directory: Path to images directory output: Output file name ''' detections = [] total = 0 yolo = Yolo('yolov3.cfg', 'yolov3.weights', 'yolov3.txt') count = 1 files = os.listdir(directory) for filename in files: _, indices, _ = yolo.extract_objects(directory + '/' + filename) num_objects = len(indices) detections.append([filename.split('.')[0], num_objects]) total += num_objects print('Number of objects detected in {}: {}'.format( filename, num_objects)) if count % 10 == 0: print("\nProcessed {} out of {}".format(count, len(files))) count += 1 detections.append(['Total Objects Detected', total]) predictions = pd.DataFrame(detections) ground_truth = parse_ground_truth(ground_truth_filename) result = pd.merge(predictions, ground_truth, how='outer', left_on=0, right_on=0) result['1_x'] = result['1_x'].fillna(0).astype(int) result.to_csv(output, index=False, header=False)
def __init__(self, args): if (args['method'] == 'HOG'): self.method = Hog(args['confidence']) elif (args['method'] == 'CAFFE'): self.method = Caffe(args['confidence']) elif (args['method'] == 'YOLO'): self.method = Yolo(args['confidence']) else: print( 'Fail, unknown algorithm. Try with -m HOG, -m CAFFE, -m YOLO') rospy.signal_shutdown('Quit') # Initialize the bridge to transform the image detect by topic's ROS self.bridge = CvBridge() # initialize our centroid tracker and frame dimensions self.ct = CentroidTracker() # Initialize the subscriber and publisher self.image_sub = rospy.Subscriber("/ardrone/image_raw", Image, self.callback) self.image_pub = rospy.Publisher("image_processed", Image, queue_size=10) self.controller = DroneController(args) self.droneStatus = DroneStatus() self.previous_position = None self.actual_position = None self.detection_status = DETECTION_STATUS['SearchingPerson'] self.last_action = { 'roll': 0, 'pitch': 0, 'yaw_velocity': 0, 'z_velocity': 0 } self.shutdown = 500
def __init__(self): rospy.init_node('Semantic_Labelling') self.yolo = Yolo() self.bridge = CvBridge() self.depth_finder = Depth_Finder() self.img_pub = rospy.Publisher('/semantic_labels/img', Image, queue_size=10) self.nav_pub = rospy.Publisher('/azm_nav/semantic_label_additions', String, queue_size=10) info_subscriber = rospy.Subscriber( '/hsrb/head_rgbd_sensor/rgb/camera_info', CameraInfo, self.info_callback, queue_size=None) img_subscriber = rospy.Subscriber( '/hsrb/head_rgbd_sensor/rgb/image_color', Image, self.img_callback, queue_size=1, buff_size=52428800)
def __init__(self, weights_file): self.verbose = True # detection params self.S = 7 # cell size self.B = 2 # boxes_per_cell self.classes = [ "recoon", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other", "other" ] self.weights_file = weights_file self.C = len(self.classes) # number of classes # offset for box center (top left point of each cell) self.x_offset = np.transpose( np.reshape(np.array([np.arange(self.S)] * self.S * self.B), [self.B, self.S, self.S]), [1, 2, 0]) #将第0维放置第三维 self.y_offset = np.transpose(self.x_offset, [1, 0, 2]) self.threshold = 0.2 # confidence scores threshold self.iou_threshold = 0.5 self.model_path = 'model' self.sess = tf.Session() self.net = Yolo() self.predicts = self.net.logits self.variable_to_restore = tf.global_variables() self.saver = tf.train.Saver(self.variable_to_restore, max_to_keep=None) self.total_loss = 0.0 self.total_TP = 0 self.total_p = 0 self.ckpt_file = os.path.join(self.model_path, 'yolo') gpuConfig = tf.ConfigProto(device_count={'gpu': 0}) #gpuConfig.gpu_options.allow_growth = True #gpu_options = tf.GPUOptions() #config = tf.ConfigProto(gpu_options=gpuConfig) self.sess = tf.Session(config=gpuConfig) self.sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(self.model_path) #获取checkpoints对象 if ckpt and ckpt.model_checkpoint_path: ##判断ckpt是否为空,若不为空,才进行模型的加载,否则从头开始训练 print('Restoring weights from: ' + ckpt.model_checkpoint_path) self.saver.restore(self.sess, ckpt.model_checkpoint_path) #恢复保存的神经网络结构,实现断点续训 elif self.weights_file is not None: print('Restoring weights from: ' + self.weights_file) self.saver.restore(self.sess, self.weights_file)
def main(): tf.reset_default_graph() parser = argparse.ArgumentParser() parser.add_argument('--weights', default="YOLO_small.ckpt", type=str) parser.add_argument('--data_dir', default="data", type=str) parser.add_argument('--threshold', default=0.2, type=float) parser.add_argument('--iou_threshold', default=0.5, type=float) parser.add_argument('--gpu', default='0', type=str) args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = "0" yolo = Yolo() pascal = pascal_voc('test') solver = Solver(yolo, pascal) print('Start testing ...') solver.train() print('Done testing.')
def __init__(self): self.__image_size = cfg.IMAGE_SIZE self.__cell_size = cfg.CELL_SIZE self.__down_sample_size = 1.0 * self.__image_size / self.__cell_size self.__box_per_cell = cfg.BOX_PRE_CELL self.__classes = cfg.CLASSES self.__num_class = len(cfg.CLASSES) self.__anchor = cfg.ANCHOR self.__prob_threshold = cfg.PROB_THRESHOLD self.__nms_threshold = cfg.NMS_THRESHOLD self.__yolo_input = tf.placeholder(shape=(1, self.__image_size, self.__image_size, 3), dtype=tf.float32, name='input') self.__is_training = tf.placeholder(dtype=tf.bool, name='is_training') self.__yolo_output = Yolo().build_network(self.__yolo_input, self.__is_training) self.__sess = tf.Session() self.__saver = tf.train.Saver(tf.global_variables()) self.__saver.restore(self.__sess, os.path.join(cfg.MODEL_DIR, cfg.MODEL_FILE))
def train(): yolo = Yolo() yolo.load_weights("./weights/yolo") opt = Adam(learning_rate=5e-5) with open("../data/data_detect_local_train.json") as json_file: data = json.load(json_file) data_index = 0 while str(data_index) in data: img = get_img("../pictures/pictures_detect_local_train/{}.png".format( data_index)) true_labels, true_boxes, true_preds = get_localization_data( data[str(data_index)]) def get_loss(): preds = yolo(img) return calculate_loss(preds, true_labels, true_boxes, true_preds) opt.minimize(get_loss, [yolo.trainable_weights]) if (data_index % 100 == 99): yolo.save_weights("./weights/yolo") data_index += 1
if __name__ == '__main__': cfgfile = "cfg/yolov3.cfg" weightsfile = "yolov3.weights" num_classes = 80 args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) #bbox_attrs = 5 + num_classes inp_dim = int(args.reso) assert inp_dim % 32 == 0 assert inp_dim > 32 yolo = Yolo(confidence, nms_thesh, args.reso, cfgfile, weightsfile, num_classes) # if a video path was not supplied, grab the reference to the web cam if not args.video: cap = cv2.VideoCapture(0) # otherwise, grab a reference to the video file else: cap = cv2.VideoCapture(args.video) time.sleep(1.0) fps = cap.get(cv2.CAP_PROP_FPS) assert fps > 0
train_dset = VOCDetection(root=data_root, split='train') train_dloader = DataLoader(train_dset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=8) test_dset = VOCDetection(root=data_root, split='test') test_dloader = DataLoader(test_dset, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=8) model = Yolo(grid_size, num_boxes, num_classes) model = model.to(device) pretrained_weights = torch.load(pretrained_backbone_path) model.load_state_dict(pretrained_weights) print('loaded pretrained weight') # Freeze the backbone network. model.features.requires_grad_(False) model_params = [v for v in model.parameters() if v.requires_grad is True] optimizer = optim.SGD(model_params, lr=lr, momentum=0.9, weight_decay=5e-4) compute_loss = Loss(grid_size, num_boxes, num_classes) # Load the last checkpoint if exits. ckpt_path = os.path.join(ckpt_dir, 'last.pth') if os.path.exists(ckpt_path):
"textbox": "text_field", "button": "button", "combobox": "combo_box", "tree": "tree", "list": "list", "scrollbar": "scroll_bar", "menuitem": "menu_item", "menu": "menu", "togglebutton": "toggle_button", "tabs": "tabs", "slider": "slider", "menuww": "menu", } yolo = Yolo() real_folder = cfg.data_dir + "/../mac" unproc_folder = real_folder + "/unproc" files = [ os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(unproc_folder + "/images")) for f in fn ] print(files) label_files = [ (unproc_folder + "/labels/" + x[x.rfind("/") + 1:-4] + ".xml") for x in files ]
from yolo import Yolo import cv2 import time src = "videos/sample.mp4" yolofiles = { "config": "yolo-coco/yolov3.cfg", "weights": "yolo-coco/yolov3.weights", "names": "yolo-coco/coco.names" } yolo = Yolo(yolofiles) src_scale = 1 src_mirror = False src_skip = 1 vid = cv2.VideoCapture(src) t0 = time.time() while True: count = src_skip while count>=1: count -= 1 grabbed, image = vid.read()
parser.add_argument('--use_gpu', type=bool, default=False, help='whether to use gpu') args = parser.parse_args() device = torch.device( 'cuda' if args.use_gpu and torch.cuda.is_available() else 'cpu') FloatTensor = torch.cuda.FloatTensor if args.use_gpu and torch.cuda.is_available( ) else torch.FloatTensor classes = utils.load_classes( args.class_path) # Extracts class labels from file # Set up model model = Yolo().to(device) if args.weights_path is not None: # Load darknet weights model.load_darknet_weights(args.weights_path) model.eval() # Set in evaluation mode # dataloader = DataLoader( # ImageFolder(args.image_folder, img_size=args.img_size), # batch_size=args.batch_size, # shuffle=False, # num_workers=args.n_cpu, # ) if not os.path.exists(args.output_path):
from threading import Event import torch torch.no_grad() torch.cuda.empty_cache() # import our classes import rocketvision as rv from rocketvision.rate import Rate from nada import Nada from yolo import Yolo nada = Nada() yolo = Yolo( img_size=256 ) # default is 512 which yeilds about 3.8 fps (i7/940MX), 384 --> 5 fps, 256 --> 7 fps context = zmq.Context() socket = context.socket(zmq.PUB) socket.connect('tcp://' + args['address'] + ':' + args['port']) width = 1280 height = 720 displayWidth = 1280 displayHeight = 720 framerate = 30 flipmethod = 2 def gstreamer_pipeline(
#! /usr/bin/python3 import cv2 import sys from detector import Detector from yolo import Yolo from timeit import default_timer as timer # extract the yolo pipeline yolo = Yolo(sys.argv[2]) vd = Detector(yolo) if sys.argv[1] == '-i': filename = 'images/test4.jpg' image = cv2.imread(filename) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) result = vd.vehicleDetection(image) result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB) cv2.imshow("result", result) cv2.waitKey(0) elif sys.argv[1] == '-v': try: skip = int(sys.argv[3]) except: skip = 1 video_output = 'videos/out.mp4'
import logging from yolo import Yolo from ocr import OCR app = Flask(__name__) DEBUG = True # TODO: state management and how to handle multiple request on this? yolo = Yolo( img_width=1056, img_height=576, confidence_threshold=0.6, non_max_supress_theshold=0.4, classes_filename='../config/classes.names', model_architecture_filename="../config/yolov3_license_plates.cfg", model_weights_filename="../config/yolov3_license_plates_last.weights", output_directory='../debug/', output_image=True) ocr = OCR(model_filename="../config/attention_ocr_model.pth", use_cuda=False, threshold=0.7) @app.route('/') def index(): return "Live and Running!"
from yolo import Yolo import json if __name__ == "__main__": CONFIG_FILE = 'config.json' with open(CONFIG_FILE) as f: config = json.load(f) yolo = Yolo(config) yolo.start_training() # Initiate the train and test generators with data Augumentation # train_datagen = ImageDataGenerator( # rescale=1. / 255, # horizontal_flip=True, # fill_mode="nearest", # zoom_range=0.3, # width_shift_range=0.3, # height_shift_range=0.3, # rotation_range=30) # # test_datagen = ImageDataGenerator( # rescale=1. / 255, # horizontal_flip=True, # fill_mode="nearest", # zoom_range=0.3, # width_shift_range=0.3, # height_shift_range=0.3, # rotation_range=30) # # train_generator = train_datagen.flow_from_directory( # train_data_dir,
imgs = (images/127.5)-1 return imgs def convert_boxes(boxes): return yolo.convert_net_to_bb(boxes, filter_top=False)[0] if __name__ == '__main__': # Collect events until released with keyboard.Listener(on_release=on_release) as listener: if len(sys.argv) > 1: cfg.window_name = sys.argv[1] event = 0 yolo = Yolo() states = [] runtime = round(time.time()) output_dir = cfg.output_dir + "/" + str(runtime) if not os.path.exists(output_dir): os.makedirs(output_dir) if not os.path.exists(output_dir + "/images"): os.makedirs(output_dir + "/images") test_file = output_dir + "/test.txt"
class YoloTrain(object): def __init__(self): self.__batch_size = cfg.BATCH_SIZE self.__image_size = cfg.IMAGE_SIZE self.__cell_size = cfg.CELL_SIZE self.__box_per_cell = cfg.BOX_PRE_CELL self.__num_class = len(cfg.CLASSES) self.__learn_rate_base = cfg.LEARN_RATE_BASE self.__max_periods = cfg.MAX_PERIODS self.__model_dir = cfg.MODEL_DIR self.__model_file = cfg.MODEL_FILE self.__log_dir = cfg.LOG_DIR self.__moving_ave_decay = cfg.MOVING_AVE_DECAY self.__save_iter = cfg.SAVE_ITER self.__train_data = PascalVoc('train') self.__test_data = PascalVoc('test') with tf.name_scope('input'): self.__samples = tf.placeholder(dtype=tf.float32, name='samples', shape=(self.__batch_size, self.__image_size, self.__image_size, 3)) self.__labels = tf.placeholder( dtype=tf.float32, name='labels', shape=(self.__batch_size, self.__cell_size, self.__cell_size, self.__box_per_cell, 5 + self.__num_class)) self.__is_training = tf.placeholder(dtype=tf.bool, name='is_training') self.__yolo = Yolo() self.__yolo_output = self.__yolo.build_network(self.__samples, self.__is_training) self.__yolo_loss = self.__yolo.loss(self.__yolo_output, self.__labels) with tf.name_scope('learn'): self.__learn_rate = tf.Variable(self.__learn_rate_base, trainable=False, name='learn_rate_base') moving_ave = tf.train.ExponentialMovingAverage( self.__moving_ave_decay).apply(tf.trainable_variables()) optimize = tf.train.AdamOptimizer(self.__learn_rate).minimize( self.__yolo_loss) with tf.control_dependencies([optimize]): with tf.control_dependencies([moving_ave]): self.__train_op = tf.no_op() with tf.name_scope('load'): self.__load = tf.train.Saver(tf.trainable_variables()) self.__save = tf.train.Saver(tf.global_variables(), max_to_keep=50) with tf.name_scope('summary'): tf.summary.scalar('batch_loss', self.__yolo_loss) self.__summary_op = tf.summary.merge_all() self.__summary_writer = tf.summary.FileWriter(self.__log_dir) self.__summary_writer.add_graph(tf.get_default_graph()) self.__sess = tf.Session() def train(self): self.__sess.run(tf.global_variables_initializer()) ckpt_path = os.path.join(self.__model_dir, self.__model_file) print 'Restoring weights from:\t %s' % ckpt_path self.__load.restore(self.__sess, ckpt_path) for period in range(self.__max_periods): if period in [20, 50, 80]: learning_rate_value = self.__sess.run( tf.assign(self.__learn_rate, self.__sess.run(self.__learn_rate) / 10.0)) print 'The value of learn rate is:\t%f' % learning_rate_value for step, (batch_sample, batch_label) in enumerate(self.__train_data): _, summary_value, yolo_loss_value = self.__sess.run( [self.__train_op, self.__summary_op, self.__yolo_loss], feed_dict={ self.__samples: batch_sample, self.__labels: batch_label, self.__is_training: True }) if np.isnan(yolo_loss_value): raise ArithmeticError('The gradient is exploded') if step % 10: continue self.__summary_writer.add_summary( summary_value, period * len(self.__train_data) + step) print 'Period:\t%d\tstep:\t%d\ttrain loss:\t%.4f' % ( period, step, yolo_loss_value) if period % self.__save_iter: continue total_test_loss = 0.0 for batch_sample, batch_label in self.__test_data: yolo_loss_value = self.__sess.run(self.__yolo_loss, feed_dict={ self.__samples: batch_sample, self.__labels: batch_label, self.__is_training: False }) total_test_loss += yolo_loss_value test_loss = total_test_loss / len(self.__test_data) print 'Period:\t%d\ttest loss:\t%.4f' % (period, test_loss) saved_model_name = os.path.join( self.__model_dir, 'yolo.ckpt-%d-%.4f' % (period, test_loss)) self.__save.save(self.__sess, saved_model_name) print 'Saved model:\t%s' % saved_model_name self.__summary_writer.close()
class Semantic_Labelling: def __init__(self): rospy.init_node('Semantic_Labelling') self.yolo = Yolo() self.bridge = CvBridge() self.depth_finder = Depth_Finder() self.img_pub = rospy.Publisher('/semantic_labels/img', Image, queue_size=10) self.nav_pub = rospy.Publisher('/azm_nav/semantic_label_additions', String, queue_size=10) info_subscriber = rospy.Subscriber( '/hsrb/head_rgbd_sensor/rgb/camera_info', CameraInfo, self.info_callback, queue_size=None) img_subscriber = rospy.Subscriber( '/hsrb/head_rgbd_sensor/rgb/image_color', Image, self.img_callback, queue_size=1, buff_size=52428800) # Gets camera info def info_callback(self, msg): self.cam_info = msg # Takes the current image from the camera feed and searches for the target object, returning coordinates def img_callback(self, msg): cv_image = self.bridge.imgmsg_to_cv2(msg, desired_encoding='passthrough') #Respond to attribute error if subscribers haven't ran yet try: objects, img = self.yolo.search_for_objects(cv_image) self.img_pub.publish( self.bridge.cv2_to_imgmsg(img, encoding='passthrough')) model = PinholeCameraModel() model.fromCameraInfo(self.cam_info) except AttributeError: print("waiting") return if (len(objects) != 0): for obj in objects: print(obj) xy = obj["Point"] dist = self.depth_finder.get_depth(int(xy[0]), int(xy[1])) depth = dist[0] vect = model.projectPixelTo3dRay((xy[0], xy[1])) xyz = [el / vect[2] for el in vect] stampedPoint = PointStamped() stampedPoint.header.frame_id = "head_rgbd_sensor_rgb_frame" stampedPoint.point.x = xyz[0] * depth stampedPoint.point.y = xyz[1] * depth stampedPoint.point.z = depth rospy.wait_for_service('transform_point') get_3d_points = rospy.ServiceProxy('transform_point', LocalizePoint) resp = get_3d_points(stampedPoint) #print(resp.localizedPointMap) threeDPoint = resp.localizedPointMap dictMsg = {} dictMsg["name"] = obj["Label"] dictMsg["type"] = "object" dictMsg["coords"] = [ threeDPoint.point.x, threeDPoint.point.y, threeDPoint.point.z ] dictMsg["others"] = {} self.nav_pub.publish(json.dumps(dictMsg)) else: print("No objects found.")