def connect_clients(self): if self.use_mpi: from mpi4py import MPI mpi_comm = MPI.COMM_WORLD server_socket = mpi_comm.Get_rank() client_sockets = list(range(mpi_comm.Get_size())) del client_sockets[server_socket] self.client_num = len(client_sockets) logging.info("Starting MPI update server on:", socket.gethostname(), "(%i clients)" % self.client_num) else: server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) logging.info("Starting update server on %s:%i (%i clients)" % (socket.gethostname(), self.port, self.client_num)) server_socket.bind((socket.gethostname(), self.port)) server_socket.listen(1) logging.info("Waiting for %i clients to connect..." % self.client_num) client_sockets = [] for _ in range(self.client_num): sock, addr = server_socket.accept() logging.info("Model Update Server - Adding new client:", addr) client_sockets.append(sock) logging.info("All clients are connected!") return server_socket, client_sockets
def import_json(self, json_obj, layer_range=None): self.func = {} #check if old JSON format if json_obj.get("version", 0) == 0: raise Exception( "Old format model file detected, no compatibility!") return self.class_labels = json_obj["classLabels"] if "imageSize" in json_obj and "imageMode" in json_obj: width = json_obj["imageSize"][0] height = json_obj["imageSize"][1] image_mode = json_obj.get("imageMode", "RGB") self.data_shape = ({"RGB": 3, "L": 1}[image_mode], width, height) elif "dataShape" in json_obj: self.data_shape = tuple(json_obj["dataShape"]) else: assert False, "Bad mdl file, Cannot determine input data shape!" assert json_obj.get("imageBorder", 0) == 0 self.class_num = json_obj.get("classNum", len(self.class_labels)) #load layers self.layers = denet.layer.import_json(json_obj["layers"], self.input, self.get_input_shape(), layer_range) logging.info("Number of parameters in model: %d" % self.get_parameter_num())
def __init__(self, gpu, args, data_shape, class_labels): super().__init__() self.gpu = gpu self.args = args self.class_labels = class_labels self.data_shape = data_shape self.class_num = len(class_labels) #shared variables self.task_queue = mp.Queue() self.active = mp.Value('i', 1) self.epoch = mp.Value('i', 0) self.learn_rate = mp.Value('f', args.learn_rate) self.cost = mp.Value('f', 0.0) self.timer = mp.Value('f', 0.0) self.model_update = shared.ModelUpdate(args.model_dims, args.batch_size) self.data_x = shared.Array(self.model_update.input_shape) self.data_y = shared.Array(self.model_update.output_shape) self.data_m = mp.Queue() self.error_q = mp.Queue() logging.info("Starting worker:" + self.gpu) proc_args = (self.gpu, self.args, self.data_shape, self.class_labels, self.class_num, self.task_queue, self.active, self.epoch, self.learn_rate, self.cost, self.timer, self.data_x, self.data_y, self.data_m, self.model_update, self.error_q) self.proc = mp.Process(target=run_worker_wrapper, args=proc_args, name=self.gpu) self.proc.daemon = True self.proc.start() self.ps_proc = psutil.Process(self.proc.pid)
def load_from_file(fname, batch_size=32, layer_range=None): t = time.time() logging.info("Loading model from %s" % fname) model = load_from_json(common.json_from_gz(fname), batch_size, layer_range) model.fname = fname logging.verbose("Loading model took %.2f sec" % (time.time() - t)) return model
def load_restart_args(args_fname, args): if not os.path.isfile(args_fname): raise Exception("Cannot find arguments file:" + args_fname) logging.info("Loading arguments from:", args_fname) with open(args_fname, "rb") as f: args = pickle.load(f) #search for models model_fnames = common.find_files(os.path.dirname(args.output_prefix), "*_epoch*.mdl.gz") if len(model_fnames) == 0: raise Exception( "Could not find any intermediate models to continue training from!" ) v = os.path.basename(model_fnames[-1]) v = v[:v.find(".")].split("_") if v[-1] == "final": args.epoch_start = int(v[-2][5:]) + 1 args.subset_start = 0 else: args.epoch_start = int(v[-2][5:]) args.subset_start = int(v[-1][6:]) + 1 args.model = model_fnames[-1] logging.info("Continuing training with model:", args.model, "epoch:", args.epoch_start, "subset:", args.subset_start) return args
def save_results(fname, error, class_errors): with open(fname, "w") as f: logging.info("Overall Error=%.2f%%" % (error), file=f) for d in class_errors: logging.info("Class %i=%.2f%% (%i samples)" % (d[0], d[1], d[2] * d[1] / 100), file=f)
def get_localization_error(detections): error = 0 det_truth = 0 det_total = 0 for d in detections: meta = d["meta"] dets = d["detections"] dets.sort(key=lambda t: t[0]) det_truth += len(meta["class"]) det_total += len(dets) positive = False for _, cls_a, bbox_a in dets[:min(5, len(dets))]: for cls_b, bbox_b in zip(meta["class"], meta["objs"]): if cls_a == cls_b and common.overlap_iou(bbox_a, bbox_b) > 0.5: positive = True if not positive: error += 1 logging.info( "Imagenet localization error: %.2f (%i images, %i true detections, %i total detections)" % (100.0 * error / len(detections), len(detections), det_truth, det_total))
def predict_output_step(self, data_x): if not "predict" in self.func: logging.info("Building predict function") self.func["predict"] = theano.function( [self.input], self.layers[-1].output, givens=[(denet.layer.get_train(), tensor.cast(0, 'int8'))], on_unused_input='ignore') return self.func["predict"](data_x)
def update(self, model_update_delta, model_update, workers): if self.server_exists: #send model delta and update workers logging.info("Sending updates to server:", self.server_name) ts = time.time() self.cmd_update(model_update_delta, model_update) for worker in workers: worker.update(model_update) logging.info("Update took %0.1f sec" % (time.time() - ts))
def load_from_subset(self, subset): if self.subset_index == subset: return logging.info("Loading from subset %i / %i (%i threads)" % (subset, self.subset_num, self.thread_num)) index_start = subset * self.subset_size index_end = min((subset + 1) * self.subset_size, self.subset_total_size) self.data = self.image_loader.load(self.images[index_start:index_end]) self.subset_index = subset
def sync(self, model_update, workers, initial=False): if self. async: logging.info("Synchronizing with update server:", self.server_name) ts = time.time() self.cmd_sync(model_update, initial) #update workers for worker in workers: worker.update(model_update) logging.info("Sync took %0.1f sec" % (time.time() - ts))
def run(self): logging.info("Exporting subset (%i/%i)" % (self.subset + 1, self.data.subset_num)) timer = common.Timer() self.data.load_from_subset(self.subset) timer.mark() self.data_export = self.data.export(self.batch_size) timer.mark() logging.info( "Finished exporting subset (%i/%i)" % (self.subset + 1, self.data.subset_num), "- load took %i sec, export took %i sec" % (timer.delta(0), timer.delta(1)))
def profile(func, it_num, *args): assert os.environ.get( 'CUDA_LAUNCH_BLOCKING', '0') == '1', "Requires CUDA_LAUNCH_BLOCKING=1 to get proper results" assert not func.profile is None, "Compile function with profile=True" logging.info("Profiling function (%i it)" % it_num) for _ in range(it_num): func(*args) try: func.profile.summary() except: pass
def get_files(data_set, image_set): logging.info("Loading pascal %s %s..." % (data_set, image_set)) with open( os.path.join( input_dir, "%s/ImageSets/Main/%s.txt" % (data_set, image_set)), "r") as f: fnames = [ os.path.join( input_dir, "%s/JPEGImages/%s.jpg" % (data_set, index.rstrip())) for index in f.readlines() ] return fnames
def update(self, pr): if self.sample_mode == "confusion": self.confusion = numpy.zeros( (self.get_class_num(), self.get_class_num()), dtype=numpy.float64) predict_cls = numpy.argmax(pr, axis=1) for i, t in enumerate(self.data): fname, d, meta = t self.confusion[meta["class"], predict_cls[i]] += 1 self.confusion /= numpy.sum(self.confusion, axis=1)[:, None] numpy.fill_diagonal(self.confusion, 0.0) logging.info("Confusion Matrix:\n", self.confusion) logging.info("Error Rates:\n", numpy.sum(self.confusion, axis=1)) #assign partially labelled items to previous prediction if self.partial_mode == "previous" or self.partial_mode == "max": cls = numpy.argmax(pr, axis=1) for i, t in enumerate(self.data): fname, d, meta = t if meta["partial"]: meta["class"] = cls[i] self.data[i] = (fname, d, meta) #update partially labelled items predicted probabilities elif "momentum" in self.partial_mode: if "hard" in self.partial_mode: cls = numpy.argmax(pr, axis=1) pr.fill(0.0) pr[numpy.arange(pr.shape[0]), cls] = 1.0 p = float(self.partial_mode.split(",")[1]) for i, t in enumerate(self.data): fname, d, meta = t if meta["partial"]: meta["pr"] = [ p * meta["pr"][c] + (1.0 - p) * pr[i, c] for c in range(pr.shape[1]) ] meta["class"] = meta["pr"].index(max(meta["pr"])) self.data[i] = (fname, d, meta)
def compute_error(data, model): class_errors = [0] * model.class_num class_samples = [0] * model.class_num for subset in range(data.subset_num): data.load_from_subset(subset) #logging.info("Preparing test data") #test_data = data.prepare(model.width, model.height, model.image_border, model.distort_mode, training=False) logging.info("Computing error...") labels_predict = model.predict_label(data) labels = data.get_labels() for i in range(len(data)): class_samples[labels[i]] += 1 if labels_predict[i] != labels[i]: class_errors[labels[i]] += 1 error = 100.0 * sum(class_errors) / sum(class_samples) class_errors = [(i, 100.0 * class_errors[i] / class_samples[i], class_samples[i]) for i in range(model.class_num)] return (error, class_errors)
def load(self, input_dir, ext, is_training, thread_num, class_labels=None): self.class_labels = class_labels #generate class labels if self.class_labels is None: self.class_labels = DatasetFromDir.find_class_labels(input_dir) #load images classes = os.listdir(input_dir) for c in classes: cls = self.class_labels[c] images = DatasetFromDir.find_paths(os.path.join(input_dir, c), "*." + ext) logging.info("Found class " + c + " (" + str(cls) + ") with " + str(len(images)) + " images") for f in images: imfile = Image.open(f) basename = f.replace(input_dir, "") self.data.append((basename, imfile.copy(), {"image_class":cls, "partial":False})) #sort by class self.data.sort(key=lambda d:d[2]["image_class"]) logging.info("Loaded " + str(len(self)) + " Samples")
def train_epoch(self, dataset, epoch, learning_rate, momentum=[0, 1, 0], decay=0.0, solver_mode="sgd"): #train over batches (assume dataset size is mulitple of batch_size!) logging.info("Evaluating training function") dataset_x, dataset_m, dataset_size = dataset.export(self.batch_size) index_num = math.ceil(dataset_size / self.batch_size) total_cost = 0 for index in range(index_num): #upload data to GPU and perform train step timer = common.Timer() data_x = dataset_x[index * self.batch_size:(index + 1) * self.batch_size] data_m = dataset_m[index * self.batch_size:(index + 1) * self.batch_size] cost, _ = self.train_step(data_x, data_m, epoch, self.iteration, learning_rate, momentum, decay) #watch out for GPU's randomly producing NaN! if math.isnan(cost): raise Exception("ERROR: Cost is NaN") logging.verbose( "Batch %i.%i - iteration: %i cost:" % (epoch, index * self.batch_size, self.iteration), cost, "took: %i ms" % timer.current_ms()) total_cost += cost self.iteration += 1 return total_cost
def run_sync(self): #connect to clients server_socket, client_sockets = self.connect_clients() #construct update object for each client / server client_updates = [ shared.ModelUpdate(self.model_dims) for _ in range(self.client_num) ] server_update = shared.ModelUpdate(self.model_dims) #perform synchronization while True: try: logging.info("Waiting for updates...") for i, sock in enumerate(client_sockets): update_json = network.recv_json(sock) client_updates[i].import_json(update_json["data"]) logging.info("Synchronising...") ts = time.time() server_update.set_mean(client_updates, self.thread_num) logging.verbose("mean calc took %.2f sec" % (time.time() - ts)) ts = time.time() server_json = server_update.export_json() logging.verbose("json export took %.2f sec" % (time.time() - ts)) #send mean update to clients ts = time.time() for sock in client_sockets: network.send_json(sock, server_json) logging.verbose("transferring data to clients took %.2f sec" % (time.time() - ts)) except (KeyboardInterrupt, SystemExit): logging.info("Done") sys.exit(0) except Exception as e: logging.error("Encounter exception: ", e)
def initialize(args, data_shape, class_labels, class_num): cudnn_info = (theano.config.dnn.conv.algo_fwd, theano.config.dnn.conv.algo_bwd_data, theano.config.dnn.conv.algo_bwd_filter) logging.info("Using theano version:", theano.__version__, "(cudnn fwd=%s,bwd data=%s,bwd filter=%s)" % cudnn_info) if args.model is None: #construct convolutional model logging.info("Building convolutional model (%i classes)..." % class_num) model = ModelCNN() model.batch_size = args.batch_size model.class_labels = class_labels model.class_num = class_num #allow padding to be specified in border mode try: n = int(args.border_mode) border_mode = (n, n) except ValueError: border_mode = args.border_mode model.build(args.model_desc, data_shape, args.activation, border_mode, list(args.weight_init)) else: model = load_from_file(args.model, args.batch_size) model.class_labels = class_labels model.class_num = class_num assert data_shape == model.data_shape, "Mismatching data shapes in .mdl and data: " + str( data_shape) + "!=" + str(model.data_shape) model.skip_layer_updates = args.skip_layer_updates if len(model.skip_layer_updates) > 0: logging.info("Skipping layer updates:", model.skip_layer_updates) return model
def save_to_file(model, fname, compresslevel=9): logging.info("Saving model to %s" % fname) t = time.time() common.json_to_gz(fname, model.export_json(), compresslevel) logging.verbose("Saving model took %.2f sec" % (time.time() - t))
def get_detections(self, model, data_x, data_m, params): pr_threshold = params.get("prThreshold", 0.01) nms_threshold = params.get("nmsThreshold", 0.5) corner_threshold = params.get("cornerThreshold", self.sparse_layer.corner_threshold) corner_max = params.get("cornerMax", 1024) t = (pr_threshold, nms_threshold, corner_threshold, corner_max) logging.verbose( "Using detection params - pr threshold: %f, nms threshold: %f, corner_threshold: %f, corner_max: %i" % t) first_detect = False if self.detect_func is None: #get all model outputs outputs = [] outputs.append(self.det_pr) if self.use_bbox_reg: outputs.append(self.bbox_reg) logging.info("Building detection function") self.detect_func = theano.function([model.input], outputs, givens=[(get_train(), tensor.cast(0, 'int8'))], on_unused_input='ignore') logging.verbose("Exporting graph...") with open("detect_graph.txt", "w") as f: theano.printing.debugprint(self.detect_func, file=f, print_type=True) first_detect = True #get sampling bounding boxs logging.verbose("Detecting sample bboxs (%.2f)" % corner_threshold) timer = common.Timer() sample_bboxs = self.sparse_layer.get_samples(data_x, train=False, store_shared=True) timer.mark() logging.verbose("Found sample bboxs: {}".format( [len(bbox) for bbox in sample_bboxs])) #upload sampling bounding boxs bboxs = self.sparse_layer.set_samples(sample_bboxs) timer.mark() #classify sampling bounding boxs r = list(self.detect_func(data_x)) #get outputs det_pr = r[0] r_index = 1 if self.use_bbox_reg: bbox_reg = r[r_index] r_index += 1 #update bbox array bboxs_cx = 0.5 * (bboxs[:, :, :, 0] + bboxs[:, :, :, 2]) bboxs_cy = 0.5 * (bboxs[:, :, :, 1] + bboxs[:, :, :, 3]) bboxs_w = bboxs[:, :, :, 2] - bboxs[:, :, :, 0] bboxs_h = bboxs[:, :, :, 3] - bboxs[:, :, :, 1] predict_cx = bbox_reg[:, 0, :, :] * bboxs_w + bboxs_cx predict_cy = bbox_reg[:, 1, :, :] * bboxs_h + bboxs_cy predict_w = numpy.exp(bbox_reg[:, 2, :, :]) * bboxs_w predict_h = numpy.exp(bbox_reg[:, 3, :, :]) * bboxs_h bboxs[:, :, :, 0] = predict_cx - predict_w * 0.5 bboxs[:, :, :, 1] = predict_cy - predict_h * 0.5 bboxs[:, :, :, 2] = predict_cx + predict_w * 0.5 bboxs[:, :, :, 3] = predict_cy + predict_h * 0.5 timer.mark() detlists = c_code.build_detections_nms(pr_threshold, nms_threshold, det_pr, bboxs, [len(s) for s in sample_bboxs]) timer.mark() logging.verbose("Found detections:", [len(detlist) for detlist in detlists]) logging.verbose( "FPS=%.1f, Timing (ms) - get samples: %i, upload: %i, classify: %i, build+nms %i" % tuple([self.batch_size / timer.current()] + timer.deltas_ms())) if not first_detect: global detect_time, detect_num detect_time += timer.current() detect_num += self.batch_size logging.info("Average FPS=%.1f" % (detect_num / detect_time)) #results format results = [] for i, detlist in enumerate(detlists): results.append({"detections": detlist, "meta": data_m[i]}) return results
def build_train_func(self, solver_mode="sgd", cost_factors=[], use_acc_mode=False, skip_build=False): #arguments to function logging.info( "Building training functions - solver: %s, use_acc_mode: %s" % (solver_mode, use_acc_mode)) iteration = tensor.fscalar() learn_rate = tensor.fscalar() momentum = tensor.fvector() decay = tensor.fscalar() #find costs self.yt = [] self.cost_list = [] self.cost_layers = [] self.cost_layer_names = [] for layer in self.layers: yt_index = tensor.lvector("target index %i" % len(self.cost_layers)) yt_value = tensor.fvector("target value %i" % len(self.cost_layers)) cost = layer.cost(yt_index, yt_value) if not cost is None: self.yt += [yt_index, yt_value] self.cost_list.append(cost) self.cost_layers.append(layer) self.cost_layer_names.append(layer.type_name) self.cost_factors = [1.0] * len(self.cost_list) if len( cost_factors) == 0 else cost_factors assert len(self.cost_factors) == len( self.cost_list ), "Different number of cost factors (%i) and cost layers (%i)" % (len( self.cost_factors), len(self.cost_layers)) logging.info("Found %i costs in model:" % len(self.cost_layers), list(zip(self.cost_layer_names, self.cost_factors))) self.train_cost = tensor.as_tensor_variable(0) for i, cost in enumerate(self.cost_list): self.train_cost += self.cost_factors[i] * cost if self.gradient_clip > 0.0: logging.info("Clipping gradient to [%f,%f]" % (-self.gradient_clip, self.gradient_clip)) self.train_cost = theano.gradient.grad_clip( self.train_cost, -self.gradient_clip, self.gradient_clip) #find split points split_points = [0] self.use_split_mode = False for index, layer in enumerate(self.layers): if layer.has_split: self.use_split_mode = True split_points.append(index) split_points.append(len(self.layers)) if self.use_split_mode: logging.verbose("Using split mode with split points:", split_points) self.func["train_fwd"] = [] self.func["train_bwd"] = [] self.updates = [] for sp in range(len(split_points) - 1): logging.info("Building training functions for layers %i-%i" % (split_points[sp], split_points[sp + 1])) split_start = self.layers[split_points[sp]] if sp > 0 else None split_end = self.layers[split_points[sp + 1]] if ( sp + 2) < len(split_points) else None split_cost = self.train_cost if split_end is None else None split_layers = [] for i, layer in enumerate(self.layers): if (i > split_points[sp]) and (i < split_points[sp + 1]): split_layers.append(layer) #determine known_grads provided by previous backward passes from collections import OrderedDict split_known_grads = OrderedDict() for i in range(sp + 1, len(split_points) - 1): split_known_grads.update( self.layers[split_points[i]].split_known_grads()) if len(split_known_grads) == 0: split_known_grads = None # print(split_known_grads) # print(split_known_grads) # print(sp+1, len(split_points)-1) # def get_sgd_updates(p, g): m = theano.shared(numpy.zeros(p.shape.eval(), dtype=theano.config.floatX), broadcastable=p.broadcastable, borrow=True) rho = tensor.switch(tensor.gt(iteration, 0), momentum[0], 0.0) m_update = rho * m + (1.0 - rho) * g p_update = p - learn_rate * m_update return [(p, p_update), (m, m_update)] def get_torch_updates(p, g): m = theano.shared(numpy.zeros(p.shape.eval(), dtype=theano.config.floatX), broadcastable=p.broadcastable, borrow=True) rho = tensor.switch(tensor.gt(iteration, 0), momentum[0], 0.0) m_update = rho * m + g p_update = p - learn_rate * (g + momentum[0] * m_update) return [(p, p_update), (m, m_update)] def get_adam_updates(p, g): eps = 1e-8 m = theano.shared(numpy.zeros(p.shape.eval(), dtype=theano.config.floatX), broadcastable=p.broadcastable, borrow=True) v = theano.shared(numpy.zeros(p.shape.eval(), dtype=theano.config.floatX), broadcastable=p.broadcastable, borrow=True) m_update = momentum[0] * m + (1.0 - momentum[0]) * g v_update = momentum[1] * v + (1.0 - momentum[1]) * (g * g) m_hat = m_update / (1.0 - tensor.pow(momentum[0], iteration + 1)) v_hat = v_update / (1.0 - tensor.pow(momentum[1], iteration + 1)) p_update = p - learn_rate * m_hat / (tensor.sqrt(v_hat) + eps) return [(p, p_update), (m, m_update), (v, v_update)] #append parameter updates params = [] params_decay = [] for layer in split_layers: params += layer.weights() params_decay += [True] * len(layer.weights()) params += layer.biases() params_decay += [False] * len(layer.biases()) #build updates print("known grads:", split_known_grads) grads = tensor.grad(split_cost, params, known_grads=split_known_grads) solver_updates = [] for p, g, p_decay in zip(params, grads, params_decay): #add L2 weight decay if needed if p_decay or self.bias_decay: g += decay * p if solver_mode == "adam": solver_updates += get_adam_updates(p, g) elif solver_mode == "torch" or solver_mode == "nesterov": solver_updates += get_torch_updates(p, g) else: solver_updates += get_sgd_updates(p, g) #append per layer updates local_updates = solver_updates + sum( [layer.updates(self.train_cost) for layer in split_layers], []) #all updates self.updates += local_updates #skipping actual theano function building (if you just want updates, etc) if skip_build: continue global debug_train if debug_train: logging.warning("WARNING: Debug mode is active!") from theano.compile.nanguardmode import NanGuardMode debug_mode = theano.compile.MonitorMode( post_func=debug_detect_errors) else: debug_mode = None if self.use_split_mode: if not split_end is None: updates = sum( [layer.split_forward() for layer in split_layers], []) updates += split_end.split_forward() print("fwd updates:", updates) f = theano.function([self.input], [], updates=updates, givens=[(denet.layer.get_train(), tensor.cast(1, 'int8'))], on_unused_input='ignore', mode=debug_mode) self.func["train_fwd"].append(f) outputs = ([self.train_cost] + self.cost_list) if split_end is None else [] updates = sum([ layer.split_backward(split_cost, split_known_grads) for layer in split_layers ], []) if not split_start is None: updates += split_start.split_backward( split_cost, split_known_grads) print("bwd updates:", updates) updates += local_updates f = theano.function([ denet.layer.get_epoch(), iteration, learn_rate, momentum, decay, self.input ] + self.yt, outputs, updates=updates, givens=[(denet.layer.get_train(), tensor.cast(1, 'int8'))], on_unused_input='ignore', mode=debug_mode) self.func["train_bwd"].insert(0, f) elif use_acc_mode: acc_counter = theano.shared( numpy.array(0, dtype=theano.config.floatX)) begin_updates = [(acc_counter, tensor.zeros_like(acc_counter))] step_updates = [(acc_counter, acc_counter + 1)] end_updates = [] self.acc_params = [] for p_dest, p_src in self.updates: p_acc = theano.shared(numpy.zeros( p_dest.shape.eval(), dtype=theano.config.floatX), broadcastable=p_dest.broadcastable, borrow=True) begin_updates.append((p_acc, tensor.zeros_like(p_acc))) step_updates.append((p_acc, p_acc + p_src)) end_updates.append((p_dest, p_acc / acc_counter)) self.acc_params.append(p_acc) logging.info( "Constructing parameter accumulate update functions (solver=%s)" % solver_mode) self.func["train_begin"] = theano.function( [], [], updates=begin_updates) self.func["train_step"] = theano.function( [ denet.layer.get_epoch(), iteration, learn_rate, momentum, decay, self.input ] + self.yt, [self.train_cost] + self.cost_list, updates=step_updates, givens=[(denet.layer.get_train(), tensor.cast(1, 'int8'))], on_unused_input='ignore', allow_input_downcast=True, mode=debug_mode) self.func["train_end"] = theano.function([], [], updates=end_updates) else: logging.info( "Constructing parameter update function (solver=%s)" % solver_mode) #making f_input = theano.In(self.input, borrow=True) f_yt = [theano.In(yt, borrow=True) for yt in self.yt] self.func["train_step"] = theano.function( [ denet.layer.get_epoch(), iteration, learn_rate, momentum, decay, f_input ] + f_yt, [self.train_cost] + self.cost_list, updates=self.updates, givens=[(denet.layer.get_train(), tensor.cast(1, 'int8'))], on_unused_input='ignore', allow_input_downcast=True, mode=debug_mode) logging.verbose("Exporting graph...") with open("graph.txt", "w") as f: theano.printing.debugprint(self.func["train_step"], file=f, print_type=True)
def get_precision(detections, overlap_threshold=0.5): #inverse class labelling class_labels_inv = { 0: "aeroplane", 1: "bicycle", 2: "bird", 3: "boat", 4: "bottle", 5: "bus", 6: "car", 7: "cat", 8: "chair", 9: "cow", 10: "diningtable", 11: "dog", 12: "horse", 13: "motorbike", 14: "person", 15: "pottedplant", 16: "sheep", 17: "sofa", 18: "train", 19: "tvmonitor" } coverage = 0 coverage_total = 0 for r in detections: for cls_a, bbox_a in zip(r["meta"]["class"], r["meta"]["bbox"]): coverage_total += 1 for _, cls_b, bbox_b in r["detections"]: if cls_a == cls_b and common.overlap_iou( bbox_a, bbox_b) > overlap_threshold: coverage += 1 break logging.info( "coverage: %.2f%% (%i,%i)" % (100.0 * coverage / coverage_total, coverage, coverage_total)) #collect all detections and groundtruth detections into classes gts_cls = [[] for _ in range(20)] dts_cls = [[] for _ in range(20)] for image_id, r in enumerate(detections): for pr, cls, bbox in r["detections"]: dts_cls[cls].append((image_id, pr, bbox)) for cls, bbox, difficult in zip(r["meta"]["class"], r["meta"]["bbox"], r["meta"]["image"]["difficult"]): gts_cls[cls].append((image_id, difficult, bbox)) logging.warning( "WARNING: does not obtain exact results relative to VOCcode implementation!" ) logging.info("Using overlap threshold: %.2f" % overlap_threshold) mean_ap = 0 for cls in range(20): gts = gts_cls[cls] dts = dts_cls[cls] non_difficult_num = 0 for _, diff, _ in gts: if not diff: non_difficult_num += 1 #sort by confidence dts.sort(key=lambda d: -d[1]) tp = numpy.zeros((len(dts), ), dtype=numpy.int64) fp = numpy.zeros((len(dts), ), dtype=numpy.int64) gt_found = [] for d in range(len(dts)): image_id, pr, bbox = dts[d] overlap_max = 0 overlap_index = 0 for gt_i, gt in enumerate(gts): gt_image_id, _, gt_bbox = gt if gt_image_id == image_id: overlap = common.overlap_iou(bbox, gt_bbox) if overlap > overlap_max: overlap_max = overlap overlap_index = gt_i if overlap_max >= overlap_threshold: if not gts[overlap_index][1]: if overlap_index in gt_found: fp[d] = 1 else: gt_found.append(overlap_index) tp[d] = 1 else: fp[d] = 1 tp = numpy.cumsum(tp) fp = numpy.cumsum(fp) recall = tp / non_difficult_num prec = tp / (tp + fp) #VOC 2007 algorithm! ap = 0 for t in numpy.linspace(0.0, 1.0, 11): n = (recall >= t) p = prec[n].max() if n.any() else 0.0 ap += p / 11 # #fancy pascal VOC 2012 AP calculation # mrec = numpy.array([0.0] + recall.tolist() + [1.0]) # mpre = numpy.array([0] + prec.tolist() + [0.0]) # for i in range(mpre.shape[0] - 2, -1, -1): # mpre[i] = max(mpre[i], mpre[i+1]) # ap=0 # for i in range(mrec.shape[0]-1): # if mrec[i+1] != mrec[i]: # ap += (mrec[i+1] - mrec[i]) * mpre[i+1] mean_ap += ap logging.info( "%s - AP: %.4f (%i detections, %i groundtruth, %i non difficult)" % (class_labels_inv[cls], ap, len(dts), len(gts), non_difficult_num)) mean_ap /= 20 logging.info("Mean AP: %.4f" % mean_ap)
def run_worker(gpu, args, data_shape, class_labels, class_num, task_queue, active, epoch, learn_rate, cost, timer, data_x, data_y, data_m, model_update): #redirect output (unbuffered) sys.stdout = open(gpu + ".out", 'w') sys.stderr = open(gpu + ".err", 'w') logging.init(args, flush=True) sys.setrecursionlimit(10000) #create thread to flush stdout / stderr every 5 seconds flush_logs() logging.info(gpu + ": initializing") #remove all openmpi variables! for v in os.environ.keys(): if v[:5] == "OMPI_": del os.environ[v] #set compile dir and gpu (possible since theano hasn't been imported yet!) if not "THEANO_FLAGS" in os.environ: os.environ["THEANO_FLAGS"] = "" import socket os.environ["THEANO_FLAGS"] += "," + args.theano_flags + "," os.environ["THEANO_FLAGS"] += "device=" + gpu + "," os.environ["THEANO_FLAGS"] += "force_device=True," os.environ["THEANO_FLAGS"] += "compiledir=~/.theano/" + socket.gethostname( ) + "-" + gpu + "/," #os.environ["THEANO_FLAGS"] += "lib.cnmem=1,"; os.environ["THEANO_FLAGS"] += "nvcc.flags=-D_FORCE_INLINES," logging.info(gpu + ": Using THEANO_FLAGS:", os.environ["THEANO_FLAGS"]) #initialize local model import denet.model.model_cnn as model_cnn model = model_cnn.initialize(args, data_shape, class_labels, class_num) #pre-initialize training function use_acc_mode = args.batch_size_factor > 1 and args.use_acc_mode model.build_train_func(args.solver, args.cost_factors, use_acc_mode=use_acc_mode) if use_acc_mode: train_begin_func = model.func["train_begin"] train_end_func = model.func["train_end"] #begin processing loop iteration = 0 while (True): #try to start next task immediately otherwise wait for task wait_time = time.time() try: task = task_queue.get(block=False) except queue.Empty: logging.verbose(gpu + ": waiting for task") with active.get_lock(): active.value = 0 task = task_queue.get(block=True) wait_time = time.time() - wait_time logging.verbose(gpu + ": " + task + " (wait time=%i ms)" % (1000 * wait_time)) #calculate updates ts = time.time() if task == "predict": with data_x.lock, data_y.lock: data_y.get_array()[...] = model.predict_output_step( data_x.get_array()) elif task == "model-read": model_update.import_updates(model) elif task == "model-write": model_update.export_updates(model) elif task == "train-begin": if use_acc_mode: train_begin_func() with cost.get_lock(): cost.value = 0 elif task == "train-step": with cost.get_lock(), epoch.get_lock(), learn_rate.get_lock( ), data_x.lock: data_meta = data_m.get(block=True) c, _ = model.train_step(data_x.get_array(), data_meta, epoch.value, iteration, learn_rate.value, args.learn_momentum, args.learn_decay) if math.isnan(c): raise Exception("Encountered NaN cost for worker") cost.value += c iteration += 1 elif task == "train-end": if use_acc_mode: train_end_func() with cost.get_lock(): cost.value /= args.batch_size_factor elif task == "done": exit(0) with timer.get_lock(): timer.value = time.time() - ts logging.info(gpu + ": %s took %i ms" % (task, 1000 * timer.value))
def load(self, input_dir, data_format, is_training, thread_num, class_labels=None): self.thread_num = thread_num #get arguments param_str = ",".join(data_format.split(",")[1:]) format_params = common.get_params_dict(param_str) #Hard coded class labels self.class_labels = { "aeroplane": 0, "bicycle": 1, "bird": 2, "boat": 3, "bottle": 4, "bus": 5, "car": 6, "cat": 7, "chair": 8, "cow": 9, "diningtable": 10, "dog": 11, "horse": 12, "motorbike": 13, "person": 14, "pottedplant": 15, "sheep": 16, "sofa": 17, "train": 18, "tvmonitor": 19 } #select datasets to include def get_files(data_set, image_set): logging.info("Loading pascal %s %s..." % (data_set, image_set)) with open( os.path.join( input_dir, "%s/ImageSets/Main/%s.txt" % (data_set, image_set)), "r") as f: fnames = [ os.path.join( input_dir, "%s/JPEGImages/%s.jpg" % (data_set, index.rstrip())) for index in f.readlines() ] return fnames #VOC 2007 files = [] param = [s for s in format_params.keys() if s.startswith("2007")] param = param[0] if len(param) > 0 else "" if "train" in param: files += get_files("VOC2007", "train") if "val" in param: files += get_files("VOC2007", "val") if "test" in param: files += get_files("VOC2007", "test") #VOC 2012 param = [s for s in format_params.keys() if s.startswith("2012")] param = param[0] if len(param) > 0 else "" if "train" in param: files += get_files("VOC2012", "train") if "val" in param: files += get_files("VOC2012", "val") if "test" in param: files += get_files("VOC2012", "test") logging.info("Finding images / metadata") self.images = [] for fname in files: bboxs = [] difficult = [] #load objects associated with image anno_dir = os.path.join(os.path.dirname(os.path.dirname(fname)), "Annotations") obj_fname = os.path.join( anno_dir, os.path.splitext(os.path.basename(fname))[0] + ".xml") if os.path.isfile(obj_fname): obj_tree = xml.parse(obj_fname).getroot() for obj in obj_tree.iter("object"): cls = self.class_labels[obj.find("name").text] diff = bool(int(obj.find("difficult").text) > 0) difficult.append(diff) #minus one due to MATLAB stupidity bndbox = obj.find("bndbox") min_x = int(bndbox.find("xmin").text) - 1 min_y = int(bndbox.find("ymin").text) - 1 max_x = int(bndbox.find("xmax").text) - 1 max_y = int(bndbox.find("ymax").text) - 1 bboxs.append((cls, (min_x, min_y, max_x, max_y))) elif is_training: raise Exception( "Could not find annotations for training data!") self.images.append({ "fname": fname, "bboxs": bboxs, "difficult": difficult }) #sort images initially self.images.sort(key=lambda im: im["fname"]) self.image_loader = ImageLoader(thread_num, is_training, format_params) # self.image_loader.rgb_mean = numpy.array([0.41, 0.46, 0.48], dtype=numpy.float32) # self.image_loader.rgb_std = numpy.array([1,1,1], dtype=numpy.float32) #from Imagenet (natural image set = should have similar values) self.image_loader.rgb_mean = numpy.array([0.485, 0.456, 0.406], dtype=numpy.float32) self.image_loader.rgb_std = numpy.array([0.229, 0.224, 0.225], dtype=numpy.float32) self.image_loader.rgb_eigen_val = numpy.array([0.2175, 0.0188, 0.0045], dtype=numpy.float32) self.image_loader.rgb_eigen_vec = numpy.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype=numpy.float32) #subset self.output_size = self.image_loader.crop self.subset_size = min(format_params.get("images_per_subset", 10000), len(self.images)) self.subset_total_size = len(self.images) self.subset_num = format_params.get("subset_num", sys.maxsize) self.subset_num = min( self.subset_num, int(math.ceil(self.subset_total_size / self.subset_size))) self.subset_index = -1 logging.info("Using Pascal VOC dataset - size:", self.subset_total_size, "subset_num", self.subset_num, "images per subset:", self.subset_size, self.image_loader)
def load(self, input_dir, data_format, is_training, thread_num, class_labels=None): self.data = [] self.thread_num = thread_num param_str = ",".join(data_format.split(",")[1:]) format_params = common.get_params_dict(param_str) self.data_types = [] if format_params.get("2014-train", False): self.data_types.append("train2014") if format_params.get("2014-val", False): self.data_types.append("val2014") if format_params.get("2014-test", False): self.data_types.append("test2014") if format_params.get("2015-test", False): self.data_types.append("test2015") if format_params.get("2015-test-dev", False): self.data_types.append("test-dev2015") if len(self.data_types) == 0: raise Exception("please specify mscoco subset") bbox_hist = [0 for _ in range(32)] self.images = [] self.class_labels = {} self.categories = None for data_type in self.data_types: if "test" in data_type: fname = os.path.join( input_dir, "annotations/image_info_%s.json" % data_type) else: fname = os.path.join( input_dir, "annotations/instances_%s.json" % data_type) json_data = common.json_from_file(fname) #get class labels data_categories = {} for i, json_cat in enumerate(json_data["categories"]): data_categories[json_cat["id"]] = json_cat["name"] if not json_cat["name"] in self.class_labels: self.class_labels[json_cat["name"]] = len( self.class_labels) assert (self.categories is None) or (self.categories == data_categories) self.categories = data_categories logging.verbose("Found %i labels:" % len(self.class_labels)) #collect bounding boxes bboxs = {} for json_ann in json_data.get("annotations", []): cls_id = self.class_labels[self.categories[ json_ann["category_id"]]] image_id = json_ann["image_id"] bbox = json_ann["bbox"] if not image_id in bboxs: bboxs[image_id] = [] bboxs[image_id].append( (cls_id, (bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]))) logging.verbose("Found %i bboxs" % (sum([len(bbox) for bbox in bboxs.values()]))) #collect images if data_type == "test-dev2015": data_type = "test2015" for image in json_data["images"]: fname = image["file_name"] image_id = image["id"] bbox_list = bboxs.get(image_id, []) bbox_hist[min(len(bbox_list), 31)] += 1 self.images.append({ "fname": os.path.join(input_dir, data_type, fname), "bboxs": bbox_list, "id": image_id }) print("BBox histogram (%):", [round(100.0 * x / sum(bbox_hist), 1) for x in bbox_hist]) #setup image loader self.image_loader = ImageLoader(thread_num, is_training, format_params) #subset self.output_size = self.image_loader.crop self.images_per_subset = format_params.get("images_per_subset", 10000) self.subset_total_size = len(self.images) self.subset_num = format_params.get("subset_num", sys.maxsize) self.subset_num = min( self.subset_num, int(math.ceil(self.subset_total_size / self.images_per_subset))) self.subset_index = -1 self.subset_size = self.images_per_subset self.bbox_only = format_params.get("bbox_only", False) #only use samples with bounding boxes if self.image_loader.is_training and self.bbox_only: images_bbox = [] for image in self.images: if len(image["bboxs"]) > 0: images_bbox.append(image) logging.info("Removed %i images without bboxs" % (len(self.images) - len(images_bbox))) self.images = images_bbox logging.info("Using MSCOCO dataset - size:", self.subset_total_size, "subset_num", self.subset_num, "images per subset:", self.subset_size, self.image_loader)
def main(): #load arguments: parser = argparse.ArgumentParser( description='Train a convolutional network using labelled data.') logging.add_arguments(parser) parser.add_argument("--model", required=False, default=None, help="Model to continue training.") parser.add_argument("--cost-factors", default=[], nargs="+", help="Multiplicative factors for model costs") parser.add_argument( "--thread-num", type=int, default=1, help= "Number of threads to use for supported opeartions (e.g. loading/distorting datasets)" ) parser.add_argument("--extension", default="ppm", help="Image file extension") parser.add_argument("--train", default=None, help="The folder with training / validation data") parser.add_argument("--test", default=None, help="The folder with testing data (optional)") parser.add_argument("--test-epochs", type=int, default=1, help="Epochs between each test evaluation") parser.add_argument("--test-mode", default="default", help="Mode to use for testing") parser.add_argument( "--border-mode", default="valid", help="Border mode for convolutional layers (full, valid)") parser.add_argument("--output-prefix", default="./model", help="Output prefix for model files") parser.add_argument( "--activation", default="relu", help= "Activation function used in convolution / hidden layers (tanh, relu, leaky-relu)" ) parser.add_argument("--solver", type=str, default="nesterov", help="") parser.add_argument("--weight-init", nargs="+", default=["he-backward"], help="Weight initialization scheme") parser.add_argument("--learn-rate", type=float, default=0.1, help="Learning rate for weights and biases.") parser.add_argument( "--learn-momentum", type=float, default=[0.0, 0.0], nargs="+", help="Learning momentum for weights and biases (0.0 - 1.0).") parser.add_argument( "--learn-anneal", type=float, default=1, help="Annealing factor per epoch for weight and bias learning rate") parser.add_argument( "--learn-anneal-epochs", nargs="+", type=int, default=[], help="Epochs to apply learning rate annealing (default every epoch)") parser.add_argument("--learn-decay", type=float, default=0.0, help="L2 weight decay (not applied to biases). ") parser.add_argument("--epochs", type=int, default=30, help="The number of training epochs") parser.add_argument("--max-samples", type=int, default=None, help="Maximum samples to load from training set") parser.add_argument("--batch-size", type=int, default=32, help="Size of processing batchs") parser.add_argument("--seed", type=int, default=23455, help="Random Seed for weights") parser.add_argument( "--distort-mode", default=[], nargs="+", help="Distortions to apply to training data (default, cifar10, disable)" ) parser.add_argument("--disable-intermediate", default=False, action="store_true", help="Disable outputting of intermediate model files") parser.add_argument( "--augment-mirror", default=False, action="store_true", help="Augment training data with horizontally mirrored copies") parser.add_argument("--skip-train", default=False, action="store_true", help="Skip training of model") parser.add_argument("--skip-layer-updates", type=int, nargs="+", default=[], help="Skip training updates to specified layers") parser.add_argument("--model-desc", default=[ "C[100,7]", "P[2]", "C[150,4]", "P[2]", "C[250,4]", "P[2]", "C[300,1]", "R" ], nargs="+", type=str, help="Network layer description") args = parser.parse_args() logging.init(args) #set random seeds random.seed(args.seed) numpy.random.seed(args.seed) #load training dataset logging.info("Loading training data:", args.train) train_data = dataset.load(args.train, args.extension, is_training=True, thread_num=args.thread_num) data_shape = train_data.get_data_shape() class_num = train_data.get_class_num() class_labels = train_data.class_labels logging.info("Found %i class labels:\n" % class_num, class_labels) #hack for reducing training data size if not args.max_samples is None: train_data.data = random.sample(train_data.data, args.max_samples) #mirror training data if args.augment_mirror: train_data.augment_mirror() logging.info("Training: %i samples" % len(train_data)) #load test dataset if args.test: logging.info("Loading test: " + args.test) test_data = dataset.load(args.test, args.extension, is_training=False, thread_num=args.thread_num, class_labels=class_labels) #initialize model model = model_cnn.initialize(args, data_shape, class_labels, class_num) model.build_train_func(args.solver, args.cost_factors) #Run training best_test_error = 100.0 learn_rate = args.learn_rate for epoch in range(args.epochs): logging.info("----- Training Epoch: %i -----" % epoch) #perform training if not args.skip_train: logging.info("Training with solver " + args.solver + ", learning rate " + str(learn_rate) + " and momentum " + str(args.learn_momentum)) #shuffle dataset: train_data.shuffle() for subset in range(train_data.subset_num): timer = common.Timer() train_data.load_from_subset(subset) logging.info("Performing Gradient Descent...") cost = model.train_epoch(train_data, epoch, learn_rate, args.learn_momentum, args.learn_decay) nbatch = math.ceil(len(train_data) / model.batch_size) logging.info("Training subset %i - Cost: %.3f, Took %.1f sec" % (subset, cost, timer.current())) if len(args.learn_anneal_epochs) == 0 or ( epoch + 1) in args.learn_anneal_epochs: logging.verbose("Annealing learning rate") learn_rate *= args.learn_anneal #perform testing test_error = 0 if not args.test is None and ((epoch % args.test_epochs) == 0 or epoch == (args.epochs - 1)): test_error, test_class_errors = compute_error(test_data, model) logging.info( "Epoch %i test error: %.2f%% (%i samples)" % (epoch, test_error, int(test_error * len(test_data) / 100.0))) save_results(args.output_prefix + "_epoch%03i.test" % epoch, test_error, test_class_errors) #save intermediate models if not args.disable_intermediate: model_cnn.save_to_file( model, args.output_prefix + "_epoch%03i.mdl.gz" % (epoch)) #save final model model_cnn.save_to_file( model, args.output_prefix + "_epoch%03i_final.mdl.gz" % epoch) logging.info("Finished Training")
def run_async(self): #connect to clients server_socket, client_sockets = self.connect_clients() #construct update object for each client / server client_update = shared.ModelUpdate(self.model_dims) server_update = shared.ModelUpdate(self.model_dims) #perform synchronization logging.info("Begin processing...") count = 0 sync = [] while True: try: #get sockets which have data # print("Waiting for updates...") if self.use_mpi: from mpi4py import MPI mpi_comm = MPI.COMM_WORLD mpi_status = MPI.Status() client_data = mpi_comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=mpi_status) client_json = json.loads(client_data.decode('utf-8')) read_sockets = [mpi_status.Get_source()] else: read_sockets, _, _ = select.select(client_sockets, [], []) for i, sock in enumerate(client_sockets): #socket is sending data if sock in read_sockets: #read data if not self.use_mpi: logging.info("client %i: recieving command" % i) client_json = network.recv_json(sock) #get counter if client_json["cmd"] == "count": logging.info("count:", count, "peek:", client_json["peek"]) network.send_json(sock, {"count": count}, self.use_mpi) if not client_json["peek"]: count += 1 #apply client update to server updates elif client_json["cmd"] == "update": logging.info("update") client_update.import_json(client_json["data"]) server_update.add_delta(client_update, self.momentum) network.send_json(sock, server_update.export_json(), self.use_mpi) #synchronize model between all clients / server elif client_json["cmd"] == "sync": logging.info("sync, initial:", client_json["initial"]) if not i in sync: sync.append(i) #on 'initial' sync set server update if client_json["initial"]: server_update.import_json(client_json["data"]) #perform sync after all clients have call sync if len(sync) == len(client_sockets): model_update = server_update.export_json() for s in client_sockets: network.send_json(s, model_update, self.use_mpi) sync = [] else: logging.error("ERROR: Unknown client command: ", client_json["cmd"]) except (KeyboardInterrupt, SystemExit): logging.info("Done") return 1 except Exception as e: logging.error("Encounter exception: ", e) return 1
def load(self, input_dir, data_format, is_training, thread_num, class_labels=None): from .basic import DatasetFromDir self.input_dir = input_dir if self.input_dir[-1] == '/': self.input_dir = self.input_dir[:-1] self.data_format = data_format self.thread_num = thread_num #generate class labels self.class_labels = class_labels fname = os.path.join(os.path.dirname(self.input_dir), "class_labels.txt") if os.path.isfile(fname) and self.class_labels is None: logging.info("Loading class labels from:", fname) self.class_labels = {} with open(fname, "r") as f: for line in f.readlines(): tokens = line.rstrip('\n').split(" ") self.class_labels[tokens[1]] = int(tokens[0]) elif self.class_labels is None: self.class_labels = DatasetFromDir.find_class_labels(input_dir) #check to see if buffered file list is present list_fname = os.path.join(input_dir, "image_list.json") if os.path.isfile(list_fname): logging.info("Loading dataset metadata:", list_fname) json_data = common.json_from_file(list_fname) if json_data.get("version", 0) < 1: logging.warning( "Warning: image_list.json is old version, missing bounding boxs!" ) self.images = [{ "fname": fname, "bboxs": [] } for fname in json_data["images"]] else: self.images = json_data["images"] else: bbox_dir = os.path.join(os.path.dirname(input_dir), "bbox") if not os.path.isdir(bbox_dir): raise Exception("ERROR: cannot find bbox dir:" + bbox_dir) fnames = [] for i, c in enumerate(os.listdir(input_dir).sort()): images_cls = DatasetFromDir.find_paths( os.path.join(input_dir, c), "*.JPEG") logging.info("Found %i images for class" % len(images_cls), c) fnames += images_cls logging.info("Finding bboxs in:", bbox_dir) self.images = [] for i, fname in enumerate(fnames): logging.verbose("%i/%i" % (i, len(fnames))) cls_name = os.path.basename(os.path.dirname(fname)) obj_fname = os.path.join( bbox_dir, cls_name, os.path.splitext(os.path.basename(fname))[0] + ".xml") bboxs = [] if os.path.isfile(obj_fname): obj_tree = xml.parse(obj_fname).getroot() size = obj_tree.find("size") width = int(size.find("width").text) height = int(size.find("height").text) for obj in obj_tree.iter("object"): bndbox = obj.find("bndbox") min_x = int(bndbox.find("xmin").text) min_y = int(bndbox.find("ymin").text) max_x = int(bndbox.find("xmax").text) max_y = int(bndbox.find("ymax").text) bboxs.append({ "x0": min_x, "x1": max_x, "y0": min_y, "y1": max_y }) self.images.append({"fname": fname, "bboxs": bboxs}) try: logging.info("Saving dataset metadata:", list_fname) common.json_to_file(list_fname, { "images": self.images, "version": 1 }) except Exception as e: logging.warning( "Warning: failed to write buffered image list - ", e) #add/fix fields to fit new image_loader interface for image in self.images: fname = image["fname"] cls = self.class_labels[os.path.basename(os.path.dirname(fname))] image["class"] = cls image["bboxs"] = [(cls, (bb["x0"], bb["y0"], bb["x1"], bb["y1"])) for bb in image["bboxs"]] param_str = ",".join(data_format.split(",")[1:]) format_params = common.get_params_dict(param_str) self.image_loader = ImageLoader(thread_num, is_training, format_params) #from facebook resnet implementation self.image_loader.rgb_mean = numpy.array([0.485, 0.456, 0.406], dtype=numpy.float32) self.image_loader.rgb_std = numpy.array([0.229, 0.224, 0.225], dtype=numpy.float32) self.image_loader.rgb_eigen_val = numpy.array([0.2175, 0.0188, 0.0045], dtype=numpy.float32) self.image_loader.rgb_eigen_vec = numpy.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype=numpy.float32) #others self.subset_size = format_params.get("images_per_subset", 10000) self.use_null_class = format_params.get("null", False) self.subset_num = format_params.get("subset_num", sys.maxsize) self.bbox_only = format_params.get("bbox_only", False) #only use samples with bounding boxes if self.image_loader.is_training and self.bbox_only: images_bbox = [] for image in self.images: if len(image["bboxs"]) > 0: images_bbox.append(image) self.images = images_bbox #append null class if self.use_null_class and not "null" in self.class_labels: self.class_labels["null"] = len(self.class_labels) self.subset_index = -1 self.subset_total_size = len(self.images) self.subset_num = min( self.subset_num, int(math.ceil(self.subset_total_size / self.subset_size))) logging.info("Using Imagenet dataset - size:", self.subset_total_size, "subset_num", self.subset_num, "images per subset:", self.subset_size, self.image_loader)