def set_samples(self, sample_bboxs): timer = common.Timer() bboxs = self.get_bbox_array(sample_bboxs) self.sample_bbox.set_value(bboxs) self.sample_bbox_list = sample_bboxs logging.debug("Took %i ms to set_samples" % timer.current_ms()) return bboxs
def get_overlap_iou(obj_bboxs, sample_bboxs): global overlap_func if overlap_func is None: logging.debug("Building overlap function") x_bboxs = tensor.matrix() y_bboxs = tensor.matrix() x_area = (x_bboxs[:, 2] - x_bboxs[:, 0]) * (x_bboxs[:, 3] - x_bboxs[:, 1]) y_area = (y_bboxs[:, 2] - y_bboxs[:, 0]) * (y_bboxs[:, 3] - y_bboxs[:, 1]) dx = tensor.maximum( tensor.minimum(x_bboxs[:, None, 2], y_bboxs[None, :, 2]) - tensor.maximum(x_bboxs[:, None, 0], y_bboxs[None, :, 0]), 0) dy = tensor.maximum( tensor.minimum(x_bboxs[:, None, 3], y_bboxs[None, :, 3]) - tensor.maximum(x_bboxs[:, None, 1], y_bboxs[None, :, 1]), 0) area_intersect = dx * dy area_union = (x_area[:, None] + y_area[None, :] - area_intersect) area_iou = area_intersect / area_union overlap_func = theano.function([x_bboxs, y_bboxs], area_iou, allow_input_downcast=True) if len(obj_bboxs) == 0 or len(sample_bboxs) == 0: return None else: x = numpy.array(obj_bboxs, dtype=numpy.float32) y = numpy.array(sample_bboxs, dtype=numpy.float32) return overlap_func(x, y)
def get_bbox_array(self, sample_bboxs): timer = common.Timer() bboxs = numpy.zeros( (self.batch_size, self.sample_num, self.sample_num, 4), dtype=numpy.float32) c_code.build_bbox_array(sample_bboxs, bboxs) logging.debug("Took %i ms to get_bbox_array" % timer.current_ms()) return bboxs
def debug_denet_export_targets(model, data_x, data_m, targets): logging.debug("DEBUGGING! Exporting targets") class_labels_inv = {v: k for k, v in model.class_labels.items()} for b in range(model.batch_size): #export ground truth for cls in set(data_m[b]["class"]): objs = [] for obj_cls, obj in zip(data_m[b]["class"], data_m[b]["objs"]): if obj_cls == cls: objs.append(obj) common.export_activation_rgb( "%06i_gt_%s.png" % (b, class_labels_inv[cls]), data_x[b, :, :, :], objs) #export targets for index, layer in enumerate(model.cost_layers): yt_index = targets[index * 2 + 0] yt_value = targets[index * 2 + 1] if layer.type_name == "denet-corner": corner_pr, = common.ndarray_unpack(yt_value, [layer.corner_shape]) common.export_activation("%06i_l%i_corner.png" % (b, index), corner_pr[b, 1, :, :, :]) elif layer.type_name == "denet-detect": det_pr, = common.ndarray_unpack(yt_value, [layer.det_shape]) for i, sample in enumerate(layer.sparse_layer.sample_bboxs[b]): sample_i = i % layer.sparse_layer.sample_num sample_j = i // layer.sparse_layer.sample_num logging.debug("%i - sample:" % b, (sample_i, sample_j), "bbox:", (int(sample[2][0] * model.width), int(sample[2][1] * model.height), int(sample[2][2] * model.width), int(sample[2][3] * model.height)), "pr:", sample[1], "corner:", sample[0]) common.export_activation("%06i_l%i_det.png" % (b, index), det_pr[b, :, :, :])
def get_samples(self, data_x, train=False, store_shared=False): global profile if self.corner_func is None: logging.verbose("Building corner function - store samples:", store_shared, "train:", train) updates = [(self.corner_layer.sample_shared, self.corner_layer.sample)] if store_shared else [] self.corner_func = theano.function( [self.model_input], self.corner_layer.corner_pr, updates=updates, profile=profile, givens=[(get_train(), tensor.cast(int(train), 'int8'))], on_unused_input='ignore') #find corners timer = common.Timer() logging.debug("Running corner function") corner_pr = self.corner_func(data_x) if profile: logging.debug("Profiling corner function") theano_util.profile(self.corner_func, 10, data_x) theano_util.export_graph("./corner.graph", self.corner_func) logging.debug("Done") exit(0) #build sampling bounding boxs timer.mark() logging.debug("Build samples (%i threads)" % self.thread_num) samples = c_code.build_samples(self.thread_num, corner_pr, self.corner_threshold, self.sample_num, self.corner_max, self.local_max, self.nms_threshold) timer.mark() logging.verbose( "Took %i ms to get_samples (%i model, %i build, %i max corners) " % (timer.current_ms(), timer.delta_ms(0), timer.delta_ms(1), self.corner_max)) return samples
def run_train_epoch(args, update_client, workers, model, train_data, learn_rate): import model_cnn logging.info("Perform train...") batch_size_factor = args.batch_size_factor output_prefix = args.output_prefix model_dims = args.model_dims model_save_dt = args.model_save_dt * 60 #update learning rates: for worker in workers: with worker.learn_rate.get_lock(): worker.learn_rate.value = learn_rate #randomly shuffle data before each epoch, set seed to ensure each node has same data order random.seed(args.seed + update_client.epoch) train_data.shuffle() #perform initial sync so that all nodes have the same model model_update = shared.ModelUpdate(model_dims) model_update.import_updates(model) # update_client.sync(model_update, workers, initial=True) #get subset next subset_next = update_client.get_subset_next() #start export of data batch_size = len(workers) * model.batch_size * batch_size_factor logging.info( "SGD batch size is %ix%ix%i = %i" % (batch_size_factor, len(workers), model.batch_size, batch_size)) export_thread = DatasetExportThread(model, train_data, subset_next, batch_size, True) #start processing total_cost = 0 total_it = 0 subset_current = subset_next epoch_current = update_client.epoch for worker in workers: worker.set_epoch(epoch_current) timer = common.Timer() timer_save = common.Timer() while subset_next >= 0: #wait until export is ready timer.reset() export_thread.wait() data_x, data_y, data_size = export_thread.get_export() subset_current = subset_next del export_thread if timer.current() > 1: logging.warning( "Warning: needed an additional %.1f seconds for dataset export" % timer.current()) #print training classes for checking random seed etc logging.debug("Sample Metas: ", data_y[0:min(3, len(data_y))]) #start exporting next subset subset_next = update_client.get_subset_next() if subset_next >= 0: export_thread = DatasetExportThread(model, train_data, subset_next, batch_size, True) # #store initial model before changes # model_update_delta = model_update.copy() logging.info("Evaluating training function") timer.reset() batch_num = data_x.shape[0] // model.batch_size it_num = batch_num // (len(workers) * batch_size_factor) index = 0 subset_cost = 0 while (index < batch_num): total_ts = time.time() def train_worker_thread(worker, indexs): worker.wait() worker.model_write(model_update) worker.train_begin() for i in indexs: dx = data_x[i * model.batch_size:(i + 1) * model.batch_size] dy = data_y[i * model.batch_size:(i + 1) * model.batch_size] worker.train_step(dx, dy) worker.wait() worker.train_end() worker.model_read() worker.wait() threads = [] for worker in workers: worker_indexs = [] for _ in range(batch_size_factor): if index < batch_num: worker_indexs.append(index) index += 1 t = threading.Thread(target=train_worker_thread, args=(worker, worker_indexs)) t.start() threads.append((t, time.time())) proc_ts = [] for t, start_ts in threads: t.join() proc_ts.append(int(1000 * (time.time() - start_ts))) #average models between GPUS and print batch info combine_ts = time.time() batch_cost = 0 model_update.set_mean_init() for worker in workers: model_update.set_mean_update(worker.model_update) with worker.cost.get_lock(): batch_cost += worker.cost.value model_update.set_mean_finish() batch_cost /= len(workers) subset_cost += batch_cost it_index = index // (len(workers) * batch_size_factor) combine_ts = int(1000 * (time.time() - combine_ts)) logging.verbose("Processing times (ms):", proc_ts, ", Combine time: %i ms" % combine_ts) logging.info( "Subset %i/%i, Batch It %i/%i" % (subset_current + 1, train_data.subset_num, it_index, it_num), "- Cost:", batch_cost, "Time: %i ms" % (1000 * (time.time() - total_ts))) logging.info( "Training subset %i took %0.1f sec, mean cost:" % (subset_current + 1, timer.current()), subset_cost / it_num) total_it += it_num total_cost += subset_cost #update with server (if one exists) model_update.export_updates(model) # model_update_delta.set_delta(model_update) # update_client.update(model_update_delta, model_update, workers) #save intermediate models if timer_save.current() > model_save_dt and model_save_dt > 0: model_cnn.save_to_file( model, output_prefix + "_epoch%03i_subset%03i.mdl.gz" % (epoch_current, subset_current + 1)) timer_save.reset() #perform final sync so that all nodes have the same model update_client.sync(model_update, workers) #save final models model_cnn.save_to_file( model, output_prefix + "_epoch%03i_final.mdl.gz" % (epoch_current)) return (total_cost / total_it)