class Solver: def __init__(self, dirname=DEFAULT_MODEL_DIR, gpu=-1, nms_thresh=DEFAULT_NMS_THRESH, score_thresh=DEFAULT_SCORE_THRESH): with open(os.path.join(dirname, "model.json"), 'r') as fp: metadata = json.load(fp) n_class = metadata['n_class'] n_channel = metadata['n_channel'] npz_file = metadata['file'] self.class_labels = metadata['class_labels'] self.model = SSD(n_class=n_class, n_channel=n_channel, nms_thresh=nms_thresh, score_thresh=score_thresh, grids=DEFAULT_GRIDS, aspect_ratios=DEFAULT_ASPECT_RATIOS, variance=DEFAULT_VARIANCE) chainer.serializers.load_npz(os.path.join(dirname, npz_file), self.model) if gpu >= 0: chainer.backends.cuda.get_device_from_id(gpu).use() self.model.to_gpu(gpu) @property def xp(self): return self.model.xp def solve(self, filename): xp = self.xp gif = cv2.VideoCapture(filename) _, color_image = gif.read(0) gray_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY) h, w = gray_image.shape[:2] img = xp.array(gray_image / 255.0, dtype=xp.float32).reshape(1, 1, h, w) output = self.model.predict(img) bbox, label, score = output[0][0], output[1][0], output[2][0] bbox = chainer.dataset.to_device(-1, bbox) label = chainer.dataset.to_device(-1, label) score = chainer.dataset.to_device(-1, score) if len(label) > NCHARS: indices = np.argsort(score)[-1:-NCHARS-1:-1] bbox = bbox[indices] label = label[indices] score = score[indices] bbox = np.vectorize(lambda v: int(v + 0.5), otypes=[int])(bbox) indices = np.argsort(bbox[:, 1]) text = ''.join([ self.class_labels[label[i]] for i in indices ]) return text, bbox[indices], score[indices]
def main(): parser = argparse.ArgumentParser() parser.add_argument('--channel', type=int, default=DEFAULT_CHANNEL) parser.add_argument('--batchsize', type=int, default=DEFAULT_BATCHSIZE) parser.add_argument('--epoch', type=int, default=DEFAULT_EPOCH) parser.add_argument('--frequency', type=int, default=DEFAULT_FREQUENCY) parser.add_argument('--alpha', type=float, default=DEFAULT_ALPHA) parser.add_argument('--opt', choices=('adam', 'adabound', 'amsgrad', 'amsbound'), default=DEFAULT_OPTIMIZER) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--model', default='model') parser.add_argument('--resume', action='store_true', default=False) parser.add_argument('--retrain', action='store_true', default=False) args = parser.parse_args() if args.resume and args.retrain: print('--resume and --retrain are exclusive') exit(1) dataset = Dataset(DEFAULT_DATASET_DIR) n_data = len(dataset) thresh = int(n_data * 0.9 + 0.5) print("{} records found in the dataset. {} records will be used for training".format(n_data, thresh)) n_class = dataset.n_class class_ids = dataset.class_ids class_labels = dataset.class_labels model = SSD(n_class=n_class, n_channel=args.channel, grids=DEFAULT_GRIDS, aspect_ratios=DEFAULT_ASPECT_RATIOS, nms_thresh=DEFAULT_NMS_THRESH, score_thresh=DEFAULT_SCORE_THRESH, variance=DEFAULT_VARIANCE) train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset(dataset[:thresh], Transform(model.coder)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = dataset[thresh:] test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # ('adam', 'adabound', 'amsgrad', 'amsbound') if args.opt == 'adam': adabound = False amsgrad = False elif args.opt == 'adabound': adabound = True amsgrad = False elif args.opt == 'amsgrad': adabound = False amsgrad = True elif args.opt == 'amsbound': adabound = True amsgrad = True else: raise ValueExcept('invalid optimizer') optimizer = chainer.optimizers.Adam(alpha=args.alpha, adabound=adabound, amsgrad=amsgrad) optimizer.setup(train_chain) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) log_interval = 1, 'epoch' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/acc', 'elapsed_time']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(args.frequency, 'epoch')) trainer.extend(extensions.PlotReport(['main/loss', 'main/loss/loc', 'main/loss/conf'], x_key='epoch', file_name='loss.png')) model_file = os.path.join(args.model, "model.npz") if args.retrain: if not os.path.isfile(model_file): print("{}: not found".format(model_file)) exit(1) print("Loading pretrained model from {}...".format(model_file)) chainer.serializers.load_npz(model_file, model) if args.resume: maxnum = -1 for s in glob.glob(os.path.join(args.model, "snapshot_epoch_*")): m = re.search('[0-9]+$', s) if m: maxnum = max(maxnum, int(m.group(0))) if maxnum < 0: print("No snapshot file found. Ignore --resume option") else: snapshot_file = os.path.join(args.model, "snapshot_epoch_{}".format(maxnum)) print("Loading the snapshot data from {}.".format(snapshot_file)) chainer.serializers.load_npz(snapshot_file, trainer) trainer.run() print("Saving the model to {}.".format(model_file)) chainer.serializers.save_npz(model_file, model) metadata = { 'file': "model.npz", 'n_channel': args.channel, 'n_class': n_class, 'class_labels': class_labels } with open(os.path.join(args.model, "model.json"), "w") as fp: json.dump(metadata, fp, sort_keys=True) return