コード例 #1
0
def main(args):
    device_opts = deviceOpts()
    workspace.CreateBlob('conv4_norm')
    workspace.CreateBlob('data')
    init_def = initNet(args.init_net, device_opts)
    net_def = createNet(args.pred_net, device_opts, use_cudnn=args.cudnn)

    print net_def
コード例 #2
0
    def testSparse(self):
        # to test duplicated indices we assign two indices to each weight and
        # thus each weight might count once or twice
        DUPLICATION = 2
        perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
        np.random.seed(123)  # make test deterministic
        data = np.random.randint(
            2,
            size=(20, perfect_model.size * DUPLICATION)).astype(np.float32)
        label = np.dot(data, np.repeat(perfect_model, DUPLICATION))

        model = cnn.CNNModelHelper("NCHW", name="test")
        # imitate what model wrapper does
        w = model.param_init_net.ConstantFill(
            [], 'w', shape=[perfect_model.size], value=0.0)
        model.params.append(w)
        picked = model.net.Gather([w, 'indices'], 'gather')
        out = model.ReduceFrontSum(picked, 'sum')

        sq = model.SquaredL2Distance([out, 'label'])
        loss = model.AveragedLoss(sq, "avg_loss")
        grad_map = model.AddGradientOperators([loss])
        self.assertIsInstance(grad_map['w'], core.GradientSlice)
        optimizer = self.build_optimizer(model)

        workspace.CreateBlob('indices')
        workspace.CreateBlob('label')

        for indices_type in [np.int32, np.int64]:
            workspace.RunNetOnce(model.param_init_net)
            workspace.CreateNet(model.net, True)
            for _ in range(2000):
                idx = np.random.randint(data.shape[0])
                # transform into indices of binary features
                indices = np.repeat(np.arange(perfect_model.size),
                                    DUPLICATION)[data[idx] == 1]
                if indices.size == 0:
                    continue
                workspace.FeedBlob(
                    'indices',
                    indices.reshape((indices.size,)).astype(indices_type)
                )
                workspace.FeedBlob('label',
                                   np.array(label[idx]).astype(np.float32))
                workspace.RunNet(model.net.Proto().name)

            np.testing.assert_allclose(
                perfect_model,
                workspace.FetchBlob('w'),
                atol=1e-2
            )
        self.check_optimizer(optimizer)
コード例 #3
0
ファイル: loader.py プロジェクト: raiden128/Detectron
 def create_enqueue_blobs(self):
     """create enqueue blobs"""
     blob_names = self.get_output_names()
     enqueue_blob_names = [
         '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names
     ]
     for gpu_id in range(self._num_gpus):
         with c2_utils.NamedCudaScope(gpu_id):
             for blob in enqueue_blob_names:
                 workspace.CreateBlob(core.ScopedName(blob))
     if self._num_gpus == 0:
         for blob in enqueue_blob_names:
             workspace.CreateBlob(core.ScopedName(blob))
     return enqueue_blob_names
コード例 #4
0
    def test_last_n_window_ops_shape_inference_4d_input(self):
        input_shape = [3, 2, 4, 5]
        collect_net = core.Net("collect_net")
        collect_net.GivenTensorFill(
            [],
            "input",
            shape=input_shape,
            values=[
                float(val) for val in range(functools.reduce(operator.mul, input_shape))
            ],
        )

        workspace.CreateBlob("output")
        workspace.FeedBlob("next", np.array(0, dtype=np.int32))
        collect_net.LastNWindowCollector(
            ["output", "next", "input"],
            ["output", "next"],
            num_to_collect=7,
        )
        (shapes, types) = workspace.InferShapesAndTypes([collect_net])
        workspace.RunNetOnce(collect_net)

        self.assertTrue(
            np.array_equal(
                shapes["output"], np.array([7, *list(workspace.blobs["output"].shape[1:])])
            )
        )
コード例 #5
0
def add_training_inputs(model, roidb=None):
    """Create network input ops and blobs used for training. To be called
    *after* model_builder.create().
    """
    # Implementation notes:
    #   Typically, one would create the input ops and then the rest of the net.
    #   However, creating the input ops depends on loading the dataset, which
    #   can take a few minutes for COCO.
    #   We prefer to avoid waiting so debugging can fail fast.
    #   Thus, we create the net *without input ops* prior to loading the
    #   dataset, and then add the input ops after loading the dataset.
    #   Since we defer input op creation, we need to do a little bit of surgery
    #   to place the input ops at the start of the network op list.
    assert model.train, 'Training inputs can only be added to a trainable model'
    if roidb is not None:
        # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1
        model.roi_data_loader = RoIDataLoader(
            roidb,
            num_loaders=cfg.DATA_LOADER.NUM_THREADS,
            minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
            blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY)
    orig_num_op = len(model.net._net.op)
    blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True)
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            for blob_name in blob_names:
                workspace.CreateBlob(core.ScopedName(blob_name))
            model.net.DequeueBlobs(model.roi_data_loader._blobs_queue_name,
                                   blob_names)
    # A little op surgery to move input ops to the start of the net
    diff = len(model.net._net.op) - orig_num_op
    new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff]
    del model.net._net.op[:]
    model.net._net.op.extend(new_op)
コード例 #6
0
    def test_last_n_window_ops(self):
        collect_net = core.Net("collect_net")
        collect_net.GivenTensorFill(
            [],
            "input",
            shape=[3, 2],
            values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
        )
        input_array = np.array(list(range(1, 7)), dtype=np.float32).reshape(3, 2)

        workspace.CreateBlob("output")
        workspace.FeedBlob("next", np.array(0, dtype=np.int32))
        collect_net.LastNWindowCollector(
            ["output", "next", "input"],
            ["output", "next"],
            num_to_collect=7,
        )
        plan = core.Plan("collect_data")
        plan.AddStep(core.execution_step("collect_data", [collect_net], num_iter=1))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob("output")
        npt.assert_array_equal(input_array, reference_result)

        plan = core.Plan("collect_data")
        plan.AddStep(core.execution_step("collect_data", [collect_net], num_iter=2))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob("output")
        npt.assert_array_equal(input_array[[1, 2, 2, 0, 1, 2, 0]], reference_result)

        plan = core.Plan("collect_data")
        plan.AddStep(core.execution_step("collect_data", [collect_net], num_iter=3))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob("output")
        npt.assert_array_equal(input_array[[2, 0, 1, 2, 2, 0, 1]], reference_result)
コード例 #7
0
def main(opts):
    logger = logging.getLogger(__name__)
    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES)
    logger.info('{:d} roidb entries'.format(len(roidb)))
    roi_data_loader = RoIDataLoader(
        roidb,
        num_loaders=cfg.DATA_LOADER.NUM_THREADS,
        minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
        blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
    )
    blob_names = roi_data_loader.get_output_names()

    net = core.Net('dequeue_net')
    net.type = 'dag'
    all_blobs = []
    for gpu_id in range(cfg.NUM_GPUS):
        with core.NameScope('gpu_{}'.format(gpu_id)):
            with core.DeviceScope(muji.OnGPU(gpu_id)):
                for blob_name in blob_names:
                    blob = core.ScopedName(blob_name)
                    all_blobs.append(blob)
                    workspace.CreateBlob(blob)
                    logger.info('Creating blob: {}'.format(blob))
                net.DequeueBlobs(
                    roi_data_loader._blobs_queue_name, blob_names)
    logger.info("Protobuf:\n" + str(net.Proto()))

    if opts.profiler:
        import cProfile
        cProfile.runctx(
            'loader_loop(roi_data_loader)', globals(), locals(),
            sort='cumulative')
    else:
        loader_loop(roi_data_loader)

    roi_data_loader.register_sigint_handler()
    roi_data_loader.start(prefill=True)
    total_time = 0
    for i in range(opts.num_batches):
        start_t = time.time()
        for _ in range(opts.x_factor):
            workspace.RunNetOnce(net)
        total_time += (time.time() - start_t) / opts.x_factor
        logger.info(
            '{:d}/{:d}: Averge dequeue time: {:.3f}s  [{:d}/{:d}]'.format(
                i + 1, opts.num_batches, total_time / (i + 1),
                roi_data_loader._minibatch_queue.qsize(),
                cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE
            )
        )
        # Sleep to simulate the time taken by running a little network
        time.sleep(opts.sleep_time)
        # To inspect:
        # blobs = workspace.FetchBlobs(all_blobs)
        # from IPython import embed; embed()
    logger.info('Shutting down data loader...')
    roi_data_loader.shutdown()
コード例 #8
0
def add_image_blob(image_blob_name='image'):
    if image_blob_name in workspace.Blobs():
        return image_blob_name

    image = Image.open(sample_image_path)
    image = preproc_image(image)

    device_opt = core.scope.CurrentDeviceScope()
    scoped_image_blob = core.ScopedName(image_blob_name)

    if device_opt is None:
        workspace.CreateBlob(scoped_image_blob)
        workspace.FeedBlob(scoped_image_blob, image)
    else:
        workspace.CreateBlob(scoped_image_blob, device_option=device_opt)
        workspace.FeedBlob(scoped_image_blob, image, device_option=device_opt)

    return image_blob_name, image
コード例 #9
0
def gen_param_update_builder_fun(self, model, dataset, is_train):
    if not is_train:
        return None
    else:
        # from sherlok
        for idx in range(
                self.opts['distributed']['first_xpu_id'],
                self.opts['distributed']['first_xpu_id'] +
                self.opts['distributed']['num_xpus']):
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, idx)):
                workspace.CreateBlob('{}_{}/lr'.format(
                    self.opts['distributed']['device'], idx))

        def add_parameter_update_ops(model):
            model.Iter("ITER")
            weight_decay = model.param_init_net.ConstantFill(
                [],
                'weight_decay',
                shape=[1],
                value=self.opts['model_param']['weight_decay'])
            weight_decay_bn = model.param_init_net.ConstantFill(
                [],
                'weight_decay_bn',
                shape=[1],
                value=self.opts['model_param']['weight_decay_bn'])
            one = model.param_init_net.ConstantFill([],
                                                    "ONE",
                                                    shape=[1],
                                                    value=1.0)
            '''
            Add the momentum-SGD update.
            '''
            params = model.GetParams()
            assert (len(params) > 0)

            for param in params:
                param_grad = model.param_to_grad[param]
                param_momentum = model.param_init_net.ConstantFill([param],
                                                                   param +
                                                                   '_momentum',
                                                                   value=0.0)

                if '_bn' in str(param):
                    model.WeightedSum(
                        [param_grad, one, param, weight_decay_bn], param_grad)
                else:
                    model.WeightedSum([param_grad, one, param, weight_decay],
                                      param_grad)

                # Update param_grad and param_momentum in place
                model.net.MomentumSGDUpdate(
                    [param_grad, param_momentum, 'lr', param],
                    [param_grad, param_momentum, param],
                    momentum=0.9,
                    nesterov=1)

        return add_parameter_update_ops
コード例 #10
0
ファイル: save_blobs.py プロジェクト: orestis-z/track-rcnn
def create_model(weights_file):
    """adapted from utils.train.setup_model_for_training
    """
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    if cfg.MEMONGER:
        optimize_memory(model)
    # Performs random weight initialization as defined by the model
    workspace.RunNetOnce(model.param_init_net)

    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
    )
    # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1
    model.roi_data_loader = RoIDataLoaderSimple(
        roidb,
        num_loaders=cfg.DATA_LOADER.NUM_THREADS,
        minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
        blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
    )
    orig_num_op = len(model.net._net.op)
    blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True)
    with c2_utils.NamedCudaScope(0):
        for blob_name in blob_names:
            workspace.CreateBlob(core.ScopedName(blob_name))
        model.net.DequeueBlobs(
            model.roi_data_loader._blobs_queue_name, blob_names
        )
    # A little op surgery to move input ops to the start of the net
    diff = len(model.net._net.op) - orig_num_op
    new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff]
    del model.net._net.op[:]
    model.net._net.op.extend(new_op)

    nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    nu.broadcast_parameters(model)

    workspace.CreateBlob("gpu_0/track_n_rois_two")
    workspace.CreateNet(model.net)

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return model
コード例 #11
0
def add_inputs(model, roidb=None, landb=None, proposals=None, split='train'):
    """Create network input ops and blobs used for training. To be called
    *after* model_builder.create().
    """
    # Implementation notes:
    #   Typically, one would create the input ops and then the rest of the net.
    #   However, creating the input ops depends on loading the dataset, which
    #   can take a few minutes for COCO.
    #   We prefer to avoid waiting so debugging can fail fast.
    #   Thus, we create the net *without input ops* prior to loading the
    #   dataset, and then add the input ops after loading the dataset.
    #   Since we defer input op creation, we need to do a little bit of surgery
    #   to place the input ops at the start of the network op list.

    if roidb is not None:
        # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1
        model.roi_data_loader = RoIDataLoader(
            split=split,
            roidb=roidb,
            landb=landb,
            proposals=proposals,
            num_loaders=cfg.DATA_LOADER.NUM_THREADS)
    orig_num_op = len(model.net._net.op)
    blob_names = roi_data.minibatch_rel.get_minibatch_blob_names(split)
    for gpu_id in range(cfg.NUM_DEVICES):
        with c2_utils.NamedCudaScope(gpu_id):
            for blob_name in blob_names:
                workspace.CreateBlob(core.ScopedName(blob_name))
            model.net.DequeueBlobs(model.roi_data_loader._blobs_queue_name,
                                   blob_names)
            workspace.CreateBlob(core.ScopedName('all_obj_word_vecs'))
            workspace.CreateBlob(core.ScopedName('all_prd_word_vecs'))
    # A little op surgery to move input ops to the start of the net
    diff = len(model.net._net.op) - orig_num_op
    new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff]
    del model.net._net.op[:]
    model.net._net.op.extend(new_op)
コード例 #12
0
def init_net():
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    np.random.seed(cfg.RNG_SEED)

    cfg.TEST.DATA_TYPE = 'test'
    if cfg.TEST.TEST_FULLY_CONV is True:
        cfg.TRAIN.CROP_SIZE = cfg.TRAIN.JITTER_SCALES[0]
        cfg.TEST.USE_MULTI_CROP = 1
    elif cfg.TEST.TEST_FULLY_CONV_FLIP is True:
        cfg.TRAIN.CROP_SIZE = cfg.TRAIN.JITTER_SCALES[0]
        cfg.TEST.USE_MULTI_CROP = 2
    else:
        cfg.TRAIN.CROP_SIZE = 224

    workspace.ResetWorkspace()

    test_model = model_builder_video.ModelBuilder(name='{}_test'.format(
        cfg.MODEL.MODEL_NAME),
                                                  train=False,
                                                  use_cudnn=True,
                                                  cudnn_exhaustive_search=True,
                                                  split=cfg.TEST.DATA_TYPE)
    test_model.build_model()

    if cfg.PROF_DAG:
        test_model.net.Proto().type = 'prof_dag'
    else:
        test_model.net.Proto().type = 'dag'

    workspace.RunNetOnce(test_model.param_init_net)
    net = test_model.net
    checkpoints.load_model_from_params_file_for_test(test_model,
                                                     cfg.TEST.PARAMS_FILE)

    # reivse the input blob from `reader_val/reader_test` to new blob that enables frame-sequence input
    clip_blob = core.BlobReference('gpu_0/data')
    net.AddExternalInput(
        clip_blob
    )  # insert op into network's head needs to rebuild the network, just add an externalinput blob is enough

    # delete the original video_input_op,  blob('gpu_0/data') is feed by this op before and by hand now
    ops = net.Proto().op
    # assert 'reader' in ops[0].name
    assert ops[0].type == 'CustomizedVideoInput'
    del ops[0]
    workspace.CreateBlob('gpu_0/data')

    workspace.CreateNet(net)
    return net
コード例 #13
0
def get_net(data_loader, name):
    logger = logging.getLogger(__name__)
    blob_names = data_loader.get_output_names()
    net = core.Net(name)
    net.type = 'dag'
    for gpu_id in range(cfg.NUM_GPUS):
        with core.NameScope('gpu_{}'.format(gpu_id)):
            with core.DeviceScope(muji.OnGPU(gpu_id)):
                for blob_name in blob_names:
                    blob = core.ScopedName(blob_name)
                    workspace.CreateBlob(blob)
                net.DequeueBlobs(data_loader._blobs_queue_name, blob_names)
    logger.info("Protobuf:\n" + str(net.Proto()))

    return net
コード例 #14
0
    def create_threads(self):
        # "worker" threads to construct (partial) minibatches and put them on
        # minibatch CPU queue in CPU memory (limited by queue size).
        self._worker_ids = self.get_worker_ids()
        self._workers = [
            threading.Thread(
                target=self.minibatch_loader,
                name='worker_{}'.format(worker_id),
                args=[worker_id],
            ) for worker_id in self._worker_ids
        ]

        # create one BlobsQueue per DEVICE which holds the training data in GPU
        # memory and feeds to the net
        prefix, device = helpers.get_prefix_and_device()
        # the root device id = 0
        for device_id in range(0, self._num_devices):
            with core.NameScope('{}{}'.format(prefix, device_id)):
                self.create_blobs_queue(
                    queue_name=self._blobs_queue_name,
                    num_blobs=len(self._blobs_idx_map),
                    capacity=self._device_blobs_queue_capacity)

        # launch enqueuer threads
        # Create one blob for each (blob_name, enqueuer_thread_id) pair:
        #  <train/test>_<blob_name>_enqueue_<enqueuer_thread_id>
        # The distinction between train/test here is important since when we use
        # EnqueueBlobs op, we need to distinguish otherwise data can get mixed.
        blob_names = self._blobs_idx_map.keys()
        enqueue_blobs_names = [[
            '{}_{}_enqueue_{}'.format(self._split, blob_name, idx)
            for blob_name in blob_names
        ] for idx in range(self._num_enqueuers)]
        for device_id in range(0, self._num_devices):
            # NameScope is prepended to all the blobs in the workspace
            with core.NameScope('{}{}'.format(prefix, device_id)):
                with core.DeviceScope(core.DeviceOption(device, device_id)):
                    for blob_list in enqueue_blobs_names:
                        for blob in blob_list:
                            scoped_blob_name = scope.CurrentNameScope() + blob
                            workspace.CreateBlob(scoped_blob_name)
        # create the enqueuer threads
        self._enqueuers = [
            threading.Thread(target=self.enqueue_blobs_thread,
                             args=(device_id, enqueue_blobs_names[idx]))
            for device_id in range(0, self._num_devices)
            for idx in range(self._num_enqueuers)
        ]
コード例 #15
0
    def create_threads(self):
        # Create mini-batch loader threads, each of which builds mini-batches
        # and places them into a queue in CPU memory
        threading_fn = multiprocessing.Process
        self._workers = [
            threading_fn(target=RoIDataLoader.minibatch_loader2,
                         args=(self.shared_readonly_dict,
                               self._minibatch_queue, self._lock, self.mp_cur,
                               self.mp_perm, self.coordinator))
            for _ in range(self._num_workers)
        ]

        # Create one BlobsQueue per GPU, each of which feeds a blob in GPU
        # memory to a net
        for gpu_id in range(self._num_gpus):
            with core.NameScope('gpu_{}'.format(gpu_id)):
                self.create_blobs_queue()

        # An enqueuer thread moves mini-batches from the shared CPU memory queue
        # to a GPU blobs queue
        # Each GPU will have it's own pool of enqueuer threads
        # Create one blob for each
        # (loader output, enqueuer thread, RoIDataLoader instance) triple:
        #   <loader_output>_enqueue_<enqueuer_thread_id>_<loader_id>
        blob_names = self.get_output_names()
        enqueue_blob_names = [[
            '{}_enqueue_{}_{}'.format(blob_name, i, self._loader_id)
            for blob_name in blob_names
        ] for i in range(self._num_enqueuers)]
        for gpu_id in range(self._num_gpus):
            with core.NameScope('gpu_{}'.format(gpu_id)):
                with core.DeviceScope(
                        core.DeviceOption(caffe2_pb2.CUDA, gpu_id)):
                    for blob_list in enqueue_blob_names:
                        for blob in blob_list:
                            workspace.CreateBlob(core.ScopedName(blob))
        # Create enqueuer threads
        self._enqueuers = [
            # This is enqueueing into C2, can't be done by multiple processes
            # so needs to be done using threading module
            threading.Thread(target=self.enqueue_blobs_thread,
                             args=(gpu_id, enqueue_blob_names[i]))
            for gpu_id in range(self._num_gpus)
            for i in range(self._num_enqueuers)
        ]
コード例 #16
0
ファイル: dataloader.py プロジェクト: CV-IP/CRCNN-Action
    def create_threads(self):
        # "worker" threads to construct (partial) minibatches and put them on
        # minibatch queue in CPU memory (limited by queue size).
        self._worker_ids = self.get_worker_ids()
        self._workers = [
            threading.Thread(
                target=self.minibatch_loader,
                name='worker_{}'.format(worker_id),
                args=[worker_id],
            ) for worker_id in self._worker_ids
        ]

        # Create one BlobsQueue per GPU which holds the training data in GPU
        # memory and feeds to the net.
        root_gpu_id = cfg.ROOT_GPU_ID
        for gpu_id in range(root_gpu_id, root_gpu_id + self._num_gpus):
            with core.NameScope('gpu_{}'.format(gpu_id)):
                self.create_blobs_queue(
                    queue_name=self._blobs_queue_name,
                    num_blobs=len(self._blobs_idx_map),
                    capacity=self._gpu_blobs_queue_capacity)

        # Launch enqueuer threads.
        blob_names = self._blobs_idx_map.keys()
        enqueue_blobs_names = [
            '{}_{}_enqueue'.format(self._split, blob_name)
            for blob_name in blob_names
        ]
        for gpu_id in range(root_gpu_id, root_gpu_id + self._num_gpus):
            with core.NameScope('gpu_{}'.format(gpu_id)):
                with core.DeviceScope(
                        core.DeviceOption(caffe2_pb2.CUDA, gpu_id)):
                    for blob_list in enqueue_blobs_names:
                        for blob in blob_list:
                            scoped_blob_name = scope.CurrentNameScope() + blob
                            workspace.CreateBlob(scoped_blob_name)
        self._enqueuer = threading.Thread(target=self.enqueue_blobs_thread,
                                          args=(0, enqueue_blobs_names))
コード例 #17
0
    def test_last_n_window_ops_shape_inference(self):
        collect_net = core.Net("collect_net")
        collect_net.GivenTensorFill(
            [],
            "input",
            shape=[3, 2],
            values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
        )

        workspace.CreateBlob("output")
        workspace.FeedBlob("next", np.array(0, dtype=np.int32))
        collect_net.LastNWindowCollector(
            ["output", "next", "input"],
            ["output", "next"],
            num_to_collect=7,
        )
        (shapes, types) = workspace.InferShapesAndTypes([collect_net])
        workspace.RunNetOnce(collect_net)

        self.assertTrue(
            np.array_equal(
                shapes["output"], np.array([7, workspace.blobs["output"].shape[1]])
            )
        )
コード例 #18
0
def create_multi_gpu_blob(blob_name):
    prefix, device = helpers.get_prefix_and_device()
    for idx in range(0, cfg.NUM_DEVICES):
        with core.DeviceScope(core.DeviceOption(device, idx)):
            workspace.CreateBlob('{}{}/{}'.format(prefix, idx, blob_name))
コード例 #19
0
 def create_input_blobs_for_net(net_def):
     for op in net_def.op:
         for blob_in in op.input:
             if not workspace.HasBlob(blob_in):
                 workspace.CreateBlob(blob_in)
コード例 #20
0
 def input_fn(model):
     for blob_name in blob_names:
         workspace.CreateBlob(scope.CurrentNameScope() + blob_name)
     model.DequeueBlobs(queue_name, blob_names)
     model.StopGradient('data{}'.format(suffix), 'data{}'.format(suffix))
コード例 #21
0
#-----------------------------------------------------------------------------------------------#

from caffe2.python import workspace
from models import model_builder_video, resnet_video_org

workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
workspace.ResetWorkspace()

c2_net = model_builder_video.ModelBuilder(
        name='test', train=False,
        use_cudnn=False, cudnn_exhaustive_search=False,
        split='val')

c2_net.net.Proto().type = 'dag'

workspace.CreateBlob('data')
workspace.CreateBlob('labels')

c2_net, out_blob = resnet_video_org.create_model(model=c2_net, data='data', labels='labels', split='val', use_nl=args.model=='r50_nl')

workspace.RunNetOnce(c2_net.param_init_net)
workspace.CreateNet(c2_net.net)

# load pretrained weights
if args.model=='r50':
    wt_file = 'pretrained/i3d_baseline_32x2_IN_pretrain_400k.pkl'
elif args.model=='r50_nl':
    wt_file = 'pretrained/i3d_nonlocal_32x2_IN_pretrain_400k.pkl'
wts = pickle.load(open(wt_file, 'rb'), encoding='latin')['blobs']

for key in wts:
コード例 #22
0
def add_train_inputs(model):
    blob_names = model.roi_data_loader.get_output_names()
    for blob_name in blob_names:
        workspace.CreateBlob(core.ScopedName(blob_name))
    model.net.DequeueBlobs(model.roi_data_loader._blobs_queue_name, blob_names)
コード例 #23
0
def run_inference(args):

    if args.gpus is not None:
        gpus = [int(x) for x in args.gpus.split(', ')]
        num_gpus = len(gpus)
    else:
        gpus = range(args.num_gpus)
        num_gpus = args.num_gpus

    my_arg_scope = {
        'order': 'NCHW',
        'use_cudnn': True,
        'cudnn_exhaustive_search': True
    }

    model = cnn.CNNModelHelper(
        name="Extract Features",
        **my_arg_scope
    )
    
    # gpu?
    if num_gpus > 0:
        log.info("Running on GPUs: {}".format(gpus))
        model._device_type = caffe2_pb2.CUDA
        model._cuda_gpu_id = 0
        model._devices = [0]

    # cpu
    else:
        log.info("Running on CPU")
        model._device_type = caffe2_pb2.CPU
        model._devices = [0]

    # create the scope 
    device_opt = core.DeviceOption(model._device_type, 0)
    with core.DeviceScope(device_opt):
        with core.NameScope("{}_{}".format("gpu", 0)):
            create_model_ops(model, 1.0, args)

    # gather parameters
    batch = 1
    channels_rgb = args.num_channels
    frames_per_clip = args.clip_length_rgb
    crop_size = args.crop_size
    width = args.scale_w
    height = args.scale_h
    input_video = args.input

    # configuration for the input
    #data = np.empty((1, channels_rgb, frames_per_clip, crop_size, crop_size))
    #label = np.empty((1, 1))

    # initialize the network
    workspace.CreateBlob("gpu_0/data")
    workspace.CreateBlob("gpu_0/label")
    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)

    if args.db_type == 'minidb':
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
            model_helper.LoadModel(args.load_model_path, args.db_type)
    elif args.db_type == 'pickle':
        model_loader.LoadModelFromPickleFile(
            model,
            args.load_model_path,
            use_gpu=False,
        )
    else:
        log.warning("Unsupported db_type: {}".format(args.db_type))

    outputs = [name.strip() for name in args.features.split(', ')]
    assert len(outputs) > 0

    input_video = cv2.VideoCapture(input_video)

    with open(args.labels) as f:
        matching_labels = np.array(json.load(f))

    clip_list = []
    label = np.empty((1)).astype('int32')

    # create windows for opencv
    cv2.namedWindow('frame',cv2.WINDOW_NORMAL)
    cv2.resizeWindow('frame', 800,600)
    cv2.namedWindow('processed',cv2.WINDOW_NORMAL)
    cv2.moveWindow('processed',800,0)

    while True:
        # get a frame from the video
        video_available, frame = input_video.read()

        if not video_available:
            break

        pre_processed_frame = put_in_shape(frame, resize_to=(
            width, height), crop_to=(crop_size, crop_size))
        clip_list.append(pre_processed_frame)

        if len(clip_list) != frames_per_clip:
            continue

        print('sending one set of images to the network!')

        # put the list of frames in the shape for the network
        input_clip = pre_process(clip_list, crop_size, crop_size)

        # remove the first frame
        del clip_list[0]

        # send the data to the network
        workspace.FeedBlob("gpu_0/data", input_clip)
        workspace.FeedBlob("gpu_0/label", label)

        # fetch the outputs
        activations = fetch_activations(model, outputs)

        # get the score for each class
        softmax = activations['softmax']

        cv2.imshow('frame', frame)
        cv2.imshow('processed', pre_processed_frame)

        for i in range(len(softmax)):
            sorted_preds = \
                np.argsort(softmax[i])
            sorted_preds[:] = sorted_preds[::-1]
            
            put_text_on_image(frame, matching_labels[sorted_preds[0:5]])
            cv2.imshow('frame', frame)

        if cv2.waitKey(1) & 0xff == ord('q'):
            break

    cv2.destroyAllWindows()
コード例 #24
0
[softmax, loss] = resnet.create_resnet50(test_model,
                                         "data",
                                         num_input_channels=3,
                                         num_labels=1000,
                                         label="label",
                                         no_bias=True)

device_opts = caffe2_pb2.DeviceOption()
device_opts.device_type = caffe2_pb2.CUDA
device_opts.cuda_gpu_id = 0

net_def = test_model.net.Proto()
net_def.device_option.CopyFrom(device_opts)
test_model.param_init_net.RunAllOnGPU(gpu_id=0, use_cudnn=True)

workspace.CreateBlob("data")
workspace.CreateBlob("label")

workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(net_def)

workspace.FeedBlob('data',
                   np.random.rand(100, 3, 224, 224).astype(np.float32),
                   device_opts)
workspace.FeedBlob('label', np.ones([
    100,
], dtype=np.int32), device_opts)

#start = time.time()
#for i in range(1000):
#    workspace.RunNet(net_def.name, 1)
コード例 #25
0
def add_test_inputs(model):
    blob_names = roi_data.minibatch.get_minibatch_blob_names()
    for blob_name in blob_names:
        workspace.CreateBlob(core.ScopedName(blob_name))
コード例 #26
0
 def input_fn(model):
     for blob_name in blob_names:
         workspace.CreateBlob(scope.CurrentNameScope() + blob_name)
     model.net.DequeueBlobs(queue_name, blob_names)
     model.StopGradient('data', 'data')
コード例 #27
0
def create_blobs_if_not_existed(blob_names):
    existd_names = set(workspace.Blobs())
    for xx in blob_names:
        if xx not in existd_names:
            workspace.CreateBlob(str(xx))
コード例 #28
0
def main():
    # Initialize C2
    workspace.GlobalInit(
        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1'])
    # Set up logging and load config options
    logger = setup_logging(__name__)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    assert_and_infer_cfg()
    smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info()
    logger.info("cuda version : {}".format(cuda_ver))
    logger.info("cudnn version: {}".format(cudnn_ver))
    logger.info("nvidia-smi output:\n{}".format(smi_output))
    logger.info('Training with config:')
    logger.info(pprint.pformat(cfg))
    # Note that while we set the numpy random seed network training will not be
    # deterministic in general. There are sources of non-determinism that cannot
    # be removed with a reasonble execution-speed tradeoff (such as certain
    # non-deterministic cudnn functions).
    np.random.seed(cfg.RNG_SEED)
    # test model
    logger.info("creat test model ...")
    test_model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS,
                                                       gpu_id=0)
    logger.info("created test model ...")
    train_data = DataLoader(root,
                            "train_id.txt",
                            cfg,
                            test_model,
                            is_train=True)
    # creat mode
    model, weights_file, start_iter, checkpoints = create_model(
        True, cfg, output_dir)
    # test blob
    print(workspace.Blobs())
    # create input blob
    blob_names = ['data_stage2', 'gt_label_stage2']
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            for blob_name in blob_names:
                workspace.CreateBlob(core.ScopedName(blob_name))
    # Override random weight initialization with weights from a saved model
    if weights_file:
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    writer = SummaryWriter(log_dir=output_dir)
    training_stats = TrainingStats(model, writer)
    CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS)
    logger.info("start train ...")
    for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER):
        # feed data
        # print("{} iter starting feed data...".format(cur_iter))
        data_stage2, gt_label = train_data.next_batch()
        with c2_utils.NamedCudaScope(gpu_id):
            workspace.FeedBlob(core.ScopedName('data_stage2'), data_stage2)
            workspace.FeedBlob(core.ScopedName('gt_label_stage2'), gt_label)

        # print("workspace.RunNet(model.net.Proto().name)")
        training_stats.IterTic()
        lr = model.UpdateWorkspaceLr(cur_iter,
                                     lr_policy.get_lr_at_iter(cur_iter))
        workspace.RunNet(model.net.Proto().name)
        if cur_iter == start_iter:
            nu.print_net(model)
        training_stats.IterToc()
        training_stats.UpdateIterStats(cur_iter)
        training_stats.LogIterStats(cur_iter, lr)
        writer.add_scalar('learning_rate', lr, cur_iter)

        # print("end of RunNet")
        if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter:
            checkpoints[cur_iter] = os.path.join(
                output_dir, 'model_iter{}.pkl'.format(cur_iter))
            nu.save_model_to_weights_file(checkpoints[cur_iter], model)

        if cur_iter == start_iter + training_stats.LOG_PERIOD:
            # Reset the iteration timer to remove outliers from the first few
            # SGD iterations
            training_stats.ResetIterTimer()

        if np.isnan(training_stats.iter_total_loss):
            handle_critical_error(model, 'Loss is NaN')

    # Save the final model
    checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl')
    nu.save_model_to_weights_file(checkpoints['final'], model)
    # save train loss and metric
    state_file = os.path.join(output_dir, 'training_state.json')
    training_stats.SaveTrainingStates(state_file)
    # Execute the training run
    checkpoints = detectron.utils.train.train_model()
    # Test the trained model
    if not args.skip_test:
        test_model(checkpoints['final'], args.multi_gpu_testing, args.opts)
コード例 #29
0
                use_cudnn=False)
            print(
                'WARNING: This alexnet implementation can not use CUDNN for some LRN layer related reason. If you can solve this problem, a PR is welcomed.'
            )
            softmax = create_alexnet(model,
                                     'data',
                                     num_labels=1000,
                                     label=None,
                                     no_loss=True)
        else:
            raise NotImplementedError
        net_def = model.net.Proto()
        net_def.device_option.CopyFrom(device_opts)
        model.param_init_net.RunAllOnGPU(gpu_id=args.gpu, use_cudnn=True)

        workspace.CreateBlob('data')
        #  workspace.CreateBlob('label')
        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net_def)
    else:
        raise NotImplementedError('%s is not supported yet' % args.network)

    t2 = time.time()
    print('Finish loading model in %.4fs' % (t2 - t1))

    t1 = time.time()
    data_list = [
        np.random.uniform(
            -1, 1, (args.batch_size, 3, im_size, im_size)).astype(np.float32)
        for i in range(int(np.ceil(1.0 * args.n_sample / args.batch_size)))
    ]
コード例 #30
0
def main(args):
    logger = logging.getLogger(__name__)

    merge_cfg_from_file(args.cfg)
    cfg.NUM_GPUS = 1
    assert_and_infer_cfg(cache_urls=False)

    import_detectron_ops()
    init_net = caffe2_pb2.NetDef()
    predict_net = caffe2_pb2.NetDef()
    with open(os.path.join(args.model_dir, "model_init.pb"), 'rb') as f:
        init_net.ParseFromString(f.read())
    with open(os.path.join(args.model_dir, "model.pb"), 'rb') as f:
        predict_net.ParseFromString(f.read())
    workspace.ResetWorkspace()
    workspace.RunNetOnce(init_net)
    for op in predict_net.op:
        for blob_in in op.input:
            if not workspace.HasBlob(blob_in):
                workspace.CreateBlob(blob_in)
    logger.info('Operators Are Loaded')
    workspace.CreateNet(predict_net)
    logger.info('Predictor Net Created')

    assert not cfg.MODEL.RPN_ONLY, \
        'RPN models are not supported'
    assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \
        'Models that require precomputed proposals are not supported'

    #model = infer_engine.initialize_model_from_cfg(args.weights)

    if os.path.isdir(args.im_or_folder):
        im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)
    else:
        im_list = [args.im_or_folder]

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    for i, im_name in enumerate(im_list):

        image_file_name = os.path.basename(im_name)

        if args.output_type == 'coco-json':
            ext = 'json'
        else:
            ext = 'xml'

        out_name = os.path.join(
            args.output_dir, '{}'.format(os.path.splitext(image_file_name)[0] + '.' + ext)
        )
        logger.info('Processing {} -> {}'.format(im_name, out_name))
        im = cv2.imread(im_name)
        im_h, im_w, img_c = im.shape
        scale_factor = 1.0
        if im_h < im_w:
            if im_h > MAX_DIMENSION_SHORT_SIDE:
                scale_factor = float(MAX_DIMENSION_SHORT_SIDE) / float(im_h)
        else:
            if im_w > MAX_DIMENSION_SHORT_SIDE:
                scale_factor = float(MAX_DIMENSION_SHORT_SIDE) / float(im_w)

        if scale_factor != 1.0:
            im = cv2.resize(im, (int(round(float(im_w) * scale_factor)), int(round(float(im_h) * scale_factor))))

        timers = defaultdict(Timer)
        t = time.time()
        cls_boxes, cls_segms, cls_keyps = im_detect_all(workspace, predict_net, im, None, timers=timers)
        logger.info('Inference time: {:.3f}s'.format(time.time() - t))
        for k, v in timers.items():
            logger.info(' | {}: {:.3f}s'.format(k, v.average_time))
        if i == 0:
            logger.info(
                ' \ Note: inference on the first image will be slower than the '
                'rest (caches and auto-tuning need to warm up)'
            )

        if isinstance(cls_boxes, list):

            (boxes, segms, keyps, classes) = convert_from_cls_format(cls_boxes, cls_segms, cls_keyps)

            m = re.match('([0-9]{9})_([0-9]{5})', image_file_name)
            dgs_str = m.group(1)
            img_num_str = m.group(2)

            image_url = 'https://das.familysearch.org/das/v2/dgs:' + dgs_str + '.' + dgs_str + '_' + img_num_str + '/$dist'

            bboxes = make_bboxes(boxes)
            logger.info('{} lines found'.format(len(bboxes)))

            if args.output_type == 'coco-json':
                data = new_json()

                image = {
                    'license': 1,
                    'file_name': image_file_name,
                    'coco_url': image_url,
                    'height': im_h,
                    'width': im_w,
                    'date_captured': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                    'flickr_url': image_url,
                    'id': 1
                }

                data['images'].append(image)
            else:
                data = new_xml()
                page = SubElement(data, 'Page')
                page.set('imageFilename', image_file_name)
                page.set('imageWidth', str(im_w))
                page.set('imageHeight', str(im_h))
                tr = SubElement(page, 'TextRegion')
                tr.set('id', 'region0')
                coords = SubElement(tr, 'Coords')
                coords.set('points', '0,0 {},0 {},{} 0,{}'.format(im_w, im_w, im_h, im_h))

            next_annotation_id = 1

            next_line_id = 1
            next_sep_id = 1
            next_ld_id = 1
            next_gra_id = 1

            j = 0
            if segms is not None:
                for segm in segms:
                    mask = mask_util.decode(segm)
                    contours = measure.find_contours(mask, 0.5)
                    segmentation = []
                    (bbox, score1) = bboxes[j]
                    score2 = boxes[j, -1]
                    logging.debug('score1: {} score2: {}'.format(score1, score2))

                    if score2 >= args.thresh:
                        bbox = [x / scale_factor for x in bbox]

                        for contour in contours:
                            contour = np.flip(contour, axis=1)
                            seg = contour.ravel().tolist()
                            seg = [x / scale_factor for x in seg]
                            segmentation.append(seg)
                        if args.output_type == 'coco-json':
                            area = calc_area(segmentation)
                            annotation = {
                                'segmentation': segmentation,
                                'score': float(score2),
                                'area': area,
                                'iscrowd': 0,
                                'image_id': 1,
                                'bbox': bbox,
                                'category_id': classes[j],
                                'id': next_annotation_id
                            }
                            data['annotations'].append(annotation)
                            next_annotation_id += 1
                        else:
                            if _r_category_map[classes[j]] == 'handwritten-cursive' or _r_category_map[classes[j]] == 'printed':
                                elem = SubElement(tr, 'TextLine')
                                elem.set('production', _r_category_map[classes[j]], )
                                elem.set('id', 'tl' + str(next_line_id))
                                next_line_id += 1
                            elif _r_category_map[classes[j]] == 'separator':
                                elem = SubElement(page, 'SeparatorRegion')
                                elem.set('id', 'sr' + str(next_sep_id))
                                next_sep_id += 1
                            elif _r_category_map[classes[j]] == 'line-drawing':
                                elem = SubElement(page, 'LineDrawingRegion')
                                elem.set('id', 'ldr' + str(next_ld_id))
                                next_ld_id += 1
                            else:  # graphic
                                elem = SubElement(page, 'GraphicRegion')
                                elem.set('id', 'gr' + str(next_gra_id))
                                next_gra_id += 1

                            coord_string = convert_to_xml_coords(segmentation)
                            coords = SubElement(elem, 'Coords')
                            coords.set('points', coord_string)

                    else:
                        logging.info('Not keeping line with confidence {} below threshold of {}'.format(score2, args.thresh))

                    j += 1


            with open(out_name, 'w') as outfile:
                if args.output_type == 'coco-json':
                    #pdb.set_trace()
                    json.dump(data, outfile, indent=4)
                else:
                    outfile.write(prettify(data))

        else:
            logger.info('Nothing found in image {}'.format(image_file_name))