Ejemplo n.º 1
0
def main():
    #remove the following two lines if testing with cpu
    caffe.set_mode_gpu()
    # choose which GPU you want to use
    caffe.set_device(0)
    caffe.SGDSolver.display = 0
    # load net
    net = caffe.Net('models/attention_test.prototxt', 'models/attention_final',
                    caffe.TEST)

    def compute_saliency(image_path):
        img = cv2.imread(image_path, cv2.IMREAD_COLOR)
        im = prepare_image(img)

        # shape for input (data blob is N x C x H x W), set data
        net.blobs['data'].reshape(1, *im.shape)
        net.blobs['data'].data[...] = im
        # run net and take argmax for prediction
        res = net.forward()
        salmap = np.squeeze(res['final_attentionmap'])
        salmap /= np.max(salmap)
        im = cv2.resize(salmap, (IMAGE_DIM, IMAGE_DIM),
                        interpolation=cv2.INTER_LINEAR)
        salmap = cv2.resize(salmap, (img.shape[1], img.shape[0]),
                            interpolation=cv2.INTER_LINEAR)

        return (salmap * 255).astype(np.uint8)

    run_model(compute_saliency)
Ejemplo n.º 2
0
def main():
    options = json.loads(os.environ['SMILER_PARAMETER_MAP'])
    network_string = options.get('network', 'SAM-VGG')
    use_default_center_bias = options.get('center_prior',
                                          'default') == 'default'

    do_default_smoothing = options.get('do_smoothing', 'default') == 'default'

    imgs_test_path = '/opt/input_vol/'
    output_folder = '/opt/output_vol/'

    os.makedirs(output_folder, exist_ok=True)

    x = Input((3, shape_r, shape_c))
    x_maps = Input((nb_gaussian, shape_r_gt, shape_c_gt))

    if network_string == "SAM-VGG":
        m = Model(input=[x, x_maps], output=sam_vgg([x, x_maps]))
        print("Compiling SAM-VGG...")
        m.compile(RMSprop(lr=1e-4),
                  loss=[kl_divergence, correlation_coefficient, nss])

        print("Loading SAM-VGG weights...")
        m.load_weights('weights/sam-vgg_salicon_weights.pkl')
    elif network_string == "SAM-ResNet":
        m = Model(input=[x, x_maps], output=sam_resnet([x, x_maps]))
        print("Compiling SAM-ResNet...")
        m.compile(RMSprop(lr=1e-4),
                  loss=[kl_divergence, correlation_coefficient, nss])

        print("Loading SAM-ResNet weights...")
        m.load_weights('weights/sam-resnet_salicon_weights.pkl')
    else:
        raise NotImplementedError(
            "The only supported network strings are SAM-VGG and SAM-ResNet! '{}' is unknown."
            .format(network_string))

    gaussian = np.zeros((b_s, nb_gaussian, shape_r_gt, shape_c_gt))

    def compute_saliency(image_path):
        if use_default_center_bias:
            predictions = m.predict(
                [preprocess_images([image_path], shape_r, shape_c),
                 gaussian])[0]
        else:
            predictions = m.predict(
                [preprocess_images([image_path], shape_r, shape_c),
                 gaussian])[0]

        original_image = cv2.imread(image_path, 0)
        res = postprocess_predictions(
            predictions[0][0],
            original_image.shape[0],
            original_image.shape[1],
            do_default_smoothing=do_default_smoothing)
        return res

    run_model(compute_saliency)
Ejemplo n.º 3
0
def main():
    options = json.loads(os.environ['SMILER_PARAMETER_MAP'])
    use_default_blur = options.get('do_smoothing', 'default') == 'default'

    # Create network
    model = ModelBCE(INPUT_SIZE[0], INPUT_SIZE[1], batch_size=8)
    # Here need to specify the epoch of model sanpshot
    load_weights(model.net['output'], path='gen_', epochtoload=90)

    def compute_saliency(image_path):
        img = cv2.cvtColor(cv2.imread(image_path, cv2.IMREAD_COLOR),
                           cv2.COLOR_BGR2RGB)

        size = (img.shape[1], img.shape[0])
        blur_size = 5

        if img.shape[:2] != (model.inputHeight, model.inputWidth):
            img = cv2.resize(img, (model.inputWidth, model.inputHeight),
                             interpolation=cv2.INTER_AREA)

        blob = np.zeros((1, 3, model.inputHeight, model.inputWidth),
                        theano.config.floatX)

        blob[0, ...] = (img.astype(theano.config.floatX).transpose(2, 0, 1))

        result = np.squeeze(model.predictFunction(blob))
        saliency_map = (result * 255).astype(np.uint8)

        # resize back to original size
        saliency_map = cv2.resize(saliency_map,
                                  size,
                                  interpolation=cv2.INTER_CUBIC)
        # blur
        if use_default_blur:
            saliency_map = cv2.GaussianBlur(saliency_map,
                                            (blur_size, blur_size), 0)
        # clip again
        saliency_map = np.clip(saliency_map, 0, 255)

        return saliency_map

    run_model(compute_saliency)
Ejemplo n.º 4
0
    whitening = 1 if whitening else 0

    if color_space == 'default':
        colorspace = 2
    else:
        colorspace = 1

    if do_smoothing == 'default':
        blur_std = 9
    else:
        blur_std = 0

    def compute_saliency(image_path):
        command = [
            "./build/BMS", image_path, output_path, sample_step,
            dilation_width_1, dilation_width_2, blur_std, colorspace,
            whitening, max_dim
        ]
        command = list(map(str, command))
        rc = subprocess.call(command)

        if rc != 0:
            return

        # TODO: FIXME a hack for SMILER integration.
        output_img = PIL.Image.open(output_path)
        return np.array(output_img)

    run_model(compute_saliency)
Ejemplo n.º 5
0
def main():
    options = json.loads(os.environ['SMILER_PARAMETER_MAP'])
    center_bias_path = 'centerbias.npy'
    use_center_bias = options.get('center_prior', 'default') == 'default'

    # load precomputed log density over a 1024x1024 image
    centerbias_template = np.load(center_bias_path)

    # Now we import the deep gaze model from the tensorflow meta-graph file
    tf.reset_default_graph()

    check_point = 'ICF.ckpt'
    new_saver = tf.train.import_meta_graph('{}.meta'.format(check_point))

    input_tensor = tf.get_collection('input_tensor')[0]
    centerbias_tensor = tf.get_collection('centerbias_tensor')[0]
    log_density = tf.get_collection('log_density')[0]
    log_density_wo_centerbias = tf.get_collection(
        'log_density_wo_centerbias')[0]

    sess = tf.Session()
    new_saver.restore(sess, check_point)

    def compute_saliency(image_path):
        img = imread(image_path, mode='RGB')

        image_data = img[np.newaxis, :, :, :]  # BHWC, three channels (RGB)

        # Set up center bias
        centerbias = zoom(centerbias_template,
                          (img.shape[0] / 1024, img.shape[1] / 1024),
                          order=0,
                          mode='nearest')

        # Renormalize log density
        centerbias -= logsumexp(centerbias)

        # The model expects all input as 4d tensors of shape `BHWC` (i.e. batch-height-
        # width-channel). It takes two inputs:
        # A batch of images and a batch of centerbias log densities.
        centerbias_data = centerbias[
            np.newaxis, :, :, np.newaxis]  # BHWC, 1 channel (log density)

        # And finally we create a tensorflow session, restore the model parameters from
        # the checkpoint and compute the log density prediction for out input data:
        if use_center_bias:
            log_density_prediction = sess.run(
                log_density, {
                    input_tensor: image_data,
                    centerbias_tensor: centerbias_data,
                })
        else:
            # TODO: In this case, don't calculate centerbias in the first place.
            log_density_prediction = sess.run(
                log_density_wo_centerbias, {
                    input_tensor: image_data,
                    centerbias_tensor: np.zeros_like(centerbias_data),
                })

        # The log density predictions again are of shape `BHWC`. Since the log-densities
        # are just 2d, `C=1`. And since we processed only one image, `B=1`:

        result = 255 * np.exp(log_density_prediction[0, :, :, 0])

        return result

    run_model(compute_saliency)
Ejemplo n.º 6
0
    svm_path = 'svm-slm-cntr'
    svm = load_model(svm_path)

    whiten_path = 'whiten-slm-cntr'
    with open(whiten_path) as fp:
        whitenParams = np.asarray([map(float, line.split(' '))
                                   for line in fp]).T

    # assemble svm model
    svmModel = {'svm': svm, 'whitenParams': whitenParams}

    biasToCntr = (svm.get_nr_feature() - nFeatures) == 1

    def eDNsaliency(image_path):
        img = misc.imread(image_path, mode='RGB')

        # compute saliency map
        model = EDNSaliencyModel(desc, svmModel, biasToCntr)

        salMap = model.saliency(img, normalize=False)

        salMap = salMap.astype('f')

        # normalize and save the saliency map to disk
        normSalMap = (255.0 / (salMap.max() - salMap.min()) *
                      (salMap - salMap.min())).astype(np.uint8)

        return normSalMap

    run_model(eDNsaliency)